12 Commits

9 changed files with 45 additions and 62 deletions

2
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "sumalibs"]
path = sumalibs
url = https://git.metabarcoding.org/obitools/sumalibs.git
url = https://forge.metabarcoding.org/obitools/sumalibs.git

View File

@ -1,16 +1,17 @@
EXEC=sumaclust
PREFIX=/usr/local
SUMACLUST_SRC= sumaclust.c \
mtcompare_sumaclust.c
CFLAGS=-I$(PREFIX)/include
SUMACLUST_OBJ= $(patsubst %.c,%.o,$(SUMACLUST_SRC))
EXEC = sumaclust
SUMACLUST_SRC = sumaclust.c \
mtcompare_sumaclust.c
SRCS= $(SUMACLUST_SRC)
SUMACLUST_OBJ = $(patsubst %.c,%.o,$(SUMACLUST_SRC))
LIB= -lfasta -llcs -lfile -lutils -lm #-ll
SRCS = $(SUMACLUST_SRC)
LIB = -lsuma -lm #-ll
include ./global.mk
@ -31,8 +32,8 @@ else
LFLAGS =
endif
sumaclust: $(SUMACLUST_OBJ) $(LIBFASTA) $(LIBLCS) $(LIBFILE) $(LIBUTILS)
$(CC) $(LDFLAGS) -o $@ $(LFLAGS) $(SUMACLUST_OBJ) $(LIB) $(LIBFASTAPATH) $(LIBLCSPATH) $(LIBFILEPATH) $(LIBUTILSPATH)
sumaclust: $(SUMACLUST_OBJ) $(LIBSUMA)
$(CC) $(LDFLAGS) -o $@ $(LFLAGS) $(SUMACLUST_OBJ) $(LIB) $(LIBSUMAPATH)
########
#
@ -41,12 +42,11 @@ sumaclust: $(SUMACLUST_OBJ) $(LIBFASTA) $(LIBLCS) $(LIBFILE) $(LIBUTILS)
########
clean:
rm -f *.o
rm -f *.P
rm -f $(SUMACLUST_OBJ)
rm -f $(EXEC)
$(MAKE) -C ./sumalibs/libfasta clean
$(MAKE) -C ./sumalibs/liblcs clean
$(MAKE) -C ./sumalibs/libfile clean
$(MAKE) -C ./sumalibs/libutils clean
$(MAKE) -C ./sumalibs clean
install: all
install -d $(DESTDIR)$(PREFIX)/bin/
install -m 755 $(EXEC) $(DESTDIR)$(PREFIX)/bin/

1
README.md Normal file
View File

@ -0,0 +1 @@
[See the wiki](https://git.metabarcoding.org/obitools/sumaclust/wikis/home)

View File

@ -1,13 +1,7 @@
LIBFASTAPATH = -L./sumalibs/libfasta
LIBLCSPATH = -L./sumalibs/liblcs
LIBFILEPATH = -L./sumalibs/libfile
LIBUTILSPATH = -L./sumalibs/libutils
LIBSUMAPATH = -L./sumalibs
LIBFASTA = ./sumalibs/libfasta/libfasta.a
LIBLCS = ./sumalibs/liblcs/liblcs.a
LIBFILE = ./sumalibs/libfile/libfile.a
LIBUTILS = ./sumalibs/libutils/libutils.a
LIBSUMA = ./sumalibs/libsuma.a
CC=gcc
LDFLAGS=
@ -32,14 +26,5 @@ default: all
#
########
./sumalibs/libfasta/libfasta.a:
$(MAKE) -C ./sumalibs/libfasta
./sumalibs/liblcs/liblcs.a:
$(MAKE) -C ./sumalibs/liblcs
./sumalibs/libfile/libfile.a:
$(MAKE) -C ./sumalibs/libfile
./sumalibs/libutils/libutils.a:
$(MAKE) -C ./sumalibs/libutils
./sumalibs/libsuma.a:
$(MAKE) -C ./sumalibs

View File

@ -15,10 +15,10 @@
#include <math.h>
#include <sys/time.h>
#include "./sumalibs/libfasta/sequence.h"
#include "./sumalibs/libutils/utilities.h"
#include "./sumalibs/liblcs/upperband.h"
#include "./sumalibs/liblcs/sse_banded_LCS_alignment.h"
#include "sumalibs/libfasta/sequence.h"
#include "sumalibs/libutils/utilities.h"
#include "sumalibs/liblcs/upperband.h"
#include "sumalibs/liblcs/sse_banded_LCS_alignment.h"
#include "sumaclust.h"

View File

@ -12,18 +12,18 @@
#include <math.h>
#include <sys/time.h>
#include "./sumalibs/libutils/utilities.h"
#include "./sumalibs/libfasta/sequence.h"
#include "./sumalibs/libfasta/fasta_header_parser.h"
#include "./sumalibs/libfasta/fasta_header_handler.h"
#include "./sumalibs/libfasta/fasta_seq_writer.h"
#include "./sumalibs/liblcs/upperband.h"
#include "./sumalibs/liblcs/sse_banded_LCS_alignment.h"
#include "sumalibs/libutils/utilities.h"
#include "sumalibs/libfasta/sequence.h"
#include "sumalibs/libfasta/fasta_header_parser.h"
#include "sumalibs/libfasta/fasta_header_handler.h"
#include "sumalibs/libfasta/fasta_seq_writer.h"
#include "sumalibs/liblcs/upperband.h"
#include "sumalibs/liblcs/sse_banded_LCS_alignment.h"
#include "mtcompare_sumaclust.h"
#include "sumaclust.h"
#define VERSION "1.0.10"
#define VERSION "1.0.36"
/* ----------------------------------------------- */
@ -73,9 +73,10 @@ static void PrintHelp()
PP " -f : Output in FASTA format is deactivated.\n");
PP "\n");
PP "------------------------------------------------------------\n");
PP " Argument : the nucleotide dataset to cluster\n");
PP " Argument : the nucleotide dataset to cluster (or nothing \n");
PP " if the standard input should be used). \n");
PP "------------------------------------------------------------\n");
PP " http://metabarcoding.org/sumatra\n");
PP " http://metabarcoding.org/sumaclust\n");
PP "------------------------------------------------------------\n\n");
}
@ -584,7 +585,7 @@ void putSeqInCluster(fastaSeqPtr* seq, fastaSeqPtr* center, double score)
}
int compare(fastaSeqPtr* db, int n, BOOL fastOption, double threshold, BOOL normalize, int reference, BOOL lcsmode,
int compare(fastaSeqPtr* db, int n, BOOL fastOption, double threshold, BOOL normalize, int reference, BOOL lcsmode,
double max_ratio)
{
double score;
@ -722,7 +723,7 @@ int compare(fastaSeqPtr* db, int n, BOOL fastOption, double threshold, BOOL nor
free(iseq1-sizeForSeqs+lmax);
free(iseq2-sizeForSeqs+lmax);
if (normalize && reference == ALILEN)
if (normalize && (reference == ALILEN))
free(address);
return(k);
@ -794,7 +795,6 @@ int main(int argc, char** argv)
BOOL reverse = FALSE;
BOOL onlyATGC = TRUE;
int reference = ALILEN;
int ndb = 0;
int nproc = 1;
BOOL printBIOM = FALSE;
BOOL printOTUtable = FALSE;
@ -941,10 +941,6 @@ int main(int argc, char** argv)
}
}
ndb = argc - optind;
if (ndb != 1)
errflag++;
if (errflag)
ExitUsage(errflag);
@ -965,6 +961,7 @@ int main(int argc, char** argv)
fprintf(stderr,"Reading dataset...");
db = seq_readAllSeq2(argv[optind], TRUE, onlyATGC);
fprintf(stderr,"\n%d sequences\n",db.count);
if (db.count == 0)
@ -1040,7 +1037,6 @@ int main(int argc, char** argv)
// FASTA file
if (printFASTA)
{
if (printFASTAtofile)
{
FASTA_output = fopen(FASTA_file_name, "w");

View File

@ -29,7 +29,8 @@ Untar the archive, go into the newly created directory and compile:
```
tar zxvf sumaclust_v[x.x.xx].tar.gz
cd sumaclust_v[x.x.xx]
make
make -C sumalibs install
make install
```
You can compile Sumaclust with `clang`, which deactivates `OpenMP`, with:
@ -46,12 +47,12 @@ Sumaclust clusters sequences using the same clustering algorithm as UCLUST and C
#### Input
Input file must be in FASTA format.
The input can be either the standard input (stdin), or a file in FASTA format.
#### Usage
```
sumaclust [-l|L|a|n|r|d|e|o|g|f] [-t threshold_value] [-s sorting_key] [-R maximum_ratio] [-p number_of_threads] [-B file_name_for_BIOM-formatted_output] [-O file_name_for_OTU_table-formatted_output] [-F file_name_for_FASTA-formatted_output] dataset
sumaclust [-l|L|a|n|r|d|e|o|g|f] [-t threshold_value] [-s sorting_key] [-R maximum_ratio] [-p number_of_threads] [-B file_name_for_BIOM-formatted_output] [-O file_name_for_OTU_table-formatted_output] [-F file_name_for_FASTA-formatted_output] [dataset]
```
Argument: the sequence dataset to cluster.

Binary file not shown.