14 Commits

10 changed files with 64 additions and 61 deletions

2
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "sumalibs"]
path = sumalibs
url = https://git.metabarcoding.org/obitools/sumalibs.git
url = https://forge.metabarcoding.org/obitools/sumalibs.git

View File

@ -1,3 +1,7 @@
PREFIX=/usr/local
CFLAGS=-I$(PREFIX)/include
EXEC = sumatra
SUMATRA_SRC = sumatra.c \
@ -5,10 +9,9 @@ SUMATRA_SRC= sumatra.c \
SUMATRA_OBJ = $(patsubst %.c,%.o,$(SUMATRA_SRC))
SRCS = $(SUMATRA_SRC)
LIB= -lfasta -llcs -lfile -lutils -lz -lm
LIB = -lsuma -lz -lm
include ./global.mk
@ -23,8 +26,8 @@ all: $(EXEC)
# executable compilation and link
sumatra: $(SUMATRA_OBJ) $(LIBFASTA) $(LIBLCS) $(LIBFILE) $(LIBUTILS)
$(CC) $(LDFLAGS) -o $@ -pthread $(SUMATRA_OBJ) $(LIBFASTAPATH) $(LIBLCSPATH) $(LIBFILEPATH) $(LIBUTILSPATH) $(LIB)
sumatra: $(SUMATRA_OBJ) ./sumalibs/libsuma.a
$(CC) $(LDFLAGS) -o $@ -pthread $(SUMATRA_OBJ) $(LIBSUMAPATH) $(LIB)
########
#
@ -33,12 +36,10 @@ sumatra: $(SUMATRA_OBJ) $(LIBFASTA) $(LIBLCS) $(LIBFILE) $(LIBUTILS)
########
clean:
rm -f *.o
rm -f *.P
rm -f $(SUMATRA_OBJ)
rm -f $(EXEC)
$(MAKE) -C ./sumalibs/libfasta clean
$(MAKE) -C ./sumalibs/liblcs clean
$(MAKE) -C ./sumalibs/libfile clean
$(MAKE) -C ./sumalibs/libutils clean
$(MAKE) -C ./sumalibs clean
install: all
install -d $(DESTDIR)$(PREFIX)/bin/
install -m 755 $(EXEC) $(DESTDIR)$(PREFIX)/bin/

1
README.md Normal file
View File

@ -0,0 +1 @@
[See the wiki](https://git.metabarcoding.org/obitools/sumatra/wikis/home)

View File

@ -1,22 +1,15 @@
LIBSUMAPATH = -L./sumalibs
LIBFASTAPATH = -L./sumalibs/libfasta
LIBLCSPATH = -L./sumalibs/liblcs
LIBFILEPATH = -L./sumalibs/libfile
LIBUTILSPATH = -L./sumalibs/libutils
LIBFASTA = ./sumalibs/libfasta/libfasta.a
LIBLCS = ./sumalibs/liblcs/liblcs.a
LIBFILE = ./sumalibs/libfile/libfile.a
LIBUTILS = ./sumalibs/libutils/libutils.a
LIBSUMA = ./sumalibs/libsuma.a
CC=gcc
LDFLAGS=
ifeq ($(CC),gcc)
CFLAGS = -O3 -s -DOMP_SUPPORT -fopenmp -w
CFLAGS = -I sumalibs -O3 -s -DOMP_SUPPORT -w
else
CFLAGS = -O3 -w
CFLAGS = -I sumalibs -O3 -w
endif
@ -32,14 +25,5 @@ default: all
#
########
./sumalibs/libfasta/libfasta.a:
$(MAKE) -C ./sumalibs/libfasta
./sumalibs/liblcs/liblcs.a:
$(MAKE) -C ./sumalibs/liblcs
./sumalibs/libfile/libfile.a:
$(MAKE) -C ./sumalibs/libfile
./sumalibs/libutils/libutils.a:
$(MAKE) -C ./sumalibs/libutils
./sumalibs/libsuma.a:
$(MAKE) -C ./sumalibs

View File

@ -10,10 +10,10 @@
#include <stdio.h>
#include <string.h>
#include "sumatra.h"
#include "./sumalibs/libfasta/sequence.h"
#include "./sumalibs/libutils/utilities.h"
#include "./sumalibs/liblcs/upperband.h"
#include "./sumalibs/liblcs/sse_banded_LCS_alignment.h"
#include "libfasta/sequence.h"
#include "libutils/utilities.h"
#include "liblcs/upperband.h"
#include "liblcs/sse_banded_LCS_alignment.h"
typedef struct {

View File

@ -14,13 +14,13 @@
#include <sys/time.h>
#include "./sumalibs/libfasta/sequence.h"
#include "./sumalibs/liblcs/upperband.h"
#include "./sumalibs/liblcs/sse_banded_LCS_alignment.h"
#include "./sumalibs/libutils/utilities.h"
#include "libfasta/sequence.h"
#include "liblcs/upperband.h"
#include "liblcs/sse_banded_LCS_alignment.h"
#include "libutils/utilities.h"
#include "mtcompare_sumatra.h"
#define VERSION "1.0.10"
#define VERSION "1.0.36"
/* ----------------------------------------------- */
@ -57,7 +57,9 @@ static void PrintHelp()
PP " -g : n's are replaced with a's (default: sequences with n's are discarded).\n");
PP " -x : Adds four extra columns with the count and length of both sequences.\n");
PP "-----------------------------------------------------------------------------------------------------------------------------\n");
PP " First argument : the nucleotide dataset to analyze\n\n");
PP " First argument : the nucleotide dataset to analyze (or nothing \n");
PP " if there is only one dataset and the standard \n");
PP " input should be used). \n\n");
PP " Second argument : optionally the second nucleotide dataset\n");
PP "-----------------------------------------------------------------------------------------------------------------------------\n");
PP " Results table description : \n");
@ -84,7 +86,7 @@ static void PrintHelp()
static void ExitUsage(stat)
int stat;
{
PP "usage: sumatra [-l|L|a|n|r|d|g|x] [-t threshold_value] [-p number of threads] dataset1 [dataset2]\n");
PP "usage: sumatra [-l|L|a|n|r|d|g|x] [-t threshold_value] [-p number of threads] [dataset1] [dataset2]\n");
PP "type \"sumatra -h\" for help\n");
if (stat)
@ -212,7 +214,7 @@ int compare1(fastaSeqCount db1, double threshold, BOOL normalize, int reference,
BOOL always = TRUE;
int64_t pairs = (int64_t)(db1.count - 1) * (int64_t)db1.count /2;
BOOL print;
double score;
double score, scoreG;
int32_t i,j;
char* s1;
char* s2;
@ -237,7 +239,7 @@ int compare1(fastaSeqCount db1, double threshold, BOOL normalize, int reference,
calculateMaxAndMinLenDB(db1, &lmax, &lmin);
sizeForSeqs = prepareTablesForSumathings(lmax, lmin, threshold, normalize, reference, lcsmode, &address, &iseq1, &iseq2);
for (i=0; i < db1.count; i++) // ...??
for (i=0; i < db1.count; i++) // ...?? db1.count - 1 probably
for (j=i+1; j < db1.count; j++)
{
print = FALSE;
@ -250,7 +252,23 @@ int compare1(fastaSeqCount db1, double threshold, BOOL normalize, int reference,
l1 = (db1.fastaSeqs+i)->length;
s2 = (db1.fastaSeqs+j)->sequence;
l2 = (db1.fastaSeqs+j)->length;
/* fprintf(stderr, "\n%s", s1);
fprintf(stderr, "\n%s", s2);
fprintf(stderr, "\n%f", threshold);
fprintf(stderr, "\n%d", normalize);
fprintf(stderr, "\n%d", reference);
fprintf(stderr, "\n%d\n", lcsmode);
*/
// score = generic_sse_banded_lcs_align(s1, s2, threshold, normalize, reference, lcsmode);
// fprintf(stderr, "\nscore generic = %f", scoreG);
score = alignForSumathings(s1, iseq1, s2, iseq2, l1, l2, normalize, reference, lcsmode, address, sizeForSeqs, LCSmin);
// fprintf(stderr, "\nscore = %f\n", score);
// if (scoreG != score)
// {
// fprintf(stderr, "\nscores differents\n");
// exit(1);
// }
print = always || (((normalize || lcsmode) && (score >= threshold)) || ((!lcsmode && !normalize) && (score <= threshold)));
if (print && !lcsmode && normalize)
score = 1.0 - score;
@ -320,7 +338,7 @@ int compare2(fastaSeqCount db1, fastaSeqCount db2, double threshold, BOOL normal
score = alignForSumathings(s1, iseq1, s2, iseq2, l1, l2, normalize, reference, lcsmode, address, sizeForSeqs, LCSmin);
print = always || (((normalize || lcsmode) && (score >= threshold)) || ((!lcsmode && !normalize) && (score <= threshold)));
if (print && !lcsmode && normalize)
score = 1.0 - score;
score = 1.0 - score; // TODO isn't that already done?
}
printResults(db1.fastaSeqs+i, db2.fastaSeqs+j, score, extradata, pairs, print);
}
@ -425,8 +443,6 @@ int main(int argc, char **argv)
}
ndb = argc - optind;
if (ndb < 1)
errflag++;
if (errflag)
ExitUsage(errflag);

View File

@ -8,7 +8,7 @@
#ifndef SUMATRA_H_
#define SUMATRA_H_
#include "./sumalibs/libfasta/sequence.h"
#include "libfasta/sequence.h"
void printResults(fastaSeqPtr seq1, fastaSeqPtr seq2, double score, BOOL extradata, int64_t pairs, BOOL print);

View File

@ -29,7 +29,8 @@ Untar the archive, go into the newly created directory and compile:
```
tar zxvf sumatra_v[x.x.xx].tar.gz
cd sumatra_v[x.x.xx]
make
make -C sumalibs install
make install
```
## Documentation
@ -40,12 +41,12 @@ Sumatra computes the pairwise alignment scores from one dataset or between two d
#### Input
Files must be in FASTA format.
If there is one dataset, the input can be either the standard input (stdin), or a file in FASTA format. If there are two datasets to compare, the input must be two files in FASTA format.
#### Usage
```
sumatra [-l|L|a|n|r|d|g|x] [-t threshold_value] [-p number of threads] dataset1 [dataset2]
sumatra [-l|L|a|n|r|d|g|x] [-t threshold_value] [-p number of threads] [dataset1] [dataset2]
```
First argument: the sequence dataset in fasta format to analyse.

Binary file not shown.