mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
First commit
This commit is contained in:
14
pkg/obiapat/abiapat/CODES/dft_code.h
Normal file
14
pkg/obiapat/abiapat/CODES/dft_code.h
Normal file
@ -0,0 +1,14 @@
|
||||
/* ----------------------------------------------- */
|
||||
/* dft_pat_seq_code.h */
|
||||
/* default alphabet encoding for alpha */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
0x00000001 /* A */, 0x00000002 /* B */, 0x00000004 /* C */,
|
||||
0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
|
||||
0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
|
||||
0x00000200 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
|
||||
0x00001000 /* M */, 0x00002000 /* N */, 0x00004000 /* O */,
|
||||
0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
|
||||
0x00040000 /* S */, 0x00080000 /* T */, 0x00100000 /* U */,
|
||||
0x00200000 /* V */, 0x00400000 /* W */, 0x00800000 /* X */,
|
||||
0x01000000 /* Y */, 0x02000000 /* Z */
|
71
pkg/obiapat/abiapat/CODES/dna_code.h
Normal file
71
pkg/obiapat/abiapat/CODES/dna_code.h
Normal file
@ -0,0 +1,71 @@
|
||||
/* ----------------------------------------------- */
|
||||
/* dna_code.h */
|
||||
/* alphabet encoding for dna/rna */
|
||||
/* ----------------------------------------- */
|
||||
/* IUPAC encoding */
|
||||
/* ----------------------------------------- */
|
||||
/* G/A/T/C */
|
||||
/* U=T */
|
||||
/* R=AG */
|
||||
/* Y=CT */
|
||||
/* M=AC */
|
||||
/* K=GT */
|
||||
/* S=CG */
|
||||
/* W=AT */
|
||||
/* H=ACT */
|
||||
/* B=CGT */
|
||||
/* V=ACG */
|
||||
/* D=AGT */
|
||||
/* N=ACGT */
|
||||
/* X=ACGT */
|
||||
/* EFIJLOPQZ not recognized */
|
||||
/* ----------------------------------------- */
|
||||
/* dual encoding */
|
||||
/* ----------------------------------------- */
|
||||
/* A=ADHMNRVW */
|
||||
/* B=BCDGHKMNRSTUVWY */
|
||||
/* C=BCHMNSVY */
|
||||
/* D=ABDGHKMNRSTUVWY */
|
||||
/* G=BDGKNRSV */
|
||||
/* H=ABCDHKMNRSTUVWY */
|
||||
/* K=BDGHKNRSTUVWY */
|
||||
/* M=ABCDHMNRSVWY */
|
||||
/* N=ABCDGHKMNRSTUVWY */
|
||||
/* R=ABDGHKMNRSVW */
|
||||
/* S=BCDGHKMNRSVY */
|
||||
/* T=BDHKNTUWY */
|
||||
/* U=BDHKNTUWY */
|
||||
/* V=ABCDGHKMNRSVWY */
|
||||
/* W=ABDHKMNRTUVWY */
|
||||
/* X=ABCDGHKMNRSTUVWY */
|
||||
/* Y=BCDHKMNSTUVWY */
|
||||
/* EFIJLOPQZ not recognized */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#ifndef USE_DUAL
|
||||
|
||||
/* IUPAC */
|
||||
|
||||
0x00000001 /* A */, 0x00080044 /* B */, 0x00000004 /* C */,
|
||||
0x00080041 /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
|
||||
0x00000040 /* G */, 0x00080005 /* H */, 0x00000000 /* I */,
|
||||
0x00000000 /* J */, 0x00080040 /* K */, 0x00000000 /* L */,
|
||||
0x00000005 /* M */, 0x00080045 /* N */, 0x00000000 /* O */,
|
||||
0x00000000 /* P */, 0x00000000 /* Q */, 0x00000041 /* R */,
|
||||
0x00000044 /* S */, 0x00080000 /* T */, 0x00080000 /* U */,
|
||||
0x00000045 /* V */, 0x00080001 /* W */, 0x00080045 /* X */,
|
||||
0x00080004 /* Y */, 0x00000000 /* Z */
|
||||
|
||||
#else
|
||||
/* DUAL */
|
||||
|
||||
0x00623089 /* A */, 0x017e34ce /* B */, 0x01243086 /* C */,
|
||||
0x017e34cb /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
|
||||
0x0026244a /* G */, 0x017e348f /* H */, 0x00000000 /* I */,
|
||||
0x00000000 /* J */, 0x017e24ca /* K */, 0x00000000 /* L */,
|
||||
0x0166308f /* M */, 0x017e34cf /* N */, 0x00000000 /* O */,
|
||||
0x00000000 /* P */, 0x00000000 /* Q */, 0x006634cb /* R */,
|
||||
0x012634ce /* S */, 0x0158248a /* T */, 0x0158248a /* U */,
|
||||
0x016634cf /* V */, 0x017a348b /* W */, 0x017e34cf /* X */,
|
||||
0x017c348e /* Y */, 0x00000000 /* Z */
|
||||
#endif
|
51
pkg/obiapat/abiapat/CODES/prot_code.h
Normal file
51
pkg/obiapat/abiapat/CODES/prot_code.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* ----------------------------------------------- */
|
||||
/* prot_code.h */
|
||||
/* alphabet encoding for proteins */
|
||||
/* ----------------------------------------- */
|
||||
/* IUPAC encoding */
|
||||
/* ----------------------------------------- */
|
||||
/* B=DN */
|
||||
/* Z=EQ */
|
||||
/* X=any - {X} */
|
||||
/* JOU not recognized */
|
||||
/* ----------------------------------------- */
|
||||
/* dual encoding */
|
||||
/* ----------------------------------------- */
|
||||
/* B=BDN */
|
||||
/* D=BD */
|
||||
/* E=EZ */
|
||||
/* N=BN */
|
||||
/* Q=QZ */
|
||||
/* X=any - {X} */
|
||||
/* Z=EQZ */
|
||||
/* JOU not recognized */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#ifndef USE_DUAL
|
||||
|
||||
/* IUPAC */
|
||||
|
||||
0x00000001 /* A */, 0x00002008 /* B */, 0x00000004 /* C */,
|
||||
0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
|
||||
0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
|
||||
0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
|
||||
0x00001000 /* M */, 0x00002000 /* N */, 0x00000000 /* O */,
|
||||
0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
|
||||
0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
|
||||
0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
|
||||
0x01000000 /* Y */, 0x00010010 /* Z */
|
||||
|
||||
#else
|
||||
/* DUAL */
|
||||
|
||||
0x00000001 /* A */, 0x0000200a /* B */, 0x00000004 /* C */,
|
||||
0x0000000a /* D */, 0x02000010 /* E */, 0x00000020 /* F */,
|
||||
0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
|
||||
0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
|
||||
0x00001000 /* M */, 0x00002002 /* N */, 0x00000000 /* O */,
|
||||
0x00008000 /* P */, 0x02010000 /* Q */, 0x00020000 /* R */,
|
||||
0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
|
||||
0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
|
||||
0x01000000 /* Y */, 0x02010010 /* Z */
|
||||
|
||||
#endif
|
24
pkg/obiapat/abiapat/Makefile
Normal file
24
pkg/obiapat/abiapat/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
|
||||
SOURCES = apat_parse.c \
|
||||
apat_search.c \
|
||||
libstki.c
|
||||
|
||||
SRCS=$(SOURCES)
|
||||
|
||||
|
||||
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
|
||||
|
||||
LIBFILE= libapat.a
|
||||
RANLIB=ranlib
|
||||
|
||||
|
||||
include ../global.mk
|
||||
|
||||
all: $(LIBFILE)
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJECTS) $(LIBFILE)
|
||||
|
||||
$(LIBFILE): $(OBJECTS)
|
||||
ar -cr $@ $?
|
||||
$(RANLIB) $@
|
165
pkg/obiapat/apat.h
Normal file
165
pkg/obiapat/apat.h
Normal file
@ -0,0 +1,165 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Dec. 94 */
|
||||
/* File: apat.h */
|
||||
/* Purpose: pattern scan */
|
||||
/* History: */
|
||||
/* 28/12/94 : <Gloup> ascan first version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* 07/12/21 : <Zafacs> last some cleaning for 2020 */
|
||||
/* ==================================================== */
|
||||
|
||||
|
||||
#ifndef H_apat
|
||||
#define H_apat
|
||||
|
||||
#include <stdio.h>
|
||||
#include "libstki.h"
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* constantes */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#ifndef BUFSIZ
|
||||
#define BUFSIZ 1024 /* io buffer size */
|
||||
#endif
|
||||
|
||||
#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */
|
||||
|
||||
#define ALPHA_LEN 26 /* alphabet length */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PATTERN 1 /* max # of patterns */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PAT_LEN 64 /* max pattern length */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PAT_ERR 64 /* max # of errors */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define PATMASK 0x3ffffff /* mask for 26 symbols */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
/* mask for position */
|
||||
#define ONEMASK 0x8000000000000000 /* mask for highest position */
|
||||
|
||||
/* masks for Levenhstein edit */
|
||||
#define OPER_IDT 0x0000000000000000 /* identity */
|
||||
#define OPER_INS 0x4000000000000000 /* insertion */
|
||||
#define OPER_DEL 0x8000000000000000 /* deletion */
|
||||
#define OPER_SUB 0xc000000000000000 /* substitution */
|
||||
|
||||
#define OPER_SHFT 30 /* <unused> shift */
|
||||
|
||||
/* Levenhstein Opcodes */
|
||||
#define SOPER_IDT 0x0 /* identity */
|
||||
#define SOPER_INS 0x1 /* insertion */
|
||||
#define SOPER_DEL 0x2 /* deletion */
|
||||
#define SOPER_SUB 0x3 /* substitution */
|
||||
|
||||
/* Levenhstein Opcodes masks */
|
||||
#define OPERMASK 0xc000000000000000 /* mask for Opcodes /!\ */
|
||||
#define NOPERMASK 0x3fffffffffffffff /* negate of previous /!\ */
|
||||
|
||||
/* special chars in pattern */
|
||||
#define PATCHARS "[]!#"
|
||||
|
||||
/* 26 letter alphabet */
|
||||
/* in alphabetical order */
|
||||
|
||||
#define ORD_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
|
||||
/* protein alphabet */
|
||||
|
||||
#define PROT_ALPHA "ACDEFGHIKLMNPQRSTVWY"
|
||||
|
||||
/* dna/rna alphabet */
|
||||
|
||||
#define DNA_ALPHA "ABCDGHKMNRSTUVWXY"
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* data structures */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
typedef uint64_t patword_t;
|
||||
|
||||
/* -------------------- */
|
||||
typedef enum { /* data encoding */
|
||||
/* -------------------- */
|
||||
alpha = 0, /* [A-Z] */
|
||||
dna, /* IUPAC DNA */
|
||||
protein /* IUPAC proteins */
|
||||
} CodType;
|
||||
|
||||
/* -------------------- */
|
||||
typedef struct { /* sequence */
|
||||
/* -------------------- */
|
||||
char *name; /* sequence name */
|
||||
int32_t seqlen; /* sequence length */
|
||||
int32_t seqsiz; /* sequence buffer size */
|
||||
int32_t datsiz; /* data buffer size */
|
||||
int32_t circular;
|
||||
uint8_t *data; /* data buffer */
|
||||
char *cseq; /* sequence buffer */
|
||||
StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */
|
||||
StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */
|
||||
} Seq, *SeqPtr;
|
||||
|
||||
/* -------------------- */
|
||||
typedef struct { /* pattern */
|
||||
/* -------------------- */
|
||||
int32_t patlen; /* pattern length */
|
||||
int32_t maxerr; /* max # of errors */
|
||||
char *cpat; /* pattern string */
|
||||
uint32_t *patcode; /* encoded pattern */
|
||||
patword_t *smat; /* S matrix */
|
||||
patword_t omask; /* oblig. bits mask */
|
||||
bool hasIndel; /* are indels allowed */
|
||||
bool ok; /* is pattern ok */
|
||||
} Pattern, *PatternPtr;
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* prototypes */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
/* apat_seq.c */
|
||||
|
||||
SeqPtr FreeSequence (SeqPtr pseq);
|
||||
SeqPtr NewSequence (void);
|
||||
int32_t ReadNextSequence (SeqPtr pseq);
|
||||
int32_t WriteSequence (FILE *filou , SeqPtr pseq);
|
||||
|
||||
/* apat_parse.c */
|
||||
uint32_t *GetCode (CodType ctype);
|
||||
int32_t CheckPattern (Pattern *ppat);
|
||||
int32_t EncodePattern (Pattern *ppat, CodType ctype);
|
||||
int32_t ReadPattern (Pattern *ppat);
|
||||
void PrintDebugPattern (Pattern *ppat);
|
||||
int lenPattern (const char *pat);
|
||||
|
||||
/* apat_search.c */
|
||||
|
||||
int32_t CreateS (Pattern *ppat, int32_t lalpha);
|
||||
int32_t ManberNoErr (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
||||
int32_t ManberSub (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
||||
int32_t ManberIndel (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
||||
int32_t ManberAll (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
||||
int32_t NwsPatAlign (Seq *pseq , Pattern *ppat, int32_t nerr ,
|
||||
int32_t *reslen , int32_t *reserr);
|
||||
|
||||
/* apat_sys.c */
|
||||
|
||||
float UserCpuTime (int32_t reset);
|
||||
float SysCpuTime (int32_t reset);
|
||||
char *StrCpuTime (int32_t reset);
|
||||
void Erreur (char *msg , int32_t stat);
|
||||
int32_t AccessFile (char *path, char *mode);
|
||||
|
||||
#endif /* H_apat */
|
15
pkg/obiapat/apat_mem.h
Normal file
15
pkg/obiapat/apat_mem.h
Normal file
@ -0,0 +1,15 @@
|
||||
|
||||
|
||||
#ifndef __APAT_MEM_H__
|
||||
#define __APAT_MEM_H__
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* macros */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#define NEW(typ) (typ*)malloc(sizeof(typ))
|
||||
#define NEWN(typ, dim) (typ*)malloc((uint64_t)(dim) * sizeof(typ))
|
||||
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (uint64_t)(dim) * sizeof(typ))
|
||||
#define FREE(ptr) free((void *) ptr)
|
||||
|
||||
#endif /* __APAT_MEM_H__ */
|
393
pkg/obiapat/apat_parse.c
Normal file
393
pkg/obiapat/apat_parse.c
Normal file
@ -0,0 +1,393 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: apat_parse.c */
|
||||
/* Purpose: Codage du pattern */
|
||||
/* History: */
|
||||
/* 00/07/94 : <Gloup> first version (stanford) */
|
||||
/* 00/11/94 : <Gloup> revised for DNA/PROTEIN */
|
||||
/* 30/12/94 : <Gloup> modified EncodePattern */
|
||||
/* for manber search */
|
||||
/* 14/05/99 : <Gloup> indels added */
|
||||
/* 07/12/21 : <Zafacs> some cleaning for 2020 */
|
||||
/* ==================================================== */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "apat.h"
|
||||
/* -------------------- */
|
||||
/* default char */
|
||||
/* encodings */
|
||||
/* -------------------- */
|
||||
|
||||
static uint32_t sDftCode[] = {
|
||||
|
||||
0x00000001 /* A */, 0x00000002 /* B */, 0x00000004 /* C */,
|
||||
0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
|
||||
0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
|
||||
0x00000200 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
|
||||
0x00001000 /* M */, 0x00002000 /* N */, 0x00004000 /* O */,
|
||||
0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
|
||||
0x00040000 /* S */, 0x00080000 /* T */, 0x00100000 /* U */,
|
||||
0x00200000 /* V */, 0x00400000 /* W */, 0x00800000 /* X */,
|
||||
0x01000000 /* Y */, 0x02000000 /* Z */
|
||||
|
||||
};
|
||||
/* -------------------- */
|
||||
/* char encodings */
|
||||
/* IUPAC */
|
||||
/* -------------------- */
|
||||
|
||||
/* IUPAC Proteins */
|
||||
static uint32_t sProtCode[] = {
|
||||
|
||||
0x00000001 /* A */, 0x00002008 /* B */, 0x00000004 /* C */,
|
||||
0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
|
||||
0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
|
||||
0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
|
||||
0x00001000 /* M */, 0x00002000 /* N */, 0x00000000 /* O */,
|
||||
0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
|
||||
0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
|
||||
0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
|
||||
0x01000000 /* Y */, 0x00010010 /* Z */
|
||||
|
||||
};
|
||||
/* IUPAC Dna/Rna */
|
||||
static uint32_t sDnaCode[] = {
|
||||
|
||||
0x00000001 /* A */, 0x00080044 /* B */, 0x00000004 /* C */,
|
||||
0x00080041 /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
|
||||
0x00000040 /* G */, 0x00080005 /* H */, 0x00000000 /* I */,
|
||||
0x00000000 /* J */, 0x00080040 /* K */, 0x00000000 /* L */,
|
||||
0x00000005 /* M */, 0x00080045 /* N */, 0x00000000 /* O */,
|
||||
0x00000000 /* P */, 0x00000000 /* Q */, 0x00000041 /* R */,
|
||||
0x00000044 /* S */, 0x00080000 /* T */, 0x00080000 /* U */,
|
||||
0x00000045 /* V */, 0x00080001 /* W */, 0x00080045 /* X */,
|
||||
0x00080004 /* Y */, 0x00000000 /* Z */
|
||||
|
||||
};
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* internal replacement of gets */
|
||||
/* -------------------------------------------- */
|
||||
static char *sGets(char *buffer, int size) {
|
||||
|
||||
char *ebuf;
|
||||
|
||||
if (! fgets(buffer, size-1, stdin))
|
||||
return NULL;
|
||||
|
||||
/* remove trailing line feed */
|
||||
|
||||
ebuf = buffer + strlen(buffer);
|
||||
|
||||
while (--ebuf >= buffer) {
|
||||
if ((*ebuf == '\n') || (*ebuf == '\r'))
|
||||
*ebuf = '\000';
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* returns actual code associated to type */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
uint32_t *GetCode(CodType ctype)
|
||||
{
|
||||
uint32_t *code = sDftCode;
|
||||
|
||||
switch (ctype) {
|
||||
case dna : code = sDnaCode ; break;
|
||||
case protein : code = sProtCode ; break;
|
||||
default : code = sDftCode ; break;
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define BAD_IF(tst) if (tst) return 0
|
||||
|
||||
int CheckPattern(Pattern *ppat)
|
||||
{
|
||||
int lev;
|
||||
char *pat;
|
||||
|
||||
pat = ppat->cpat;
|
||||
|
||||
BAD_IF (*pat == '#');
|
||||
|
||||
for (lev = 0; *pat ; pat++)
|
||||
|
||||
switch (*pat) {
|
||||
|
||||
case '[' :
|
||||
BAD_IF (lev);
|
||||
BAD_IF (*(pat+1) == ']');
|
||||
lev++;
|
||||
break;
|
||||
|
||||
case ']' :
|
||||
lev--;
|
||||
BAD_IF (lev);
|
||||
break;
|
||||
|
||||
case '!' :
|
||||
BAD_IF (lev);
|
||||
BAD_IF (! *(pat+1));
|
||||
BAD_IF (*(pat+1) == ']');
|
||||
break;
|
||||
|
||||
case '#' :
|
||||
BAD_IF (lev);
|
||||
BAD_IF (*(pat-1) == '[');
|
||||
break;
|
||||
|
||||
default :
|
||||
if (! isupper(*pat))
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return (lev ? 0 : 1);
|
||||
}
|
||||
|
||||
#undef BAD_IF
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
static const char *skipOblig(const char *pat)
|
||||
{
|
||||
return (*(pat+1) == '#' ? pat+1 : pat);
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
static const char *splitPattern(const char *pat)
|
||||
{
|
||||
switch (*pat) {
|
||||
|
||||
case '[' :
|
||||
for (; *pat; pat++)
|
||||
if (*pat == ']')
|
||||
return skipOblig(pat);
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
case '!' :
|
||||
return splitPattern(pat+1);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
return skipOblig(pat);
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
static uint32_t valPattern(char *pat, uint32_t *code)
|
||||
{
|
||||
uint32_t val;
|
||||
|
||||
switch (*pat) {
|
||||
|
||||
case '[' :
|
||||
return valPattern(pat+1, code);
|
||||
break;
|
||||
|
||||
case '!' :
|
||||
val = valPattern(pat+1, code);
|
||||
return (~val & PATMASK);
|
||||
break;
|
||||
|
||||
default :
|
||||
val = 0x0;
|
||||
while (isupper(*pat)) {
|
||||
val |= code[*pat - 'A'];
|
||||
pat++;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
return 0x0;
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
static uint32_t obliBitPattern(char *pat)
|
||||
{
|
||||
return (*(pat + strlen(pat) - 1) == '#' ? OBLIBIT : 0x0);
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
int lenPattern(const char *pat)
|
||||
{
|
||||
int lpat;
|
||||
|
||||
lpat = 0;
|
||||
|
||||
while (*pat) {
|
||||
|
||||
if (! (pat = splitPattern(pat)))
|
||||
return 0;
|
||||
|
||||
pat++;
|
||||
|
||||
lpat++;
|
||||
}
|
||||
|
||||
return lpat;
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Interface */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* encode un pattern */
|
||||
/* -------------------------------------------- */
|
||||
int EncodePattern(Pattern *ppat, CodType ctype)
|
||||
{
|
||||
int pos, lpat;
|
||||
uint32_t *code;
|
||||
char *pp, *pa, c;
|
||||
|
||||
ppat->ok = false;
|
||||
|
||||
code = GetCode(ctype);
|
||||
|
||||
ppat->patlen = lpat = lenPattern(ppat->cpat);
|
||||
|
||||
if (lpat <= 0)
|
||||
return 0;
|
||||
|
||||
// if (! (ppat->patcode = NEWN(uint32_t, lpat)))
|
||||
// return 0;
|
||||
|
||||
pa = pp = ppat->cpat;
|
||||
|
||||
pos = 0;
|
||||
|
||||
while (*pa) {
|
||||
|
||||
pp = (char*)splitPattern(pa);
|
||||
|
||||
c = *++pp;
|
||||
|
||||
*pp = '\000';
|
||||
|
||||
ppat->patcode[pos++] = valPattern(pa, code) | obliBitPattern(pa);
|
||||
|
||||
*pp = c;
|
||||
|
||||
pa = pp;
|
||||
}
|
||||
|
||||
ppat->ok = true;
|
||||
|
||||
return lpat;
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* remove blanks */
|
||||
/* -------------------------------------------- */
|
||||
static char *RemBlanks(char *s)
|
||||
{
|
||||
char *sb, *sc;
|
||||
|
||||
for (sb = sc = s ; *sb ; sb++)
|
||||
if (! isspace(*sb))
|
||||
*sc++ = *sb;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* count non blanks */
|
||||
/* -------------------------------------------- */
|
||||
static uint32_t CountAlpha(char *s)
|
||||
{
|
||||
uint32_t n;
|
||||
|
||||
for (n = 0 ; *s ; s++)
|
||||
if (! isspace(*s))
|
||||
n++;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* lit un pattern */
|
||||
/* <pattern> #mis */
|
||||
/* ligne starting with '/' are comments */
|
||||
/* -------------------------------------------- */
|
||||
int ReadPattern(Pattern *ppat)
|
||||
{
|
||||
int val;
|
||||
char *spac;
|
||||
char buffer[BUFSIZ];
|
||||
|
||||
ppat->ok = true;
|
||||
|
||||
if (! sGets(buffer, sizeof(buffer)))
|
||||
return 0;
|
||||
|
||||
if (*buffer == '/')
|
||||
return ReadPattern(ppat);
|
||||
|
||||
if (! CountAlpha(buffer))
|
||||
return ReadPattern(ppat);
|
||||
|
||||
for (spac = buffer ; *spac ; spac++)
|
||||
if ((*spac == ' ') || (*spac == '\t'))
|
||||
break;
|
||||
|
||||
ppat->ok = false;
|
||||
|
||||
if (! *spac)
|
||||
return 0;
|
||||
|
||||
if (sscanf(spac, "%d", &val) != 1)
|
||||
return 0;
|
||||
|
||||
ppat->hasIndel = (val < 0);
|
||||
|
||||
ppat->maxerr = ((val >= 0) ? val : -val);
|
||||
|
||||
*spac = '\000';
|
||||
|
||||
(void) RemBlanks(buffer);
|
||||
|
||||
if ((ppat->cpat = NEWN(char, strlen(buffer)+1)))
|
||||
strcpy(ppat->cpat, buffer);
|
||||
|
||||
ppat->ok = (ppat->cpat != NULL);
|
||||
|
||||
return (ppat->ok ? 1 : 0);
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* ecrit un pattern - Debug - */
|
||||
/* -------------------------------------------- */
|
||||
void PrintDebugPattern(Pattern *ppat)
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("Pattern : %s (length : %d)\n", ppat->cpat, ppat->patlen);
|
||||
printf("Encoding : \n\t");
|
||||
|
||||
for (i = 0 ; i < ppat->patlen ; i++) {
|
||||
printf("0x%8.8x ", ppat->patcode[i]);
|
||||
if (i%4 == 3)
|
||||
printf("\n\t");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
337
pkg/obiapat/apat_search.c
Normal file
337
pkg/obiapat/apat_search.c
Normal file
@ -0,0 +1,337 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Dec. 94 */
|
||||
/* File: apat_search.c */
|
||||
/* Purpose: recherche du pattern */
|
||||
/* algorithme de Baeza-Yates/Gonnet */
|
||||
/* Manber (agrep) */
|
||||
/* History: */
|
||||
/* 07/12/94 : <MFS> first version */
|
||||
/* 28/12/94 : <Gloup> revised version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* 07/12/21 : <Zafacs> last some cleaning for 2020 */
|
||||
/* ==================================================== */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libstki.h"
|
||||
#include "apat.h"
|
||||
|
||||
#define POP PopiOut
|
||||
#define PUSH PushiIn
|
||||
#define TOPCURS CursiToTop
|
||||
#define DOWNREAD ReadiDown
|
||||
|
||||
#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Construction de la matrice S */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
int CreateS(Pattern *ppat, int32_t lalpha)
|
||||
{
|
||||
int32_t indx, pindx, i, j;
|
||||
patword_t amask, omask, *smat;
|
||||
|
||||
ppat->ok = false;
|
||||
|
||||
omask = 0x0L;
|
||||
|
||||
// if (! (smat = NEWN(uint32_t, lalpha)))
|
||||
// return 0;
|
||||
smat = ppat->smat;
|
||||
|
||||
for (i = 0 ; i < lalpha ; i++)
|
||||
smat[i] = 0x0;
|
||||
|
||||
for (i = ppat->patlen - 1, amask = 0x1L ; i >= 0 ; i--, amask <<= 1) {
|
||||
|
||||
indx = ppat->patcode[i];
|
||||
|
||||
if (ppat->patcode[i] & OBLIBIT)
|
||||
omask |= amask;
|
||||
|
||||
for (j = 0, pindx = 0x1L ; j < lalpha ; j++, pindx <<= 1)
|
||||
if (indx & pindx)
|
||||
smat[j] |= amask;
|
||||
}
|
||||
|
||||
ppat->smat = smat;
|
||||
|
||||
ppat->omask = omask;
|
||||
|
||||
ppat->ok = true;
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* NoError */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
||||
{
|
||||
int32_t pos;
|
||||
patword_t smask, r;
|
||||
uint8_t *data;
|
||||
StackiPtr *stkpos, *stkerr;
|
||||
int32_t end;
|
||||
|
||||
end = begin + length;
|
||||
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
|
||||
|
||||
|
||||
/* create local masks */
|
||||
|
||||
smask = r = 0x1L << ppat->patlen;
|
||||
|
||||
/* init. scan */
|
||||
data = pseq->data + begin;
|
||||
stkpos = pseq->hitpos + patnum;
|
||||
EmptyStacki(stkpos[0]);
|
||||
stkerr = pseq->hiterr + patnum;
|
||||
EmptyStacki(stkerr[0]);
|
||||
|
||||
/* loop on text data */
|
||||
|
||||
for (pos = begin ; pos < end ; pos++) {
|
||||
|
||||
r = (r >> 1) & ppat->smat[*data++];
|
||||
|
||||
if (r & 0x1L) {
|
||||
PUSH(stkpos, pos - ppat->patlen + 1);
|
||||
PUSH(stkerr, 0);
|
||||
}
|
||||
|
||||
r |= smask;
|
||||
}
|
||||
|
||||
return (*stkpos)->top; /* aka # of hits */
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* Substitution only */
|
||||
/* */
|
||||
/* Note : r array is stored as : */
|
||||
/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
|
||||
/* */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
||||
{
|
||||
int e, emax, found;
|
||||
uint32_t pos;
|
||||
uint32_t smask, cmask, sindx;
|
||||
uint32_t *pr, r[2 * MAX_PAT_ERR + 2];
|
||||
uint8_t *data;
|
||||
StackiPtr *stkpos, *stkerr;
|
||||
uint32_t end;
|
||||
|
||||
end = begin + length;
|
||||
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
|
||||
|
||||
/* create local masks */
|
||||
emax = ppat->maxerr;
|
||||
|
||||
r[0] = r[1] = 0x0;
|
||||
|
||||
cmask = smask = 0x1L << ppat->patlen;
|
||||
|
||||
for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2)
|
||||
*pr = cmask;
|
||||
|
||||
cmask = ~ ppat->omask;
|
||||
|
||||
/* init. scan */
|
||||
data = pseq->data + begin;
|
||||
stkpos = pseq->hitpos + patnum;
|
||||
EmptyStacki(stkpos[0]);
|
||||
stkerr = pseq->hiterr + patnum;
|
||||
EmptyStacki(stkerr[0]);
|
||||
|
||||
/* loop on text data */
|
||||
|
||||
for (pos = begin ; pos < end ; pos++) {
|
||||
|
||||
sindx = ppat->smat[*data++];
|
||||
|
||||
for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
|
||||
|
||||
pr[2] = pr[3] | smask;
|
||||
|
||||
pr[3] = ((pr[0] >> 1) & cmask) /* sub */
|
||||
| ((pr[2] >> 1) & sindx); /* ident */
|
||||
|
||||
if (pr[3] & 0x1L) { /* found */
|
||||
if (! found) {
|
||||
PUSH(stkpos, pos - ppat->patlen + 1);
|
||||
PUSH(stkerr, e);
|
||||
}
|
||||
found++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (*stkpos)->top; /* aka # of hits */
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* Substitution + Indels */
|
||||
/* */
|
||||
/* Note : r array is stored as : */
|
||||
/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
|
||||
/* */
|
||||
/* Warning: may return shifted pos. */
|
||||
/* */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
||||
{
|
||||
int e, emax, found;
|
||||
uint32_t pos;
|
||||
uint32_t smask, cmask, sindx;
|
||||
uint32_t *pr, r[2 * MAX_PAT_ERR + 2];
|
||||
uint8_t *data;
|
||||
StackiPtr *stkpos, *stkerr;
|
||||
uint32_t end;
|
||||
|
||||
end = begin + length;
|
||||
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
|
||||
|
||||
/* create local masks */
|
||||
emax = ppat->maxerr;
|
||||
|
||||
r[0] = r[1] = 0x0;
|
||||
|
||||
cmask = smask = 0x1L << ppat->patlen;
|
||||
|
||||
for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) {
|
||||
*pr = cmask;
|
||||
cmask = (cmask >> 1) | smask;
|
||||
}
|
||||
|
||||
cmask = ~ ppat->omask;
|
||||
|
||||
/* init. scan */
|
||||
data = pseq->data + begin;
|
||||
stkpos = pseq->hitpos + patnum;
|
||||
stkerr = pseq->hiterr + patnum;
|
||||
|
||||
/* loop on text data */
|
||||
|
||||
for (pos = begin ; pos < end ; pos++) {
|
||||
|
||||
sindx = ppat->smat[*data++];
|
||||
|
||||
for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
|
||||
|
||||
pr[2] = pr[3] | smask;
|
||||
|
||||
pr[3] = (( pr[0] /* ins */
|
||||
| (pr[0] >> 1) /* sub */
|
||||
| (pr[1] >> 1)) /* del */
|
||||
& cmask)
|
||||
| ((pr[2] >> 1) & sindx); /* ident */
|
||||
|
||||
if (pr[3] & 0x1L) { /* found */
|
||||
if (! found) {
|
||||
PUSH(stkpos, pos - ppat->patlen + 1);
|
||||
PUSH(stkerr, e);
|
||||
}
|
||||
found++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return (*stkpos)->top; /* aka # of hits */
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* API call to previous functions */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
||||
{
|
||||
if (ppat->maxerr == 0)
|
||||
return ManberNoErr(pseq, ppat, patnum, begin, length);
|
||||
else if (ppat->hasIndel)
|
||||
return ManberIndel(pseq, ppat, patnum, begin, length);
|
||||
else
|
||||
return ManberSub(pseq, ppat, patnum, begin, length);
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Alignement NWS */
|
||||
/* pour edition des hits */
|
||||
/* (avec substitution obligatoire aux bords) */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
int32_t NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
|
||||
Seq *pseq;
|
||||
Pattern *ppat;
|
||||
int32_t nerr, *reslen, *reserr;
|
||||
{
|
||||
uint8_t *sseq, *px;
|
||||
int32_t i, j, lseq, lpat, npos, dindel, dsub,
|
||||
*pc, *pi, *pd, *ps;
|
||||
uint32_t amask;
|
||||
|
||||
static int32_t sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)];
|
||||
|
||||
lseq = pseq->seqlen;
|
||||
|
||||
pc = sTab; /* |----|----| --> i */
|
||||
pi = pc - 1; /* | ps | pd | | */
|
||||
pd = pi - lseq; /* |----|----| | */
|
||||
ps = pd - 1; /* | pi | pc | v j */
|
||||
/* |---------| */
|
||||
|
||||
lseq = pseq->seqlen;
|
||||
lpat = ppat->patlen;
|
||||
|
||||
sseq = pseq->data - 1;
|
||||
|
||||
amask = ONEMASK >> lpat;
|
||||
|
||||
for (j = 0 ; j <= lpat ; j++) {
|
||||
|
||||
for (i = 0 , px = sseq ; i <= lseq ; i++, px++) {
|
||||
|
||||
if (i && j) {
|
||||
dindel = MIN(*pi, *pd) + 1;
|
||||
dsub = *ps + KRONECK(ppat->smat[*px], amask);
|
||||
*pc = MIN(dindel, dsub);
|
||||
}
|
||||
else if (i) /* j == 0 */
|
||||
*pc = *pi + 1;
|
||||
else if (j) /* i == 0 */
|
||||
*pc = *pd + 1;
|
||||
else /* root */
|
||||
*pc = 0;
|
||||
|
||||
pc++;
|
||||
pi++;
|
||||
pd++;
|
||||
ps++;
|
||||
}
|
||||
|
||||
amask <<= 1;
|
||||
}
|
||||
|
||||
pc--;
|
||||
|
||||
for (i = lseq, npos = 0 ; i >= 0 ; i--, pc--) {
|
||||
if (*pc <= nerr) {
|
||||
*reslen++ = i;
|
||||
*reserr++ = *pc;
|
||||
npos++;
|
||||
}
|
||||
}
|
||||
|
||||
return npos;
|
||||
}
|
82
pkg/obiapat/ecoMalloc.c
Normal file
82
pkg/obiapat/ecoMalloc.c
Normal file
@ -0,0 +1,82 @@
|
||||
#include "obiapat.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
static int eco_log_malloc = 0;
|
||||
|
||||
void eco_trace_memory_allocation()
|
||||
{
|
||||
eco_log_malloc=1;
|
||||
}
|
||||
|
||||
void eco_untrace_memory_allocation()
|
||||
{
|
||||
eco_log_malloc=0;
|
||||
}
|
||||
|
||||
|
||||
void *eco_malloc(int32_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line,
|
||||
int *errno, char **errmsg)
|
||||
{
|
||||
void * chunk;
|
||||
|
||||
chunk = calloc(1,chunksize);
|
||||
|
||||
if (!chunk)
|
||||
ecoError(ECO_MEM_ERROR,error_message,filename,line,errno,errmsg);
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Memory segment located at %p of size %d is allocated (file : %s [%d])",
|
||||
chunk,
|
||||
chunksize,
|
||||
filename,
|
||||
line);
|
||||
|
||||
return chunk;
|
||||
}
|
||||
|
||||
void *eco_realloc(void *chunk,
|
||||
int32_t newsize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line,
|
||||
int *errno, char **errmsg)
|
||||
{
|
||||
void *newchunk;
|
||||
|
||||
newchunk = realloc(chunk,newsize);
|
||||
|
||||
if (!newchunk)
|
||||
ecoError(ECO_MEM_ERROR,error_message,filename,line,errno,errmsg);
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
|
||||
chunk,
|
||||
newchunk,
|
||||
newsize,
|
||||
filename,
|
||||
line);
|
||||
|
||||
return newchunk;
|
||||
}
|
||||
|
||||
void eco_free(void *chunk,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line,
|
||||
int *errno, char **errmsg)
|
||||
{
|
||||
free(chunk);
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Memory segment %p is released => %s (file : %s [%d])",
|
||||
chunk,
|
||||
error_message,
|
||||
filename,
|
||||
line);
|
||||
}
|
391
pkg/obiapat/libstki.c
Normal file
391
pkg/obiapat/libstki.c
Normal file
@ -0,0 +1,391 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: libstki.c */
|
||||
/* Purpose: A library to deal with 'stacks' of */
|
||||
/* integers */
|
||||
/* Note: 'stacks' are dynamic (i.e. size is */
|
||||
/* automatically readjusted when needed) */
|
||||
/* History: */
|
||||
/* 00/03/92 : <Gloup> first draft */
|
||||
/* 15/08/93 : <Gloup> revised version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* 07/12/21 : <Zafacs> some cleaning for 2020's */
|
||||
/* ==================================================== */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// #include "Gtypes.h"
|
||||
#include "libstki.h"
|
||||
|
||||
|
||||
/* ============================ */
|
||||
/* Constantes et Macros locales */
|
||||
/* ============================ */
|
||||
|
||||
#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1)
|
||||
|
||||
#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1)
|
||||
|
||||
|
||||
static int16_t sStkiLastError = kStkiNoErr;
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* gestion des erreurs */
|
||||
/* get/reset erreur flag */
|
||||
/* */
|
||||
/* @function: StkiError */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
int16_t StkiError(bool reset)
|
||||
{
|
||||
int16_t err;
|
||||
|
||||
err = sStkiLastError;
|
||||
|
||||
if (reset)
|
||||
sStkiLastError = kStkiNoErr;
|
||||
|
||||
return err;
|
||||
|
||||
} /* end of StkiError */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* creation d'un stack */
|
||||
/* */
|
||||
/* @function: NewStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiPtr NewStacki(int32_t size)
|
||||
{
|
||||
StackiPtr stki;
|
||||
|
||||
if (! (stki = NEW(Stacki)))
|
||||
return NULL;
|
||||
|
||||
stki->size = size;
|
||||
stki->top = 0;
|
||||
stki->cursor = 0;
|
||||
|
||||
if ( ! (stki->val = NEWN(int32_t, size))) {
|
||||
sStkiLastError = kStkiMemErr;
|
||||
return FreeStacki(stki);
|
||||
}
|
||||
|
||||
return stki;
|
||||
|
||||
} /* end of NewStacki */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* liberation d'un stack */
|
||||
/* */
|
||||
/* @function: FreeStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiPtr FreeStacki(StackiPtr stki)
|
||||
{
|
||||
if (stki) {
|
||||
if (stki->val)
|
||||
FREE(stki->val);
|
||||
FREE(stki);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
} /* end of FreeStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* creation d'un vecteur de stacks */
|
||||
/* */
|
||||
/* @function: NewStackiVector */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiHdle NewStackiVector(int32_t vectSize, int32_t stackSize)
|
||||
{
|
||||
int32_t i;
|
||||
StackiHdle stkh;
|
||||
|
||||
if (! (stkh = NEWN(StackiPtr, vectSize))) {
|
||||
sStkiLastError = kStkiMemErr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < vectSize ; i++)
|
||||
if (! (stkh[i] = NewStacki(stackSize)))
|
||||
return FreeStackiVector(stkh, i);
|
||||
|
||||
return stkh;
|
||||
|
||||
} /* end of NewStackiVector */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* liberation d'un vecteur de stacks */
|
||||
/* */
|
||||
/* @function: FreeStackiVector */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiHdle FreeStackiVector(StackiHdle stkh, int32_t vectSize)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
if (stkh) {
|
||||
for (i = 0 ; i < vectSize ; i++)
|
||||
(void) FreeStacki(stkh[i]);
|
||||
FREE(stkh);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
} /* end of FreeStackiVector */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* resize d'un stack */
|
||||
/* */
|
||||
/* @function: ResizeStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
int32_t ResizeStacki(StackiHdle stkh, int32_t size)
|
||||
{
|
||||
int32_t resize = 0; /* assume error */
|
||||
int32_t *val;
|
||||
|
||||
if ((val = REALLOC(int32_t, (*stkh)->val, size))) {
|
||||
(*stkh)->size = resize = size;
|
||||
(*stkh)->val = val;
|
||||
}
|
||||
|
||||
if (! resize)
|
||||
sStkiLastError = kStkiMemErr;
|
||||
|
||||
return resize;
|
||||
|
||||
} /* end of ResizeStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* empilage(/lement) */
|
||||
/* */
|
||||
/* @function: PushiIn */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool PushiIn(StackiHdle stkh, int32_t val)
|
||||
{
|
||||
if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh)))
|
||||
return false;
|
||||
|
||||
(*stkh)->val[((*stkh)->top)++] = val;
|
||||
|
||||
return true;
|
||||
|
||||
} /* end of PushiIn */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* depilage(/lement) */
|
||||
/* */
|
||||
/* @function: PopiOut */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool PopiOut(StackiHdle stkh, int32_t *val)
|
||||
{
|
||||
if ((*stkh)->top <= 0)
|
||||
return false;
|
||||
|
||||
*val = (*stkh)->val[--((*stkh)->top)];
|
||||
|
||||
if ( ((*stkh)->top < ((*stkh)->size >> 1))
|
||||
&& ((*stkh)->top > kMinStackiSize))
|
||||
|
||||
(void) ShrinkStack(stkh);
|
||||
|
||||
return true;
|
||||
|
||||
} /* end of PopiOut */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* lecture descendante */
|
||||
/* */
|
||||
/* @function: ReadiDown */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool ReadiDown(StackiPtr stki, int32_t *val)
|
||||
{
|
||||
if (stki->cursor <= 0)
|
||||
return false;
|
||||
|
||||
*val = stki->val[--(stki->cursor)];
|
||||
|
||||
return true;
|
||||
|
||||
} /* end of ReadiDown */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* lecture ascendante */
|
||||
/* */
|
||||
/* @function: ReadiUp */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool ReadiUp(StackiPtr stki, int32_t *val)
|
||||
{
|
||||
if (stki->cursor >= stki->top)
|
||||
return false;
|
||||
|
||||
*val = stki->val[(stki->cursor)++];
|
||||
|
||||
return true;
|
||||
|
||||
} /* end of ReadiUp */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* remontee/descente du curseur */
|
||||
/* */
|
||||
/* @function: CursiToTop */
|
||||
/* @function: CursiToBottom */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
void CursiToTop(StackiPtr stki)
|
||||
{
|
||||
stki->cursor = stki->top;
|
||||
|
||||
} /* end of CursiToTop */
|
||||
|
||||
void CursiToBottom(stki)
|
||||
StackiPtr stki;
|
||||
{
|
||||
stki->cursor = 0;
|
||||
|
||||
} /* end of CursiToBottom */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* echange des valeurs cursor <-> (top - 1) */
|
||||
/* */
|
||||
/* @function: CursiSwap */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
void CursiSwap(StackiPtr stki)
|
||||
{
|
||||
int32_t tmp;
|
||||
|
||||
if ((stki->top <= 0) || (stki->cursor < 0))
|
||||
return;
|
||||
|
||||
tmp = stki->val[stki->cursor];
|
||||
stki->val[stki->cursor] = stki->val[stki->top - 1];
|
||||
stki->val[stki->top - 1] = tmp;
|
||||
|
||||
} /* end of CursiSwap */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Recherche d'une valeur en stack a partir du */
|
||||
/* curseur courant en descendant. */
|
||||
/* on laisse le curseur a l'endroit trouve */
|
||||
/* */
|
||||
/* @function: SearchDownStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool SearchDownStacki(StackiPtr stki, int32_t sval)
|
||||
{
|
||||
int32_t val;
|
||||
bool more;
|
||||
|
||||
while ((more = ReadiDown(stki, &val)))
|
||||
if (val == sval)
|
||||
break;
|
||||
|
||||
return more;
|
||||
|
||||
} /* end of SearchDownStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Recherche dichotomique d'une valeur en stack */
|
||||
/* le stack est suppose trie par valeurs */
|
||||
/* croissantes. */
|
||||
/* on place le curseur a l'endroit trouve */
|
||||
/* */
|
||||
/* @function: BinSearchStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool BinSearchStacki(StackiPtr stki, int32_t sval)
|
||||
{
|
||||
int32_t midd, low, high, span;
|
||||
|
||||
low = 0;
|
||||
high = stki->top - 1;
|
||||
|
||||
while (high >= low) {
|
||||
|
||||
midd = (high + low) / 2;
|
||||
|
||||
span = stki->val[midd] - sval;
|
||||
|
||||
if (span == 0) {
|
||||
stki->cursor = midd;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (span > 0)
|
||||
high = midd - 1;
|
||||
else
|
||||
low = midd + 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
} /* end of BinSearchStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* teste l'egalite *physique* de deux stacks */
|
||||
/* */
|
||||
/* @function: SameStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool SameStacki(StackiPtr stki1, StackiPtr stki2)
|
||||
{
|
||||
if (stki1->top != stki2->top)
|
||||
return false;
|
||||
|
||||
return ((memcmp(stki1->val, stki2->val,
|
||||
stki1->top * sizeof(int32_t)) == 0) ? true : false);
|
||||
|
||||
} /* end of SameStacki */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* inverse l'ordre des elements dans un stack */
|
||||
/* */
|
||||
/* @function: ReverseStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool ReverseStacki(StackiPtr stki)
|
||||
{
|
||||
int32_t *t, *b, swp;
|
||||
|
||||
if (stki->top <= 0)
|
||||
return false;
|
||||
|
||||
b = stki->val;
|
||||
t = b + stki->top - 1;
|
||||
|
||||
while (t > b) {
|
||||
swp = *t;
|
||||
*t-- = *b;
|
||||
*b++ = swp;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
} /* end of ReverseStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Remove every values from a stack by moving */
|
||||
/* back the top member to 0. */
|
||||
/* */
|
||||
/* @function: EmptyStacki */
|
||||
/* -------------------------------------------- */
|
||||
bool EmptyStacki(StackiPtr stki)
|
||||
{
|
||||
stki->top = 0;
|
||||
return true;
|
||||
}
|
81
pkg/obiapat/libstki.h
Normal file
81
pkg/obiapat/libstki.h
Normal file
@ -0,0 +1,81 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: libstki.h */
|
||||
/* Purpose: library of dynamic stacks holding */
|
||||
/* integer values */
|
||||
/* History: */
|
||||
/* 00/03/92 : <Gloup> first draft */
|
||||
/* 07/07/93 : <Gloup> complete revision */
|
||||
/* 10/03/94 : <Gloup> added xxxVector funcs */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* 07/12/21 : <Zafacs> last some cleaning for 2020 */
|
||||
/* ==================================================== */
|
||||
|
||||
#ifndef _H_libstki
|
||||
#define _H_libstki
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "apat_mem.h"
|
||||
|
||||
/* ==================================================== */
|
||||
/* Constantes de dimensionnement */
|
||||
/* ==================================================== */
|
||||
|
||||
#ifndef kMinStackiSize
|
||||
#define kMinStackiSize 2 /* taille mini stack */
|
||||
#endif
|
||||
|
||||
|
||||
#define kStkiNoErr 0 /* ok */
|
||||
#define kStkiMemErr 1 /* not enough memory */
|
||||
|
||||
#define kStkiReset true
|
||||
#define kStkiGet false
|
||||
|
||||
|
||||
/* ==================================================== */
|
||||
/* Types & Structures de donnees */
|
||||
/* ==================================================== */
|
||||
|
||||
/* -------------------- */
|
||||
/* structure : pile */
|
||||
/* -------------------- */
|
||||
typedef struct Stacki {
|
||||
/* ---------------------*/
|
||||
int32_t size; /* stack size */
|
||||
int32_t top; /* current free pos. */
|
||||
int32_t cursor; /* current cursor */
|
||||
int32_t *val; /* values */
|
||||
/* ---------------------*/
|
||||
} Stacki, *StackiPtr, **StackiHdle;
|
||||
|
||||
|
||||
|
||||
/* ==================================================== */
|
||||
/* Prototypes (generated by mproto) */
|
||||
/* ==================================================== */
|
||||
|
||||
/* libstki.c */
|
||||
|
||||
int16_t StkiError (bool reset );
|
||||
StackiPtr NewStacki (int32_t size );
|
||||
StackiPtr FreeStacki (StackiPtr stki );
|
||||
StackiHdle NewStackiVector (int32_t vectSize, int32_t stackSize );
|
||||
StackiHdle FreeStackiVector (StackiHdle stkh , int32_t vectSize );
|
||||
int32_t ResizeStacki (StackiHdle stkh , int32_t size );
|
||||
bool PushiIn (StackiHdle stkh , int32_t val );
|
||||
bool PopiOut (StackiHdle stkh , int32_t *val );
|
||||
bool ReadiDown (StackiPtr stki , int32_t *val );
|
||||
bool ReadiUp (StackiPtr stki , int32_t *val );
|
||||
void CursiToTop (StackiPtr stki );
|
||||
void CursiToBottom (StackiPtr stki );
|
||||
void CursiSwap (StackiPtr stki );
|
||||
bool SearchDownStacki (StackiPtr stki , int32_t sval );
|
||||
bool BinSearchStacki (StackiPtr stki , int32_t sval );
|
||||
bool SameStacki (StackiPtr stki1 , StackiPtr stki2 );
|
||||
bool ReverseStacki (StackiPtr stki );
|
||||
bool EmptyStacki (StackiPtr stki );
|
||||
|
||||
#endif /* _H_libstki */
|
417
pkg/obiapat/obiapat.c
Normal file
417
pkg/obiapat/obiapat.c
Normal file
@ -0,0 +1,417 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "libstki.h"
|
||||
#include "apat.h"
|
||||
|
||||
#include "obiapat.h"
|
||||
|
||||
static void EncodeSequence(SeqPtr seq);
|
||||
static void UpperSequence(char *seq);
|
||||
|
||||
/*
|
||||
* print the message given as argument and exit the program
|
||||
* @param error error number
|
||||
* @param message the text explaining what's going on
|
||||
* @param filename the file source where the program failed
|
||||
* @param linenumber the line where it has failed
|
||||
* filename and linenumber are written at pre-processing
|
||||
* time by a macro
|
||||
*/
|
||||
void* ecoError(int error,
|
||||
const char* message,
|
||||
const char * filename,
|
||||
int linenumber,
|
||||
int *errno,
|
||||
char **error_msg)
|
||||
{
|
||||
asprintf(error_msg,
|
||||
"Error %d in file %s line %d : %s",
|
||||
error,
|
||||
filename,
|
||||
linenumber,
|
||||
message);
|
||||
|
||||
*errno = error;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* @doc: DNA alphabet (IUPAC)
|
||||
*/
|
||||
#define LX_BIO_DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
|
||||
|
||||
/*
|
||||
* @doc: complementary DNA alphabet (IUPAC)
|
||||
*/
|
||||
#define LX_BIO_CDNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
|
||||
|
||||
|
||||
static char sNuc[] = LX_BIO_DNA_ALPHA;
|
||||
static char sAnuc[] = LX_BIO_CDNA_ALPHA;
|
||||
|
||||
static char LXBioBaseComplement(char nucAc);
|
||||
static char *LXBioSeqComplement(char *nucAcSeq);
|
||||
static char *reverseSequence(char *str,char isPattern);
|
||||
|
||||
|
||||
/* ---------------------------- */
|
||||
|
||||
char LXBioBaseComplement(char nucAc)
|
||||
{
|
||||
char *c;
|
||||
|
||||
if ((c = strchr(sNuc, nucAc)))
|
||||
return sAnuc[(c - sNuc)];
|
||||
else
|
||||
return nucAc;
|
||||
}
|
||||
|
||||
/* ---------------------------- */
|
||||
|
||||
char *LXBioSeqComplement(char *nucAcSeq)
|
||||
{
|
||||
char *s;
|
||||
|
||||
for (s = nucAcSeq ; *s ; s++)
|
||||
*s = LXBioBaseComplement(*s);
|
||||
|
||||
return nucAcSeq;
|
||||
}
|
||||
|
||||
|
||||
char *reverseSequence(char *str,char isPattern)
|
||||
{
|
||||
char *sb, *se, c;
|
||||
|
||||
if (! str)
|
||||
return str;
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
while(sb <= se) {
|
||||
c = *sb;
|
||||
*sb++ = *se;
|
||||
*se-- = c;
|
||||
}
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
if (isPattern)
|
||||
for (;sb <= se; sb++)
|
||||
{
|
||||
if (*sb=='#')
|
||||
{
|
||||
if (*(sb+1) == '[') {
|
||||
while(*sb !=']') {
|
||||
*sb = *(sb+1);
|
||||
sb++;
|
||||
}
|
||||
*sb='#';
|
||||
} else {
|
||||
if (((se - sb) > 2) && (*(sb+2)=='!'))
|
||||
{
|
||||
*sb='!';
|
||||
sb+=2;
|
||||
*sb='#';
|
||||
}
|
||||
else
|
||||
{
|
||||
*sb=*(sb+1);
|
||||
sb++;
|
||||
*sb='#';
|
||||
}}
|
||||
}
|
||||
else if (*sb=='!')
|
||||
{
|
||||
*sb=*(sb-1);
|
||||
*(sb-1)='!';
|
||||
}
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
char *ecoComplementPattern(char *nucAcSeq)
|
||||
{
|
||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
|
||||
}
|
||||
|
||||
char *ecoComplementSequence(char *nucAcSeq)
|
||||
{
|
||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
|
||||
}
|
||||
|
||||
|
||||
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end,
|
||||
int *errno, char **errmsg)
|
||||
/*
|
||||
extract subsequence from nucAcSeq [begin,end[
|
||||
*/
|
||||
{
|
||||
static char *buffer = NULL;
|
||||
static int32_t buffSize= 0;
|
||||
int32_t length;
|
||||
|
||||
if (begin < end)
|
||||
{
|
||||
length = end - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer",errno,errmsg);
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer",errno,errmsg);
|
||||
|
||||
}
|
||||
|
||||
strncpy(buffer,nucAcSeq + begin,length);
|
||||
buffer[length]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
length = end + strlen(nucAcSeq) - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer",errno,errmsg);
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer",errno,errmsg);
|
||||
|
||||
}
|
||||
strncpy(buffer,nucAcSeq+begin,length - end);
|
||||
strncpy(buffer+(length-end),nucAcSeq ,end);
|
||||
buffer[length]=0;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* uppercase sequence */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
|
||||
#define TO_UPPER(c) ((c) - 'a' + 'A')
|
||||
|
||||
void UpperSequence(char *seq)
|
||||
{
|
||||
char *cseq;
|
||||
|
||||
for (cseq = seq ; *cseq ; cseq++)
|
||||
if (IS_LOWER(*cseq))
|
||||
*cseq = TO_UPPER(*cseq);
|
||||
}
|
||||
|
||||
#undef IS_LOWER
|
||||
#undef TO_UPPER
|
||||
|
||||
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* encode sequence */
|
||||
/* IS_UPPER is slightly faster than isupper */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
|
||||
|
||||
|
||||
|
||||
void EncodeSequence(SeqPtr seq)
|
||||
{
|
||||
int i;
|
||||
uint8_t *data;
|
||||
char *cseq;
|
||||
char nuc;
|
||||
|
||||
data = seq->data;
|
||||
cseq = seq->cseq;
|
||||
|
||||
while (*cseq) {
|
||||
nuc = *cseq & (~32);
|
||||
*data = (IS_UPPER(nuc) ? nuc - 'A' : 0x0);
|
||||
data++;
|
||||
cseq++;
|
||||
}
|
||||
|
||||
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++) {
|
||||
nuc = *cseq & (~32);
|
||||
*data = (IS_UPPER(nuc) ? nuc - 'A' : 0x0);
|
||||
}
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++)
|
||||
seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
|
||||
|
||||
}
|
||||
|
||||
#undef IS_UPPER
|
||||
|
||||
|
||||
SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
|
||||
SeqPtr out,
|
||||
int *errno, char **errmsg)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (circular != 0) circular=MAX_PAT_LEN;
|
||||
|
||||
if (!out)
|
||||
{
|
||||
out = ECOMALLOC(sizeof(Seq),
|
||||
"Error in Allocation of a new Seq structure",errno,errmsg);
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++)
|
||||
{
|
||||
|
||||
if (! (out->hitpos[i] = NewStacki(kMinStackiSize)))
|
||||
ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation",errno,errmsg);
|
||||
|
||||
if (! (out->hiterr[i] = NewStacki(kMinStackiSize)))
|
||||
ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation",errno,errmsg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out->seqsiz = out->seqlen = seqlen;
|
||||
out->circular = circular;
|
||||
|
||||
if (!out->data)
|
||||
{
|
||||
out->data = ECOMALLOC((out->seqlen+circular) *sizeof(uint8_t),
|
||||
"Error in Allocation of a new Seq data member",
|
||||
errno,errmsg);
|
||||
out->datsiz= out->seqlen+circular;
|
||||
}
|
||||
else if ((out->seqlen +circular) >= out->datsiz)
|
||||
{
|
||||
out->data = ECOREALLOC(out->data,(out->seqlen+circular) *sizeof(uint8_t),
|
||||
"Error during Seq data buffer realloc",
|
||||
errno,errmsg);
|
||||
out->datsiz= out->seqlen+circular;
|
||||
}
|
||||
|
||||
out->cseq = (char *)in;
|
||||
|
||||
EncodeSequence(out);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int32_t delete_apatseq(SeqPtr pseq,
|
||||
int *errno, char **errmsg)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (pseq) {
|
||||
|
||||
if (pseq->data)
|
||||
ECOFREE(pseq->data,"Freeing sequence data buffer",
|
||||
errno,errmsg);
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++) {
|
||||
if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]);
|
||||
if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]);
|
||||
}
|
||||
|
||||
ECOFREE(pseq,"Freeing apat sequence structure",
|
||||
errno,errmsg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
PatternPtr buildPattern(const char *pat, int32_t error_max,
|
||||
int *errno, char **errmsg)
|
||||
{
|
||||
PatternPtr pattern;
|
||||
int32_t patlen;
|
||||
int32_t patlen2;
|
||||
|
||||
patlen = strlen(pat);
|
||||
patlen2 = lenPattern(pat);
|
||||
|
||||
pattern = ECOMALLOC(sizeof(Pattern) + // Space for struct Pattern
|
||||
sizeof(char)*patlen+1 + // Space for cpat
|
||||
sizeof(uint32_t) * patlen2 + // Space for patcode
|
||||
sizeof(patword_t) * ALPHA_LEN , // Space for smat
|
||||
"Error in pattern allocation",
|
||||
errno,errmsg);
|
||||
|
||||
pattern->ok = true;
|
||||
pattern->hasIndel= false;
|
||||
pattern->maxerr = error_max;
|
||||
|
||||
pattern->cpat = (char*)pattern + sizeof(Pattern);
|
||||
pattern->patcode = (uint32_t*)(pattern->cpat + patlen + 1);
|
||||
pattern->smat = (patword_t*)(pattern->patcode + patlen2);
|
||||
|
||||
strncpy(pattern->cpat,pat,patlen);
|
||||
pattern->cpat[patlen]=0;
|
||||
UpperSequence(pattern->cpat);
|
||||
|
||||
if (!CheckPattern(pattern))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking",errno,errmsg);
|
||||
|
||||
if (! EncodePattern(pattern, dna))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding",errno,errmsg);
|
||||
|
||||
if (! CreateS(pattern, ALPHA_LEN))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling",errno,errmsg);
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
||||
|
||||
PatternPtr complementPattern(PatternPtr pat, int *errno,
|
||||
char **errmsg)
|
||||
{
|
||||
PatternPtr pattern;
|
||||
|
||||
pattern = ECOMALLOC(sizeof(Pattern) +
|
||||
sizeof(char) * strlen(pat->cpat) + 1 +
|
||||
sizeof(uint32_t) * pat->patlen +
|
||||
sizeof(patword_t) * ALPHA_LEN,
|
||||
"Error in pattern allocation",
|
||||
errno,errmsg);
|
||||
|
||||
pattern->ok = true;
|
||||
pattern->hasIndel= pat->hasIndel;
|
||||
pattern->maxerr = pat->maxerr;
|
||||
pattern->patlen = pat->patlen;
|
||||
|
||||
pattern->cpat = (char*)pattern + sizeof(Pattern);
|
||||
pattern->patcode = (uint32_t*)(pattern->cpat + strlen(pat->cpat) + 1);
|
||||
pattern->smat = (patword_t*)(pattern->patcode + pat->patlen);
|
||||
|
||||
strcpy(pattern->cpat,pat->cpat);
|
||||
|
||||
ecoComplementPattern(pattern->cpat);
|
||||
|
||||
if (!CheckPattern(pattern))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking",errno,errmsg);
|
||||
|
||||
if (! EncodePattern(pattern, dna))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding",errno,errmsg);
|
||||
|
||||
if (! CreateS(pattern, ALPHA_LEN))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling",errno,errmsg);
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
134
pkg/obiapat/obiapat.h
Normal file
134
pkg/obiapat/obiapat.h
Normal file
@ -0,0 +1,134 @@
|
||||
#ifndef __obiapat_h__
|
||||
#define __obiapat_h__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "apat.h"
|
||||
|
||||
/*****************************************************
|
||||
*
|
||||
* Data type declarations
|
||||
*
|
||||
*****************************************************/
|
||||
|
||||
/*
|
||||
*
|
||||
* Sequence types
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
|
||||
int32_t taxid;
|
||||
char AC[20];
|
||||
int32_t DE_length;
|
||||
int32_t SQ_length;
|
||||
int32_t CSQ_length;
|
||||
|
||||
char data[1];
|
||||
|
||||
} ecoseqformat_t;
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t SQ_length;
|
||||
char *AC;
|
||||
char *DE;
|
||||
char *SQ;
|
||||
} ecoseq_t;
|
||||
|
||||
|
||||
|
||||
/*****************************************************
|
||||
*
|
||||
* Function declarations
|
||||
*
|
||||
*****************************************************/
|
||||
|
||||
void* ecoError(int error,
|
||||
const char* message,
|
||||
const char * filename,
|
||||
int linenumber,
|
||||
int *errno,
|
||||
char **error_msg);
|
||||
|
||||
#define ECOERROR(code,message,errno,errmsg) \
|
||||
{ return ecoError((code),(message),__FILE__,__LINE__,errno,errmsg); }
|
||||
|
||||
#define ECO_IO_ERROR (1)
|
||||
#define ECO_MEM_ERROR (2)
|
||||
#define ECO_ASSERT_ERROR (3)
|
||||
#define ECO_NOTFOUND_ERROR (4)
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Low level system functions
|
||||
*
|
||||
*/
|
||||
|
||||
int32_t is_big_endian();
|
||||
int32_t swap_int32_t(int32_t);
|
||||
|
||||
void *eco_malloc(int32_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line,
|
||||
int *errno, char **errmsg);
|
||||
|
||||
|
||||
void *eco_realloc(void *chunk,
|
||||
int32_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line,
|
||||
int *errno, char **errmsg);
|
||||
|
||||
void eco_free(void *chunk,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line,
|
||||
int *errno, char **errmsg);
|
||||
|
||||
void eco_trace_memory_allocation();
|
||||
void eco_untrace_memory_allocation();
|
||||
|
||||
#define ECOMALLOC(size,error_message,errno,errmsg) \
|
||||
eco_malloc((size),(error_message),__FILE__,__LINE__,errno,errmsg)
|
||||
|
||||
#define ECOREALLOC(chunk,size,error_message,errno,errmsg) \
|
||||
eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__,errno,errmsg)
|
||||
|
||||
#define ECOFREE(chunk,error_message,errno,errmsg) \
|
||||
eco_free((chunk),(error_message),__FILE__,__LINE__,errno,errmsg)
|
||||
|
||||
|
||||
|
||||
|
||||
ecoseq_t *new_ecoseq();
|
||||
int32_t delete_ecoseq(ecoseq_t *);
|
||||
ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
char *DE,
|
||||
char *SQ,
|
||||
int32_t taxid
|
||||
);
|
||||
|
||||
|
||||
|
||||
int32_t delete_apatseq(SeqPtr pseq,
|
||||
int *errno, char **errmsg);
|
||||
PatternPtr buildPattern(const char *pat, int32_t error_max, int *errno, char **errmsg);
|
||||
PatternPtr complementPattern(PatternPtr pat, int *errno, char **errmsg);
|
||||
|
||||
SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
|
||||
SeqPtr out,
|
||||
int *errno, char **errmsg);
|
||||
|
||||
char *ecoComplementPattern(char *nucAcSeq);
|
||||
char *ecoComplementSequence(char *nucAcSeq);
|
||||
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end,
|
||||
int *errno, char **errmsg);
|
||||
|
||||
|
||||
#endif /* __obiapat_h__ */
|
168
pkg/obiapat/pattern.go
Normal file
168
pkg/obiapat/pattern.go
Normal file
@ -0,0 +1,168 @@
|
||||
package obiapat
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -g -Wall
|
||||
#include <stdlib.h>
|
||||
#include "obiapat.h"
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"errors"
|
||||
"unsafe"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
var MAX_PAT_LEN = int(C.MAX_PAT_LEN)
|
||||
|
||||
type ApatPattern struct {
|
||||
pointer *C.Pattern
|
||||
}
|
||||
|
||||
type ApatSequence struct {
|
||||
pointer *C.Seq
|
||||
}
|
||||
|
||||
var NilApatPattern = ApatPattern{nil}
|
||||
var NilApatSequence = ApatSequence{nil}
|
||||
|
||||
func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) {
|
||||
cpattern := C.CString(pattern)
|
||||
defer C.free(unsafe.Pointer(cpattern))
|
||||
cerrormax := C.int32_t(errormax)
|
||||
var errno C.int32_t
|
||||
var errmsg *C.char
|
||||
|
||||
ap := C.buildPattern(cpattern, cerrormax, &errno, &errmsg)
|
||||
|
||||
if ap == nil {
|
||||
message := C.GoString(errmsg)
|
||||
C.free(unsafe.Pointer(errmsg))
|
||||
return NilApatPattern, errors.New(message)
|
||||
}
|
||||
|
||||
return ApatPattern{pointer: ap}, nil
|
||||
}
|
||||
|
||||
func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
|
||||
var errno C.int32_t
|
||||
var errmsg *C.char
|
||||
ap := C.complementPattern((*C.Pattern)(pattern.pointer), &errno, &errmsg)
|
||||
|
||||
if ap == nil {
|
||||
message := C.GoString(errmsg)
|
||||
C.free(unsafe.Pointer(errmsg))
|
||||
return ApatPattern{nil}, errors.New(message)
|
||||
}
|
||||
|
||||
return ApatPattern{pointer: ap}, nil
|
||||
}
|
||||
|
||||
func (pattern ApatPattern) String() string {
|
||||
return C.GoString(pattern.pointer.cpat)
|
||||
}
|
||||
|
||||
func (pattern ApatPattern) Length() int {
|
||||
return int(pattern.pointer.patlen)
|
||||
}
|
||||
|
||||
func (pattern ApatPattern) Free() {
|
||||
C.free(unsafe.Pointer(pattern.pointer))
|
||||
}
|
||||
|
||||
func (pattern ApatPattern) Print() {
|
||||
C.PrintDebugPattern(C.PatternPtr(pattern.pointer))
|
||||
}
|
||||
|
||||
func MakeApatSequence(sequence obiseq.BioSequence, circular bool, recycle ...ApatSequence) (ApatSequence, error) {
|
||||
var errno C.int32_t
|
||||
var errmsg *C.char
|
||||
seqlen := sequence.Length()
|
||||
p := C.malloc(C.size_t(seqlen) + 1)
|
||||
ic := 0
|
||||
if circular {
|
||||
ic = 1
|
||||
}
|
||||
|
||||
// copy the data into the buffer, by converting it to a Go array
|
||||
cBuf := (*[1 << 30]byte)(p)
|
||||
copy(cBuf[:], sequence.Sequence())
|
||||
cBuf[sequence.Length()] = 0
|
||||
|
||||
var out *C.Seq
|
||||
|
||||
if len(recycle) > 0 {
|
||||
out = recycle[0].pointer
|
||||
} else {
|
||||
out = nil
|
||||
}
|
||||
|
||||
pseq := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen),
|
||||
(*C.Seq)(out),
|
||||
&errno, &errmsg)
|
||||
|
||||
if pseq == nil {
|
||||
message := C.GoString(errmsg)
|
||||
C.free(unsafe.Pointer(errmsg))
|
||||
return NilApatSequence, errors.New(message)
|
||||
}
|
||||
|
||||
seq := ApatSequence{pointer: pseq}
|
||||
|
||||
//log.Println(C.GoString(pseq.cseq))
|
||||
// runtime.SetFinalizer(&seq, __free_apat_sequence__)
|
||||
|
||||
return seq, nil
|
||||
}
|
||||
|
||||
func (sequence ApatSequence) Length() int {
|
||||
return int(sequence.pointer.seqlen)
|
||||
}
|
||||
|
||||
func (sequence ApatSequence) Free() {
|
||||
var errno C.int32_t
|
||||
var errmsg *C.char
|
||||
|
||||
C.delete_apatseq(sequence.pointer,
|
||||
&errno, &errmsg)
|
||||
|
||||
sequence.pointer = nil
|
||||
}
|
||||
|
||||
func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (loc [][3]int) {
|
||||
begin := 0
|
||||
length := sequence.Length()
|
||||
|
||||
if len(limits) > 0 {
|
||||
begin = limits[0]
|
||||
}
|
||||
|
||||
if len(limits) > 1 {
|
||||
length = limits[1]
|
||||
}
|
||||
|
||||
nhits := int(C.ManberAll(sequence.pointer,
|
||||
pattern.pointer,
|
||||
0,
|
||||
C.int32_t(begin),
|
||||
C.int32_t(length+C.MAX_PAT_LEN)))
|
||||
|
||||
//log.Printf("match count : %d\n", nhits)
|
||||
|
||||
if nhits == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
stktmp := (*[1 << 30]int32)(unsafe.Pointer(sequence.pointer.hitpos[0].val))
|
||||
errtmp := (*[1 << 30]int32)(unsafe.Pointer(sequence.pointer.hiterr[0].val))
|
||||
patlen := int(pattern.pointer.patlen)
|
||||
|
||||
for i := 0; i < nhits; i++ {
|
||||
start := int(stktmp[i])
|
||||
err := int(errtmp[i])
|
||||
|
||||
loc = append(loc, [3]int{start, start + patlen, err})
|
||||
}
|
||||
|
||||
return loc
|
||||
}
|
370
pkg/obiapat/pcr.go
Normal file
370
pkg/obiapat/pcr.go
Normal file
@ -0,0 +1,370 @@
|
||||
package obiapat
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/goutils"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
type __options__ struct {
|
||||
min_length int
|
||||
max_length int
|
||||
circular bool
|
||||
forward_error int
|
||||
reverse_error int
|
||||
buffer_size int
|
||||
batch_size int
|
||||
parallel_workers int
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
pointer *__options__
|
||||
}
|
||||
|
||||
type WithOption func(Options)
|
||||
|
||||
func (options Options) MinLength() int {
|
||||
return options.pointer.min_length
|
||||
}
|
||||
|
||||
func (options Options) MaxLength() int {
|
||||
return options.pointer.max_length
|
||||
}
|
||||
|
||||
func (options Options) ForwardError() int {
|
||||
return options.pointer.forward_error
|
||||
}
|
||||
|
||||
func (options Options) ReverseError() int {
|
||||
return options.pointer.reverse_error
|
||||
}
|
||||
|
||||
func (options Options) Circular() bool {
|
||||
return options.pointer.circular
|
||||
}
|
||||
|
||||
func (opt Options) BufferSize() int {
|
||||
return opt.pointer.buffer_size
|
||||
}
|
||||
|
||||
func (opt Options) BatchSize() int {
|
||||
return opt.pointer.batch_size
|
||||
}
|
||||
|
||||
func (opt Options) ParallelWorkers() int {
|
||||
return opt.pointer.parallel_workers
|
||||
}
|
||||
|
||||
func MakeOptions(setters []WithOption) Options {
|
||||
o := __options__{
|
||||
min_length: 0,
|
||||
max_length: 0,
|
||||
forward_error: 0,
|
||||
reverse_error: 0,
|
||||
circular: false,
|
||||
parallel_workers: 4,
|
||||
batch_size: 100,
|
||||
buffer_size: 100,
|
||||
}
|
||||
|
||||
opt := Options{&o}
|
||||
|
||||
for _, set := range setters {
|
||||
set(opt)
|
||||
}
|
||||
|
||||
return opt
|
||||
}
|
||||
|
||||
func OptionMinLength(min_length int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.min_length = min_length
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionMaxLength(max_length int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.max_length = max_length
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionForwardError(max int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.forward_error = max
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionReverseError(max int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.reverse_error = max
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionCircular(circular bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.circular = circular
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionBufferSize(size int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.buffer_size = size
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionParallelWorkers(nworkers int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.parallel_workers = nworkers
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionBatchSize(size int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.batch_size = size
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func __pcr__(seq ApatSequence, sequence obiseq.BioSequence,
|
||||
forward, cfwd, reverse, crev ApatPattern,
|
||||
opt Options) obiseq.BioSequenceSlice {
|
||||
results := make(obiseq.BioSequenceSlice, 0, 10)
|
||||
|
||||
forward_matches := forward.FindAllIndex(seq)
|
||||
|
||||
if forward_matches != nil {
|
||||
|
||||
begin := forward_matches[0][0]
|
||||
length := seq.Length() - begin
|
||||
|
||||
if opt.pointer.max_length > 0 {
|
||||
length = forward_matches[len(forward_matches)-1][2] - begin + opt.MaxLength() + reverse.Length()
|
||||
}
|
||||
|
||||
if opt.Circular() {
|
||||
begin = 0
|
||||
length = seq.Length() + MAX_PAT_LEN
|
||||
}
|
||||
|
||||
reverse_matches := crev.FindAllIndex(seq, begin, length)
|
||||
|
||||
if reverse_matches != nil {
|
||||
for _, fm := range forward_matches {
|
||||
|
||||
posi := fm[0]
|
||||
|
||||
if posi < seq.Length() {
|
||||
|
||||
erri := fm[2]
|
||||
|
||||
for _, rm := range reverse_matches {
|
||||
posj := rm[0]
|
||||
if posj < seq.Length() {
|
||||
posj := rm[1]
|
||||
errj := rm[2]
|
||||
length = 0
|
||||
|
||||
if posj > posi {
|
||||
length = rm[0] - fm[1]
|
||||
} else {
|
||||
if opt.Circular() {
|
||||
length = rm[0] + seq.Length() - posi - forward.Length()
|
||||
}
|
||||
}
|
||||
if length > 0 && // For when primers touch or overlap
|
||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
annot := amplicon.Annotations()
|
||||
goutils.CopyMap(annot, sequence.Annotations())
|
||||
annot["forward_primer"] = forward.String()
|
||||
|
||||
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||
annot["forward_match"] = match.String()
|
||||
match.Revoke()
|
||||
|
||||
annot["forward_error"] = erri
|
||||
|
||||
annot["reverse_primer"] = reverse.String()
|
||||
match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||
match = match.ReverseComplement(true)
|
||||
annot["reverse_match"] = match.String()
|
||||
match.Revoke()
|
||||
|
||||
annot["reverse_error"] = errj
|
||||
results = append(results, amplicon)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
forward_matches = reverse.FindAllIndex(seq)
|
||||
|
||||
if forward_matches != nil {
|
||||
|
||||
begin := forward_matches[0][0]
|
||||
length := seq.Length() - begin
|
||||
|
||||
if opt.pointer.max_length > 0 {
|
||||
length = forward_matches[len(forward_matches)-1][2] - begin + opt.MaxLength() + reverse.Length()
|
||||
}
|
||||
|
||||
if opt.Circular() {
|
||||
begin = 0
|
||||
length = seq.Length() + MAX_PAT_LEN
|
||||
}
|
||||
|
||||
reverse_matches := cfwd.FindAllIndex(seq, begin, length)
|
||||
|
||||
if reverse_matches != nil {
|
||||
for _, fm := range forward_matches {
|
||||
|
||||
posi := fm[0]
|
||||
|
||||
if posi < seq.Length() {
|
||||
|
||||
erri := fm[2]
|
||||
|
||||
for _, rm := range reverse_matches {
|
||||
posj := rm[0]
|
||||
if posj < seq.Length() {
|
||||
posj := rm[1]
|
||||
errj := rm[2]
|
||||
length = 0
|
||||
|
||||
if posj > posi {
|
||||
length = rm[0] - fm[1]
|
||||
} else {
|
||||
if opt.Circular() {
|
||||
length = rm[0] + seq.Length() - posi - forward.Length()
|
||||
}
|
||||
}
|
||||
if length > 0 && // For when primers touch or overlap
|
||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
amplicon = amplicon.ReverseComplement(true)
|
||||
|
||||
annot := amplicon.Annotations()
|
||||
goutils.CopyMap(annot, sequence.Annotations())
|
||||
annot["forward_primer"] = forward.String()
|
||||
|
||||
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||
match.ReverseComplement(true)
|
||||
annot["forward_match"] = match.String()
|
||||
match.Revoke()
|
||||
|
||||
annot["forward_error"] = errj
|
||||
|
||||
annot["reverse_primer"] = reverse.String()
|
||||
match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||
annot["reverse_match"] = match.String()
|
||||
match.Revoke()
|
||||
|
||||
annot["reverse_error"] = erri
|
||||
results = append(results, amplicon)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func PCR(sequence obiseq.BioSequence,
|
||||
forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
|
||||
seq, _ := MakeApatSequence(sequence, opt.Circular())
|
||||
|
||||
fwd, _ := MakeApatPattern(forward, opt.ForwardError())
|
||||
rev, _ := MakeApatPattern(reverse, opt.ReverseError())
|
||||
cfwd, _ := fwd.ReverseComplement()
|
||||
crev, _ := rev.ReverseComplement()
|
||||
|
||||
results := __pcr__(seq, sequence,
|
||||
fwd, cfwd, rev, crev,
|
||||
opt)
|
||||
|
||||
seq.Free()
|
||||
|
||||
fwd.Free()
|
||||
rev.Free()
|
||||
cfwd.Free()
|
||||
crev.Free()
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func PCRSlice(sequences obiseq.BioSequenceSlice,
|
||||
forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice {
|
||||
|
||||
results := make(obiseq.BioSequenceSlice, 0, len(sequences))
|
||||
|
||||
opt := MakeOptions(options)
|
||||
|
||||
fwd, _ := MakeApatPattern(forward, opt.ForwardError())
|
||||
rev, _ := MakeApatPattern(reverse, opt.ReverseError())
|
||||
cfwd, _ := fwd.ReverseComplement()
|
||||
crev, _ := rev.ReverseComplement()
|
||||
|
||||
if len(sequences) > 0 {
|
||||
seq, _ := MakeApatSequence(sequences[0], opt.Circular())
|
||||
amplicons := __pcr__(seq, sequences[0],
|
||||
fwd, cfwd, rev, crev,
|
||||
opt)
|
||||
|
||||
if len(amplicons) > 0 {
|
||||
results = append(results, amplicons...)
|
||||
}
|
||||
|
||||
for _, sequence := range sequences[1:] {
|
||||
seq, _ := MakeApatSequence(sequence, opt.Circular(), seq)
|
||||
amplicons = __pcr__(seq, sequence,
|
||||
fwd, cfwd, rev, crev,
|
||||
opt)
|
||||
if len(amplicons) > 0 {
|
||||
results = append(results, amplicons...)
|
||||
}
|
||||
}
|
||||
|
||||
seq.Free()
|
||||
}
|
||||
|
||||
fwd.Free()
|
||||
rev.Free()
|
||||
cfwd.Free()
|
||||
crev.Free()
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func PCRSliceWorker(forward, reverse string,
|
||||
options ...WithOption) obiseq.SeqSliceWorker {
|
||||
|
||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
return PCRSlice(sequences, forward, reverse, options...)
|
||||
}
|
||||
|
||||
return worker
|
||||
}
|
Reference in New Issue
Block a user