Cython API: added API to use apat (pattern search) C library
This commit is contained in:
61
python/obitools3/dms/capi/apat.pxd
Executable file
61
python/obitools3/dms/capi/apat.pxd
Executable file
@ -0,0 +1,61 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.dms.capi.obiview cimport Obiview_p
|
||||
from obitools3.dms.capi.obidmscolumn cimport OBIDMS_column_p
|
||||
from obitools3.dms.capi.obitypes cimport index_t
|
||||
|
||||
from libc.stdint cimport int32_t, uint32_t, uint8_t
|
||||
|
||||
|
||||
cdef extern from "libecoPCR/libapat/libstki.h" nogil:
|
||||
|
||||
struct Stacki :
|
||||
int32_t size
|
||||
int32_t top
|
||||
int32_t cursor
|
||||
int32_t* val
|
||||
|
||||
ctypedef Stacki* StackiPtr
|
||||
|
||||
|
||||
cdef extern from "libecoPCR/libapat/apat.h" nogil:
|
||||
|
||||
extern int MAX_PATTERN
|
||||
|
||||
struct Seq :
|
||||
char* name
|
||||
int32_t seqlen
|
||||
int32_t seqsiz
|
||||
int32_t datsiz
|
||||
int32_t circular
|
||||
uint8_t* data
|
||||
char* cseq
|
||||
StackiPtr* hitpos
|
||||
StackiPtr* hiterr
|
||||
|
||||
ctypedef Seq* SeqPtr
|
||||
|
||||
|
||||
struct Pattern :
|
||||
int patlen
|
||||
int maxerr
|
||||
char* cpat
|
||||
int32_t* patcode
|
||||
uint32_t* smat
|
||||
uint32_t omask
|
||||
bint hasIndel
|
||||
bint ok
|
||||
|
||||
ctypedef Pattern* PatternPtr
|
||||
|
||||
|
||||
int32_t ManberAll(Seq *pseq, Pattern *ppat, int patnum, int begin, int length)
|
||||
|
||||
|
||||
cdef extern from "libecoPCR/ecoPCR.h" nogil:
|
||||
|
||||
SeqPtr ecoseq2apatseq(char* sequence, SeqPtr out, int32_t circular)
|
||||
int32_t delete_apatseq(SeqPtr pseq)
|
||||
PatternPtr buildPattern(const char *pat, int32_t error_max)
|
||||
PatternPtr complementPattern(PatternPtr pat)
|
||||
|
28
python/obitools3/libalign/apat_pattern.pxd
Executable file
28
python/obitools3/libalign/apat_pattern.pxd
Executable file
@ -0,0 +1,28 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.dms.capi.apat cimport SeqPtr
|
||||
|
||||
|
||||
cdef class One_primer_search_result:
|
||||
cdef SeqPtr _pointer
|
||||
cdef int pattern_ref
|
||||
cdef int hit_count
|
||||
cdef inline SeqPtr pointer(self)
|
||||
@staticmethod
|
||||
cdef new(SeqPtr apat_seq_p, int pattern_ref, int hit_count)
|
||||
cpdef first_encountered(self)
|
||||
|
||||
|
||||
cdef class Primer_search:
|
||||
cdef SeqPtr apat_seq_p
|
||||
cdef list direct_primers
|
||||
cdef list revcomp_primers
|
||||
cpdef One_primer_search_result search_one_primer(self, bytes sequence,
|
||||
int primer_pair_index, int primer_index,
|
||||
bint reverse_comp=*,
|
||||
bint same_sequence=*,
|
||||
int pattern_ref=*,
|
||||
int begin=*)
|
||||
cpdef free(self)
|
||||
|
||||
|
150
python/obitools3/libalign/apat_pattern.pyx
Executable file
150
python/obitools3/libalign/apat_pattern.pyx
Executable file
@ -0,0 +1,150 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from libc.stdint cimport uintptr_t
|
||||
|
||||
from obitools3.dms.capi.apat cimport SeqPtr, \
|
||||
PatternPtr, \
|
||||
ecoseq2apatseq, \
|
||||
ManberAll, \
|
||||
delete_apatseq, \
|
||||
buildPattern, \
|
||||
complementPattern
|
||||
|
||||
|
||||
cdef class One_primer_search_result:
|
||||
|
||||
cdef inline SeqPtr pointer(self) :
|
||||
return <SeqPtr>(self._pointer)
|
||||
|
||||
@staticmethod
|
||||
cdef new(SeqPtr apat_seq_p, int pattern_ref, int hit_count) : # not __init__ method because need to pass pointer
|
||||
result = One_primer_search_result()
|
||||
result._pointer = apat_seq_p
|
||||
result.pattern_ref = pattern_ref
|
||||
result.hit_count = hit_count
|
||||
return result
|
||||
|
||||
cpdef first_encountered(self):
|
||||
cdef int i
|
||||
cdef int first
|
||||
cdef int first_index
|
||||
cdef int hit_pos
|
||||
cdef int error_count
|
||||
cdef SeqPtr pointer
|
||||
|
||||
pointer = self.pointer()
|
||||
first = -1
|
||||
|
||||
for i in range(self.hit_count):
|
||||
hit_pos = pointer.hitpos[self.pattern_ref].val[i]
|
||||
if first == -1 or hit_pos < first:
|
||||
first = hit_pos
|
||||
first_index = i
|
||||
error_count = pointer.hiterr[self.pattern_ref].val[first_index]
|
||||
return error_count, first
|
||||
|
||||
|
||||
cdef class Primer_search:
|
||||
|
||||
def __init__(self, list primers, int error_max) :
|
||||
cdef PatternPtr p1
|
||||
cdef PatternPtr p2
|
||||
cdef PatternPtr p3
|
||||
cdef PatternPtr p4
|
||||
|
||||
cdef PatternPtr test
|
||||
cdef uintptr_t test_i
|
||||
cdef bytes test_b
|
||||
|
||||
self.apat_seq_p = NULL
|
||||
self.direct_primers = []
|
||||
self.revcomp_primers = []
|
||||
for i in range(len(primers)):
|
||||
p1 = buildPattern(primers[i][0], error_max)
|
||||
p2 = buildPattern(primers[i][1], error_max)
|
||||
p3 = complementPattern(p1)
|
||||
p4 = complementPattern(p2)
|
||||
self.direct_primers.append((<uintptr_t>p1, <uintptr_t>p2))
|
||||
self.revcomp_primers.append((<uintptr_t>p3, <uintptr_t>p4))
|
||||
|
||||
|
||||
cpdef One_primer_search_result search_one_primer(self, bytes sequence,
|
||||
int primer_pair_index, int primer_index,
|
||||
bint reverse_comp=False,
|
||||
bint same_sequence=False,
|
||||
int pattern_ref=0,
|
||||
int begin=0) :
|
||||
'''
|
||||
begin = start of direct pattern if it was already searched + its length
|
||||
'''
|
||||
cdef One_primer_search_result result = None
|
||||
cdef PatternPtr primer_p
|
||||
cdef SeqPtr apat_seq_p
|
||||
cdef int hit_count
|
||||
|
||||
if not same_sequence:
|
||||
self.apat_seq_p = <SeqPtr>(ecoseq2apatseq(sequence, self.apat_seq_p, 0))
|
||||
|
||||
apat_seq_p = <SeqPtr>(self.apat_seq_p)
|
||||
|
||||
if reverse_comp:
|
||||
primer_p = <PatternPtr>(<uintptr_t>(self.revcomp_primers[primer_pair_index][primer_index]))
|
||||
else:
|
||||
primer_p = <PatternPtr>(<uintptr_t>(self.direct_primers[primer_pair_index][primer_index]))
|
||||
|
||||
begin = begin
|
||||
seqlen = (apat_seq_p.seqlen) - begin
|
||||
|
||||
hit_count = ManberAll(apat_seq_p, primer_p, pattern_ref, begin, seqlen)
|
||||
|
||||
if hit_count:
|
||||
result = One_primer_search_result.new(apat_seq_p, pattern_ref, hit_count)
|
||||
else:
|
||||
result = None
|
||||
return result
|
||||
|
||||
|
||||
cpdef free(self):
|
||||
delete_apatseq(self.apat_seq_p)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# min_error_count = -1
|
||||
# best_hit = -1
|
||||
# for i in range(hit_count):
|
||||
# error_count = apat_seq_p.hiterr[pattern_ref].val[i]
|
||||
# hit_pos = apat_seq_p.hitpos[pattern_ref].val[i]
|
||||
# if min_error_count < 0 or error_count < min_error_count:
|
||||
# self.min_error_count = error_count
|
||||
# self.best_hit = i
|
||||
|
||||
# def __call__(self, bytes sequence):
|
||||
# # TODO declare things
|
||||
# # Search ALL primers in the direct direction
|
||||
# p1 = search_one_primer(self, sequence, True, False, same_sequence=False)
|
||||
# p2 = search_one_primer(self, sequence, False, False, same_sequence=True)
|
||||
#
|
||||
# # Search each primer in both directions
|
||||
# # 1st direction
|
||||
# p1 = search_one_primer(self, sequence, True, False, same_sequence=False)
|
||||
# p2_revcomp = search_one_primer(self, sequence, False, True, same_sequence=True)
|
||||
# # 2nd direction
|
||||
# p2 = search_one_primer(self, sequence, False, False, same_sequence=True)
|
||||
# p1_revcomp = search_one_primer(self, sequence, True, True, same_sequence=True)
|
||||
# # Choose best hit (best score for direction and longest amplicon if multiple hits in one direction)
|
||||
# if p1.min_error_count + p2_revcomp.min_error_count < p2.min_error_count + p1_revcomp.min_error_count:
|
||||
# direct_hit = True
|
||||
# if p1.hit_count > 1:
|
||||
# pos1 = min((pos, idx) for (idx, pos) in enumerate(p1.hit_pos))[0]
|
||||
# if p2_revcomp.hit_count > 1:
|
||||
# pos2 = max((pos, idx) for (idx, pos) in enumerate(p2_revcomp.hit_pos))[0]
|
||||
# else:
|
||||
# direct_hit = False
|
||||
# if p2.hit_count > 1:
|
||||
# pos1 = min((pos, idx) for (idx, pos) in enumerate(p2.hit_pos))[0]
|
||||
# if p2_revcomp.hit_count > 1:
|
||||
# pos2 = max((pos, idx) for (idx, pos) in enumerate(p2_revcomp.hit_pos))[0]
|
||||
|
Reference in New Issue
Block a user