New command: obi ecopcr

This commit is contained in:
Celine Mercier
2018-07-28 17:13:45 +02:00
parent 275d85dc5d
commit 2ba6d16147
24 changed files with 4523 additions and 1 deletions

View File

@ -0,0 +1,188 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.capi.obidms cimport OBIDMS_p
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption, addTaxonomyInputOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from libc.stdlib cimport malloc, free
from libc.stdint cimport int32_t
__title__="in silico PCR"
# TODO: add option to output unique ids
def addOptions(parser):
addSequenceInputOption(parser)
addMinimalOutputOption(parser)
addTaxonomyInputOption(parser)
group = parser.add_argument_group('obi ecopcr specific options')
group.add_argument('--primer1', '-F',
action="store", dest="ecopcr:primer1",
metavar='<PRIMER>',
type=str,
help="Forward primer.")
group.add_argument('--primer2', '-R',
action="store", dest="ecopcr:primer2",
metavar='<PRIMER>',
type=str,
help="Reverse primer.")
group.add_argument('--error', '-e',
action="store", dest="ecopcr:error",
metavar='<ERROR>',
default=0,
type=int,
help="Maximum number of errors (mismatches) allowed per primer. Default: 0.")
group.add_argument('--min-length', '-l',
action="store",
dest="ecopcr:min-length",
metavar="<MINIMUM LENGTH>",
type=int,
default=0,
help="Minimum length of the in silico amplified DNA fragment, excluding primers.")
group.add_argument('--max-length', '-L',
action="store",
dest="ecopcr:max-length",
metavar="<MAXIMUM LENGTH>",
type=int,
default=0,
help="Maximum length of the in silico amplified DNA fragment, excluding primers.")
group.add_argument('--restrict-to-taxid', '-r',
action="append",
dest="ecopcr:restrict-to-taxid",
metavar="<TAXID>",
type=int,
default=[],
help="Only the sequence records corresponding to the taxonomic group identified "
"by TAXID are considered for the in silico PCR. The TAXID is an integer "
"that can be found in the NCBI taxonomic database.")
group.add_argument('--ignore-taxid', '-i',
action="append",
dest="ecopcr:ignore-taxid",
metavar="<TAXID>",
type=int,
default=[],
help="The sequences of the taxonomic group identified by TAXID are not considered for the in silico PCR.")
group.add_argument('--circular', '-c',
action="store_true",
dest="ecopcr:circular",
default=False,
help="Considers that the input sequences are circular (e.g. mitochondrial or chloroplastic DNA).")
group.add_argument('--salt-concentration', '-a',
action="store",
dest="ecopcr:salt-concentration",
metavar="<FLOAT>",
type=float,
default=0.05,
help="Salt concentration used for estimating the Tm. Default: 0.05.")
group.add_argument('--salt-correction-method', '-m',
action="store",
dest="ecopcr:salt-correction-method",
metavar="<1|2>",
type=int,
default=1,
help="Defines the method used for estimating the Tm (melting temperature) between the primers and their corresponding "
"target sequences. SANTALUCIA: 1, or OWCZARZY: 2. Default: 1.")
group.add_argument('--keep-nucs', '-D',
action="store",
dest="ecopcr:keep-nucs",
metavar="<INTEGER>",
type=int,
default=0,
help="Keeps the specified number of nucleotides on each side of the in silico amplified sequences, "
"(already including the amplified DNA fragment plus the two target sequences of the primers).")
group.add_argument('--kingdom-mode', '-k',
action="store_true",
dest="ecopcr:kingdom-mode",
default=False,
help="Print in the output the kingdom of the in silico amplified sequences (default: print the superkingdom).")
def run(config):
cdef int32_t* restrict_to_taxids_p = NULL
cdef int32_t* ignore_taxids_p = NULL
restrict_to_taxids_len = len(config['ecopcr']['restrict-to-taxid'])
restrict_to_taxids_p = <int32_t*> malloc((restrict_to_taxids_len + 1) * sizeof(int32_t)) # +1 for the -1 flagging the end of the array
for i in range(restrict_to_taxids_len) :
restrict_to_taxids_p[i] = config['ecopcr']['restrict-to-taxid'][i]
restrict_to_taxids_p[restrict_to_taxids_len] = -1
ignore_taxids_len = len(config['ecopcr']['ignore-taxid'])
ignore_taxids_p = <int32_t*> malloc((ignore_taxids_len + 1) * sizeof(int32_t)) # +1 for the -1 flagging the end of the array
for i in range(ignore_taxids_len) :
ignore_taxids_p[i] = config['ecopcr']['ignore-taxid'][i]
ignore_taxids_p[ignore_taxids_len] = -1
DMS.obi_atexit()
logger("info", "obi ecopcr")
# TODO Bad URI reading because current one is not adapted
# Get input DMS path
i_dms_name = config['obi']['inputURI'].split('/')[0]
# Read the name of the input view
i_uri = config['obi']['inputURI'].split('/')
i_view_name = i_uri[1]
# Read the name of the output view
o_uri = config['obi']['outputURI'].split('/')
if len(o_uri)==2:
# Get output DMS path
o_dms_name = o_uri[0]
o_view_name = o_uri[1]
else:
o_dms_name = i_dms_name
o_view_name = o_uri[0]
o_dms = open_uri(o_dms_name, input=False)[0]
# Read taxonomy name
taxonomy_name = config['obi']['taxoURI'].split('/')[2]
# TODO: input DMS, taxonomy and primers in comments
if obi_ecopcr(tobytes(i_dms_name), tobytes(i_view_name), tobytes(taxonomy_name), \
tobytes(o_dms_name), tobytes(o_view_name), b"ecopcr", \
tobytes(config['ecopcr']['primer1']), tobytes(config['ecopcr']['primer2']), \
config['ecopcr']['error'], \
config['ecopcr']['min-length'], config['ecopcr']['max-length'], \
restrict_to_taxids_p, ignore_taxids_p, \
config['ecopcr']['circular'], config['ecopcr']['salt-concentration'], config['ecopcr']['salt-correction-method'], \
config['ecopcr']['keep-nucs'], config['ecopcr']['kingdom-mode']) < 0:
raise Exception("Error running ecopcr")
free(restrict_to_taxids_p)
free(ignore_taxids_p)
print("\n")
print(repr(o_dms[o_view_name]))
o_dms.close()

View File

@ -0,0 +1,28 @@
#cython: language_level=3
from obitools3.dms.capi.obidms cimport OBIDMS_p
from libc.stdint cimport int32_t
cdef extern from "obi_ecopcr.h" nogil:
int obi_ecopcr(const char* input_dms_name,
const char* i_view_name,
const char* taxonomy_name,
const char* output_dms_name,
const char* o_view_name,
const char* o_view_comments,
const char* primer1,
const char* primer2,
int error_max,
int min_len,
int max_len,
int32_t* restrict_to_taxids,
int32_t* ignore_taxids,
int circular,
double salt_concentration,
int salt_correction_method,
int keep_nucleotides,
bint kingdom_mode)

View File

@ -9,6 +9,15 @@
../../../src/murmurhash2.c
../../../src/obi_align.c
../../../src/obi_clean.c
../../../src/obi_ecopcr.c
../../../src/libecoPCR/libthermo/nnparams.c
../../../src/libecoPCR/libapat/apat_parse.c
../../../src/libecoPCR/libapat/apat_search.c
../../../src/libecoPCR/libapat/libstki.c
../../../src/libecoPCR/ecoapat.c
../../../src/libecoPCR/ecodna.c
../../../src/libecoPCR/ecoError.c
../../../src/libecoPCR/ecoMalloc.c
../../../src/obiavl.c
../../../src/obiblob_indexer.c
../../../src/obiblob.c

View File

@ -9,6 +9,15 @@
../../src/murmurhash2.c
../../src/obi_align.c
../../src/obi_clean.c
../../src/obi_ecopcr.c
../../src/libecoPCR/libthermo/nnparams.c
../../src/libecoPCR/libapat/apat_parse.c
../../src/libecoPCR/libapat/apat_search.c
../../src/libecoPCR/libapat/libstki.c
../../src/libecoPCR/ecoapat.c
../../src/libecoPCR/ecodna.c
../../src/libecoPCR/ecoError.c
../../src/libecoPCR/ecoMalloc.c
../../src/obiavl.c
../../src/obiblob_indexer.c
../../src/obiblob.c