Compare commits
244 Commits
v3.0.0-bet
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3db93ee9c4 | ||
|
|
4844b20770 | ||
|
|
0d98a4f717 | ||
|
|
837ff1a1ba | ||
|
|
aeed42456a | ||
|
|
fb6e27bb5d | ||
|
|
6d94cdcc0d | ||
|
|
8a1f844645 | ||
|
|
791ccfb92e | ||
|
|
1c9a906f5b | ||
|
|
55b2679b23 | ||
|
|
9ea2124adc | ||
|
|
2130a949c7 | ||
|
|
eeb93afa7d | ||
|
|
755ce179ad | ||
|
|
7e492578b3 | ||
|
|
02e9df3ad1 | ||
|
|
55ada80500 | ||
|
|
ef9d9674b0 | ||
|
|
4f39bb2418 | ||
|
|
0a2b8adb50 | ||
|
|
f9b99a9397 | ||
|
|
ce2833c04b | ||
|
|
f64b3da30b | ||
|
|
388b3e0410 | ||
|
|
c9db990b83 | ||
|
|
3f253feb5e | ||
|
|
85d2bab607 | ||
|
|
53b3d81137 | ||
|
|
f6353fbf28 | ||
|
|
5a8b9dca5d | ||
|
|
8bd6d6c8e9 | ||
|
|
405e6ef420 | ||
|
|
fedacfafe7 | ||
|
|
2d66e0e965 | ||
|
|
f43856b712 | ||
|
|
9e0c319806 | ||
|
|
58b42cd977 | ||
|
|
34de90bce6 | ||
|
|
4be9f36f99 | ||
|
|
f10e78ba3c | ||
|
|
88c8463ed7 | ||
|
|
89168271ef | ||
|
|
82d2642000 | ||
|
|
99c1cd60d6 | ||
|
|
ce7ae4ac55 | ||
|
|
0b4283bb58 | ||
|
|
747f3efbb2 | ||
|
|
6c1a3aff47 | ||
|
|
e2932b05f2 | ||
|
|
32345b9ec4 | ||
|
|
9334cf6cc6 | ||
|
|
8ec13a294c | ||
|
|
3e45c34491 | ||
|
|
c2f3d90dc1 | ||
|
|
6b732d11d3 | ||
|
|
9eb833a0af | ||
|
|
6b7b0e3bd1 | ||
|
|
47691a8f58 | ||
|
|
b908b581c8 | ||
|
|
03c174fd7a | ||
|
|
2156588ff6 | ||
|
|
6ff29c6a6a | ||
|
|
51a3c68fb5 | ||
|
|
da91ffc2c7 | ||
|
|
c884615522 | ||
|
|
cb53381863 | ||
|
|
72b3e5d872 | ||
|
|
238e9f70f3 | ||
|
|
e099a16624 | ||
|
|
847c9c816d | ||
|
|
6026129ca8 | ||
|
|
169b6514b4 | ||
|
|
89b0c48141 | ||
|
|
7c02782e3c | ||
|
|
ecc4c2c78b | ||
|
|
f5413381fd | ||
|
|
3e93cfff7b | ||
|
|
6d445fe3ad | ||
|
|
824deb7e21 | ||
|
|
d579bb2749 | ||
|
|
10e5ebdbc0 | ||
|
|
8833110490 | ||
|
|
bd38449f2d | ||
|
|
904823c827 | ||
|
|
af68a1024c | ||
|
|
425fe25bd2 | ||
|
|
d48aed38d4 | ||
|
|
5e32f8523e | ||
|
|
8f1d94fd24 | ||
|
|
38f42cb0fb | ||
|
|
7f0f63cf26 | ||
|
|
cba78111c9 | ||
|
|
41fbae7b6c | ||
|
|
ad1fd3c341 | ||
|
|
fbf0f7dfb6 | ||
|
|
fda0edd0d8 | ||
|
|
382e37a6ae | ||
|
|
5cc3e29f75 | ||
|
|
a8e2aee281 | ||
| 13adb479d3 | |||
|
|
8ba7acdfe1 | ||
|
|
38051b1e4f | ||
|
|
52a2e21b38 | ||
|
|
d27a5b9115 | ||
|
|
20bd3350b4 | ||
|
|
2e191372d7 | ||
|
|
112e12cab0 | ||
|
|
b9b4cec5b5 | ||
|
|
199f3772e8 | ||
|
|
422a6450fa | ||
|
|
137c109f86 | ||
|
|
b6648ae81e | ||
|
|
f6dffbecfe | ||
|
|
c4696ac865 | ||
|
|
11a0945a9b | ||
|
|
f23c40c905 | ||
|
|
f99fc13b75 | ||
|
|
1da6aac1b8 | ||
|
|
159803b40a | ||
|
|
7dcbc34017 | ||
|
|
db2202c8b4 | ||
|
|
d33ff97846 | ||
|
|
1dcdf69f1f | ||
|
|
dec114eed6 | ||
|
|
f36691053b | ||
|
|
f2aa5fcf8b | ||
|
|
bccb3e6874 | ||
|
|
f5a17bea68 | ||
|
|
e28507639a | ||
|
|
e6feac93fe | ||
|
|
50b292b489 | ||
|
|
24a737aa55 | ||
|
|
8aa455ad8a | ||
|
|
46ca693ca9 | ||
|
|
9a9afde113 | ||
|
|
8dd403a118 | ||
|
|
9672f01c6a | ||
|
|
ed9549acfb | ||
|
|
9ace9989c4 | ||
|
|
a3ebe5f118 | ||
|
|
9100e14899 | ||
|
|
ccda0661ce | ||
|
|
aab59f2214 | ||
|
|
ade1107b42 | ||
|
|
9c7d24406f | ||
|
|
03bc9915f2 | ||
|
|
24b1dab573 | ||
|
|
7593673f3f | ||
|
|
aa01236cae | ||
|
|
49b8810a76 | ||
|
|
7a39df54c0 | ||
|
|
09e483b0d6 | ||
|
|
14a2579173 | ||
|
|
36a8aaa92e | ||
|
|
a17eb445c2 | ||
|
|
e4a32788c2 | ||
|
|
2442cc80bf | ||
|
|
aa836b2ace | ||
|
|
8776ce22e6 | ||
|
|
4aa772c405 | ||
|
|
b0b96ac37a | ||
|
|
687e42ad22 | ||
|
|
5fbbb6d304 | ||
|
|
359a9fe237 | ||
|
|
f9b6851f75 | ||
|
|
29a2652bbf | ||
|
|
2a2c233936 | ||
|
|
faf8ea9d86 | ||
|
|
ffe2485e94 | ||
|
|
6094ce2bbc | ||
|
|
a7dcf16c06 | ||
|
|
f13f8f6165 | ||
|
|
b5a29ac413 | ||
|
|
efd2b9d338 | ||
|
|
ca6e3e7aad | ||
|
|
76ed8e18e5 | ||
|
|
1d17f28aec | ||
|
|
fa834e4b8b | ||
|
|
a72fea3cc9 | ||
|
|
e9a37d8a6e | ||
|
|
ef074f8455 | ||
|
|
aec5e69f2c | ||
|
|
170ef3f1ba | ||
|
|
f999946582 | ||
|
|
773b36ec37 | ||
|
|
69cb434a6c | ||
|
|
55d4f98d60 | ||
|
|
0bec2631e8 | ||
|
|
e6b6c6fa84 | ||
|
|
974528b2e6 | ||
|
|
1b346b54f9 | ||
|
|
058f2ad8b3 | ||
|
|
60bfd3ae8d | ||
|
|
67bdee105a | ||
|
|
0f745e0113 | ||
|
|
da8de52ba4 | ||
|
|
4d36538c6e | ||
|
|
8d0b17d87d | ||
|
|
343999a627 | ||
|
|
e9a40630e9 | ||
|
|
8dbcd3025a | ||
|
|
4cf635d001 | ||
|
|
b7e7cc232a | ||
|
|
b6ab792ceb | ||
|
|
ddea5a2964 | ||
|
|
30852ab7d5 | ||
|
|
4d0299904e | ||
|
|
eef5156d95 | ||
|
|
e62c991bbc | ||
|
|
1218eed7fd | ||
|
|
cd9cea8c97 | ||
|
|
98cfb70d73 | ||
|
|
b9f68c76c8 | ||
|
|
0b98371688 | ||
|
|
f0d152fcbd | ||
|
|
8019dee68e | ||
|
|
0b4a234671 | ||
|
|
d32cfdcce5 | ||
|
|
219c0d6fdc | ||
|
|
dc9f897917 | ||
|
|
bb72682f7d | ||
|
|
52920c3c71 | ||
|
|
18c22cecf9 | ||
|
|
1bfb96023c | ||
|
|
c67d668989 | ||
|
|
db0ac37d41 | ||
|
|
d0c21ecd39 | ||
|
|
53212168a2 | ||
|
|
b4b2e62195 | ||
|
|
ced82c4242 | ||
|
|
a524f8829e | ||
|
|
5c9091e9eb | ||
|
|
822000cb70 | ||
|
|
b9cd9bee9a | ||
|
|
b1f3e082f9 | ||
|
|
6c018b403c | ||
|
|
694d1934a8 | ||
|
|
fc3ac03630 | ||
|
|
d75e54a078 | ||
|
|
6bfd7441f3 | ||
|
|
81a179239c | ||
|
|
35ce37c0f7 | ||
|
|
53f18316b0 |
@@ -6,7 +6,7 @@ recursive-include doc/sphinx/source *.txt *.rst *.py
|
||||
recursive-include doc/sphinx/sphinxext *.py
|
||||
include doc/sphinx/Makefile
|
||||
include doc/sphinx/Doxyfile
|
||||
include README.txt
|
||||
include README.md
|
||||
include requirements.txt
|
||||
include scripts/obi
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#/usr/bin/env bash
|
||||
|
||||
_obi_comp ()
|
||||
{
|
||||
@@ -1,3 +1,9 @@
|
||||
import codecs
|
||||
|
||||
def unescaped_str(arg_str):
|
||||
return arg_str.encode('latin-1', 'backslashreplace').decode('unicode-escape')
|
||||
|
||||
|
||||
def __addInputOption(optionManager):
|
||||
|
||||
optionManager.add_argument(
|
||||
@@ -39,6 +45,30 @@ def __addImportInputOption(optionManager):
|
||||
const=b'fastq',
|
||||
help="Input file is in fastq format")
|
||||
|
||||
group.add_argument('--silva-input',
|
||||
action="store_const", dest="obi:inputformat",
|
||||
default=None,
|
||||
const=b'silva',
|
||||
help="Input file is in SILVA fasta format. If NCBI taxonomy provided with --taxonomy, taxid and scientific name will be added for each sequence.")
|
||||
|
||||
group.add_argument('--rdp-input',
|
||||
action="store_const", dest="obi:inputformat",
|
||||
default=None,
|
||||
const=b'rdp',
|
||||
help="Input file is in RDP training set fasta format. If NCBI taxonomy provided with --taxonomy, taxid and scientific name will be added for each sequence.")
|
||||
|
||||
group.add_argument('--unite-input',
|
||||
action="store_const", dest="obi:inputformat",
|
||||
default=None,
|
||||
const=b'unite',
|
||||
help="Input file is in UNITE fasta format. If NCBI taxonomy provided with --taxonomy, taxid and scientific name will be added for each sequence.")
|
||||
|
||||
group.add_argument('--sintax-input',
|
||||
action="store_const", dest="obi:inputformat",
|
||||
default=None,
|
||||
const=b'sintax',
|
||||
help="Input file is in SINTAX fasta format. If NCBI taxonomy provided with --taxonomy, taxid and scientific name will be added for each sequence.")
|
||||
|
||||
group.add_argument('--embl-input',
|
||||
action="store_const", dest="obi:inputformat",
|
||||
default=None,
|
||||
@@ -55,7 +85,7 @@ def __addImportInputOption(optionManager):
|
||||
action="store_const", dest="obi:inputformat",
|
||||
default=None,
|
||||
const=b'ngsfilter',
|
||||
help="Input file is an ngsfilter file")
|
||||
help="Input file is an ngsfilter file. If not using tags, use ':' or 'None:None' or '-:-' or any combination")
|
||||
|
||||
group.add_argument('--ecopcr-result-input',
|
||||
action="store_const", dest="obi:inputformat",
|
||||
@@ -119,15 +149,15 @@ def __addImportInputOption(optionManager):
|
||||
def __addTabularOption(optionManager):
|
||||
group = optionManager.add_argument_group("Input and output format options for tabular files")
|
||||
|
||||
group.add_argument('--header',
|
||||
action="store_true", dest="obi:header",
|
||||
default=False,
|
||||
help="First line of tabular file contains column names")
|
||||
group.add_argument('--no-header',
|
||||
action="store_false", dest="obi:header",
|
||||
default=True,
|
||||
help="Don't print the header (first line with column names")
|
||||
|
||||
group.add_argument('--sep',
|
||||
action="store", dest="obi:sep",
|
||||
default="\t",
|
||||
type=str,
|
||||
type=unescaped_str,
|
||||
help="Column separator")
|
||||
|
||||
|
||||
@@ -159,6 +189,16 @@ def __addTabularInputOption(optionManager):
|
||||
help="Lines starting by this char are considered as comment")
|
||||
|
||||
|
||||
def __addTabularOutputOption(optionManager):
|
||||
group = optionManager.add_argument_group("Output format options for tabular files")
|
||||
|
||||
__addTabularOption(optionManager)
|
||||
|
||||
group.add_argument('--na-int-stay-na',
|
||||
action="store_false", dest="obi:na_int_to_0",
|
||||
help="NA (Non available) integer values should be exported as NA in tabular output (default: they are converted to 0 for tabular output).") # TODO
|
||||
|
||||
|
||||
def __addTaxdumpInputOption(optionManager): # TODO maybe not the best way to do it
|
||||
group = optionManager.add_argument_group("Input format options for taxdump")
|
||||
|
||||
@@ -192,6 +232,10 @@ def addTabularInputOption(optionManager):
|
||||
__addTabularInputOption(optionManager)
|
||||
|
||||
|
||||
def addTabularOutputOption(optionManager):
|
||||
__addTabularOutputOption(optionManager)
|
||||
|
||||
|
||||
def addTaxonomyOption(optionManager):
|
||||
__addTaxonomyOption(optionManager)
|
||||
|
||||
@@ -204,6 +248,7 @@ def addAllInputOption(optionManager):
|
||||
__addInputOption(optionManager)
|
||||
__addImportInputOption(optionManager)
|
||||
__addTabularInputOption(optionManager)
|
||||
__addTabularOutputOption(optionManager)
|
||||
__addTaxonomyOption(optionManager)
|
||||
__addTaxdumpInputOption(optionManager)
|
||||
|
||||
@@ -222,7 +267,7 @@ def __addDMSOutputOption(optionManager):
|
||||
group.add_argument('--no-create-dms',
|
||||
action="store_true", dest="obi:nocreatedms",
|
||||
default=False,
|
||||
help="Don't create an output DMS it does not already exist")
|
||||
help="Don't create an output DMS if it does not already exist")
|
||||
|
||||
|
||||
def __addEltLimitOption(optionManager):
|
||||
@@ -264,6 +309,35 @@ def __addExportOutputOption(optionManager):
|
||||
const=b'tabular',
|
||||
help="Output file is in tabular format")
|
||||
|
||||
group.add_argument('--metabaR-output',
|
||||
action="store_const", dest="obi:outputformat",
|
||||
default=None,
|
||||
const=b'metabaR',
|
||||
help="Export the files needed by the obifiles_to_metabarlist function of the metabaR package")
|
||||
|
||||
group.add_argument('--metabaR-prefix',
|
||||
action="store", dest="obi:metabarprefix",
|
||||
type=str,
|
||||
help="Prefix for the files when using --metabaR-output option")
|
||||
|
||||
group.add_argument('--metabaR-ngsfilter',
|
||||
action="store", dest="obi:metabarngsfilter",
|
||||
type=str,
|
||||
default=None,
|
||||
help="URI to the ngsfilter view when using --metabaR-output option (if not provided, it is not exported)")
|
||||
|
||||
group.add_argument('--metabaR-samples',
|
||||
action="store", dest="obi:metabarsamples",
|
||||
type=str,
|
||||
default=None,
|
||||
help="URI to the sample metadata view when using --metabaR-output option (if not provided, it is built as just a list of the sample names)")
|
||||
|
||||
group.add_argument('--only-keys',
|
||||
action="append", dest="obi:only_keys",
|
||||
type=str,
|
||||
default=[],
|
||||
help="Only export the given keys (columns).")
|
||||
|
||||
group.add_argument('--print-na',
|
||||
action="store_true", dest="obi:printna",
|
||||
default=False,
|
||||
@@ -296,14 +370,14 @@ def addTabularOutputOption(optionManager):
|
||||
|
||||
def addExportOutputOption(optionManager):
|
||||
__addExportOutputOption(optionManager)
|
||||
__addTabularOption(optionManager)
|
||||
__addTabularOutputOption(optionManager)
|
||||
|
||||
|
||||
def addAllOutputOption(optionManager):
|
||||
__addOutputOption(optionManager)
|
||||
__addDMSOutputOption(optionManager)
|
||||
__addExportOutputOption(optionManager)
|
||||
__addTabularOption(optionManager)
|
||||
__addTabularOutputOption(optionManager)
|
||||
|
||||
|
||||
def addNoProgressBarOption(optionManager):
|
||||
|
||||
@@ -30,12 +30,12 @@ cdef class ProgressBar:
|
||||
off_t maxi,
|
||||
dict config={},
|
||||
str head="",
|
||||
double seconde=0.1,
|
||||
double seconds=5,
|
||||
cut=False):
|
||||
|
||||
self.starttime = self.clock()
|
||||
self.lasttime = self.starttime
|
||||
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
|
||||
self.tickcount = <clock_t> (seconds * CLOCKS_PER_SEC)
|
||||
self.freq = 1
|
||||
self.cycle = 0
|
||||
self.arrow = 0
|
||||
|
||||
231
python/obitools3/commands/addtaxids.pyx
Executable file
231
python/obitools3/commands/addtaxids.pyx
Executable file
@@ -0,0 +1,231 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.column.column cimport Column
|
||||
from functools import reduce
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes, tostr
|
||||
from io import BufferedWriter
|
||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
ID_COLUMN, \
|
||||
DEFINITION_COLUMN, \
|
||||
QUALITY_COLUMN, \
|
||||
COUNT_COLUMN, \
|
||||
TAXID_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_INT
|
||||
from obitools3.dms.capi.obitaxonomy cimport MIN_LOCAL_TAXID
|
||||
import time
|
||||
import math
|
||||
import sys
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Annotate sequences with their corresponding NCBI taxid found from the taxon scientific name"
|
||||
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi addtaxids specific options')
|
||||
|
||||
group.add_argument('-t', '--taxid-tag',
|
||||
action="store",
|
||||
dest="addtaxids:taxid_tag",
|
||||
metavar="<TAXID_TAG>",
|
||||
default=b"TAXID",
|
||||
help="Name of the tag to store the found taxid "
|
||||
"(default: 'TAXID').")
|
||||
|
||||
group.add_argument('-n', '--taxon-name-tag',
|
||||
action="store",
|
||||
dest="addtaxids:taxon_name_tag",
|
||||
metavar="<SCIENTIFIC_NAME_TAG>",
|
||||
default=b"SCIENTIFIC_NAME",
|
||||
help="Name of the tag giving the scientific name of the taxon "
|
||||
"(default: 'SCIENTIFIC_NAME').")
|
||||
|
||||
group.add_argument('-g', '--try-genus-match',
|
||||
action="store_true", dest="addtaxids:try_genus_match",
|
||||
default=False,
|
||||
help="Try matching the first word of <SCIENTIFIC_NAME_TAG> when can't find corresponding taxid for a taxon. "
|
||||
"If there is a match it is added in the 'parent_taxid' tag. (Can be used by 'obi taxonomy' to add the taxon under that taxid).")
|
||||
|
||||
group.add_argument('-a', '--restricting-ancestor',
|
||||
action="store",
|
||||
dest="addtaxids:restricting_ancestor",
|
||||
metavar="<RESTRICTING_ANCESTOR>",
|
||||
default=None,
|
||||
help="Enables to restrict the search of taxids under an ancestor specified by its taxid.")
|
||||
|
||||
group.add_argument('-l', '--log-file',
|
||||
action="store",
|
||||
dest="addtaxids:log_file",
|
||||
metavar="<LOG_FILE>",
|
||||
default='',
|
||||
help="Path to a log file to write informations about not found taxids.")
|
||||
|
||||
|
||||
def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
|
||||
logger("info", "obi addtaxids")
|
||||
|
||||
# Open the input
|
||||
input = open_uri(config['obi']['inputURI'])
|
||||
if input is None:
|
||||
raise Exception("Could not read input view")
|
||||
i_dms = input[0]
|
||||
i_view = input[1]
|
||||
i_view_name = input[1].name
|
||||
|
||||
# Open the output: only the DMS, as the output view is going to be created by cloning the input view
|
||||
# (could eventually be done via an open_uri() argument)
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
input=False,
|
||||
dms_only=True)
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view_name = output[1]
|
||||
|
||||
# stdout output: create temporary view
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
i=0
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in output DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
imported_view_name = o_view_name
|
||||
|
||||
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
||||
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
||||
if i_dms != o_dms:
|
||||
imported_view_name = i_view_name
|
||||
i=0
|
||||
while imported_view_name in o_dms: # Making sure view name is unique in output DMS
|
||||
imported_view_name = i_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
||||
i_view = o_dms[imported_view_name]
|
||||
|
||||
# Clone output view from input view
|
||||
o_view = i_view.clone(o_view_name)
|
||||
if o_view is None:
|
||||
raise Exception("Couldn't create output view")
|
||||
i_view.close()
|
||||
|
||||
# Open taxonomy
|
||||
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||
if taxo_uri is None or taxo_uri[2] == bytes:
|
||||
raise Exception("Couldn't open taxonomy")
|
||||
taxo = taxo_uri[1]
|
||||
|
||||
# Initialize the progress bar
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(o_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
try:
|
||||
if config['addtaxids']['log_file']:
|
||||
logfile = open(config['addtaxids']['log_file'], 'w')
|
||||
else:
|
||||
logfile = None
|
||||
if config['addtaxids']['try_genus_match']:
|
||||
try_genus = True
|
||||
else:
|
||||
try_genus = False
|
||||
if 'restricting_ancestor' in config['addtaxids']:
|
||||
res_anc = int(config['addtaxids']['restricting_ancestor'])
|
||||
else:
|
||||
res_anc = None
|
||||
taxid_column_name = config['addtaxids']['taxid_tag']
|
||||
parent_taxid_column_name = "PARENT_TAXID" # TODO macro
|
||||
taxon_name_column_name = config['addtaxids']['taxon_name_tag']
|
||||
taxid_column = Column.new_column(o_view, taxid_column_name, OBI_INT)
|
||||
parent_taxid_column = Column.new_column(o_view, parent_taxid_column_name, OBI_INT)
|
||||
taxon_name_column = o_view[taxon_name_column_name]
|
||||
|
||||
found_count = 0
|
||||
not_found_count = 0
|
||||
parent_found_count = 0
|
||||
|
||||
for i in range(len(o_view)):
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
taxon_name = taxon_name_column[i]
|
||||
taxon = taxo.get_taxon_by_name(taxon_name, res_anc)
|
||||
if taxon is not None:
|
||||
taxid_column[i] = taxon.taxid
|
||||
found_count+=1
|
||||
elif try_genus: # try finding genus or other parent taxon from the first word
|
||||
#print(i, o_view[i].id)
|
||||
taxon_name_sp = taxon_name.split(b" ")
|
||||
taxon = taxo.get_taxon_by_name(taxon_name_sp[0], res_anc)
|
||||
if taxon is not None:
|
||||
parent_taxid_column[i] = taxon.taxid
|
||||
parent_found_count+=1
|
||||
if logfile:
|
||||
print("Found parent taxon for", tostr(taxon_name), file=logfile)
|
||||
else:
|
||||
not_found_count+=1
|
||||
if logfile:
|
||||
print("No taxid found for", tostr(taxon_name), file=logfile)
|
||||
else:
|
||||
not_found_count+=1
|
||||
if logfile:
|
||||
print("No taxid found for", tostr(taxon_name), file=logfile)
|
||||
|
||||
except Exception, e:
|
||||
raise RollbackException("obi addtaxids error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
logger("info", "\nTaxids found: "+str(found_count)+"/"+str(len(o_view))+" ("+str(round(found_count*100.0/len(o_view), 2))+"%)")
|
||||
if config['addtaxids']['try_genus_match']:
|
||||
logger("info", "\nParent taxids found: "+str(parent_found_count)+"/"+str(len(o_view))+" ("+str(round(parent_found_count*100.0/len(o_view), 2))+"%)")
|
||||
logger("info", "\nTaxids not found: "+str(not_found_count)+"/"+str(len(o_view))+" ("+str(round(not_found_count*100.0/len(o_view), 2))+"%)")
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
input_dms_name=[input[0].name]
|
||||
input_view_name=[i_view_name]
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "addtaxids", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(o_dms, imported_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
@@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@@ -12,17 +12,21 @@ from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
|
||||
obi_lcs_align_two_columns
|
||||
|
||||
from io import BufferedWriter
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
__title__="Aligns one sequence column with itself or two sequence columns"
|
||||
__title__="Align one sequence column with itself or two sequence columns"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi align specific options')
|
||||
|
||||
@@ -154,7 +158,7 @@ def run(config):
|
||||
i_view_name = i_uri.split(b"/")[0]
|
||||
i_column_name = b""
|
||||
i_element_name = b""
|
||||
if len(i_uri.split(b"/")) == 2:
|
||||
if len(i_uri.split(b"/")) >= 2:
|
||||
i_column_name = i_uri.split(b"/")[1]
|
||||
if len(i_uri.split(b"/")) == 3:
|
||||
i_element_name = i_uri.split(b"/")[2]
|
||||
@@ -177,7 +181,7 @@ def run(config):
|
||||
i_dms_name_2 = i_dms_2.name
|
||||
i_uri_2 = input_2[1]
|
||||
original_i_view_name_2 = i_uri_2.split(b"/")[0]
|
||||
if len(i_uri_2.split(b"/")) == 2:
|
||||
if len(i_uri_2.split(b"/")) >= 2:
|
||||
i_column_name_2 = i_uri_2.split(b"/")[1]
|
||||
if len(i_uri_2.split(b"/")) == 3:
|
||||
i_element_name_2 = i_uri_2.split(b"/")[2]
|
||||
@@ -201,20 +205,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_dms_name = o_dms.name
|
||||
final_o_view_name = output[1]
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# If the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@@ -263,12 +267,19 @@ def run(config):
|
||||
View.delete_view(i_dms, i_view_name_2)
|
||||
i_dms_2.close()
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close()
|
||||
o_dms.close(force=True)
|
||||
|
||||
i_dms.close()
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -6,7 +6,7 @@ from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_QUAL
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
|
||||
@@ -14,14 +14,16 @@ from obitools3.libalign._qsrassemble import QSolexaRightReverseAssemble
|
||||
from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequence
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
|
||||
from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME
|
||||
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
import os
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
__title__="Aligns paired-ended reads"
|
||||
__title__="Align paired-ended reads"
|
||||
|
||||
|
||||
|
||||
@@ -29,6 +31,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi alignpairedend specific options')
|
||||
|
||||
@@ -39,12 +42,13 @@ def addOptions(parser):
|
||||
type=str,
|
||||
help="URI to the reverse reads if they are in a different view than the forward reads")
|
||||
|
||||
group.add_argument('--score-min',
|
||||
action="store", dest="alignpairedend:smin",
|
||||
metavar="#.###",
|
||||
default=None,
|
||||
type=float,
|
||||
help="Minimum score for keeping alignments")
|
||||
# group.add_argument('--score-min',
|
||||
# action="store", dest="alignpairedend:smin",
|
||||
# metavar="#.###",
|
||||
# default=None,
|
||||
# type=float,
|
||||
# help="Minimum score for keeping alignments. "
|
||||
# "(for kmer alignment) The score is an approximation of the number of nucleotides matching in the overlap of the alignment.")
|
||||
|
||||
# group.add_argument('-A', '--true-ali',
|
||||
# action="store_true", dest="alignpairedend:trueali",
|
||||
@@ -102,7 +106,7 @@ def alignmentIterator(entries, aligner):
|
||||
seqR = reverse[i]
|
||||
else:
|
||||
seqF = Nuc_Seq.new_from_stored(entries[i])
|
||||
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME])
|
||||
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQUENCE_COLUMN], quality=seqF[REVERSE_QUALITY_COLUMN])
|
||||
seqR.index = i
|
||||
|
||||
ali = aligner(seqF, seqR)
|
||||
@@ -170,18 +174,29 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
view = output[1]
|
||||
output_0 = output[0]
|
||||
o_dms = output[0]
|
||||
|
||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
|
||||
|
||||
if 'smin' in config['alignpairedend']:
|
||||
smin = config['alignpairedend']['smin']
|
||||
# stdout output: create temporary view
|
||||
if type(output_0)==BufferedWriter:
|
||||
i_dms = forward.dms # using any dms
|
||||
o_dms = i_dms
|
||||
i=0
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view = View_NUC_SEQS.new(o_dms, o_view_name, quality=True)
|
||||
else:
|
||||
smin = 0
|
||||
o_view = output[1]
|
||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL)
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(entries_len, config, seconde=5)
|
||||
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(entries_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
#if config['alignpairedend']['trueali']:
|
||||
# kmer_ali = False
|
||||
# aligner = buildAlignment
|
||||
@@ -190,67 +205,89 @@ def run(config):
|
||||
if type(entries) == list:
|
||||
forward = entries[0]
|
||||
reverse = entries[1]
|
||||
aligner = Kmer_similarity(forward, \
|
||||
view2=reverse, \
|
||||
kmer_size=config['alignpairedend']['kmersize'], \
|
||||
reversed_column=None)
|
||||
if len(forward) == 0 or len(reverse) == 0:
|
||||
aligner = None
|
||||
else:
|
||||
aligner = Kmer_similarity(forward, \
|
||||
view2=reverse, \
|
||||
kmer_size=config['alignpairedend']['kmersize'], \
|
||||
reversed_column=None)
|
||||
else:
|
||||
aligner = Kmer_similarity(entries, \
|
||||
column2=entries[REVERSE_SEQ_COLUMN_NAME], \
|
||||
qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], \
|
||||
kmer_size=config['alignpairedend']['kmersize'], \
|
||||
reversed_column=entries[b'reversed']) # column created by the ngsfilter tool
|
||||
if len(entries) == 0:
|
||||
aligner = None
|
||||
else:
|
||||
aligner = Kmer_similarity(entries, \
|
||||
column2=entries[REVERSE_SEQUENCE_COLUMN], \
|
||||
qual_column2=entries[REVERSE_QUALITY_COLUMN], \
|
||||
kmer_size=config['alignpairedend']['kmersize'], \
|
||||
reversed_column=entries[b'reversed']) # column created by the ngsfilter tool
|
||||
|
||||
ba = alignmentIterator(entries, aligner)
|
||||
|
||||
|
||||
i = 0
|
||||
for ali in ba:
|
||||
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
|
||||
PyErr_CheckSignals()
|
||||
|
||||
consensus = view[i]
|
||||
consensus = o_view[i]
|
||||
|
||||
if two_views:
|
||||
consensus[b"R1_parent"] = forward[i].id
|
||||
consensus[b"R2_parent"] = reverse[i].id
|
||||
|
||||
if not two_views:
|
||||
seqF = entries[i]
|
||||
else:
|
||||
seqF = forward[i]
|
||||
|
||||
if ali.score > smin and ali.consensus_len > 0 :
|
||||
if ali.overlap_len > 0 :
|
||||
buildConsensus(ali, consensus, seqF)
|
||||
else:
|
||||
if not two_views:
|
||||
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality = seqF[REVERSE_QUALITY_COLUMN_NAME])
|
||||
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQUENCE_COLUMN], quality = seqF[REVERSE_QUALITY_COLUMN])
|
||||
else:
|
||||
seqR = reverse[i]
|
||||
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
|
||||
|
||||
consensus[b"smin"] = smin
|
||||
|
||||
|
||||
if kmer_ali :
|
||||
ali.free()
|
||||
|
||||
i+=1
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
if kmer_ali :
|
||||
if kmer_ali and aligner is not None:
|
||||
aligner.free()
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(view), file=sys.stderr)
|
||||
|
||||
input[0].close()
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If stdout output, delete the temporary imported view used to create the final file
|
||||
if type(output_0)==BufferedWriter:
|
||||
View_NUC_SEQS.delete_view(o_dms, o_view_name)
|
||||
output_0.close()
|
||||
|
||||
# Close all DMS
|
||||
input[0].close(force=True)
|
||||
if two_views:
|
||||
rinput[0].close()
|
||||
output[0].close()
|
||||
rinput[0].close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -4,16 +4,20 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from functools import reduce
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from io import BufferedWriter
|
||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
ID_COLUMN, \
|
||||
DEFINITION_COLUMN, \
|
||||
QUALITY_COLUMN, \
|
||||
COUNT_COLUMN
|
||||
COUNT_COLUMN, \
|
||||
TAXID_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_STR
|
||||
from obitools3.dms.column.column cimport Column
|
||||
|
||||
import time
|
||||
import math
|
||||
@@ -33,6 +37,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi annotate specific options')
|
||||
|
||||
@@ -175,8 +180,8 @@ def sequenceTaggerGenerator(config, taxo=None):
|
||||
counter[0]+=1
|
||||
|
||||
for rank in annoteRank:
|
||||
if 'taxid' in seq:
|
||||
taxid = seq['taxid']
|
||||
if TAXID_COLUMN in seq:
|
||||
taxid = seq[TAXID_COLUMN]
|
||||
if taxid is not None:
|
||||
rtaxid = taxo.get_taxon_at_rank(taxid, rank)
|
||||
if rtaxid is not None:
|
||||
@@ -184,64 +189,58 @@ def sequenceTaggerGenerator(config, taxo=None):
|
||||
else:
|
||||
scn=None
|
||||
seq[rank]=rtaxid
|
||||
if "%s_name"%rank not in seq.view:
|
||||
Column.new_column(seq.view, "%s_name"%rank, OBI_STR)
|
||||
seq["%s_name"%rank]=scn
|
||||
|
||||
if add_rank:
|
||||
seq['seq_rank']=counter[0]
|
||||
|
||||
for i,v in toSet:
|
||||
#try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
val = eval(v, environ, seq)
|
||||
#except Exception,e: # TODO discuss usefulness of this
|
||||
# if options.onlyValid:
|
||||
# raise e
|
||||
# val = v
|
||||
try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
val = eval(v, environ, seq)
|
||||
except Exception: # set string if not a valid expression
|
||||
val = v
|
||||
seq[i]=val
|
||||
|
||||
if length:
|
||||
seq['seq_length']=len(seq)
|
||||
|
||||
if newId is not None:
|
||||
# try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
val = eval(newId, environ, seq)
|
||||
# except Exception,e:
|
||||
# if options.onlyValid:
|
||||
# raise e
|
||||
# val = newId
|
||||
try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
val = eval(newId, environ, seq)
|
||||
except Exception: # set string if not a valid expression
|
||||
val = newId
|
||||
seq.id=val
|
||||
|
||||
if newDef is not None:
|
||||
# try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
val = eval(newDef, environ, seq)
|
||||
# except Exception,e:
|
||||
# if options.onlyValid:
|
||||
# raise e
|
||||
# val = newDef
|
||||
try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
val = eval(newDef, environ, seq)
|
||||
except Exception: # set string if not a valid expression
|
||||
val = newDef
|
||||
seq.definition=val
|
||||
#
|
||||
|
||||
if newSeq is not None:
|
||||
# try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
val = eval(newSeq, environ, seq)
|
||||
# except Exception,e:
|
||||
# if options.onlyValid:
|
||||
# raise e
|
||||
# val = newSeq
|
||||
try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
val = eval(newSeq, environ, seq)
|
||||
except Exception: # set string if not a valid expression
|
||||
val = newSeq
|
||||
seq.seq=val
|
||||
if 'seq_length' in seq:
|
||||
seq['seq_length']=len(seq)
|
||||
@@ -251,15 +250,14 @@ def sequenceTaggerGenerator(config, taxo=None):
|
||||
seq.view.delete_column(QUALITY_COLUMN)
|
||||
|
||||
if run is not None:
|
||||
# try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
eval(run, environ, seq)
|
||||
# except Exception,e:
|
||||
# if options.onlyValid:
|
||||
# raise e
|
||||
try:
|
||||
if taxo is not None:
|
||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
else:
|
||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||
eval(run, environ, seq)
|
||||
except Exception,e:
|
||||
raise e
|
||||
|
||||
return sequenceTagger
|
||||
|
||||
@@ -286,8 +284,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view_name = output[1]
|
||||
|
||||
|
||||
# stdout output: create temporary view
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
i=0
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in output DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
imported_view_name = o_view_name
|
||||
|
||||
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
||||
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
||||
if i_dms != o_dms:
|
||||
@@ -298,7 +307,7 @@ def run(config):
|
||||
i+=1
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
||||
i_view = o_dms[imported_view_name]
|
||||
|
||||
|
||||
# Clone output view from input view
|
||||
o_view = i_view.clone(o_view_name)
|
||||
if o_view is None:
|
||||
@@ -315,7 +324,10 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(o_view), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(o_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
try:
|
||||
|
||||
@@ -354,14 +366,16 @@ def run(config):
|
||||
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
|
||||
for i in range(len(o_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
sequenceTagger(o_view[i])
|
||||
|
||||
except Exception, e:
|
||||
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@@ -371,15 +385,21 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(o_dms, imported_view_name)
|
||||
o_dms.close()
|
||||
i_dms.close()
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -4,17 +4,19 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.build_reference_db cimport build_reference_db
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Tag a set of sequences for PCR and sequencing errors identification"
|
||||
__title__="Build a reference database for ecotag"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
@@ -22,16 +24,16 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi build_ref_db specific options')
|
||||
|
||||
group.add_argument('--threshold','-t',
|
||||
action="store", dest="build_ref_db:threshold",
|
||||
metavar='<THRESHOLD>',
|
||||
default=0.0,
|
||||
default=0.99,
|
||||
type=float,
|
||||
help="Score threshold as a normalized identity, e.g. 0.95 for an identity of 95%%. Default: 0.00"
|
||||
" (no threshold).")
|
||||
help="Score threshold as a normalized identity, e.g. 0.95 for an identity of 95%%. Default: 0.99.")
|
||||
|
||||
|
||||
def run(config):
|
||||
@@ -56,17 +58,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@@ -80,26 +85,33 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
comments = View.print_config(config, "build_ref_db", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
|
||||
|
||||
if build_reference_db(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(taxonomy_name), tobytes(o_view_name), comments, config['build_ref_db']['threshold']) < 0:
|
||||
raise Exception("Error building a reference database")
|
||||
|
||||
# If the input and output DMS are not the same, export result view to output DMS
|
||||
if i_dms != o_dms:
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close()
|
||||
o_dms.close(force=True)
|
||||
|
||||
i_dms.close()
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
|
||||
168
python/obitools3/commands/cat.pyx
Executable file
168
python/obitools3/commands/cat.pyx
Executable file
@@ -0,0 +1,168 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_COLUMN, \
|
||||
QUALITY_COLUMN, REVERSE_QUALITY_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||
from obitools3.dms.column.column cimport Column
|
||||
|
||||
from io import BufferedWriter
|
||||
import time
|
||||
import sys
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Concatenate views"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi cat specific options')
|
||||
|
||||
group.add_argument("-c",
|
||||
action="append", dest="cat:views_to_cat",
|
||||
metavar="<VIEW_NAME>",
|
||||
default=[],
|
||||
type=str,
|
||||
help="URI of a view to concatenate. (e.g. 'my_dms/my_view'). "
|
||||
"Several -c options can be used on the same "
|
||||
"command line.")
|
||||
|
||||
|
||||
def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
|
||||
logger("info", "obi cat")
|
||||
|
||||
# Check the views to concatenate
|
||||
idms_list = []
|
||||
iview_list = []
|
||||
total_len = 0
|
||||
remove_qual = False
|
||||
remove_rev_qual = False
|
||||
v_type = View_NUC_SEQS
|
||||
for v_uri in config["cat"]["views_to_cat"]:
|
||||
input = open_uri(v_uri)
|
||||
if input is None:
|
||||
raise Exception("Could not read input view")
|
||||
i_dms = input[0]
|
||||
i_view = input[1]
|
||||
if input[2] != View_NUC_SEQS: # Check view type (output view is nuc_seqs view if all input view are nuc_seqs view)
|
||||
v_type = View
|
||||
if QUALITY_COLUMN not in i_view: # Check if keep quality column in output view (if all input views have it)
|
||||
remove_qual = True
|
||||
if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
|
||||
remove_rev_qual = True
|
||||
total_len += len(i_view)
|
||||
idms_list.append(i_dms)
|
||||
iview_list.append(i_view.name)
|
||||
i_view.close()
|
||||
|
||||
# Open the output: only the DMS
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
input=False,
|
||||
newviewtype=v_type)
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view = output[1]
|
||||
|
||||
# stdout output
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
|
||||
# Initialize quality columns and their associated sequence columns if needed
|
||||
if type(output_0) != BufferedWriter:
|
||||
if not remove_qual:
|
||||
if NUC_SEQUENCE_COLUMN not in o_view:
|
||||
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
|
||||
if not remove_rev_qual:
|
||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||
|
||||
# Initialize multiple elements columns
|
||||
if type(output_0)!=BufferedWriter:
|
||||
dict_cols = {}
|
||||
for v_uri in config["cat"]["views_to_cat"]:
|
||||
v = open_uri(v_uri)[1]
|
||||
for coln in v.keys():
|
||||
col = v[coln]
|
||||
if v[coln].nb_elements_per_line > 1:
|
||||
if coln not in dict_cols:
|
||||
dict_cols[coln] = {}
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
||||
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
||||
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
||||
else:
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||
v.close()
|
||||
for coln in dict_cols:
|
||||
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']), dict_column=True)
|
||||
|
||||
# Initialize the progress bar
|
||||
if not config['obi']['noprogressbar']:
|
||||
pb = ProgressBar(total_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
i = 0
|
||||
for v_uri in config["cat"]["views_to_cat"]:
|
||||
v = open_uri(v_uri)[1]
|
||||
for entry in v:
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if type(output_0)==BufferedWriter:
|
||||
rep = repr(entry)
|
||||
output_0.write(str2bytes(rep)+b"\n")
|
||||
else:
|
||||
try:
|
||||
o_view[i] = entry
|
||||
except:
|
||||
print("\nError with entry:", repr(entry))
|
||||
print(repr(o_view))
|
||||
i+=1
|
||||
v.close()
|
||||
|
||||
# Deletes quality columns if needed
|
||||
if type(output_0)!=BufferedWriter:
|
||||
if QUALITY_COLUMN in o_view and remove_qual :
|
||||
o_view.delete_column(QUALITY_COLUMN)
|
||||
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
|
||||
o_view.delete_column(REVERSE_QUALITY_COLUMN)
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[vname for vname in iview_list])
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(view), file=sys.stderr)
|
||||
|
||||
for d in idms_list:
|
||||
d.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
@@ -4,13 +4,14 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiclean cimport obi_clean
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
|
||||
|
||||
@@ -21,7 +22,8 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi clean specific options')
|
||||
|
||||
group.add_argument('--distance', '-d',
|
||||
@@ -36,8 +38,7 @@ def addOptions(parser):
|
||||
dest="clean:sample-tag-name",
|
||||
metavar="<SAMPLE TAG NAME>",
|
||||
type=str,
|
||||
default="merged_sample",
|
||||
help="Name of the tag where sample counts are kept.")
|
||||
help="Name of the tag where merged sample count informations are kept (typically generated by obi uniq, usually MERGED_sample, default: None).")
|
||||
|
||||
group.add_argument('--ratio', '-r',
|
||||
action="store", dest="clean:ratio",
|
||||
@@ -53,11 +54,11 @@ def addOptions(parser):
|
||||
default=False,
|
||||
help="Only sequences labeled as heads are kept in the output. Default: False")
|
||||
|
||||
group.add_argument('--cluster-tags', '-C',
|
||||
action="store_true",
|
||||
dest="clean:cluster-tags",
|
||||
default=False,
|
||||
help="Adds tags for each sequence giving its cluster's head and weight for each sample.")
|
||||
# group.add_argument('--cluster-tags', '-C',
|
||||
# action="store_true",
|
||||
# dest="clean:cluster-tags",
|
||||
# default=False,
|
||||
# help="Adds tags for each sequence giving its cluster's head and weight for each sample.")
|
||||
|
||||
group.add_argument('--thread-count','-p', # TODO should probably be in a specific option group
|
||||
action="store", dest="clean:thread-count",
|
||||
@@ -89,17 +90,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, run obiclean creating a temporary view that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@@ -107,6 +111,9 @@ def run(config):
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
|
||||
|
||||
if 'sample-tag-name' not in config['clean']:
|
||||
config['clean']['sample-tag-name'] = ""
|
||||
|
||||
if obi_clean(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
|
||||
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], config['clean']['thread-count']) < 0:
|
||||
raise Exception("Error running obiclean")
|
||||
@@ -114,18 +121,26 @@ def run(config):
|
||||
# If the input and output DMS are not the same, export result view to output DMS
|
||||
if i_dms != o_dms:
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close()
|
||||
o_dms.close(force=True)
|
||||
|
||||
i_dms.close()
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -21,7 +21,10 @@ def run(config):
|
||||
|
||||
logger("info", "obi clean_dms")
|
||||
|
||||
if obi_clean_dms(tobytes(config['obi']['inputURI'])) < 0 :
|
||||
dms_path = tobytes(config['obi']['inputURI'])
|
||||
if b'.obidms' in dms_path:
|
||||
dms_path = dms_path.split(b'.obidms')[0]
|
||||
if obi_clean_dms(dms_path) < 0 :
|
||||
raise Exception("Error cleaning DMS", config['obi']['inputURI'])
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -10,7 +10,7 @@ from obitools3.dms.capi.obiview cimport COUNT_COLUMN
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Counts sequence records"
|
||||
__title__="Count sequence records"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
@@ -22,13 +22,19 @@ def addOptions(parser):
|
||||
group.add_argument('-s','--sequence',
|
||||
action="store_true", dest="count:sequence",
|
||||
default=False,
|
||||
help="Prints only the number of sequence records.")
|
||||
help="Prints only the number of sequence records (much faster, default: False).")
|
||||
|
||||
group.add_argument('-a','--all',
|
||||
action="store_true", dest="count:all",
|
||||
default=False,
|
||||
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False).")
|
||||
|
||||
group.add_argument('-c','--count-tag',
|
||||
action="store", dest="count:countcol",
|
||||
default='COUNT',
|
||||
type=str,
|
||||
help="Name of the tag/column associated with the count information (default: COUNT).")
|
||||
|
||||
|
||||
def run(config):
|
||||
|
||||
@@ -41,18 +47,22 @@ def run(config):
|
||||
if input is None:
|
||||
raise Exception("Could not read input")
|
||||
entries = input[1]
|
||||
|
||||
|
||||
countcol = config['count']['countcol']
|
||||
|
||||
count1 = len(entries)
|
||||
count2 = 0
|
||||
|
||||
if COUNT_COLUMN in entries and ((config['count']['sequence'] == config['count']['all']) or (config['count']['all'])) :
|
||||
if countcol in entries and ((config['count']['sequence'] == config['count']['all']) or (config['count']['all'])) :
|
||||
for e in entries:
|
||||
PyErr_CheckSignals()
|
||||
count2+=e[COUNT_COLUMN]
|
||||
count2+=e[countcol]
|
||||
|
||||
if COUNT_COLUMN in entries and (config['count']['sequence'] == config['count']['all']):
|
||||
if countcol in entries and (config['count']['sequence'] == config['count']['all']):
|
||||
print(count1,count2)
|
||||
elif COUNT_COLUMN in entries and config['count']['all']:
|
||||
elif countcol in entries and config['count']['all']:
|
||||
print(count2)
|
||||
else:
|
||||
print(count1)
|
||||
|
||||
input[0].close(force=True)
|
||||
|
||||
@@ -5,10 +5,10 @@ from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.capi.obidms cimport OBIDMS_p
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.view import View
|
||||
|
||||
@@ -16,6 +16,7 @@ from libc.stdlib cimport malloc, free
|
||||
from libc.stdint cimport int32_t
|
||||
|
||||
import sys
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="in silico PCR"
|
||||
@@ -27,6 +28,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
|
||||
group = parser.add_argument_group('obi ecopcr specific options')
|
||||
@@ -35,13 +37,15 @@ def addOptions(parser):
|
||||
action="store", dest="ecopcr:primer1",
|
||||
metavar='<PRIMER>',
|
||||
type=str,
|
||||
help="Forward primer.")
|
||||
required=True,
|
||||
help="Forward primer, length must be less than or equal to 32")
|
||||
|
||||
group.add_argument('--primer2', '-R',
|
||||
action="store", dest="ecopcr:primer2",
|
||||
metavar='<PRIMER>',
|
||||
type=str,
|
||||
help="Reverse primer.")
|
||||
required=True,
|
||||
help="Reverse primer, length must be less than or equal to 32")
|
||||
|
||||
group.add_argument('--error', '-e',
|
||||
action="store", dest="ecopcr:error",
|
||||
@@ -107,14 +111,20 @@ def addOptions(parser):
|
||||
help="Defines the method used for estimating the Tm (melting temperature) between the primers and their corresponding "
|
||||
"target sequences. SANTALUCIA: 1, or OWCZARZY: 2. Default: 1.")
|
||||
|
||||
group.add_argument('--keep-primers', '-p',
|
||||
action="store_true",
|
||||
dest="ecopcr:keep-primers",
|
||||
default=False,
|
||||
help="Whether to keep the primers attached to the output sequences (default: the primers are cut out).")
|
||||
|
||||
group.add_argument('--keep-nucs', '-D',
|
||||
action="store",
|
||||
dest="ecopcr:keep-nucs",
|
||||
metavar="<INTEGER>",
|
||||
metavar="<N>",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Keeps the specified number of nucleotides on each side of the in silico amplified sequences, "
|
||||
"(already including the amplified DNA fragment plus the two target sequences of the primers).")
|
||||
help="Keeps N nucleotides on each side of the in silico amplified sequences, "
|
||||
"not including the primers (implying that primers are automatically kept if N > 0).")
|
||||
|
||||
group.add_argument('--kingdom-mode', '-k',
|
||||
action="store_true",
|
||||
@@ -161,11 +171,28 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_dms_name = output[0].name
|
||||
o_view_name = output[1]
|
||||
|
||||
# Open the taxonomy DMS
|
||||
taxdms = open_uri(config['obi']['taxoURI'],
|
||||
dms_only=True)
|
||||
if taxdms is None:
|
||||
raise Exception("Could not open taxonomy DMS")
|
||||
tax_dms = taxdms[0]
|
||||
tax_dms_name = taxdms[0].name
|
||||
|
||||
# Read taxonomy name
|
||||
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
|
||||
|
||||
# If stdout output create a temporary view in the input dms that will be deleted afterwards.
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@@ -178,14 +205,15 @@ def run(config):
|
||||
|
||||
# TODO: primers in comments?
|
||||
|
||||
if obi_ecopcr(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(taxonomy_name), \
|
||||
if obi_ecopcr(i_dms.name_with_full_path, tobytes(i_view_name),
|
||||
tax_dms.name_with_full_path, tobytes(taxonomy_name), \
|
||||
o_dms.name_with_full_path, tobytes(o_view_name), comments, \
|
||||
tobytes(config['ecopcr']['primer1']), tobytes(config['ecopcr']['primer2']), \
|
||||
config['ecopcr']['error'], \
|
||||
config['ecopcr']['min-length'], config['ecopcr']['max-length'], \
|
||||
restrict_to_taxids_p, ignore_taxids_p, \
|
||||
config['ecopcr']['circular'], config['ecopcr']['salt-concentration'], config['ecopcr']['salt-correction-method'], \
|
||||
config['ecopcr']['keep-nucs'], config['ecopcr']['kingdom-mode']) < 0:
|
||||
config['ecopcr']['keep-nucs'], config['ecopcr']['keep-primers'], config['ecopcr']['kingdom-mode']) < 0:
|
||||
raise Exception("Error running ecopcr")
|
||||
|
||||
# Save command config in DMS comments
|
||||
@@ -193,10 +221,22 @@ def run(config):
|
||||
|
||||
free(restrict_to_taxids_p)
|
||||
free(ignore_taxids_p)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[o_view_name]), file=sys.stderr)
|
||||
|
||||
o_dms.close()
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiecotag cimport obi_ecotag
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@@ -12,6 +12,7 @@ from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
import sys
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Taxonomic assignment of sequences"
|
||||
@@ -22,6 +23,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi ecotag specific options')
|
||||
|
||||
@@ -39,6 +41,17 @@ def addOptions(parser):
|
||||
help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. "
|
||||
"Default: 0.00 (no threshold).")
|
||||
|
||||
group.add_argument('--minimum-circle','-c',
|
||||
action="store", dest="ecotag:bubble_threshold",
|
||||
metavar='<CIRCLE_THRESHOLD>',
|
||||
default=0.99,
|
||||
type=float,
|
||||
help="Minimum identity considered for the assignment circle "
|
||||
"(sequence is assigned to the LCA of all sequences within a similarity circle of the best matches; "
|
||||
"the threshold for this circle is the highest value between <CIRCLE_THRESHOLD> and the best assignment score found for the query sequence). "
|
||||
"Give value as a normalized identity, e.g. 0.95 for an identity of 95%%. "
|
||||
"Default: 0.99.")
|
||||
|
||||
def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
@@ -64,10 +77,9 @@ def run(config):
|
||||
ref_view_name = ref[1]
|
||||
|
||||
# Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
|
||||
if config['ecotag']['threshold'] < eval(i_dms[ref_view_name].comments["ref_db_threshold"]) :
|
||||
print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).",
|
||||
config['ecotag']['threshold'], i_dms[ref_view_name].comments["ref_db_threshold"])
|
||||
|
||||
if config['ecotag']['bubble_threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
|
||||
raise Exception(f"Error: The threshold demanded ({config['ecotag']['bubble_threshold']}) is lower than the threshold used to build the reference database ({float(ref_dms[ref_view_name].comments['ref_db_threshold'])}).")
|
||||
|
||||
# Open the output: only the DMS
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
input=False,
|
||||
@@ -75,17 +87,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, run ecotag creating a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@@ -107,10 +121,11 @@ def run(config):
|
||||
comments = View.print_config(config, "ecotag", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
|
||||
if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
|
||||
tobytes(ref_dms_name), tobytes(ref_view_name), \
|
||||
tobytes(taxo_dms_name), tobytes(taxonomy_name), \
|
||||
tobytes(o_view_name), comments,
|
||||
config['ecotag']['threshold']) < 0:
|
||||
ref_dms.name_with_full_path, tobytes(ref_view_name), \
|
||||
taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
|
||||
tobytes(o_view_name), comments, \
|
||||
config['ecotag']['threshold'], \
|
||||
config['ecotag']['bubble_threshold']) < 0:
|
||||
raise Exception("Error running ecotag")
|
||||
|
||||
# If the input and output DMS are not the same, export result view to output DMS
|
||||
@@ -120,15 +135,24 @@ def run(config):
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close()
|
||||
o_dms.close(force=True)
|
||||
|
||||
i_dms.close()
|
||||
taxo_dms.close(force=True)
|
||||
ref_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
|
||||
@@ -6,6 +6,9 @@ from obitools3.apps.config import logger
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.obiseq import Nuc_Seq
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||
from obitools3.writers.tab import TabWriter
|
||||
from obitools3.format.tab import TabFormat
|
||||
from obitools3.utils cimport tobytes, tostr
|
||||
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||
addExportOutputOption, \
|
||||
@@ -59,13 +62,30 @@ def run(config):
|
||||
# Check that the input view has the type NUC_SEQS if needed # TODO discuss, maybe bool property
|
||||
if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") : # Nuc_Seq_Stored? TODO
|
||||
raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
|
||||
|
||||
|
||||
if config['obi']['only'] is not None:
|
||||
withoutskip = min(input[4], config['obi']['only'])
|
||||
else:
|
||||
withoutskip = input[4]
|
||||
|
||||
if config['obi']['skip'] is not None:
|
||||
skip = min(input[4], config['obi']['skip'])
|
||||
else:
|
||||
skip = 0
|
||||
|
||||
# Initialize the progress bar
|
||||
if config['obi']['noprogressbar']:
|
||||
pb = None
|
||||
else:
|
||||
pb = ProgressBar(len(iview), config, seconde=5)
|
||||
|
||||
pb = ProgressBar(withoutskip - skip, config)
|
||||
|
||||
if config['obi']['outputformat'] == b'metabaR':
|
||||
# Check prefix
|
||||
if "metabarprefix" not in config["obi"]:
|
||||
raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)")
|
||||
else:
|
||||
metabaRprefix = config["obi"]["metabarprefix"]
|
||||
|
||||
i=0
|
||||
for seq in iview :
|
||||
PyErr_CheckSignals()
|
||||
@@ -79,14 +99,89 @@ def run(config):
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
if config['obi']['outputformat'] == b'metabaR':
|
||||
|
||||
# Export ngsfilter file if view provided
|
||||
if 'metabarngsfilter' in config['obi']:
|
||||
ngsfilter_input = open_uri(config['obi']['metabarngsfilter'])
|
||||
if ngsfilter_input is None:
|
||||
raise Exception("Could not read ngsfilter view for metabaR output")
|
||||
ngsfilter_view = ngsfilter_input[1]
|
||||
|
||||
ngsfilter_output = open(config['obi']['metabarprefix']+'.ngsfilter', 'w')
|
||||
|
||||
for line in ngsfilter_view:
|
||||
|
||||
line_to_print = b""
|
||||
line_to_print += line[b'experiment']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'sample']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'forward_tag']
|
||||
line_to_print += b":"
|
||||
line_to_print += line[b'reverse_tag']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'forward_primer']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'reverse_primer']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'additional_info']
|
||||
|
||||
print(tostr(line_to_print), file=ngsfilter_output)
|
||||
|
||||
if ngsfilter_input[0] != input[0]:
|
||||
ngsfilter_input[0].close()
|
||||
ngsfilter_output.close()
|
||||
|
||||
# Export sample metadata
|
||||
samples_output = open(config['obi']['metabarprefix']+'_samples.csv', 'w')
|
||||
|
||||
# Export sample metadata file if view provided
|
||||
if 'metabarsamples' in config['obi']:
|
||||
samples_input = open_uri(config['obi']['metabarsamples'])
|
||||
if samples_input is None:
|
||||
raise Exception("Could not read sample view for metabaR output")
|
||||
samples_view = samples_input[1]
|
||||
|
||||
# Export with tab formatter
|
||||
TabWriter(TabFormat(header=True, sep='\t',),
|
||||
samples_output,
|
||||
header=True)
|
||||
|
||||
if samples_input[0] != input[0]:
|
||||
samples_input[0].close()
|
||||
|
||||
# Else export just sample names from main view
|
||||
else:
|
||||
|
||||
sample_list = []
|
||||
if 'MERGED_sample' in iview:
|
||||
sample_list = iview['MERGED_sample'].keys()
|
||||
elif 'sample' not in iview:
|
||||
for seq in iview:
|
||||
sample = seq['sample']
|
||||
if sample not in sample_list:
|
||||
sample_list.append(sample)
|
||||
else:
|
||||
logger("warning", "Can not read sample list from main view for metabaR sample list export")
|
||||
|
||||
print("sample_id", file=samples_output)
|
||||
for sample in sample_list:
|
||||
line_to_print = b""
|
||||
line_to_print += sample
|
||||
line_to_print += b"\t"
|
||||
print(tostr(line_to_print), file=samples_output)
|
||||
|
||||
samples_output.close()
|
||||
|
||||
# TODO save command in input dms?
|
||||
|
||||
if not BrokenPipeError and not IOError:
|
||||
output_object.close()
|
||||
iview.close()
|
||||
input[0].close()
|
||||
input[0].close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@@ -14,6 +14,7 @@ import time
|
||||
import re
|
||||
import sys
|
||||
import ast
|
||||
from io import BufferedWriter
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
@@ -28,6 +29,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group("obi grep specific options")
|
||||
|
||||
@@ -36,14 +38,13 @@ def addOptions(parser):
|
||||
metavar="<PREDICATE>",
|
||||
default=[],
|
||||
type=str,
|
||||
help="Warning: use bytes for character strings (b'text' instead of 'text'). "
|
||||
"Python boolean expression to be evaluated in the "
|
||||
help="Python boolean expression to be evaluated in the "
|
||||
"sequence/line context. The attribute name can be "
|
||||
"used in the expression as a variable name. "
|
||||
"An extra variable named 'sequence' or 'line' refers "
|
||||
"to the sequence or line object itself. "
|
||||
"Several -p options can be used on the same "
|
||||
"commande line.")
|
||||
"command line.")
|
||||
|
||||
group.add_argument("-S", "--sequence",
|
||||
action="store", dest="grep:seq_pattern",
|
||||
@@ -90,7 +91,7 @@ def addOptions(parser):
|
||||
metavar="<ATTRIBUTE_NAME>",
|
||||
help="Select records with the attribute <ATTRIBUTE_NAME> "
|
||||
"defined (not set to NA value). "
|
||||
"Several -a options can be used on the same "
|
||||
"Several -A options can be used on the same "
|
||||
"command line.")
|
||||
|
||||
group.add_argument("-L", "--lmax",
|
||||
@@ -162,8 +163,7 @@ def obi_eval(compiled_expr, loc_env, line):
|
||||
return obi_eval_result
|
||||
|
||||
|
||||
def Filter_generator(options, tax_filter):
|
||||
#taxfilter = taxonomyFilterGenerator(options)
|
||||
def Filter_generator(options, tax_filter, i_view):
|
||||
|
||||
# Initialize conditions
|
||||
predicates = None
|
||||
@@ -172,6 +172,9 @@ def Filter_generator(options, tax_filter):
|
||||
attributes = None
|
||||
if "attributes" in options and len(options["attributes"]) > 0:
|
||||
attributes = options["attributes"]
|
||||
for attribute in attributes:
|
||||
if attribute not in i_view:
|
||||
return None
|
||||
lmax = None
|
||||
if "lmax" in options:
|
||||
lmax = options["lmax"]
|
||||
@@ -181,7 +184,7 @@ def Filter_generator(options, tax_filter):
|
||||
invert_selection = options["invert_selection"]
|
||||
id_set = None
|
||||
if "id_list" in options:
|
||||
id_set = set(x.strip() for x in open(options["id_list"]))
|
||||
id_set = set(x.strip() for x in open(options["id_list"], 'rb'))
|
||||
|
||||
# Initialize the regular expression patterns
|
||||
seq_pattern = None
|
||||
@@ -197,6 +200,8 @@ def Filter_generator(options, tax_filter):
|
||||
if "attribute_patterns" in options and len(options["attribute_patterns"]) > 0:
|
||||
for p in options["attribute_patterns"]:
|
||||
attribute, pattern = p.split(":", 1)
|
||||
if attribute not in i_view:
|
||||
return None
|
||||
attribute_patterns[tobytes(attribute)] = re.compile(tobytes(pattern))
|
||||
|
||||
def filter(line, loc_env):
|
||||
@@ -253,6 +258,13 @@ def Filter_generator(options, tax_filter):
|
||||
|
||||
|
||||
def Taxonomy_filter_generator(taxo, options):
|
||||
|
||||
if (("required_ranks" in options and options["required_ranks"]) or \
|
||||
("required_taxids" in options and options["required_taxids"]) or \
|
||||
("ignored_taxids" in options and options["ignored_taxids"])) and \
|
||||
(taxo is None):
|
||||
raise RollbackException("obi grep error: can't use taxonomy options without providing a taxonomy. Rollbacking view")
|
||||
|
||||
if taxo is not None:
|
||||
def tax_filter(seq):
|
||||
good = True
|
||||
@@ -301,16 +313,21 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted afterwards.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
taxo_uri = open_uri(config["obi"]["taxoURI"])
|
||||
@@ -321,33 +338,49 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
# Apply filter
|
||||
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
|
||||
filter = Filter_generator(config["grep"], tax_filter)
|
||||
filter = Filter_generator(config["grep"], tax_filter, i_view)
|
||||
selection = Line_selection(i_view)
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
line = i_view[i]
|
||||
|
||||
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
|
||||
|
||||
good = filter(line, loc_env)
|
||||
|
||||
if good :
|
||||
|
||||
if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
elif filter is not None : # filter is None if no line will be selected because some columns don't exist
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
line = i_view[i]
|
||||
|
||||
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
|
||||
|
||||
good = filter(line, loc_env)
|
||||
|
||||
if good :
|
||||
selection.append(i)
|
||||
|
||||
if pb is not None:
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the line selection
|
||||
try:
|
||||
o_view = selection.materialize(o_view_name)
|
||||
except Exception, e:
|
||||
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
|
||||
logger("info", "Grepped %d entries" % len(o_view))
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
input_dms_name=[input[0].name]
|
||||
@@ -362,16 +395,22 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close()
|
||||
i_dms.close()
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -4,24 +4,28 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
from obitools3.apps.optiongroups import addExportOutputOption
|
||||
|
||||
import time
|
||||
import sys
|
||||
|
||||
from io import BufferedWriter
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Keep the N first lines of a view."
|
||||
__title__="Keep the N first lines of a view"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addExportOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi head specific options')
|
||||
|
||||
@@ -53,31 +57,41 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
n = min(config['head']['count'], len(i_view))
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(n, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(n, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
|
||||
for i in range(n):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the line selection
|
||||
try:
|
||||
@@ -94,16 +108,22 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close()
|
||||
i_dms.close()
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -54,4 +54,5 @@ def run(config):
|
||||
print(bytes2str(entries.ascii_history))
|
||||
else:
|
||||
raise Exception("ASCII history only available for views")
|
||||
|
||||
|
||||
input[0].close(force=True)
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.view.view cimport View
|
||||
@@ -11,6 +12,7 @@ from obitools3.dms.column.column cimport Column
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.taxo.taxo cimport Taxonomy
|
||||
from obitools3.files.uncompress cimport CompressedFile
|
||||
|
||||
|
||||
from obitools3.utils cimport tobytes, \
|
||||
@@ -24,20 +26,26 @@ from obitools3.dms.capi.obiview cimport VIEW_TYPE_NUC_SEQS, \
|
||||
DEFINITION_COLUMN, \
|
||||
QUALITY_COLUMN, \
|
||||
COUNT_COLUMN, \
|
||||
TAXID_COLUMN
|
||||
TAXID_COLUMN, \
|
||||
MERGED_PREFIX, \
|
||||
SCIENTIFIC_NAME_COLUMN
|
||||
|
||||
from obitools3.dms.capi.obidms cimport obi_import_view
|
||||
|
||||
from obitools3.dms.capi.obitypes cimport obitype_t, \
|
||||
OBI_VOID, \
|
||||
OBI_QUAL
|
||||
OBI_QUAL, \
|
||||
OBI_STR, \
|
||||
OBI_INT
|
||||
|
||||
from obitools3.dms.capi.obierrno cimport obi_errno
|
||||
|
||||
from obitools3.apps.optiongroups import addImportInputOption, \
|
||||
addTabularInputOption, \
|
||||
addTaxdumpInputOption, \
|
||||
addMinimalOutputOption
|
||||
addMinimalOutputOption, \
|
||||
addNoProgressBarOption, \
|
||||
addTaxonomyOption
|
||||
|
||||
from obitools3.uri.decode import open_uri
|
||||
|
||||
@@ -45,8 +53,11 @@ from obitools3.apps.config import logger
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
from io import BufferedWriter
|
||||
import ast
|
||||
|
||||
__title__="Imports sequences from different formats into a DMS"
|
||||
|
||||
__title__="Import sequences from different formats into a DMS"
|
||||
|
||||
|
||||
default_config = { 'destview' : None,
|
||||
@@ -63,8 +74,27 @@ def addOptions(parser):
|
||||
addImportInputOption(parser)
|
||||
addTabularInputOption(parser)
|
||||
addTaxdumpInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi import specific options')
|
||||
|
||||
group.add_argument('--preread',
|
||||
action="store_true", dest="import:preread",
|
||||
default=False,
|
||||
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
|
||||
"a much faster import. This option is not recommended and will slow down the import in any other case.")
|
||||
|
||||
group.add_argument('--space-priority',
|
||||
action="store_true", dest="import:space_priority",
|
||||
default=False,
|
||||
help="If importing a view into another DMS, do it by importing each line, saving disk space if the original view "
|
||||
"has a line selection associated.")
|
||||
|
||||
# group.add_argument('--only-id',
|
||||
# action="store", dest="import:onlyid",
|
||||
# help="only id")
|
||||
|
||||
def run(config):
|
||||
|
||||
@@ -77,6 +107,10 @@ def run(config):
|
||||
cdef obitype_t new_type
|
||||
cdef bint get_quality
|
||||
cdef bint NUC_SEQS_view
|
||||
cdef bint silva
|
||||
cdef bint rdp
|
||||
cdef bint unite
|
||||
cdef bint sintax
|
||||
cdef int nb_elts
|
||||
cdef object d
|
||||
cdef View view
|
||||
@@ -87,6 +121,8 @@ def run(config):
|
||||
cdef Column seq_col
|
||||
cdef Column qual_col
|
||||
cdef Column old_column
|
||||
cdef Column sci_name_col
|
||||
cdef bytes sci_name
|
||||
cdef bint rewrite
|
||||
cdef dict dcols
|
||||
cdef int skipping
|
||||
@@ -120,7 +156,7 @@ def run(config):
|
||||
if entry_count > 0:
|
||||
logger("info", "Importing %d entries", entry_count)
|
||||
else:
|
||||
logger("info", "Importing an unknow number of entries")
|
||||
logger("info", "Importing an unknown number of entries")
|
||||
|
||||
# TODO a bit dirty?
|
||||
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
|
||||
@@ -130,7 +166,7 @@ def run(config):
|
||||
else:
|
||||
v = None
|
||||
|
||||
if config['obi']['taxdump'] or isinstance(input[1], View):
|
||||
if config['obi']['taxdump'] or (isinstance(input[1], View) and not config['import']['space_priority']):
|
||||
dms_only=True
|
||||
else:
|
||||
dms_only=False
|
||||
@@ -154,23 +190,37 @@ def run(config):
|
||||
taxo.write(taxo_name)
|
||||
taxo.close()
|
||||
o_dms.record_command_line(" ".join(sys.argv[1:]))
|
||||
o_dms.close()
|
||||
o_dms.close(force=True)
|
||||
logger("info", "Done.")
|
||||
return
|
||||
|
||||
# If importing a view between two DMS, use C API
|
||||
if isinstance(input[1], View):
|
||||
# Open taxonomy if there is one
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||
if taxo_uri is None or taxo_uri[2] == bytes:
|
||||
raise Exception("Couldn't open taxonomy")
|
||||
taxo = taxo_uri[1]
|
||||
|
||||
else :
|
||||
taxo = None
|
||||
|
||||
# If importing a view between two DMS and not wanting to save space if line selection in original view, use C API
|
||||
if isinstance(input[1], View) and not config['import']['space_priority']:
|
||||
if obi_import_view(input[0].name_with_full_path, o_dms.name_with_full_path, input[1].name, tobytes((config['obi']['outputURI'].split('/'))[-1])) < 0 :
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
raise Exception("Error importing a view in a DMS")
|
||||
o_dms.record_command_line(" ".join(sys.argv[1:]))
|
||||
o_dms.close()
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
logger("info", "Done.")
|
||||
return
|
||||
|
||||
if entry_count >= 0:
|
||||
pb = ProgressBar(entry_count, config, seconde=5)
|
||||
|
||||
entries = input[1]
|
||||
# Reinitialize the progress bar
|
||||
if entry_count >= 0 and config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(entry_count, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
NUC_SEQS_view = False
|
||||
if isinstance(output[1], View) :
|
||||
@@ -179,15 +229,106 @@ def run(config):
|
||||
NUC_SEQS_view = True
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
# Save basic columns in variables for optimization
|
||||
if NUC_SEQS_view :
|
||||
id_col = view[ID_COLUMN]
|
||||
def_col = view[DEFINITION_COLUMN]
|
||||
seq_col = view[NUC_SEQUENCE_COLUMN]
|
||||
|
||||
|
||||
# Prepare taxon scientific name and taxid refs if RDP/SILVA/UNITE/SINTAX formats
|
||||
silva = False
|
||||
rdp = False
|
||||
unite = False
|
||||
sintax=False
|
||||
if 'inputformat' in config['obi'] and (config['obi']['inputformat'] == b"silva" or \
|
||||
config['obi']['inputformat'] == b"rdp" or \
|
||||
config['obi']['inputformat'] == b"unite" or \
|
||||
config['obi']['inputformat'] == b"sintax"):
|
||||
#if taxo is None:
|
||||
# raise Exception("A taxonomy (as built by 'obi import --taxdump') must be provided for SILVA and RDP files")
|
||||
if config['obi']['inputformat'] == b"silva":
|
||||
silva = True
|
||||
elif config['obi']['inputformat'] == b"rdp":
|
||||
rdp = True
|
||||
elif config['obi']['inputformat'] == b"unite":
|
||||
unite = True
|
||||
elif config['obi']['inputformat'] == b"sintax":
|
||||
sintax = True
|
||||
sci_name_col = Column.new_column(view, SCIENTIFIC_NAME_COLUMN, OBI_STR)
|
||||
if taxo is not None:
|
||||
taxid_col = Column.new_column(view, TAXID_COLUMN, OBI_INT)
|
||||
|
||||
dcols = {}
|
||||
|
||||
# First read through the entries to prepare columns with dictionaries as they are very time-expensive to rewrite
|
||||
if config['import']['preread']:
|
||||
logger("info", "First readthrough...")
|
||||
entries = input[1]
|
||||
i = 0
|
||||
dict_dict = {}
|
||||
for entry in entries:
|
||||
PyErr_CheckSignals()
|
||||
|
||||
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
|
||||
if config['obi']['skiperror']:
|
||||
i-=1
|
||||
continue
|
||||
else:
|
||||
raise Exception("obi import error in first readthrough")
|
||||
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
elif not i%50000:
|
||||
logger("info", "Read %d entries", i)
|
||||
|
||||
for tag in entry :
|
||||
newtag = tag
|
||||
if tag[:7] == b"merged_":
|
||||
newtag = MERGED_PREFIX+tag[7:]
|
||||
if type(entry[tag]) == dict :
|
||||
if tag in dict_dict:
|
||||
dict_dict[newtag][0].update(entry[tag].keys())
|
||||
else:
|
||||
dict_dict[newtag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
|
||||
i+=1
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
for tag in dict_dict:
|
||||
dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
|
||||
nb_elements_per_line=len(dict_dict[tag][0]), \
|
||||
elements_names=list(dict_dict[tag][0]), \
|
||||
dict_column=True), \
|
||||
dict_dict[tag][1])
|
||||
|
||||
|
||||
# Reinitialize the input
|
||||
if isinstance(input[0], CompressedFile):
|
||||
input_is_file = True
|
||||
|
||||
# Reinitialize the progress bar
|
||||
if entry_count >= 0 and config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(entry_count, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
try:
|
||||
input[0].close()
|
||||
except AttributeError:
|
||||
pass
|
||||
input = open_uri(config['obi']['inputURI'], force_file=input_is_file)
|
||||
if input is None:
|
||||
raise Exception("Could not open input URI")
|
||||
|
||||
# if 'onlyid' in config['import']:
|
||||
# onlyid = tobytes(config['import']['onlyid'])
|
||||
# else:
|
||||
# onlyid = None
|
||||
|
||||
entries = input[1]
|
||||
i = 0
|
||||
for entry in entries :
|
||||
|
||||
@@ -195,7 +336,6 @@ def run(config):
|
||||
|
||||
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
|
||||
if config['obi']['skiperror']:
|
||||
i-=1
|
||||
continue
|
||||
else:
|
||||
raise RollbackException("obi import error, rollbacking view", view)
|
||||
@@ -204,121 +344,193 @@ def run(config):
|
||||
pb(i)
|
||||
elif not i%50000:
|
||||
logger("info", "Imported %d entries", i)
|
||||
|
||||
if NUC_SEQS_view:
|
||||
id_col[i] = entry.id
|
||||
def_col[i] = entry.definition
|
||||
seq_col[i] = entry.seq
|
||||
# Check if there is a sequencing quality associated by checking the first entry # TODO haven't found a more robust solution yet
|
||||
if i == 0:
|
||||
get_quality = QUALITY_COLUMN in entry
|
||||
|
||||
# if onlyid is not None and entry.id != onlyid:
|
||||
# continue
|
||||
|
||||
try:
|
||||
|
||||
if NUC_SEQS_view:
|
||||
id_col[i] = entry.id
|
||||
def_col[i] = entry.definition
|
||||
seq_col[i] = entry.seq
|
||||
# Check if there is a sequencing quality associated by checking the first entry # TODO haven't found a more robust solution yet
|
||||
if i == 0:
|
||||
get_quality = QUALITY_COLUMN in entry
|
||||
if get_quality:
|
||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL)
|
||||
qual_col = view[QUALITY_COLUMN]
|
||||
if get_quality:
|
||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL)
|
||||
qual_col = view[QUALITY_COLUMN]
|
||||
if get_quality:
|
||||
qual_col[i] = entry.quality
|
||||
|
||||
for tag in entry :
|
||||
|
||||
if tag != ID_COLUMN and tag != DEFINITION_COLUMN and tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN : # TODO dirty
|
||||
|
||||
value = entry[tag]
|
||||
if tag == b"taxid":
|
||||
tag = TAXID_COLUMN
|
||||
if tag == b"count":
|
||||
tag = COUNT_COLUMN
|
||||
qual_col[i] = entry.quality
|
||||
|
||||
if tag not in dcols :
|
||||
|
||||
value_type = type(value)
|
||||
nb_elts = 1
|
||||
value_obitype = OBI_VOID
|
||||
|
||||
if value_type == dict or value_type == list :
|
||||
nb_elts = len(value)
|
||||
elt_names = list(value)
|
||||
else :
|
||||
nb_elts = 1
|
||||
elt_names = None
|
||||
|
||||
value_obitype = get_obitype(value)
|
||||
|
||||
if value_obitype != OBI_VOID :
|
||||
dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
|
||||
|
||||
# Fill value
|
||||
dcols[tag][0][i] = value
|
||||
|
||||
# TODO else log error?
|
||||
|
||||
else :
|
||||
|
||||
rewrite = False
|
||||
|
||||
# Check type adequation
|
||||
old_type = dcols[tag][1]
|
||||
new_type = OBI_VOID
|
||||
new_type = update_obitype(old_type, value)
|
||||
if old_type != new_type :
|
||||
rewrite = True
|
||||
|
||||
try:
|
||||
# Fill value
|
||||
dcols[tag][0][i] = value
|
||||
|
||||
except IndexError :
|
||||
|
||||
value_type = type(value)
|
||||
old_column = dcols[tag][0]
|
||||
old_nb_elements_per_line = old_column.nb_elements_per_line
|
||||
new_nb_elements_per_line = 0
|
||||
old_elements_names = old_column.elements_names
|
||||
new_elements_names = None
|
||||
|
||||
#####################################################################
|
||||
|
||||
# Check the length and keys of column lines if needed
|
||||
if value_type == dict : # Check dictionary keys
|
||||
for k in value :
|
||||
if k not in old_elements_names :
|
||||
new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
||||
rewrite = True
|
||||
# Parse taxon scientific name if RDP or Silva or Unite file
|
||||
if (rdp or silva or unite or sintax):
|
||||
if rdp or silva:
|
||||
sci_names = entry.definition.split(b";")
|
||||
sci_name_col[i] = sci_names[-1]
|
||||
elif unite:
|
||||
sci_names = entry.id.split(b'|')[-1].split(b';')
|
||||
sci_name_col[i] = re.sub(b'[a-zA-Z]__', b'', sci_names[-1])
|
||||
elif sintax:
|
||||
reconstructed_line = entry.id+b' '+entry.definition[:-1]
|
||||
splitted_reconstructed_line = reconstructed_line.split(b';')
|
||||
taxa = splitted_reconstructed_line[1].split(b'=')[1]
|
||||
taxa = splitted_reconstructed_line[1].split(b',')
|
||||
sci_names = []
|
||||
for t in taxa:
|
||||
tf = t.split(b':')[1]
|
||||
sci_names.append(tf)
|
||||
sci_name_col[i] = sci_names[-1]
|
||||
id_col[i] = reconstructed_line.split(b';')[0]
|
||||
def_col[i] = reconstructed_line
|
||||
|
||||
# Fond taxid if taxonomy provided
|
||||
if taxo is not None :
|
||||
for sci_name in reversed(sci_names):
|
||||
if unite:
|
||||
sci_name = re.sub(b'[a-zA-Z]__', b'', sci_name)
|
||||
if sci_name.split()[0] != b'unidentified' and sci_name.split()[0] != b'uncultured' and sci_name.split()[0] != b'metagenome':
|
||||
taxon = taxo.get_taxon_by_name(sci_name)
|
||||
if taxon is not None:
|
||||
sci_name_col[i] = taxon.name
|
||||
taxid_col[i] = taxon.taxid
|
||||
#print(taxid_col[i], sci_name_col[i])
|
||||
break
|
||||
|
||||
elif value_type == list or value_type == tuple : # Check vector length
|
||||
if old_nb_elements_per_line < len(value) :
|
||||
new_nb_elements_per_line = len(value)
|
||||
rewrite = True
|
||||
|
||||
#####################################################################
|
||||
|
||||
if rewrite :
|
||||
if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
||||
new_nb_elements_per_line = len(new_elements_names)
|
||||
|
||||
# Reset obierrno
|
||||
obi_errno = 0
|
||||
|
||||
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
||||
new_data_type=new_type,
|
||||
new_nb_elements_per_line=new_nb_elements_per_line,
|
||||
new_elements_names=new_elements_names,
|
||||
rewrite_last_line=False),
|
||||
new_type)
|
||||
|
||||
# Update the dictionary:
|
||||
for t in dcols :
|
||||
dcols[t] = (view[t], dcols[t][1])
|
||||
|
||||
|
||||
for tag in entry :
|
||||
|
||||
if tag != ID_COLUMN and tag != DEFINITION_COLUMN and tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN : # TODO dirty
|
||||
|
||||
value = entry[tag]
|
||||
if tag == b"taxid":
|
||||
tag = TAXID_COLUMN
|
||||
if tag == b"count":
|
||||
tag = COUNT_COLUMN
|
||||
if tag == b"scientific_name":
|
||||
tag = SCIENTIFIC_NAME_COLUMN
|
||||
if tag[:7] == b"merged_":
|
||||
tag = MERGED_PREFIX+tag[7:]
|
||||
|
||||
if type(value) == bytes and value[:1]==b"[" :
|
||||
try:
|
||||
if type(eval(value)) == list:
|
||||
value = eval(value)
|
||||
#print(value)
|
||||
except:
|
||||
pass
|
||||
|
||||
if tag not in dcols :
|
||||
|
||||
value_type = type(value)
|
||||
nb_elts = 1
|
||||
value_obitype = OBI_VOID
|
||||
dict_col = False
|
||||
|
||||
if value_type == dict :
|
||||
nb_elts = len(value)
|
||||
elt_names = list(value)
|
||||
dict_col = True
|
||||
else :
|
||||
nb_elts = 1
|
||||
elt_names = None
|
||||
|
||||
if value_type == list :
|
||||
tuples = True
|
||||
else:
|
||||
tuples = False
|
||||
|
||||
value_obitype = get_obitype(value)
|
||||
|
||||
if value_obitype != OBI_VOID :
|
||||
dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names, dict_column=dict_col, tuples=tuples), value_obitype)
|
||||
|
||||
# Fill value
|
||||
dcols[tag][0][i] = value
|
||||
|
||||
|
||||
# TODO else log error?
|
||||
|
||||
else :
|
||||
|
||||
rewrite = False
|
||||
|
||||
# Check type adequation
|
||||
old_type = dcols[tag][1]
|
||||
new_type = OBI_VOID
|
||||
new_type = update_obitype(old_type, value)
|
||||
if old_type != new_type :
|
||||
rewrite = True
|
||||
|
||||
try:
|
||||
# Check that it's not the case where the first entry contained a dict of length 1 and now there is a new key
|
||||
if type(value) == dict and \
|
||||
dcols[tag][0].nb_elements_per_line == 1 \
|
||||
and set(dcols[tag][0].elements_names) != set(value.keys()) :
|
||||
raise IndexError # trigger column rewrite
|
||||
|
||||
# Fill value
|
||||
dcols[tag][0][i] = value
|
||||
|
||||
except (IndexError, OverflowError):
|
||||
|
||||
value_type = type(value)
|
||||
old_column = dcols[tag][0]
|
||||
old_nb_elements_per_line = old_column.nb_elements_per_line
|
||||
new_nb_elements_per_line = 0
|
||||
old_elements_names = old_column.elements_names
|
||||
new_elements_names = None
|
||||
|
||||
#####################################################################
|
||||
|
||||
# Check the length and keys of column lines if needed
|
||||
if value_type == dict : # Check dictionary keys
|
||||
for k in value :
|
||||
if k not in old_elements_names :
|
||||
new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
||||
rewrite = True
|
||||
break
|
||||
|
||||
elif value_type == list or value_type == tuple : # Check vector length
|
||||
if old_nb_elements_per_line < len(value) :
|
||||
new_nb_elements_per_line = len(value)
|
||||
rewrite = True
|
||||
|
||||
#####################################################################
|
||||
|
||||
if rewrite :
|
||||
if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
||||
new_nb_elements_per_line = len(new_elements_names)
|
||||
|
||||
# Reset obierrno
|
||||
obi_errno = 0
|
||||
|
||||
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
||||
new_data_type=new_type,
|
||||
new_nb_elements_per_line=new_nb_elements_per_line,
|
||||
new_elements_names=new_elements_names,
|
||||
rewrite_last_line=False),
|
||||
new_type)
|
||||
|
||||
# Update the dictionary:
|
||||
for t in dcols :
|
||||
dcols[t] = (view[t], dcols[t][1])
|
||||
|
||||
# Fill value
|
||||
dcols[tag][0][i] = value
|
||||
|
||||
except Exception as e:
|
||||
print("\nCould not import sequence:\n", repr(entry), "\nError raised:", e, "\n/!\ Check if '--input-na-string' option needs to be set")
|
||||
if 'skiperror' in config['obi'] and not config['obi']['skiperror']:
|
||||
raise e
|
||||
else:
|
||||
pass
|
||||
i-=1 # overwrite problematic entry
|
||||
|
||||
i+=1
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
logger("info", "Imported %d entries", i)
|
||||
logger("info", "Imported %d entries", len(view))
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@@ -333,7 +545,7 @@ def run(config):
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
output[0].close()
|
||||
output[0].close(force=True)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
@@ -46,5 +46,5 @@ def run(config):
|
||||
process.wait()
|
||||
|
||||
iview.close()
|
||||
input[0].close()
|
||||
input[0].close(force=True)
|
||||
|
||||
|
||||
@@ -31,24 +31,11 @@ def run(config):
|
||||
input = open_uri(config['obi']['inputURI'])
|
||||
if input is None:
|
||||
raise Exception("Could not read input")
|
||||
if input[2] == DMS and not config['ls']['longformat']:
|
||||
dms = input[0]
|
||||
l = []
|
||||
for view in input[0]:
|
||||
l.append(tostr(view) + "\t(Date created: " + str(bytes2str_object(dms[view].comments["Date created"]))+")")
|
||||
l.sort()
|
||||
for v in l:
|
||||
print(v)
|
||||
|
||||
# Print representation
|
||||
if config['ls']['longformat']:
|
||||
print(input[1].repr_longformat())
|
||||
else:
|
||||
print(repr(input[1]))
|
||||
if input[2] == DMS:
|
||||
taxolist = ["\n### Taxonomies:"]
|
||||
for t in Taxonomy.list_taxos(input[0]):
|
||||
taxolist.append("\t"+tostr(t))
|
||||
if len(taxolist) > 1:
|
||||
for t in taxolist:
|
||||
print(t)
|
||||
if config['ls']['longformat'] and len(input[1].comments) > 0:
|
||||
print("\n### Comments:")
|
||||
print(str(input[1].comments))
|
||||
|
||||
|
||||
input[0].close(force=True)
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.view import RollbackException, View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column, Column_line
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
|
||||
@@ -13,27 +13,29 @@ from obitools3.libalign.apat_pattern import Primer_search
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||
from obitools3.dms.capi.apat cimport MAX_PATTERN
|
||||
from obitools3.utils cimport tobytes
|
||||
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
|
||||
from libc.stdint cimport INT32_MAX
|
||||
from functools import reduce
|
||||
import math
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
|
||||
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY" # used by alignpairedend tool
|
||||
MAX_PAT_LEN = 31
|
||||
|
||||
|
||||
__title__="Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers"
|
||||
__title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi ngsfilter specific options')
|
||||
|
||||
group.add_argument('-t','--info-view',
|
||||
@@ -41,7 +43,9 @@ def addOptions(parser):
|
||||
metavar="<URI>",
|
||||
type=str,
|
||||
default=None,
|
||||
help="URI to the view containing the samples definition (with tags, primers, sample names,...)")
|
||||
required=True,
|
||||
help="URI to the view containing the samples definition (with tags, primers, sample names,...).\n"
|
||||
"\nWarning: primer lengths must be less than or equal to 32")
|
||||
|
||||
group.add_argument('-R', '--reverse-reads',
|
||||
action="store", dest="ngsfilter:reverse",
|
||||
@@ -55,7 +59,12 @@ def addOptions(parser):
|
||||
metavar="<URI>",
|
||||
type=str,
|
||||
default=None,
|
||||
help="URI to the view used to store the sequences unassigned to any sample")
|
||||
help="URI to the view used to store the sequences unassigned to any sample. Those sequences are untrimmed.")
|
||||
|
||||
group.add_argument('--no-tags',
|
||||
action="store_true", dest="ngsfilter:notags",
|
||||
default=False,
|
||||
help="Use this option if your experiment does not use tags to identify samples")
|
||||
|
||||
group.add_argument('-e','--error',
|
||||
action="store", dest="ngsfilter:error",
|
||||
@@ -78,6 +87,8 @@ class Primer:
|
||||
@type direct:
|
||||
'''
|
||||
|
||||
assert len(sequence) <= MAX_PAT_LEN, "Primer %s is too long, 31 bp max" % sequence
|
||||
|
||||
assert sequence not in Primer.collection \
|
||||
or Primer.collection[sequence]==taglength, \
|
||||
"Primer %s must always be used with tags of the same length" % sequence
|
||||
@@ -166,8 +177,15 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
||||
primer_list = []
|
||||
i=0
|
||||
for p in info_view:
|
||||
|
||||
# Check primer length: should not be longer than 32, the max allowed by the apat lib
|
||||
if len(p[b'forward_primer']) > 32:
|
||||
raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
|
||||
if len(p[b'reverse_primer']) > 32:
|
||||
raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
|
||||
|
||||
forward=Primer(p[b'forward_primer'],
|
||||
len(p[b'forward_tag']) if p[b'forward_tag']!=b'-' else None,
|
||||
len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
|
||||
True,
|
||||
max_errors=max_errors,
|
||||
verbose=verbose,
|
||||
@@ -178,7 +196,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
||||
infos[forward]=fp
|
||||
|
||||
reverse=Primer(p[b'reverse_primer'],
|
||||
len(p[b'reverse_tag']) if p[b'reverse_tag']!=b'-' else None,
|
||||
len(p[b'reverse_tag']) if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None,
|
||||
False,
|
||||
max_errors=max_errors,
|
||||
verbose=verbose,
|
||||
@@ -213,10 +231,11 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
||||
rpp=rp.get(cf,{})
|
||||
rp[cf]=rpp
|
||||
|
||||
tags = (p[b'forward_tag'] if p[b'forward_tag']!=b'-' else None,
|
||||
p[b'reverse_tag'] if p[b'reverse_tag']!=b'-' else None)
|
||||
tags = (p[b'forward_tag'] if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
|
||||
p[b'reverse_tag'] if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None)
|
||||
|
||||
assert tags not in dpp, \
|
||||
if tags != (None, None):
|
||||
assert tags not in dpp, \
|
||||
"Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse)
|
||||
|
||||
# Save additional data
|
||||
@@ -234,7 +253,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
||||
return infos, primer_list
|
||||
|
||||
|
||||
cdef tuple annotate(sequences, infos, verbose=False):
|
||||
cdef tuple annotate(sequences, infos, no_tags, verbose=False):
|
||||
|
||||
def sortMatch(match):
|
||||
if match[1] is None:
|
||||
@@ -249,20 +268,19 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
||||
return match[1][1]
|
||||
|
||||
not_aligned = len(sequences) > 1
|
||||
sequenceF = sequences[0]
|
||||
sequenceR = None
|
||||
if not not_aligned:
|
||||
final_sequence = sequenceF
|
||||
else:
|
||||
final_sequence = sequenceF.clone() # TODO maybe not cloning and then deleting quality tags is more efficient
|
||||
sequences[0] = sequences[0].clone()
|
||||
|
||||
if not_aligned:
|
||||
sequences[0][b"R1_parent"] = sequences[0].id
|
||||
sequences[0][b"R2_parent"] = sequences[1].id
|
||||
|
||||
if not_aligned:
|
||||
sequenceR = sequences[1]
|
||||
final_sequence[REVERSE_SEQ_COLUMN_NAME] = sequenceR.seq # used by alignpairedend tool
|
||||
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = sequenceR.quality # used by alignpairedend tool
|
||||
sequences[1] = sequences[1].clone()
|
||||
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
|
||||
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
|
||||
|
||||
for seq in sequences:
|
||||
if hasattr(seq, "quality_array"):
|
||||
if hasattr(seq, "quality_array") and seq.quality_array is not None:
|
||||
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array),0)/len(seq.quality_array)*10
|
||||
seq[b'avg_quality']=q
|
||||
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[0:10]),0)
|
||||
@@ -275,8 +293,6 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
||||
|
||||
# Try direct matching:
|
||||
directmatch = []
|
||||
first_matched_seq = None
|
||||
second_matched_seq = None
|
||||
for seq in sequences:
|
||||
new_seq = True
|
||||
pattern = 0
|
||||
@@ -288,70 +304,106 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
||||
directmatch.append((p, p(seq, same_sequence=not new_seq, pattern=pattern), seq, p))
|
||||
new_seq = False
|
||||
pattern+=1
|
||||
|
||||
|
||||
# Choose match closer to the start of (one of the) sequence(s)
|
||||
directmatch = sorted(directmatch, key=sortMatch)
|
||||
all_direct_matches = directmatch
|
||||
directmatch = directmatch[0] if directmatch[0][1] is not None else None
|
||||
|
||||
if directmatch is None:
|
||||
final_sequence[b'error']=b'No primer match'
|
||||
return False, final_sequence
|
||||
if not_aligned:
|
||||
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
|
||||
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
|
||||
sequences[0][b'error']=b'No primer match'
|
||||
return False, sequences[0]
|
||||
|
||||
first_matched_seq = directmatch[2]
|
||||
if id(first_matched_seq) == id(sequenceF) and not_aligned:
|
||||
second_matched_seq = sequenceR
|
||||
if id(directmatch[2]) == id(sequences[0]):
|
||||
first_match_first_seq = True
|
||||
else:
|
||||
second_matched_seq = sequenceF
|
||||
|
||||
match = first_matched_seq[directmatch[1][1]:directmatch[1][2]]
|
||||
first_match_first_seq = False
|
||||
|
||||
match = directmatch[2][directmatch[1][1]:directmatch[1][2]]
|
||||
|
||||
if not not_aligned:
|
||||
final_sequence[b'seq_length_ori']=len(final_sequence)
|
||||
sequences[0][b'seq_length_ori']=len(sequences[0])
|
||||
|
||||
if not not_aligned or id(first_matched_seq) == id(sequenceF):
|
||||
final_sequence = final_sequence[directmatch[1][2]:]
|
||||
if not not_aligned or first_match_first_seq:
|
||||
sequences[0] = sequences[0][directmatch[1][2]:]
|
||||
else:
|
||||
cut_seq = sequenceR[directmatch[1][2]:]
|
||||
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq # used by alignpairedend tool
|
||||
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality # used by alignpairedend tool
|
||||
sequences[1] = sequences[1][directmatch[1][2]:]
|
||||
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
|
||||
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
|
||||
|
||||
if directmatch[0].forward:
|
||||
final_sequence[b'direction']=b'forward'
|
||||
final_sequence[b'forward_errors']=directmatch[1][0]
|
||||
final_sequence[b'forward_primer']=directmatch[0].raw
|
||||
final_sequence[b'forward_match']=match.seq
|
||||
sequences[0][b'direction']=b'forward'
|
||||
sequences[0][b'forward_errors']=directmatch[1][0]
|
||||
sequences[0][b'forward_primer']=directmatch[0].raw
|
||||
sequences[0][b'forward_match']=match.seq
|
||||
|
||||
else:
|
||||
final_sequence[b'direction']=b'reverse'
|
||||
final_sequence[b'reverse_errors']=directmatch[1][0]
|
||||
final_sequence[b'reverse_primer']=directmatch[0].raw
|
||||
final_sequence[b'reverse_match']=match.seq
|
||||
sequences[0][b'direction']=b'reverse'
|
||||
sequences[0][b'reverse_errors']=directmatch[1][0]
|
||||
sequences[0][b'reverse_primer']=directmatch[0].raw
|
||||
sequences[0][b'reverse_match']=match.seq
|
||||
|
||||
# Keep only paired reverse primer
|
||||
infos = infos[directmatch[0]]
|
||||
|
||||
infos = infos[directmatch[0]]
|
||||
reverse_primer = list(infos.keys())[0]
|
||||
direct_primer = directmatch[0]
|
||||
|
||||
# If not aligned, look for other match in already computed matches (choose the one that makes the biggest amplicon)
|
||||
if not_aligned:
|
||||
i=1
|
||||
# TODO comment
|
||||
while i<len(all_direct_matches) and (all_direct_matches[i][1] is None or all_direct_matches[i][0].forward == directmatch[0].forward or all_direct_matches[i][0] == directmatch[0]):
|
||||
while i<len(all_direct_matches) and \
|
||||
(all_direct_matches[i][1] is None or \
|
||||
all_direct_matches[i][0].forward == directmatch[0].forward or \
|
||||
all_direct_matches[i][0] == directmatch[0] or \
|
||||
reverse_primer != all_direct_matches[i][0]) :
|
||||
i+=1
|
||||
if i < len(all_direct_matches):
|
||||
reversematch = all_direct_matches[i]
|
||||
else:
|
||||
reversematch = None
|
||||
|
||||
# Cut reverse primer out of 1st matched seq if it contains it, because if it's also in the other sequence, the next step will "choose" only the one on the other sequence
|
||||
if not_aligned:
|
||||
# do it on same seq
|
||||
if first_match_first_seq:
|
||||
r = reverse_primer.revcomp(sequences[0])
|
||||
else:
|
||||
r = reverse_primer.revcomp(sequences[1])
|
||||
if r is not None: # found
|
||||
if first_match_first_seq :
|
||||
sequences[0] = sequences[0][:r[1]]
|
||||
else:
|
||||
sequences[1] = sequences[1][:r[1]]
|
||||
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
|
||||
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
|
||||
# do the same on the other seq
|
||||
if first_match_first_seq:
|
||||
r = direct_primer.revcomp(sequences[1])
|
||||
else:
|
||||
r = direct_primer.revcomp(sequences[0])
|
||||
if r is not None: # found
|
||||
if first_match_first_seq:
|
||||
sequences[1] = sequences[1][:r[1]]
|
||||
else:
|
||||
sequences[0] = sequences[0][:r[1]]
|
||||
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq
|
||||
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality
|
||||
|
||||
|
||||
# Look for other primer in the other direction on the sequence, or
|
||||
# If sequences are not already aligned and reverse primer not found in most likely sequence (the one without the forward primer), try matching on the same sequence than the first match (primer in the other direction)
|
||||
if not not_aligned or (not_aligned and (reversematch is None or reversematch[1] is None)):
|
||||
if not not_aligned:
|
||||
sequence_to_match = second_matched_seq
|
||||
if not_aligned and first_match_first_seq:
|
||||
seq_to_match = sequences[1]
|
||||
else:
|
||||
sequence_to_match = first_matched_seq
|
||||
seq_to_match = sequences[0]
|
||||
reversematch = []
|
||||
# Compute begin
|
||||
begin=directmatch[1][2]+1 # end of match + 1 on the same sequence
|
||||
#begin=directmatch[1][2]+1 # end of match + 1 on the same sequence -- No, already cut out forward primer
|
||||
# Try reverse matching on the other sequence:
|
||||
new_seq = True
|
||||
pattern = 0
|
||||
@@ -365,7 +417,7 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
||||
primer=p
|
||||
# Saving original primer as 4th member of the tuple to serve as correct key in infos dict even if it might have been reversed complemented
|
||||
# (3rd member already used by directmatch)
|
||||
reversematch.append((primer, primer(sequence_to_match, same_sequence=not new_seq, pattern=pattern, begin=begin), None, p))
|
||||
reversematch.append((primer, primer(seq_to_match, same_sequence=not new_seq, pattern=pattern, begin=0), None, p))
|
||||
new_seq = False
|
||||
pattern+=1
|
||||
# Choose match closer to the end of the sequence
|
||||
@@ -378,11 +430,11 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
||||
message = b'No reverse primer match'
|
||||
else:
|
||||
message = b'No direct primer match'
|
||||
final_sequence[b'error']=message
|
||||
return False, final_sequence
|
||||
sequences[0][b'error']=message
|
||||
return False, sequences[0]
|
||||
|
||||
if reversematch is None:
|
||||
final_sequence[b'status']=b'partial'
|
||||
sequences[0][b'status']=b'partial'
|
||||
|
||||
if directmatch[0].forward:
|
||||
tags=(directmatch[1][3],None)
|
||||
@@ -392,78 +444,86 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
||||
samples = infos[None]
|
||||
|
||||
else:
|
||||
final_sequence[b'status']=b'full'
|
||||
sequences[0][b'status']=b'full'
|
||||
|
||||
if not not_aligned or first_match_first_seq:
|
||||
match = sequences[0][reversematch[1][1]:reversematch[1][2]]
|
||||
else:
|
||||
match = sequences[1][reversematch[1][1]:reversematch[1][2]]
|
||||
|
||||
match = second_matched_seq[reversematch[1][1]:reversematch[1][2]]
|
||||
match = match.reverse_complement
|
||||
|
||||
if not not_aligned or id(second_matched_seq) == id(sequenceF):
|
||||
final_sequence = final_sequence[0:reversematch[1][1]]
|
||||
else:
|
||||
cut_seq = sequenceR[reversematch[1][2]:]
|
||||
if not not_aligned:
|
||||
sequences[0] = sequences[0][0:reversematch[1][1]]
|
||||
elif first_match_first_seq:
|
||||
sequences[1] = sequences[1][reversematch[1][2]:]
|
||||
if not directmatch[0].forward:
|
||||
cut_seq = cut_seq.reverse_complement
|
||||
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq # used by alignpairedend tool
|
||||
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality # used by alignpairedend tool
|
||||
|
||||
sequences[1] = sequences[1].reverse_complement
|
||||
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
|
||||
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
|
||||
else:
|
||||
sequences[0] = sequences[0][reversematch[1][2]:]
|
||||
|
||||
if directmatch[0].forward:
|
||||
tags=(directmatch[1][3], reversematch[1][3])
|
||||
final_sequence[b'reverse_errors'] = reversematch[1][0]
|
||||
final_sequence[b'reverse_primer'] = reversematch[0].raw
|
||||
final_sequence[b'reverse_match'] = match.seq
|
||||
sequences[0][b'reverse_errors'] = reversematch[1][0]
|
||||
sequences[0][b'reverse_primer'] = reversematch[0].raw
|
||||
sequences[0][b'reverse_match'] = match.seq
|
||||
|
||||
else:
|
||||
tags=(reversematch[1][3], directmatch[1][3])
|
||||
final_sequence[b'forward_errors'] = reversematch[1][0]
|
||||
final_sequence[b'forward_primer'] = reversematch[0].raw
|
||||
final_sequence[b'forward_match'] = match.seq
|
||||
sequences[0][b'forward_errors'] = reversematch[1][0]
|
||||
sequences[0][b'forward_primer'] = reversematch[0].raw
|
||||
sequences[0][b'forward_match'] = match.seq
|
||||
|
||||
if tags[0] is not None:
|
||||
final_sequence[b'forward_tag'] = tags[0]
|
||||
sequences[0][b'forward_tag'] = tags[0]
|
||||
if tags[1] is not None:
|
||||
final_sequence[b'reverse_tag'] = tags[1]
|
||||
sequences[0][b'reverse_tag'] = tags[1]
|
||||
|
||||
samples = infos[reversematch[3]]
|
||||
|
||||
if not directmatch[0].forward:
|
||||
final_sequence = final_sequence.reverse_complement
|
||||
final_sequence[b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
||||
sequences[0] = sequences[0].reverse_complement
|
||||
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
||||
else:
|
||||
sequences[0][b'reversed'] = False # used by the alignpairedend tool (in kmer_similarity.c)
|
||||
|
||||
sample=None
|
||||
|
||||
if tags[0] is not None: # Direct tag known
|
||||
if tags[1] is not None: # Reverse tag known
|
||||
sample = samples.get(tags, None)
|
||||
else: # Only direct tag known
|
||||
s=[samples[x] for x in samples if x[0]==tags[0]]
|
||||
if len(s)==1:
|
||||
sample=s[0]
|
||||
elif len(s)>1:
|
||||
final_sequence[b'error']=b'multiple samples match tags'
|
||||
return False, final_sequence
|
||||
else:
|
||||
sample=None
|
||||
else:
|
||||
if tags[1] is not None: # Only reverse tag known
|
||||
s=[samples[x] for x in samples if x[1]==tags[1]]
|
||||
if len(s)==1:
|
||||
sample=s[0]
|
||||
elif len(s)>1:
|
||||
final_sequence[b'error']=b'multiple samples match tags'
|
||||
return False, final_sequence
|
||||
else:
|
||||
sample=None
|
||||
if not no_tags:
|
||||
if tags[0] is not None: # Direct tag known
|
||||
if tags[1] is not None: # Reverse tag known
|
||||
sample = samples.get(tags, None)
|
||||
else: # Only direct tag known
|
||||
s=[samples[x] for x in samples if x[0]==tags[0]]
|
||||
if len(s)==1:
|
||||
sample=s[0]
|
||||
elif len(s)>1:
|
||||
sequences[0][b'error']=b'Did not found reverse tag'
|
||||
return False, sequences[0]
|
||||
else:
|
||||
sample=None
|
||||
else:
|
||||
if tags[1] is not None: # Only reverse tag known
|
||||
s=[samples[x] for x in samples if x[1]==tags[1]]
|
||||
if len(s)==1:
|
||||
sample=s[0]
|
||||
elif len(s)>1:
|
||||
sequences[0][b'error']=b'Did not found forward tag'
|
||||
return False, sequences[0]
|
||||
else:
|
||||
sample=None
|
||||
|
||||
if sample is None:
|
||||
sequences[0][b'error']=b"No sample with that tag combination"
|
||||
return False, sequences[0]
|
||||
|
||||
if sample is None:
|
||||
final_sequence[b'error']=b"Cannot assign sequence to a sample"
|
||||
return False, final_sequence
|
||||
|
||||
final_sequence.update(sample)
|
||||
sequences[0].update(sample)
|
||||
|
||||
if not not_aligned:
|
||||
final_sequence[b'seq_length']=len(final_sequence)
|
||||
sequences[0][b'seq_length']=len(sequences[0])
|
||||
|
||||
return True, final_sequence
|
||||
return True, sequences[0]
|
||||
|
||||
|
||||
def run(config):
|
||||
@@ -486,7 +546,8 @@ def run(config):
|
||||
raise Exception("Could not open input reads")
|
||||
if input[2] != View_NUC_SEQS:
|
||||
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||
|
||||
i_dms = input[0]
|
||||
|
||||
if "reverse" in config["ngsfilter"]:
|
||||
|
||||
forward = input[1]
|
||||
@@ -527,8 +588,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view = output[1]
|
||||
|
||||
# If stdout output, create a temporary view in the input dms that will be deleted afterwards.
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
|
||||
|
||||
# Open the view containing the informations about the tags and the primers
|
||||
info_input = open_uri(config['ngsfilter']['info_view'])
|
||||
if info_input is None:
|
||||
@@ -549,10 +621,19 @@ def run(config):
|
||||
unidentified = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(entries_len, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(entries_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
# Check and store primers and tags
|
||||
infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned) # TODO obi verbose option
|
||||
try:
|
||||
infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned) # TODO obi verbose option
|
||||
except RollbackException, e:
|
||||
if unidentified is not None:
|
||||
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
|
||||
else:
|
||||
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
aligner = Primer_search(primer_list, config['ngsfilter']['error'])
|
||||
|
||||
@@ -564,50 +645,76 @@ def run(config):
|
||||
paired_p.revcomp.aligner = aligner
|
||||
|
||||
if not_aligned: # create columns used by alignpairedend tool
|
||||
Column.new_column(o_view, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ)
|
||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=o_view[REVERSE_SEQ_COLUMN_NAME].version)
|
||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||
|
||||
Column.new_column(unidentified, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ)
|
||||
Column.new_column(unidentified, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=unidentified[REVERSE_SEQ_COLUMN_NAME].version)
|
||||
if unidentified is not None:
|
||||
Column.new_column(unidentified, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(unidentified, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=unidentified[REVERSE_SEQUENCE_COLUMN].version)
|
||||
|
||||
g = 0
|
||||
u = 0
|
||||
i = 0
|
||||
no_tags = config['ngsfilter']['notags']
|
||||
try:
|
||||
for i in range(entries_len):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if not_aligned:
|
||||
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
||||
else:
|
||||
modseq = [Nuc_Seq.new_from_stored(entries[i])]
|
||||
good, oseq = annotate(modseq, infos)
|
||||
good, oseq = annotate(modseq, infos, no_tags)
|
||||
if good:
|
||||
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||
g+=1
|
||||
elif unidentified is not None:
|
||||
unidentified[u].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||
# Untrim sequences (put original back)
|
||||
if len(modseq) > 1:
|
||||
oseq[REVERSE_SEQUENCE_COLUMN] = reverse[i].seq
|
||||
oseq[REVERSE_QUALITY_COLUMN] = reverse[i].quality
|
||||
unidentified[u].set(oseq.id, forward[i].seq, definition=oseq.definition, quality=forward[i].quality, tags=oseq)
|
||||
else:
|
||||
unidentified[u].set(oseq.id, entries[i].seq, definition=oseq.definition, quality=entries[i].quality, tags=oseq)
|
||||
u+=1
|
||||
except Exception, e:
|
||||
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
|
||||
if unidentified is not None:
|
||||
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
|
||||
else:
|
||||
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
o_view.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
# Add comment about unidentified seqs
|
||||
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
||||
output[0].record_command_line(command_line)
|
||||
if unidentified is not None:
|
||||
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
# Add comment about unidentified seqs
|
||||
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
input[0].close()
|
||||
output[0].close()
|
||||
info_input[0].close()
|
||||
unidentified_input[0].close()
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
info_input[0].close(force=True)
|
||||
if unidentified is not None:
|
||||
unidentified_input[0].close(force=True)
|
||||
aligner.free()
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
87
python/obitools3/commands/rm.pyx
Normal file
87
python/obitools3/commands/rm.pyx
Normal file
@@ -0,0 +1,87 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.utils cimport tostr
|
||||
import os
|
||||
import shutil
|
||||
|
||||
__title__="Delete a view"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
|
||||
def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
|
||||
logger("info", "obi rm")
|
||||
|
||||
# Open the input
|
||||
input = open_uri(config['obi']['inputURI'])
|
||||
if input is None:
|
||||
raise Exception("Could not read input")
|
||||
|
||||
# Check that it's a view
|
||||
if isinstance(input[1], View) :
|
||||
view = input[1]
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
dms = input[0]
|
||||
|
||||
# Get the path to the view file to remove
|
||||
path = dms.full_path # dms path
|
||||
view_path=path+b"/VIEWS/"
|
||||
view_path+=view.name
|
||||
view_path+=b".obiview"
|
||||
|
||||
to_remove = {}
|
||||
# For each column:
|
||||
for col_alias in view.keys():
|
||||
col = view[col_alias]
|
||||
col_name = col.original_name
|
||||
col_version = col.version
|
||||
col_type = col.data_type
|
||||
col_ref = (col_name, col_version)
|
||||
# build file name and AVL file names
|
||||
col_file_name = f"{tostr(path)}/{tostr(col.original_name)}.obicol/{tostr(col.original_name)}@{col.version}.odc"
|
||||
if col_type in [b'OBI_CHAR', b'OBI_QUAL', b'OBI_STR', b'OBI_SEQ']:
|
||||
avl_file_name = f"{tostr(path)}/OBIBLOB_INDEXERS/{tostr(col.original_name)}_{col.version}_indexer"
|
||||
else:
|
||||
avl_file_name = None
|
||||
to_remove[col_ref] = [col_file_name, avl_file_name]
|
||||
|
||||
# For each view:
|
||||
do_not_remove = []
|
||||
for vn in dms:
|
||||
v = dms[vn]
|
||||
# ignore the one being deleted
|
||||
if v.name != view.name:
|
||||
# check that none of the column is referenced, if referenced, remove from list to remove
|
||||
cols = [(v[c].original_name, v[c].version) for c in v.keys()]
|
||||
for col_ref in to_remove:
|
||||
if col_ref in cols:
|
||||
do_not_remove.append(col_ref)
|
||||
|
||||
for nr in do_not_remove:
|
||||
to_remove.pop(nr)
|
||||
|
||||
# Close the view and the DMS
|
||||
view.close()
|
||||
input[0].close(force=True)
|
||||
|
||||
#print(to_remove)
|
||||
|
||||
# rm AFTER view and DMS close
|
||||
os.remove(view_path)
|
||||
for col in to_remove:
|
||||
os.remove(to_remove[col][0])
|
||||
if to_remove[col][1] is not None:
|
||||
shutil.rmtree(to_remove[col][1])
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
@@ -24,6 +24,7 @@ from obitools3.dms.capi.obitypes cimport OBI_BOOL, \
|
||||
import time
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
|
||||
@@ -35,13 +36,14 @@ NULL_VALUE = {OBI_BOOL: OBIBool_NA,
|
||||
OBI_STR: b""}
|
||||
|
||||
|
||||
__title__="Sort view lines according to the value of a given attribute."
|
||||
__title__="Sort view lines according to the value of a given attribute"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi sort specific options')
|
||||
|
||||
@@ -59,7 +61,7 @@ def addOptions(parser):
|
||||
|
||||
|
||||
def line_cmp(line, key, pb):
|
||||
pb
|
||||
pb
|
||||
if line[key] is None:
|
||||
return NULL_VALUE[line.view[key].data_type_int]
|
||||
else:
|
||||
@@ -86,20 +88,28 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
keys = config['sort']['keys']
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
@@ -110,10 +120,14 @@ def run(config):
|
||||
|
||||
for k in keys: # TODO order?
|
||||
PyErr_CheckSignals()
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
||||
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
||||
else:
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, None), reverse=config['sort']['reverse'])
|
||||
|
||||
if pb is not None:
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the sorted line selection
|
||||
try:
|
||||
@@ -132,16 +146,23 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close()
|
||||
i_dms.close()
|
||||
o_dms.close(force=True)
|
||||
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
105
python/obitools3/commands/split.pyx
Normal file
105
python/obitools3/commands/split.pyx
Normal file
@@ -0,0 +1,105 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes
|
||||
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Split"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group("obi split specific options")
|
||||
|
||||
group.add_argument('-p','--prefix',
|
||||
action="store", dest="split:prefix",
|
||||
metavar="<PREFIX>",
|
||||
help="Prefix added to each subview name (included undefined)")
|
||||
|
||||
group.add_argument('-t','--tag-name',
|
||||
action="store", dest="split:tagname",
|
||||
metavar="<TAG_NAME>",
|
||||
help="Attribute/tag used to split the input")
|
||||
|
||||
group.add_argument('-u','--undefined',
|
||||
action="store", dest="split:undefined",
|
||||
default=b'UNDEFINED',
|
||||
metavar="<VIEW_NAME>",
|
||||
help="Name of the view where undefined sequenced are stored (will be PREFIX_VIEW_NAME)")
|
||||
|
||||
|
||||
def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
|
||||
logger("info", "obi split")
|
||||
|
||||
# Open the input
|
||||
input = open_uri(config["obi"]["inputURI"])
|
||||
if input is None:
|
||||
raise Exception("Could not read input view")
|
||||
i_dms = input[0]
|
||||
i_view = input[1]
|
||||
|
||||
# Initialize the progress bar
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
tag_to_split = config["split"]["tagname"]
|
||||
undefined = tobytes(config["split"]["undefined"])
|
||||
selections = {}
|
||||
|
||||
# Go through input view and split
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
line = i_view[i]
|
||||
if tag_to_split not in line or line[tag_to_split] is None or len(line[tag_to_split])==0:
|
||||
value = undefined
|
||||
else:
|
||||
value = line[tag_to_split]
|
||||
if value not in selections:
|
||||
selections[value] = Line_selection(i_view)
|
||||
selections[value].append(i)
|
||||
|
||||
if pb is not None:
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output views with the line selection
|
||||
try:
|
||||
for cat in selections:
|
||||
o_view_name = config["split"]["prefix"].encode()+cat
|
||||
o_view = selections[cat].materialize(o_view_name)
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
input_dms_name=[input[0].name]
|
||||
input_view_name=[input[1].name]
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "split", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
o_view.close()
|
||||
except Exception, e:
|
||||
raise RollbackException("obi split error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
i_dms.record_command_line(command_line)
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -16,7 +16,7 @@ import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Compute basic statistics for attribute values."
|
||||
__title__="Compute basic statistics for attribute values"
|
||||
|
||||
'''
|
||||
`obi stats` computes basic statistics for attribute values of sequence records.
|
||||
@@ -119,9 +119,12 @@ def mean(values, options):
|
||||
|
||||
def variance(v):
|
||||
if len(v)==1:
|
||||
return 0
|
||||
return 0
|
||||
s = reduce(lambda x,y:(x[0]+y,x[1]+y**2),v,(0.,0.))
|
||||
return s[1]/(len(v)-1) - s[0]**2/len(v)/(len(v)-1)
|
||||
var = round(s[1]/(len(v)-1) - s[0]**2/len(v)/(len(v)-1), 5) # round to go around shady python rounding stuff when var is actually 0
|
||||
if var == -0.0: # then fix -0 to +0 if was rounded to -0
|
||||
var = 0.0
|
||||
return var
|
||||
|
||||
|
||||
def varpop(values, options):
|
||||
@@ -154,7 +157,7 @@ def run(config):
|
||||
else :
|
||||
taxo = None
|
||||
|
||||
statistics = set(config['stats']['minimum']) | set(config['stats']['maximum']) | set(config['stats']['mean'])
|
||||
statistics = set(config['stats']['minimum']) | set(config['stats']['maximum']) | set(config['stats']['mean']) | set(config['stats']['var']) | set(config['stats']['sd'])
|
||||
total = 0
|
||||
catcount={}
|
||||
totcount={}
|
||||
@@ -162,7 +165,7 @@ def run(config):
|
||||
lcat=0
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
@@ -195,7 +198,7 @@ def run(config):
|
||||
except KeyError:
|
||||
totcount[category]=totcount.get(category,0)+1
|
||||
for var in statistics:
|
||||
if var in line:
|
||||
if var in line and line[var] is not None:
|
||||
v = line[var]
|
||||
if var not in values:
|
||||
values[var]={}
|
||||
@@ -238,20 +241,40 @@ def run(config):
|
||||
else:
|
||||
sdvar= "%s"
|
||||
|
||||
hcat = "\t".join([pcat % x for x in config['stats']['categories']]) + "\t" +\
|
||||
"\t".join([minvar % x for x in config['stats']['minimum']]) + "\t" +\
|
||||
"\t".join([maxvar % x for x in config['stats']['maximum']]) + "\t" +\
|
||||
"\t".join([meanvar % x for x in config['stats']['mean']]) + "\t" +\
|
||||
"\t".join([varvar % x for x in config['stats']['var']]) + "\t" +\
|
||||
"\t".join([sdvar % x for x in config['stats']['sd']]) + \
|
||||
"\t count" + \
|
||||
"\t total"
|
||||
hcat = ""
|
||||
|
||||
for x in config['stats']['categories']:
|
||||
hcat += pcat % x
|
||||
hcat += "\t"
|
||||
|
||||
for x in config['stats']['minimum']:
|
||||
hcat += minvar % x
|
||||
hcat += "\t"
|
||||
|
||||
for x in config['stats']['maximum']:
|
||||
hcat += maxvar % x
|
||||
hcat += "\t"
|
||||
|
||||
for x in config['stats']['mean']:
|
||||
hcat += meanvar % x
|
||||
hcat += "\t"
|
||||
|
||||
for x in config['stats']['var']:
|
||||
hcat += varvar % x
|
||||
hcat += "\t"
|
||||
|
||||
for x in config['stats']['sd']:
|
||||
hcat += sdvar % x
|
||||
hcat += "\t"
|
||||
|
||||
hcat += "count\ttotal"
|
||||
|
||||
print(hcat)
|
||||
sorted_stats = sorted(catcount.items(), key = lambda kv:(totcount[kv[0]]), reverse=True)
|
||||
for i in range(len(sorted_stats)):
|
||||
c = sorted_stats[i][0]
|
||||
for v in c:
|
||||
if v is not None:
|
||||
if type(v) == bytes:
|
||||
print(pcat % tostr(v)+"\t", end="")
|
||||
else:
|
||||
print(pcat % str(v)+"\t", end="")
|
||||
@@ -265,9 +288,9 @@ def run(config):
|
||||
print((("%%%df" % lvarp[m]) % varp[m][c])+"\t", end="")
|
||||
for m in config['stats']['sd']:
|
||||
print((("%%%df" % lsigma[m]) % sigma[m][c])+"\t", end="")
|
||||
print("%7d" %catcount[c], end="")
|
||||
print("%9d" %totcount[c])
|
||||
print("%d" %catcount[c]+"\t", end="")
|
||||
print("%d" %totcount[c]+"\t")
|
||||
|
||||
input[0].close()
|
||||
input[0].close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
@@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
@@ -12,15 +12,17 @@ from obitools3.utils cimport str2bytes
|
||||
import time
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Keep the N last lines of a view."
|
||||
__title__="Keep the N last lines of a view"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi tail specific options')
|
||||
|
||||
@@ -52,31 +54,41 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
start = max(len(i_view) - config['tail']['count'], 0)
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view) - start, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view) - start, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
|
||||
for i in range(start, len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@@ -97,16 +109,22 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close()
|
||||
i_dms.close()
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
230
python/obitools3/commands/taxonomy.pyx
Normal file
230
python/obitools3/commands/taxonomy.pyx
Normal file
@@ -0,0 +1,230 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.column.column cimport Column
|
||||
from functools import reduce
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes, tostr
|
||||
from io import BufferedWriter
|
||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
ID_COLUMN, \
|
||||
DEFINITION_COLUMN, \
|
||||
QUALITY_COLUMN, \
|
||||
COUNT_COLUMN, \
|
||||
TAXID_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_INT
|
||||
from obitools3.dms.capi.obitaxonomy cimport MIN_LOCAL_TAXID
|
||||
import time
|
||||
import math
|
||||
import sys
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Add taxa with a new generated taxid to an NCBI taxonomy database"
|
||||
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi taxonomy specific options')
|
||||
|
||||
group.add_argument('-n', '--taxon-name-tag',
|
||||
action="store",
|
||||
dest="taxonomy:taxon_name_tag",
|
||||
metavar="<SCIENTIFIC_NAME_TAG>",
|
||||
default=b"SCIENTIFIC_NAME",
|
||||
help="Name of the tag giving the scientific name of the taxon "
|
||||
"(default: 'SCIENTIFIC_NAME').")
|
||||
|
||||
# group.add_argument('-g', '--try-genus-match',
|
||||
# action="store_true", dest="taxonomy:try_genus_match",
|
||||
# default=False,
|
||||
# help="Try matching the first word of <SCIENTIFIC_NAME_TAG> when can't find corresponding taxid for a taxon. "
|
||||
# "If there is a match it is added in the 'parent_taxid' tag. (Can be used by 'obi taxonomy' to add the taxon under that taxid).")
|
||||
|
||||
group.add_argument('-a', '--restricting-ancestor',
|
||||
action="store",
|
||||
dest="taxonomy:restricting_ancestor",
|
||||
metavar="<RESTRICTING_ANCESTOR>",
|
||||
default=None,
|
||||
help="Enables to restrict the addition of taxids under an ancestor specified by its taxid.")
|
||||
|
||||
group.add_argument('-t', '--taxid-tag',
|
||||
action="store",
|
||||
dest="taxonomy:taxid_tag",
|
||||
metavar="<TAXID_TAG>",
|
||||
default=b"TAXID",
|
||||
help="Name of the tag to store the new taxid "
|
||||
"(default: 'TAXID').")
|
||||
|
||||
group.add_argument('-l', '--log-file',
|
||||
action="store",
|
||||
dest="taxonomy:log_file",
|
||||
metavar="<LOG_FILE>",
|
||||
default='',
|
||||
help="Path to a log file to write informations about added taxids.")
|
||||
|
||||
|
||||
def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
|
||||
logger("info", "obi taxonomy")
|
||||
|
||||
# Open the input
|
||||
input = open_uri(config['obi']['inputURI'])
|
||||
if input is None:
|
||||
raise Exception("Could not read input view")
|
||||
i_dms = input[0]
|
||||
i_view = input[1]
|
||||
i_view_name = input[1].name
|
||||
|
||||
# Open the output: only the DMS, as the output view is going to be created by cloning the input view
|
||||
# (could eventually be done via an open_uri() argument)
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
input=False,
|
||||
dms_only=True)
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view_name = output[1]
|
||||
|
||||
# stdout output: create temporary view
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
i=0
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in output DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
imported_view_name = o_view_name
|
||||
|
||||
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
||||
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
||||
if i_dms != o_dms:
|
||||
imported_view_name = i_view_name
|
||||
i=0
|
||||
while imported_view_name in o_dms: # Making sure view name is unique in output DMS
|
||||
imported_view_name = i_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
||||
i_view = o_dms[imported_view_name]
|
||||
|
||||
# Clone output view from input view
|
||||
o_view = i_view.clone(o_view_name)
|
||||
if o_view is None:
|
||||
raise Exception("Couldn't create output view")
|
||||
i_view.close()
|
||||
|
||||
# Open taxonomy
|
||||
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||
if taxo_uri is None or taxo_uri[2] == bytes:
|
||||
raise Exception("Couldn't open taxonomy")
|
||||
taxo = taxo_uri[1]
|
||||
|
||||
# Initialize the progress bar
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(o_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
try:
|
||||
if config['taxonomy']['log_file']:
|
||||
logfile = open(config['taxonomy']['log_file'], 'w')
|
||||
else:
|
||||
logfile = sys.stdout
|
||||
if 'restricting_ancestor' in config['taxonomy']:
|
||||
res_anc = int(config['taxonomy']['restricting_ancestor'])
|
||||
else:
|
||||
res_anc = None
|
||||
taxid_column_name = config['taxonomy']['taxid_tag']
|
||||
parent_taxid_column_name = "PARENT_TAXID" # TODO macro
|
||||
taxon_name_column_name = config['taxonomy']['taxon_name_tag']
|
||||
taxid_column = Column.new_column(o_view, taxid_column_name, OBI_INT)
|
||||
if parent_taxid_column_name in o_view:
|
||||
parent_taxid_column = o_view[parent_taxid_column_name]
|
||||
else:
|
||||
parent_taxid_column = None
|
||||
#parent_taxid_column = Column.new_column(o_view, parent_taxid_column_name, OBI_INT)
|
||||
taxon_name_column = o_view[taxon_name_column_name]
|
||||
|
||||
for i in range(len(o_view)):
|
||||
PyErr_CheckSignals()
|
||||
#if pb is not None:
|
||||
# pb(i)
|
||||
taxon_name = taxon_name_column[i]
|
||||
taxon = taxo.get_taxon_by_name(taxon_name, res_anc)
|
||||
if taxon is not None:
|
||||
taxid_column[i] = taxon.taxid
|
||||
if logfile:
|
||||
print(f"Found taxon '{tostr(taxon_name)}' already existing with taxid {taxid_column[i]}", file=logfile)
|
||||
else: # try finding genus or other parent taxon from the first word
|
||||
#print(i, o_view[i].id)
|
||||
if parent_taxid_column is not None and parent_taxid_column[i] is not None:
|
||||
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', parent_taxid_column[i])
|
||||
if logfile:
|
||||
print(f"Adding taxon '{tostr(taxon_name)}' under provided parent {parent_taxid_column[i]} with taxid {taxid_column[i]}", file=logfile)
|
||||
else:
|
||||
taxon_name_sp = taxon_name.split(b" ")
|
||||
taxon = taxo.get_taxon_by_name(taxon_name_sp[0], res_anc)
|
||||
if taxon is not None:
|
||||
parent_taxid_column[i] = taxon.taxid
|
||||
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', taxon.taxid)
|
||||
if logfile:
|
||||
print(f"Adding taxon '{tostr(taxon_name)}' under '{tostr(taxon.name)}' ({taxon.taxid}) with taxid {taxid_column[i]}", file=logfile)
|
||||
else:
|
||||
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', res_anc)
|
||||
if logfile:
|
||||
print(f"Adding taxon '{tostr(taxon_name)}' under provided restricting ancestor {res_anc} with taxid {taxid_column[i]}", file=logfile)
|
||||
|
||||
taxo.write(taxo.name, update=True)
|
||||
|
||||
except Exception, e:
|
||||
raise RollbackException("obi taxonomy error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
#if pb is not None:
|
||||
# pb(i, force=True)
|
||||
# print("", file=sys.stderr)
|
||||
|
||||
#logger("info", "\nTaxa already in the taxonomy: "+str(found_count)+"/"+str(len(o_view))+" ("+str(round(found_count*100.0/len(o_view), 2))+"%)")
|
||||
#logger("info", "\nParent taxids found: "+str(parent_found_count)+"/"+str(len(o_view))+" ("+str(round(parent_found_count*100.0/len(o_view), 2))+"%)")
|
||||
#logger("info", "\nTaxids not found: "+str(not_found_count)+"/"+str(len(o_view))+" ("+str(round(not_found_count*100.0/len(o_view), 2))+"%)")
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
input_dms_name=[input[0].name]
|
||||
input_view_name=[i_view_name]
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "taxonomy", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(o_dms, imported_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
@@ -23,6 +23,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
import shutil
|
||||
import string
|
||||
import random
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
@@ -38,7 +39,7 @@ COL_COMMENTS_MAX_LEN = 2048
|
||||
MAX_INT = 2147483647 # used to generate random float values
|
||||
|
||||
|
||||
__title__="Tests if the obitools are working properly"
|
||||
__title__="Test if the obitools are working properly"
|
||||
|
||||
|
||||
default_config = {
|
||||
@@ -300,8 +301,11 @@ def fill_column(config, infos, col) :
|
||||
def create_random_column(config, infos) :
|
||||
alias = random.choice([b'', random_unique_name(infos)])
|
||||
tuples = random.choice([True, False])
|
||||
dict_column = False
|
||||
if not tuples :
|
||||
nb_elements_per_line=random.randint(1, config['test']['maxelts'])
|
||||
if nb_elements_per_line > 1:
|
||||
dict_column = True
|
||||
elements_names = []
|
||||
for i in range(nb_elements_per_line) :
|
||||
elements_names.append(random_unique_element_name(config, infos))
|
||||
@@ -317,6 +321,7 @@ def create_random_column(config, infos) :
|
||||
data_type,
|
||||
nb_elements_per_line=nb_elements_per_line,
|
||||
elements_names=elements_names,
|
||||
dict_column=dict_column,
|
||||
tuples=tuples,
|
||||
comments=random_comments(config),
|
||||
alias=alias
|
||||
@@ -366,7 +371,7 @@ def random_new_view(config, infos, first=False):
|
||||
infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
|
||||
else :
|
||||
infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
|
||||
|
||||
infos['view'].write_config(config, "test", infos["command_line"], input_dms_name=[infos['dms'].name], input_view_name=["random"])
|
||||
print_test(config, repr(infos['view']))
|
||||
if v_to_clone is not None :
|
||||
if line_selection is None:
|
||||
@@ -441,7 +446,7 @@ def addOptions(parser):
|
||||
default=20,
|
||||
type=int,
|
||||
help="Maximum length of tuples. "
|
||||
"Default: 200")
|
||||
"Default: 20")
|
||||
|
||||
group.add_argument('--max_ini_col_count','-o',
|
||||
action="store", dest="test:maxinicolcount",
|
||||
@@ -454,10 +459,10 @@ def addOptions(parser):
|
||||
group.add_argument('--max_line_nb','-l',
|
||||
action="store", dest="test:maxlinenb",
|
||||
metavar='<MAX_LINE_NB>',
|
||||
default=10000,
|
||||
default=1000,
|
||||
type=int,
|
||||
help="Maximum number of lines in a column. "
|
||||
"Default: 10000")
|
||||
"Default: 1000")
|
||||
|
||||
group.add_argument('--max_elts_per_line','-e',
|
||||
action="store", dest="test:maxelts",
|
||||
@@ -497,7 +502,8 @@ def run(config):
|
||||
(b"OBI_SEQ", False): random_seq, (b"OBI_SEQ", True): random_seq_tuples,
|
||||
(b"OBI_STR", False): random_bytes, (b"OBI_STR", True): random_bytes_tuples
|
||||
},
|
||||
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view]
|
||||
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view],
|
||||
'command_line': " ".join(sys.argv[1:])
|
||||
}
|
||||
|
||||
# TODO ???
|
||||
@@ -529,7 +535,7 @@ def run(config):
|
||||
test_taxo(config, infos)
|
||||
|
||||
infos['view'].close()
|
||||
infos['dms'].close()
|
||||
infos['dms'].close(force=True)
|
||||
shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
|
||||
|
||||
print("Done.")
|
||||
|
||||
@@ -5,5 +5,5 @@ from obitools3.dms.taxo.taxo cimport Taxonomy
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
|
||||
cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy)
|
||||
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*, int max_elts=*)
|
||||
cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict config)
|
||||
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*, int max_elts=*)
|
||||
|
||||
@@ -8,22 +8,24 @@ from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column, Column_line
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN, TAXID_COLUMN, \
|
||||
TAXID_DIST_COLUMN, MERGED_TAXID_COLUMN, MERGED_COLUMN, MERGED_PREFIX
|
||||
TAXID_DIST_COLUMN, MERGED_TAXID_COLUMN, MERGED_COLUMN, MERGED_PREFIX, \
|
||||
REVERSE_QUALITY_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||
addMinimalOutputOption, \
|
||||
addTaxonomyOption, \
|
||||
addEltLimitOption
|
||||
addEltLimitOption, \
|
||||
addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, tostr
|
||||
from obitools3.utils cimport tobytes, tostr, str2bytes
|
||||
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Group sequence records together"
|
||||
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
@@ -32,6 +34,7 @@ def addOptions(parser):
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addEltLimitOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi uniq specific options')
|
||||
|
||||
@@ -56,7 +59,7 @@ def addOptions(parser):
|
||||
"(option can be used several times).")
|
||||
|
||||
|
||||
cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict config) :
|
||||
|
||||
cdef int taxid
|
||||
cdef Nuc_Seq_Stored seq
|
||||
@@ -69,7 +72,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
cdef object gn_sn
|
||||
cdef object fa_sn
|
||||
|
||||
# Create columns
|
||||
# Create columns and save them for efficiency
|
||||
if b"species" in o_view and o_view[b"species"].data_type_int != OBI_INT :
|
||||
o_view.delete_column(b"species")
|
||||
if b"species" not in o_view:
|
||||
@@ -77,6 +80,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
b"species",
|
||||
OBI_INT
|
||||
)
|
||||
species_column = o_view[b"species"]
|
||||
|
||||
if b"genus" in o_view and o_view[b"genus"].data_type_int != OBI_INT :
|
||||
o_view.delete_column(b"genus")
|
||||
@@ -85,6 +89,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
b"genus",
|
||||
OBI_INT
|
||||
)
|
||||
genus_column = o_view[b"genus"]
|
||||
|
||||
if b"family" in o_view and o_view[b"family"].data_type_int != OBI_INT :
|
||||
o_view.delete_column(b"family")
|
||||
@@ -93,6 +98,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
b"family",
|
||||
OBI_INT
|
||||
)
|
||||
family_column = o_view[b"family"]
|
||||
|
||||
if b"species_name" in o_view and o_view[b"species_name"].data_type_int != OBI_STR :
|
||||
o_view.delete_column(b"species_name")
|
||||
@@ -101,6 +107,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
b"species_name",
|
||||
OBI_STR
|
||||
)
|
||||
species_name_column = o_view[b"species_name"]
|
||||
|
||||
if b"genus_name" in o_view and o_view[b"genus_name"].data_type_int != OBI_STR :
|
||||
o_view.delete_column(b"genus_name")
|
||||
@@ -109,6 +116,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
b"genus_name",
|
||||
OBI_STR
|
||||
)
|
||||
genus_name_column = o_view[b"genus_name"]
|
||||
|
||||
if b"family_name" in o_view and o_view[b"family_name"].data_type_int != OBI_STR :
|
||||
o_view.delete_column(b"family_name")
|
||||
@@ -117,6 +125,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
b"family_name",
|
||||
OBI_STR
|
||||
)
|
||||
family_name_column = o_view[b"family_name"]
|
||||
|
||||
if b"rank" in o_view and o_view[b"rank"].data_type_int != OBI_STR :
|
||||
o_view.delete_column(b"rank")
|
||||
@@ -125,6 +134,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
b"rank",
|
||||
OBI_STR
|
||||
)
|
||||
rank_column = o_view[b"rank"]
|
||||
|
||||
if b"scientific_name" in o_view and o_view[b"scientific_name"].data_type_int != OBI_STR :
|
||||
o_view.delete_column(b"scientific_name")
|
||||
@@ -133,9 +143,19 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
b"scientific_name",
|
||||
OBI_STR
|
||||
)
|
||||
|
||||
for seq in o_view:
|
||||
PyErr_CheckSignals()
|
||||
scientific_name_column = o_view[b"scientific_name"]
|
||||
|
||||
# Initialize the progress bar
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(o_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
i=0
|
||||
for seq in o_view:
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if MERGED_TAXID_COLUMN in seq :
|
||||
m_taxids = []
|
||||
m_taxids_dict = seq[MERGED_TAXID_COLUMN]
|
||||
@@ -165,20 +185,24 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
else:
|
||||
fa_sn = None
|
||||
tfa = None
|
||||
|
||||
seq[b"species"] = tsp
|
||||
seq[b"genus"] = tgn
|
||||
seq[b"family"] = tfa
|
||||
|
||||
seq[b"species_name"] = sp_sn
|
||||
seq[b"genus_name"] = gn_sn
|
||||
seq[b"family_name"] = fa_sn
|
||||
|
||||
seq[b"rank"] = taxonomy.get_rank(taxid)
|
||||
seq[b"scientific_name"] = taxonomy.get_scientific_name(taxid)
|
||||
species_column[i] = tsp
|
||||
genus_column[i] = tgn
|
||||
family_column[i] = tfa
|
||||
|
||||
species_name_column[i] = sp_sn
|
||||
genus_name_column[i] = gn_sn
|
||||
family_name_column[i] = fa_sn
|
||||
|
||||
rank_column[i] = taxonomy.get_rank(taxid)
|
||||
scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
|
||||
i+=1
|
||||
|
||||
if pb is not None:
|
||||
pb(len(o_view), force=True)
|
||||
|
||||
|
||||
|
||||
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
|
||||
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
|
||||
|
||||
cdef int i
|
||||
cdef int k
|
||||
@@ -187,6 +211,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
cdef int u_idx
|
||||
cdef int i_idx
|
||||
cdef int i_count
|
||||
cdef int o_count
|
||||
cdef str key_str
|
||||
cdef bytes key
|
||||
cdef bytes mkey
|
||||
@@ -209,7 +234,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
cdef Nuc_Seq_Stored i_seq
|
||||
cdef Nuc_Seq_Stored o_seq
|
||||
cdef Nuc_Seq_Stored u_seq
|
||||
cdef Column i_col
|
||||
cdef Column i_seq_col
|
||||
cdef Column i_id_col
|
||||
cdef Column i_taxid_col
|
||||
@@ -217,6 +241,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
cdef Column o_id_col
|
||||
cdef Column o_taxid_dist_col
|
||||
cdef Column o_merged_col
|
||||
cdef Column o_count_col
|
||||
cdef Column i_count_col
|
||||
cdef Column_line i_mcol
|
||||
cdef object taxid_dist_dict
|
||||
cdef object iter_view
|
||||
@@ -252,7 +278,12 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
mergedKeys_m = []
|
||||
for k in range(k_count):
|
||||
mergedKeys_m.append(MERGED_PREFIX + mergedKeys[k])
|
||||
|
||||
|
||||
# Check that not trying to remerge without total count information
|
||||
for key in mergedKeys_m:
|
||||
if key in view and COUNT_COLUMN not in view:
|
||||
raise Exception("\n>>>>\nError: trying to re-merge tags without total count tag. Run obi annotate to add the count tag from the relevant merged tag, i.e.: \nobi annotate --set-tag COUNT:'sum([value for key,value in sequence['MERGED_sample'].items()])' dms/input dms/output\n")
|
||||
|
||||
if categories is None:
|
||||
categories = []
|
||||
|
||||
@@ -274,7 +305,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
iter_view = iter(view)
|
||||
for i_seq in iter_view :
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
|
||||
# This can't be done in the same line as the unique_id tuple creation because it generates a bug
|
||||
# where Cython (version 0.25.2) does not detect the reference to the categs_list variable and deallocates
|
||||
@@ -284,6 +316,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
for x in categories :
|
||||
catl.append(i_seq[x])
|
||||
|
||||
#unique_id = tuple(catl) + (i_seq_col[i],)
|
||||
unique_id = tuple(catl) + (i_seq_col.get_line_idx(i),)
|
||||
#unique_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) # The line that cython can't read properly
|
||||
|
||||
@@ -320,7 +353,14 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
for k in range(k_count):
|
||||
key = mergedKeys[k]
|
||||
merged_col_name = mergedKeys_m[k]
|
||||
i_col = view[key]
|
||||
|
||||
# if merged_infos[merged_col_name]['nb_elts'] == 1:
|
||||
# raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
|
||||
|
||||
if merged_col_name in view:
|
||||
i_col = view[merged_col_name]
|
||||
else:
|
||||
i_col = view[key]
|
||||
|
||||
if merged_infos[merged_col_name]['nb_elts'] > max_elts:
|
||||
str_merged_cols.append(merged_col_name)
|
||||
@@ -338,6 +378,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
OBI_INT,
|
||||
nb_elements_per_line=merged_infos[merged_col_name]['nb_elts'],
|
||||
elements_names=list(merged_infos[merged_col_name]['elt_names']),
|
||||
dict_column=True,
|
||||
comments=i_col.comments,
|
||||
alias=merged_col_name
|
||||
)
|
||||
@@ -360,6 +401,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
OBI_INT,
|
||||
nb_elements_per_line=len(view),
|
||||
elements_names=[id for id in i_id_col],
|
||||
dict_column=True,
|
||||
alias=TAXID_DIST_COLUMN
|
||||
)
|
||||
|
||||
@@ -374,23 +416,35 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
alias=MERGED_COLUMN
|
||||
)
|
||||
|
||||
# Keep columns that are going to be used a lot in variables
|
||||
# Keep columns in variables for efficiency
|
||||
o_id_col = o_view[ID_COLUMN]
|
||||
if TAXID_DIST_COLUMN in o_view:
|
||||
o_taxid_dist_col = o_view[TAXID_DIST_COLUMN]
|
||||
if MERGED_COLUMN in o_view:
|
||||
o_merged_col = o_view[MERGED_COLUMN]
|
||||
if COUNT_COLUMN not in o_view:
|
||||
Column.new_column(o_view,
|
||||
COUNT_COLUMN,
|
||||
OBI_INT)
|
||||
o_count_col = o_view[COUNT_COLUMN]
|
||||
if COUNT_COLUMN in view:
|
||||
i_count_col = view[COUNT_COLUMN]
|
||||
|
||||
if pb is not None:
|
||||
pb(len(view), force=True)
|
||||
print("")
|
||||
|
||||
pb(len(view), force=True)
|
||||
print("")
|
||||
logger("info", "Second browsing through the input")
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(uniques), seconde=5)
|
||||
if pb is not None:
|
||||
pb = ProgressBar(len(view))
|
||||
|
||||
o_idx = 0
|
||||
total_treated = 0
|
||||
|
||||
for unique_id in uniques :
|
||||
PyErr_CheckSignals()
|
||||
pb(o_idx)
|
||||
|
||||
merged_sequences = uniques[unique_id]
|
||||
|
||||
@@ -407,7 +461,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
merged_list = list(set(merged_list)) # deduplicate the list
|
||||
o_merged_col[o_idx] = merged_list
|
||||
|
||||
o_seq[COUNT_COLUMN] = 0
|
||||
o_count = 0
|
||||
|
||||
if TAXID_DIST_COLUMN in u_seq and i_taxid_dist_col[u_idx] is not None:
|
||||
taxid_dist_dict = i_taxid_dist_col[u_idx]
|
||||
@@ -419,16 +473,20 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
merged_dict[mkey] = {}
|
||||
|
||||
for i_idx in merged_sequences:
|
||||
|
||||
PyErr_CheckSignals()
|
||||
|
||||
if pb is not None:
|
||||
pb(total_treated)
|
||||
|
||||
i_id = i_id_col[i_idx]
|
||||
i_seq = view[i_idx]
|
||||
|
||||
if COUNT_COLUMN not in i_seq or i_seq[COUNT_COLUMN] is None:
|
||||
if COUNT_COLUMN not in i_seq or i_count_col[i_idx] is None:
|
||||
i_count = 1
|
||||
else:
|
||||
i_count = i_seq[COUNT_COLUMN]
|
||||
i_count = i_count_col[i_idx]
|
||||
|
||||
o_seq[COUNT_COLUMN] += i_count
|
||||
o_count += i_count
|
||||
|
||||
for k in range(k_count):
|
||||
|
||||
@@ -463,42 +521,55 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
mcol[key2] = i_mcol[key2]
|
||||
else:
|
||||
mcol[key2] = mcol[key2] + i_mcol[key2]
|
||||
|
||||
# Write taxid_dist
|
||||
if mergeIds and TAXID_COLUMN in mergedKeys:
|
||||
if TAXID_DIST_COLUMN in str_merged_cols:
|
||||
o_taxid_dist_col[o_idx] = str(taxid_dist_dict)
|
||||
else:
|
||||
o_taxid_dist_col[o_idx] = taxid_dist_dict
|
||||
|
||||
# Write merged dicts
|
||||
for mkey in merged_dict:
|
||||
if mkey in str_merged_cols:
|
||||
mkey_cols[mkey][o_idx] = str(merged_dict[mkey])
|
||||
else:
|
||||
mkey_cols[mkey][o_idx] = merged_dict[mkey]
|
||||
# Sets NA values to 0 # TODO discuss, for now keep as None and test for None instead of testing for 0 in tools
|
||||
#for key in mkey_cols[mkey][o_idx]:
|
||||
# if mkey_cols[mkey][o_idx][key] is None:
|
||||
# mkey_cols[mkey][o_idx][key] = 0
|
||||
|
||||
|
||||
for key in i_seq.keys():
|
||||
# Delete informations that differ between the merged sequences
|
||||
# TODO make special columns list?
|
||||
# TODO make special columns list? // could be more efficient
|
||||
if key != COUNT_COLUMN and key != ID_COLUMN and key != NUC_SEQUENCE_COLUMN and key in o_seq and o_seq[key] != i_seq[key] \
|
||||
and key not in merged_dict :
|
||||
o_seq[key] = None
|
||||
|
||||
total_treated += 1
|
||||
|
||||
# Write merged dicts
|
||||
for mkey in merged_dict:
|
||||
if mkey in str_merged_cols:
|
||||
mkey_cols[mkey][o_idx] = str(merged_dict[mkey])
|
||||
else:
|
||||
mkey_cols[mkey][o_idx] = merged_dict[mkey]
|
||||
# Sets NA values to 0 # TODO discuss, for now keep as None and test for None instead of testing for 0 in tools
|
||||
#for key in mkey_cols[mkey][o_idx]:
|
||||
# if mkey_cols[mkey][o_idx][key] is None:
|
||||
# mkey_cols[mkey][o_idx][key] = 0
|
||||
|
||||
# Write taxid_dist
|
||||
if mergeIds and TAXID_COLUMN in mergedKeys:
|
||||
if TAXID_DIST_COLUMN in str_merged_cols:
|
||||
o_taxid_dist_col[o_idx] = str(taxid_dist_dict)
|
||||
else:
|
||||
o_taxid_dist_col[o_idx] = taxid_dist_dict
|
||||
|
||||
o_count_col[o_idx] = o_count
|
||||
o_idx += 1
|
||||
|
||||
# Deletes quality column if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
|
||||
if pb is not None:
|
||||
pb(len(view), force=True)
|
||||
|
||||
# Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
|
||||
if QUALITY_COLUMN in view:
|
||||
o_view.delete_column(QUALITY_COLUMN)
|
||||
if REVERSE_QUALITY_COLUMN in view:
|
||||
o_view.delete_column(REVERSE_QUALITY_COLUMN)
|
||||
|
||||
# Delete old columns that are now merged
|
||||
for k in range(k_count):
|
||||
if mergedKeys[k] in o_view:
|
||||
o_view.delete_column(mergedKeys[k])
|
||||
|
||||
if taxonomy is not None:
|
||||
print("") # TODO because in the middle of progress bar. Better solution?
|
||||
logger("info", "Merging taxonomy classification")
|
||||
merge_taxonomy_classification(o_view, taxonomy)
|
||||
merge_taxonomy_classification(o_view, taxonomy, config)
|
||||
|
||||
|
||||
|
||||
@@ -530,8 +601,23 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
i_dms = input[0]
|
||||
entries = input[1]
|
||||
o_view = output[1]
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
|
||||
# If stdout output create a temporary view that will be exported and deleted.
|
||||
if type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
o_dms = i_dms
|
||||
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
|
||||
else:
|
||||
o_view = output[1]
|
||||
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||
@@ -542,15 +628,19 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(entries), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(entries), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
try:
|
||||
uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])
|
||||
except Exception, e:
|
||||
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
|
||||
if len(entries) > 0:
|
||||
try:
|
||||
uniq_sequences(entries, o_view, pb, config, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])
|
||||
except Exception, e:
|
||||
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
pb(len(entries), force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@@ -560,13 +650,23 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "uniq", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
input[0].close()
|
||||
output[0].close()
|
||||
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ cdef extern from "obidms.h" nogil:
|
||||
int obi_close_dms(OBIDMS_p dms, bint force)
|
||||
char* obi_dms_get_dms_path(OBIDMS_p dms)
|
||||
char* obi_dms_get_full_path(OBIDMS_p dms, const_char_p path_name)
|
||||
char* obi_dms_formatted_infos(OBIDMS_p dms, bint detailed)
|
||||
void obi_close_atexit()
|
||||
|
||||
obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, const char* column_name, obiversion_t version_number)
|
||||
|
||||
@@ -31,6 +31,7 @@ cdef extern from "obidmscolumn.h" nogil:
|
||||
const_char_p elements_names
|
||||
OBIType_t returned_data_type
|
||||
OBIType_t stored_data_type
|
||||
bint dict_column
|
||||
bint tuples
|
||||
bint to_eval
|
||||
time_t creation_date
|
||||
@@ -68,3 +69,6 @@ cdef extern from "obidmscolumn.h" nogil:
|
||||
int obi_column_write_comments(OBIDMS_column_p column, const char* comments)
|
||||
|
||||
int obi_column_add_comment(OBIDMS_column_p column, const char* key, const char* value)
|
||||
|
||||
char* obi_column_formatted_infos(OBIDMS_column_p column, bint detailed)
|
||||
|
||||
@@ -8,6 +8,7 @@ cdef extern from "obi_ecopcr.h" nogil:
|
||||
|
||||
int obi_ecopcr(const char* input_dms_name,
|
||||
const char* i_view_name,
|
||||
const char* tax_dms_name,
|
||||
const char* taxonomy_name,
|
||||
const char* output_dms_name,
|
||||
const char* o_view_name,
|
||||
@@ -23,6 +24,7 @@ cdef extern from "obi_ecopcr.h" nogil:
|
||||
double salt_concentration,
|
||||
int salt_correction_method,
|
||||
int keep_nucleotides,
|
||||
bint keep_primers,
|
||||
bint kingdom_mode)
|
||||
|
||||
|
||||
|
||||
@@ -11,4 +11,5 @@ cdef extern from "obi_ecotag.h" nogil:
|
||||
const char* taxonomy_name,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
double ecotag_threshold)
|
||||
double ecotag_threshold,
|
||||
double bubble_threshold)
|
||||
|
||||
@@ -7,6 +7,8 @@ from libc.stdint cimport int32_t
|
||||
|
||||
cdef extern from "obidms_taxonomy.h" nogil:
|
||||
|
||||
extern int MIN_LOCAL_TAXID
|
||||
|
||||
struct ecotxnode :
|
||||
int32_t taxid
|
||||
int32_t rank
|
||||
@@ -18,6 +20,13 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
ctypedef ecotxnode ecotx_t
|
||||
|
||||
|
||||
struct econame_t : # can't get this struct to be accepted by Cython ('unknown size')
|
||||
char* name
|
||||
char* class_name
|
||||
int32_t is_scientific_name
|
||||
ecotxnode* taxon
|
||||
|
||||
|
||||
struct ecotxidx_t :
|
||||
int32_t count
|
||||
int32_t max_taxid
|
||||
@@ -30,9 +39,14 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
char** label
|
||||
|
||||
|
||||
struct econameidx_t :
|
||||
int32_t count
|
||||
econame_t* names
|
||||
|
||||
|
||||
struct OBIDMS_taxonomy_t :
|
||||
ecorankidx_t* ranks
|
||||
# econameidx_t* names
|
||||
econameidx_t* names
|
||||
ecotxidx_t* taxa
|
||||
|
||||
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
|
||||
@@ -44,14 +58,18 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxdump(const_char_p taxdump)
|
||||
|
||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p tax_name)
|
||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p tax_name, bint update)
|
||||
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
|
||||
|
||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
|
||||
|
||||
char* obi_taxo_get_name_from_name_idx(OBIDMS_taxonomy_p taxonomy, int32_t idx)
|
||||
|
||||
ecotx_t* obi_taxo_get_taxon_from_name_idx(OBIDMS_taxonomy_p taxonomy, int32_t idx)
|
||||
|
||||
bint obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)
|
||||
|
||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
@@ -71,4 +89,4 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
|
||||
|
||||
const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks)
|
||||
|
||||
|
||||
|
||||
@@ -53,6 +53,8 @@ cdef extern from "obitypes.h" nogil:
|
||||
extern const_char_p OBIQual_char_NA
|
||||
extern uint8_t* OBIQual_int_NA
|
||||
extern void* OBITuple_NA
|
||||
|
||||
extern obiint_t OBI_INT_MAX
|
||||
|
||||
const_char_p name_data_type(int data_type)
|
||||
|
||||
|
||||
@@ -24,7 +24,10 @@ cdef extern from "obiview.h" nogil:
|
||||
extern const_char_p ID_COLUMN
|
||||
extern const_char_p DEFINITION_COLUMN
|
||||
extern const_char_p QUALITY_COLUMN
|
||||
extern const_char_p REVERSE_QUALITY_COLUMN
|
||||
extern const_char_p REVERSE_SEQUENCE_COLUMN
|
||||
extern const_char_p COUNT_COLUMN
|
||||
extern const_char_p SCIENTIFIC_NAME_COLUMN
|
||||
extern const_char_p TAXID_COLUMN
|
||||
extern const_char_p MERGED_TAXID_COLUMN
|
||||
extern const_char_p MERGED_PREFIX
|
||||
@@ -92,6 +95,7 @@ cdef extern from "obiview.h" nogil:
|
||||
index_t nb_elements_per_line,
|
||||
char* elements_names,
|
||||
bint elt_names_formatted,
|
||||
bint dict_column,
|
||||
bint tuples,
|
||||
bint to_eval,
|
||||
const_char_p indexer_name,
|
||||
@@ -100,14 +104,18 @@ cdef extern from "obiview.h" nogil:
|
||||
const_char_p comments,
|
||||
bint create)
|
||||
|
||||
int obi_view_delete_column(Obiview_p view, const_char_p column_name)
|
||||
|
||||
int obi_view_delete_column(Obiview_p view, const_char_p column_name, bint delete_file)
|
||||
|
||||
OBIDMS_column_p obi_view_get_column(Obiview_p view, const_char_p column_name)
|
||||
|
||||
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
|
||||
|
||||
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
|
||||
|
||||
|
||||
char* obi_view_formatted_infos(Obiview_p view, bint detailed)
|
||||
|
||||
char* obi_view_formatted_infos_one_line(Obiview_p view)
|
||||
|
||||
int obi_view_write_comments(Obiview_p view, const_char_p comments)
|
||||
|
||||
int obi_view_add_comment(Obiview_p view, const_char_p key, const_char_p value)
|
||||
|
||||
@@ -22,6 +22,7 @@ cdef class Column(OBIWrapper) :
|
||||
|
||||
cdef inline OBIDMS_column_p pointer(self)
|
||||
cdef read_elements_names(self)
|
||||
cpdef list keys(self)
|
||||
|
||||
@staticmethod
|
||||
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)
|
||||
|
||||
@@ -7,13 +7,15 @@ __OBIDMS_COLUMN_CLASS__ = {}
|
||||
from ..capi.obitypes cimport name_data_type, \
|
||||
obitype_t, \
|
||||
obiversion_t, \
|
||||
OBI_QUAL
|
||||
OBI_QUAL, \
|
||||
OBI_STR
|
||||
|
||||
from ..capi.obidms cimport obi_import_column
|
||||
|
||||
from ..capi.obidmscolumn cimport OBIDMS_column_header_p, \
|
||||
obi_close_column, \
|
||||
obi_get_elements_names, \
|
||||
obi_column_formatted_infos, \
|
||||
obi_column_write_comments
|
||||
|
||||
from ..capi.obiutils cimport obi_format_date
|
||||
@@ -21,7 +23,11 @@ from ..capi.obiutils cimport obi_format_date
|
||||
from ..capi.obiview cimport obi_view_add_column, \
|
||||
obi_view_get_pointer_on_column_in_view, \
|
||||
Obiview_p, \
|
||||
NUC_SEQUENCE_COLUMN
|
||||
NUC_SEQUENCE_COLUMN, \
|
||||
QUALITY_COLUMN, \
|
||||
REVERSE_SEQUENCE_COLUMN, \
|
||||
REVERSE_QUALITY_COLUMN
|
||||
|
||||
|
||||
from ..object cimport OBIDeactivatedInstanceError
|
||||
|
||||
@@ -34,8 +40,9 @@ from obitools3.utils cimport tobytes, \
|
||||
|
||||
from obitools3.dms.column import typed_column
|
||||
|
||||
from libc.stdlib cimport free
|
||||
|
||||
from libc.stdlib cimport free
|
||||
from libc.string cimport strcpy
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
@@ -84,6 +91,7 @@ cdef class Column(OBIWrapper) :
|
||||
obitype_t data_type,
|
||||
index_t nb_elements_per_line=1,
|
||||
list elements_names=None,
|
||||
bint dict_column=False,
|
||||
bint tuples=False,
|
||||
bint to_eval=False,
|
||||
object associated_column_name=b"",
|
||||
@@ -92,6 +100,7 @@ cdef class Column(OBIWrapper) :
|
||||
object alias=b""):
|
||||
# TODO indexer_name?
|
||||
|
||||
cdef Column column
|
||||
cdef bytes column_name_b = tobytes(column_name)
|
||||
cdef bytes alias_b = tobytes(alias)
|
||||
cdef bytes comments_b = str2bytes(json.dumps(bytes2str_object(comments)))
|
||||
@@ -120,13 +129,25 @@ cdef class Column(OBIWrapper) :
|
||||
else:
|
||||
elements_names_p = NULL
|
||||
|
||||
if column_name_b == b"SAMPLE" or column_name_b == b"sample":
|
||||
# force str type
|
||||
data_type = OBI_STR
|
||||
|
||||
if data_type == OBI_QUAL:
|
||||
if associated_column_name_b == b"":
|
||||
if NUC_SEQUENCE_COLUMN not in view:
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = NUC_SEQUENCE_COLUMN
|
||||
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
|
||||
if column_name == QUALITY_COLUMN:
|
||||
if NUC_SEQUENCE_COLUMN not in view:
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = NUC_SEQUENCE_COLUMN
|
||||
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
|
||||
elif column_name == REVERSE_QUALITY_COLUMN:
|
||||
if REVERSE_SEQUENCE_COLUMN not in view:
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = REVERSE_SEQUENCE_COLUMN
|
||||
associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version
|
||||
|
||||
|
||||
if (obi_view_add_column(view = view.pointer(),
|
||||
column_name = column_name_b,
|
||||
@@ -137,6 +158,7 @@ cdef class Column(OBIWrapper) :
|
||||
nb_elements_per_line = nb_elements_per_line,
|
||||
elements_names = elements_names_p,
|
||||
elt_names_formatted = False,
|
||||
dict_column = dict_column,
|
||||
tuples = tuples,
|
||||
to_eval = to_eval,
|
||||
indexer_name = NULL,
|
||||
@@ -146,8 +168,19 @@ cdef class Column(OBIWrapper) :
|
||||
create = True)<0):
|
||||
raise RuntimeError("Cannot create column %s in view %s" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
|
||||
return Column.open(view, alias_b)
|
||||
|
||||
column = Column.open(view, alias_b)
|
||||
|
||||
# Automatically associate nuc sequence column to quality column if necessary
|
||||
if data_type == OBI_QUAL:
|
||||
if column_name == QUALITY_COLUMN:
|
||||
view[NUC_SEQUENCE_COLUMN].associated_column_name = column.name
|
||||
view[NUC_SEQUENCE_COLUMN].associated_column_version = column.version
|
||||
elif column_name == REVERSE_QUALITY_COLUMN:
|
||||
view[REVERSE_SEQUENCE_COLUMN].associated_column_name = column.name
|
||||
view[REVERSE_SEQUENCE_COLUMN].associated_column_version = column.version
|
||||
|
||||
return column
|
||||
|
||||
|
||||
@staticmethod
|
||||
@@ -174,7 +207,7 @@ cdef class Column(OBIWrapper) :
|
||||
|
||||
column_p = column_pp[0]
|
||||
column_type = column_p.header.returned_data_type
|
||||
column_class = Column.get_column_class(column_type, (column_p.header.nb_elements_per_line > 1), column_p.header.tuples)
|
||||
column_class = Column.get_column_class(column_type, (column_p.header.nb_elements_per_line > 1 or column_p.header.dict_column == True), column_p.header.tuples)
|
||||
column = OBIWrapper.new_wrapper(column_class, column_pp)
|
||||
|
||||
column._view = view
|
||||
@@ -210,6 +243,7 @@ cdef class Column(OBIWrapper) :
|
||||
nb_elements_per_line = -1,
|
||||
elements_names = NULL,
|
||||
elt_names_formatted = False,
|
||||
dict_column = False,
|
||||
tuples = False,
|
||||
to_eval = False,
|
||||
indexer_name = NULL,
|
||||
@@ -276,10 +310,25 @@ cdef class Column(OBIWrapper) :
|
||||
|
||||
@OBIWrapper.checkIsActive
|
||||
def __repr__(self) :
|
||||
cdef bytes s
|
||||
s = self._alias + b", data type: " + self.data_type
|
||||
return bytes2str(s)
|
||||
cdef str s
|
||||
cdef char* sc
|
||||
cdef OBIDMS_column_p pointer = self.pointer()
|
||||
sc = obi_column_formatted_infos(pointer, False)
|
||||
s = bytes2str(sc)
|
||||
free(sc)
|
||||
return s
|
||||
|
||||
|
||||
@OBIWrapper.checkIsActive
|
||||
def repr_longformat(self) :
|
||||
cdef str s
|
||||
cdef char* sc
|
||||
cdef OBIDMS_column_p pointer = self.pointer()
|
||||
sc = obi_column_formatted_infos(pointer, True)
|
||||
s = bytes2str(sc)
|
||||
free(sc)
|
||||
return s
|
||||
|
||||
|
||||
def close(self): # TODO discuss, can't be called bc then bug when closing view that tries to close it in C
|
||||
|
||||
@@ -305,7 +354,10 @@ cdef class Column(OBIWrapper) :
|
||||
free(elts_names_b)
|
||||
return elts_names_list
|
||||
|
||||
|
||||
cpdef list keys(self):
|
||||
return self._elements_names
|
||||
|
||||
|
||||
# Column alias property getter and setter
|
||||
@property
|
||||
def name(self):
|
||||
@@ -322,7 +374,7 @@ cdef class Column(OBIWrapper) :
|
||||
@property
|
||||
def elements_names(self):
|
||||
return self._elements_names
|
||||
|
||||
|
||||
# nb_elements_per_line property getter
|
||||
@property
|
||||
def nb_elements_per_line(self):
|
||||
@@ -330,6 +382,13 @@ cdef class Column(OBIWrapper) :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.nb_elements_per_line
|
||||
|
||||
# dict_column property getter
|
||||
@property
|
||||
def dict_column(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.dict_column
|
||||
|
||||
# data_type property getter
|
||||
@property
|
||||
def data_type(self):
|
||||
@@ -386,6 +445,31 @@ cdef class Column(OBIWrapper) :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return obi_format_date(self.pointer().header.creation_date)
|
||||
|
||||
|
||||
# associated_column name property getter and setter
|
||||
@property
|
||||
def associated_column_name(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.associated_column.column_name
|
||||
|
||||
@associated_column_name.setter
|
||||
def associated_column_name(self, object new_name):
|
||||
strcpy(self.pointer().header.associated_column.column_name, tobytes(new_name))
|
||||
|
||||
|
||||
# associated_column version property getter and setter
|
||||
@property
|
||||
def associated_column_version(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.associated_column.version
|
||||
|
||||
@associated_column_version.setter
|
||||
def associated_column_version(self, int new_version):
|
||||
self.pointer().header.associated_column.version = new_version
|
||||
|
||||
|
||||
# comments property getter
|
||||
@property
|
||||
def comments(self):
|
||||
|
||||
@@ -38,11 +38,13 @@ cdef class Column_bool(Column):
|
||||
object column_name,
|
||||
index_t nb_elements_per_line=1,
|
||||
object elements_names=None,
|
||||
bint dict_column=False,
|
||||
bint tuples=False,
|
||||
object comments={}):
|
||||
return Column.new_column(view, column_name, OBI_BOOL,
|
||||
nb_elements_per_line=nb_elements_per_line,
|
||||
elements_names=elements_names,
|
||||
elements_names=elements_names,
|
||||
dict_column=dict_column,
|
||||
tuples=tuples,
|
||||
comments=comments)
|
||||
|
||||
|
||||
@@ -36,12 +36,14 @@ cdef class Column_char(Column):
|
||||
object column_name,
|
||||
index_t nb_elements_per_line=1,
|
||||
object elements_names=None,
|
||||
bint dict_column=False,
|
||||
bint tuples=False,
|
||||
object comments={}):
|
||||
|
||||
return Column.new_column(view, column_name, OBI_CHAR,
|
||||
nb_elements_per_line=nb_elements_per_line,
|
||||
elements_names=elements_names,
|
||||
dict_column=dict_column,
|
||||
tuples=tuples,
|
||||
comments=comments)
|
||||
|
||||
|
||||
@@ -36,12 +36,14 @@ cdef class Column_float(Column):
|
||||
object column_name,
|
||||
index_t nb_elements_per_line=1,
|
||||
object elements_names=None,
|
||||
bint dict_column=False,
|
||||
bint tuples=False,
|
||||
object comments={}):
|
||||
|
||||
return Column.new_column(view, column_name, OBI_FLOAT,
|
||||
nb_elements_per_line=nb_elements_per_line,
|
||||
elements_names=elements_names,
|
||||
dict_column=dict_column,
|
||||
tuples=tuples,
|
||||
comments=comments)
|
||||
|
||||
|
||||
@@ -38,12 +38,14 @@ cdef class Column_int(Column):
|
||||
object column_name,
|
||||
index_t nb_elements_per_line=1,
|
||||
object elements_names=None,
|
||||
bint dict_column=False,
|
||||
bint tuples=False,
|
||||
object comments={}):
|
||||
|
||||
return Column.new_column(view, column_name, OBI_INT,
|
||||
nb_elements_per_line=nb_elements_per_line,
|
||||
elements_names=elements_names,
|
||||
dict_column=dict_column,
|
||||
tuples=tuples,
|
||||
comments=comments)
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ cdef class Column_qual(Column_idx):
|
||||
object column_name,
|
||||
index_t nb_elements_per_line=1,
|
||||
object elements_names=None,
|
||||
bint dict_column=False,
|
||||
object associated_column_name=b"",
|
||||
int associated_column_version=-1,
|
||||
object comments={}):
|
||||
@@ -45,6 +46,7 @@ cdef class Column_qual(Column_idx):
|
||||
return Column.new_column(view, column_name, OBI_QUAL,
|
||||
nb_elements_per_line=nb_elements_per_line,
|
||||
elements_names=elements_names,
|
||||
dict_column=dict_column,
|
||||
tuples=False,
|
||||
associated_column_name=associated_column_name,
|
||||
associated_column_version=associated_column_name,
|
||||
|
||||
@@ -39,12 +39,14 @@ cdef class Column_seq(Column_idx):
|
||||
object column_name,
|
||||
index_t nb_elements_per_line=1,
|
||||
object elements_names=None,
|
||||
bint dict_column=False,
|
||||
bint tuples=False,
|
||||
object comments={}):
|
||||
|
||||
return Column.new_column(view, column_name, OBI_SEQ,
|
||||
nb_elements_per_line=nb_elements_per_line,
|
||||
elements_names=elements_names,
|
||||
dict_column=dict_column,
|
||||
tuples=tuples,
|
||||
comments=comments)
|
||||
|
||||
|
||||
@@ -38,12 +38,14 @@ cdef class Column_str(Column_idx):
|
||||
object column_name,
|
||||
index_t nb_elements_per_line=1,
|
||||
object elements_names=None,
|
||||
bint dict_column=False,
|
||||
bint tuples=False,
|
||||
object comments={}):
|
||||
|
||||
return Column.new_column(view, column_name, OBI_STR,
|
||||
nb_elements_per_line=nb_elements_per_line,
|
||||
elements_names=elements_names,
|
||||
dict_column=dict_column,
|
||||
tuples=tuples,
|
||||
comments=comments)
|
||||
|
||||
@@ -72,6 +74,9 @@ cdef class Column_str(Column_idx):
|
||||
if value is None :
|
||||
value_b = <char*>OBIStr_NA
|
||||
else :
|
||||
if self.name == b'sample' or self.name == b'SAMPLE':
|
||||
if type(value) == int:
|
||||
value = str(value) # force sample ids to be str
|
||||
value_bytes = tobytes(value)
|
||||
value_b = <char*>value_bytes
|
||||
|
||||
@@ -135,6 +140,9 @@ cdef class Column_multi_elts_str(Column_multi_elts_idx):
|
||||
if value is None :
|
||||
value_b = <char*>OBIStr_NA
|
||||
else :
|
||||
if self.name == b'sample' or self.name == b'SAMPLE':
|
||||
if type(value) == int:
|
||||
value = str(value) # force sample ids to be str
|
||||
value_bytes = tobytes(value)
|
||||
value_b = <char*>value_bytes
|
||||
|
||||
@@ -204,6 +212,9 @@ cdef class Column_tuples_str(Column_idx):
|
||||
i = 0
|
||||
for elt in value :
|
||||
if elt is not None and elt != '':
|
||||
if self.name == b'sample' or self.name == b'SAMPLE':
|
||||
if type(elt) == int:
|
||||
elt = str(elt) # force sample ids to be str
|
||||
elt_b = tobytes(elt)
|
||||
strcpy(array+i, <char*>elt_b)
|
||||
i = i + len(elt_b) + 1
|
||||
|
||||
@@ -10,7 +10,8 @@ from .capi.obidms cimport obi_open_dms, \
|
||||
obi_dms_exists, \
|
||||
obi_dms_get_full_path, \
|
||||
obi_close_atexit, \
|
||||
obi_dms_write_comments
|
||||
obi_dms_write_comments, \
|
||||
obi_dms_formatted_infos
|
||||
|
||||
from .capi.obitypes cimport const_char_p
|
||||
|
||||
@@ -32,6 +33,8 @@ from .object import OBIWrapper
|
||||
import json
|
||||
import time
|
||||
|
||||
from libc.stdlib cimport free
|
||||
|
||||
|
||||
cdef class DMS(OBIWrapper):
|
||||
|
||||
@@ -94,16 +97,16 @@ cdef class DMS(OBIWrapper):
|
||||
return dms
|
||||
|
||||
|
||||
def close(self) :
|
||||
def close(self, force=False) :
|
||||
'''
|
||||
Closes the DMS instance and free the associated memory
|
||||
Closes the DMS instance and free the associated memory (no counter, closing is final)
|
||||
|
||||
The `close` method is automatically called by the object destructor.
|
||||
'''
|
||||
cdef OBIDMS_p pointer = self.pointer()
|
||||
if self.active() :
|
||||
OBIWrapper.close(self)
|
||||
if (obi_close_dms(pointer, False)) < 0 :
|
||||
if (obi_close_dms(pointer, force=force)) < 0 :
|
||||
raise Exception("Problem closing an OBIDMS")
|
||||
|
||||
|
||||
@@ -223,11 +226,24 @@ cdef class DMS(OBIWrapper):
|
||||
|
||||
|
||||
@OBIWrapper.checkIsActive
|
||||
def __repr__(self):
|
||||
cdef str s
|
||||
s=""
|
||||
for view_name in self.keys():
|
||||
s = s + repr(self.get_view(view_name)) + "\n"
|
||||
def __repr__(self) :
|
||||
cdef str s
|
||||
cdef char* sc
|
||||
cdef OBIDMS_p pointer = self.pointer()
|
||||
sc = obi_dms_formatted_infos(pointer, False)
|
||||
s = bytes2str(sc)
|
||||
free(sc)
|
||||
return s
|
||||
|
||||
|
||||
@OBIWrapper.checkIsActive
|
||||
def repr_longformat(self) :
|
||||
cdef str s
|
||||
cdef char* sc
|
||||
cdef OBIDMS_p pointer = self.pointer()
|
||||
sc = obi_dms_formatted_infos(pointer, True)
|
||||
s = bytes2str(sc)
|
||||
free(sc)
|
||||
return s
|
||||
|
||||
|
||||
@@ -254,12 +270,13 @@ cdef class DMS(OBIWrapper):
|
||||
# bash command history property getter
|
||||
@property
|
||||
def bash_history(self):
|
||||
s = b"#!/bin/bash\n\n"
|
||||
#s = b"#!${bash}/bin/bash\n\n"
|
||||
s = b""
|
||||
first = True
|
||||
for command in self.command_line_history:
|
||||
s+=b"#"
|
||||
s+=command[b"time"]
|
||||
s+=b"\n"
|
||||
s+=b"\nobi "
|
||||
s+=command[b"command"]
|
||||
s+=b"\n"
|
||||
return s
|
||||
|
||||
@@ -39,4 +39,6 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
cpdef set_quality_char(self, object new_qual, int offset=*)
|
||||
cpdef object build_quality_array(self, list quality)
|
||||
cpdef bytes build_reverse_complement(self)
|
||||
cpdef str get_str(self)
|
||||
cpdef str get_str(self)
|
||||
cpdef repr_bytes(self)
|
||||
|
||||
@@ -431,9 +431,12 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
||||
|
||||
def __repr__(self):
|
||||
return bytes2str(self.repr_bytes())
|
||||
|
||||
cpdef repr_bytes(self):
|
||||
if self.quality is None:
|
||||
formatter = FastaFormat()
|
||||
else:
|
||||
formatter = FastqFormat()
|
||||
return bytes2str(formatter(self))
|
||||
return formatter(self)
|
||||
|
||||
|
||||