Compare commits
297 Commits
Eric_new_P
...
pip-standa
Author | SHA1 | Date | |
---|---|---|---|
7c2787b6b3 | |||
14eca43eac | |||
0b4ea49539 | |||
cd88c37a7e | |||
1095a617a3 | |||
5a05258fcb | |||
10ab557259 | |||
06178d9d61 | |||
3abe1b7ace | |||
802a3f5933 | |||
7e20870719 | |||
e8090a44c9 | |||
832f582802 | |||
58d0c850c2 | |||
7737211ac2 | |||
c953f0cb00 | |||
bb045c3ae9 | |||
2a4f1b8feb | |||
24a63f8732 | |||
478d19ab43 | |||
e3c565d6be | |||
d88390c6d8 | |||
50e7cd61a6 | |||
49d5f6fb1e | |||
b45c2ee653 | |||
6afd1294a7 | |||
a9ba7744cf | |||
185a95e667 | |||
8835a1a983 | |||
1ee50b7222 | |||
720bb65b24 | |||
2a1ab9db29 | |||
4bc52c08c2 | |||
306da846e3 | |||
af57e532da | |||
52de6f2717 | |||
29c56572cf | |||
de3d12af17 | |||
9ccddd5280 | |||
e026e9ec83 | |||
4ddd1a1c37 | |||
3015310535 | |||
08bcbcd357 | |||
04a3682307 | |||
6ca6d27ecb | |||
8f18907566 | |||
0b62619e4e | |||
c7f5b8d980 | |||
59017c0d6b | |||
9f6bba183f | |||
2a6a112d29 | |||
c437931a35 | |||
eb586b2f53 | |||
9556130b11 | |||
005aaeec06 | |||
579f56bb54 | |||
da445066f3 | |||
0a407436da | |||
54efff36c4 | |||
6acb21712a | |||
12087a6c3a | |||
fbabbceb5a | |||
6f27734d71 | |||
b3bfa9ca65 | |||
ece942e771 | |||
ef8dc85f3c | |||
f942dd856f | |||
730ea99f85 | |||
4d51f4f015 | |||
e9c1d5e48d | |||
7fc1b578cf | |||
31053591b5 | |||
b0da36cb48 | |||
d1f1fd432e | |||
75a28929a7 | |||
d076ea9900 | |||
6b1c41f3fb | |||
362df50fe9 | |||
b1090574da | |||
8faabd3ebf | |||
35f3e7c30b | |||
8a8e9e50b2 | |||
c7ff53b948 | |||
1b7bccb236 | |||
d09aa43133 | |||
123e5dc0ac | |||
320561a582 | |||
92c0fbc9bf | |||
b11d52d630 | |||
6305282305 | |||
d53323e7f4 | |||
e18b762d81 | |||
0a0f0682a9 | |||
4802e32f72 | |||
b027762059 | |||
da0e3d4043 | |||
da76f911db | |||
61ad2deeca | |||
eb6d5581bd | |||
343dbc7e4d | |||
6d018a2d28 | |||
2c2df4e098 | |||
8ce6dd6d1a | |||
df70086384 | |||
32d8396ee2 | |||
6a8670d24a | |||
ec73fa840a | |||
11032ec90b | |||
8a9ba8b0a8 | |||
135d3b6e67 | |||
58589e04be | |||
e6bbe13d81 | |||
61b00d6013 | |||
8029493c10 | |||
aa5ee53478 | |||
e31c8ea57a | |||
9e700ddc21 | |||
e9a41c5b97 | |||
35cf2962cc | |||
74be3c39f0 | |||
c6ee0bade9 | |||
ffd5bc76bf | |||
704d9b0474 | |||
86bb582a17 | |||
bc8c394061 | |||
cef458f570 | |||
2736a92699 | |||
79f4185757 | |||
1b6b6d825a | |||
3847850a9d | |||
b57e938cc4 | |||
2dc7fcceac | |||
e096b929dc | |||
2c634dae7c | |||
7a4cdc0cfe | |||
e8dc5eb123 | |||
3fcf29a76f | |||
080a97cccf | |||
9c9aec2556 | |||
303648bd47 | |||
2ba6d16147 | |||
275d85dc5d | |||
a39f9697be | |||
b98880b7fa | |||
895d09b133 | |||
c02c15b93f | |||
3e8c187f0b | |||
7f6d1597fc | |||
1de308a856 | |||
892ed83a33 | |||
6911bf4d70 | |||
f0c147c252 | |||
4aef20add8 | |||
62614a8538 | |||
ffebc6acfb | |||
b91b3176b0 | |||
31d8ba5085 | |||
a166a169cf | |||
8a10072d99 | |||
b380368264 | |||
1f4e82e6f6 | |||
6825fc13ab | |||
49c17ab7b4 | |||
2684535e26 | |||
123fb9d7ba | |||
4c3478d8f8 | |||
4a815785c4 | |||
75b54c83ca | |||
53cb3354b8 | |||
ea58e254da | |||
9fb63d4894 | |||
d4f7e02c85 | |||
15e43bb9a1 | |||
8a0b95c1d6 | |||
dd225a255f | |||
dad21823ff | |||
96bf2daae8 | |||
e6c49b7941 | |||
4960662332 | |||
b2cfa4b52f | |||
94a899de12 | |||
b48330a5c9 | |||
74d880b817 | |||
00993d4215 | |||
370fb9272c | |||
c8097e14e1 | |||
01ef85658c | |||
f5a00c9322 | |||
156fb04e88 | |||
428c4eb5e6 | |||
1a5b499b5c | |||
b7b8ba7e5a | |||
e9e7fac999 | |||
1fd3323372 | |||
2df5932b67 | |||
b93b982a18 | |||
ea73047fc7 | |||
0998268955 | |||
31726407a3 | |||
d21f4a6f90 | |||
9e3ac477eb | |||
ee5d647d0d | |||
38fef5b9d4 | |||
3ba7ce1c91 | |||
9a50803c00 | |||
1684f96b79 | |||
43f65e7fd0 | |||
dfd51939a0 | |||
1ae634d56b | |||
04e065094a | |||
5ddd1d9ae6 | |||
9fc6868341 | |||
f2ece573ff | |||
fb9b219abe | |||
09a5f89849 | |||
535692b020 | |||
0ab081f79e | |||
1cb05de7e3 | |||
532d8e9cd7 | |||
b4088a7928 | |||
ae24a807da | |||
75c15594c4 | |||
5ed6835e0e | |||
41dec03448 | |||
7c57bd33e5 | |||
a776e46e6d | |||
0e140df0fb | |||
4bb071c048 | |||
5045d0c2e9 | |||
73bca6288f | |||
6a2759eee6 | |||
38029b1f77 | |||
663a1a1091 | |||
c6d5436a58 | |||
47cad285d6 | |||
74f15d1a23 | |||
c559ddf487 | |||
93cff94e7f | |||
9744a48a67 | |||
6afdc9fb5f | |||
6f202363f4 | |||
7f1ff49aa2 | |||
4b86aa67a8 | |||
a3e81930c2 | |||
644b55b49f | |||
927c684fc2 | |||
344566d9e9 | |||
407f61a408 | |||
09ddd74652 | |||
7c0d882bc9 | |||
35b0c55a8c | |||
b9c65a871f | |||
84bb93096f | |||
01c69e7e25 | |||
adf5cbef97 | |||
da48a9d1af | |||
9482c663c0 | |||
c5f3fdc295 | |||
89e2f80fd8 | |||
7112f44fb7 | |||
b2fc1f4611 | |||
75f691d55a | |||
0655063bb0 | |||
9701b1230c | |||
f8a4428674 | |||
1a0f18a11a | |||
3d7aa52c90 | |||
69c50ff922 | |||
c91969126b | |||
15d383fa8b | |||
99ceed5fff | |||
fa8f826cdc | |||
dc91174a5e | |||
ec65f00cf2 | |||
8d9cdb4d03 | |||
949e5f9baf | |||
3c6a05be54 | |||
8781ecab1f | |||
0f6ae7dfa6 | |||
28259cd88b | |||
b24be84b0a | |||
59dd0a8a8c | |||
c88df2e12c | |||
1e57bfacb4 | |||
3e6aecc635 | |||
ced9a268a1 | |||
df2ad41150 | |||
f8895e879d | |||
b729b8928f | |||
b6b95f26b6 | |||
b94ec9557f | |||
143bddf1d1 | |||
a718081ebd | |||
740d021276 | |||
906343187b | |||
c3cd57a9e3 | |||
f03928c679 |
0
MANIFEST.in
Normal file → Executable file
0
c-sandbox/obicount/Makefile
Normal file → Executable file
0
c-sandbox/obicount/obicount.c
Normal file → Executable file
@ -6,12 +6,28 @@ Created on 20 oct. 2012
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from distutils import sysconfig
|
||||||
from distutils.core import Command
|
from distutils.core import Command
|
||||||
from distutils.sysconfig import customize_compiler
|
from distutils.sysconfig import customize_compiler as customize_compiler_ori
|
||||||
from distutils.errors import DistutilsSetupError
|
from distutils.errors import DistutilsSetupError
|
||||||
from distutils import log
|
from distutils import log
|
||||||
from distutils.ccompiler import show_compilers
|
from distutils.ccompiler import show_compilers
|
||||||
|
|
||||||
|
def customize_compiler(compiler):
|
||||||
|
customize_compiler_ori(compiler)
|
||||||
|
compilername = compiler.compiler[0]
|
||||||
|
if ("gcc" in compilername or "g++" in compilername):
|
||||||
|
cc_cmd = ' '.join(compiler.compiler + ['-fopenmp'])
|
||||||
|
ccshared= ' '.join(x for x in sysconfig.get_config_vars("ccshared") if x is not None)
|
||||||
|
|
||||||
|
compiler.set_executables(
|
||||||
|
compiler=cc_cmd,
|
||||||
|
compiler_so=cc_cmd + ' ' + ccshared
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class build_exe(Command):
|
class build_exe(Command):
|
||||||
|
|
||||||
description = "build an executable -- Abstract command "
|
description = "build an executable -- Abstract command "
|
||||||
@ -80,6 +96,7 @@ class build_exe(Command):
|
|||||||
else:
|
else:
|
||||||
self.extra_compile_args.append('-m%s' % self.sse)
|
self.extra_compile_args.append('-m%s' % self.sse)
|
||||||
|
|
||||||
|
|
||||||
# XXX same as for build_ext -- what about 'self.define' and
|
# XXX same as for build_ext -- what about 'self.define' and
|
||||||
# 'self.undef' ?
|
# 'self.undef' ?
|
||||||
|
|
||||||
|
@ -7,9 +7,21 @@ Created on 13 fevr. 2014
|
|||||||
from distutils import log
|
from distutils import log
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from distutils import sysconfig
|
||||||
from distutils.errors import DistutilsSetupError
|
from distutils.errors import DistutilsSetupError
|
||||||
|
|
||||||
|
def _customize_compiler(compiler):
|
||||||
|
compilername = compiler.compiler[0]
|
||||||
|
if ("gcc" in compilername or "g++" in compilername):
|
||||||
|
cc_cmd = ' '.join(compiler.compiler + ['-fopenmp'])
|
||||||
|
ccshared= ' '.join(x for x in sysconfig.get_config_vars("ccshared") if x is not None)
|
||||||
|
|
||||||
|
compiler.set_executables(
|
||||||
|
compiler=cc_cmd,
|
||||||
|
compiler_so=cc_cmd + ' ' + ccshared
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from Cython.Distutils import build_ext as ori_build_ext # @UnresolvedImport
|
from Cython.Distutils import build_ext as ori_build_ext # @UnresolvedImport
|
||||||
from Cython.Compiler import Options as cython_options # @UnresolvedImport
|
from Cython.Compiler import Options as cython_options # @UnresolvedImport
|
||||||
@ -17,6 +29,10 @@ try:
|
|||||||
|
|
||||||
|
|
||||||
def modifyDocScripts(self):
|
def modifyDocScripts(self):
|
||||||
|
try:
|
||||||
|
os.mkdir("doc/sphinx")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
build_dir_file=open("doc/sphinx/build_dir.txt","w")
|
build_dir_file=open("doc/sphinx/build_dir.txt","w")
|
||||||
print(self.build_lib,file=build_dir_file)
|
print(self.build_lib,file=build_dir_file)
|
||||||
build_dir_file.close()
|
build_dir_file.close()
|
||||||
@ -28,7 +44,8 @@ try:
|
|||||||
|
|
||||||
|
|
||||||
def finalize_options(self):
|
def finalize_options(self):
|
||||||
ori_build_ext.finalize_options(self) # @UndefinedVariable
|
super(build_ext, self).finalize_options()
|
||||||
|
|
||||||
|
|
||||||
self.set_undefined_options('littlebigman',
|
self.set_undefined_options('littlebigman',
|
||||||
('littlebigman', 'littlebigman'))
|
('littlebigman', 'littlebigman'))
|
||||||
@ -80,11 +97,23 @@ try:
|
|||||||
|
|
||||||
self.check_extensions_list(self.extensions)
|
self.check_extensions_list(self.extensions)
|
||||||
|
|
||||||
|
print("pouic")
|
||||||
|
print(ext.sources)
|
||||||
|
print("pouac")
|
||||||
|
|
||||||
for ext in self.extensions:
|
for ext in self.extensions:
|
||||||
log.info("%s :-> %s",ext.name,ext.sources)
|
log.info("%s :-> %s",ext.name,ext.sources)
|
||||||
ext.sources = self.cython_sources(ext.sources, ext)
|
ext.sources = self.cython_sources(ext.sources, ext)
|
||||||
self.build_extension(ext)
|
self.build_extension(ext)
|
||||||
|
|
||||||
|
def build_extensions(self): # TODO what?? double? is it supposed to be build_extension?
|
||||||
|
if hasattr(self, 'compiler'):
|
||||||
|
_customize_compiler(self.compiler)
|
||||||
|
if hasattr(self, 'shlib_compiler'):
|
||||||
|
_customize_compiler(self.shlib_compiler)
|
||||||
|
|
||||||
|
ori_build_ext.build_extensions(self)
|
||||||
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.modifyDocScripts()
|
self.modifyDocScripts()
|
||||||
@ -104,8 +133,7 @@ try:
|
|||||||
|
|
||||||
sub_commands = [('build_files',has_files),
|
sub_commands = [('build_files',has_files),
|
||||||
('build_cexe', has_executables)
|
('build_cexe', has_executables)
|
||||||
] + \
|
] + ori_build_ext.sub_commands
|
||||||
ori_build_ext.sub_commands
|
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from distutils.command import build_ext # @UnusedImport
|
from distutils.command import build_ext # @UnusedImport
|
||||||
|
@ -9,12 +9,12 @@ import os.path
|
|||||||
import glob
|
import glob
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# try:
|
try:
|
||||||
# from setuptools.extension import Extension
|
from setuptools.extension import Extension
|
||||||
# except ImportError:
|
except ImportError:
|
||||||
# from distutils.extension import Extension
|
from distutils.extension import Extension
|
||||||
|
|
||||||
from distutils.extension import Extension
|
# from distutils.extension import Extension
|
||||||
|
|
||||||
from obidistutils.serenity.checkpackage import install_requirements,\
|
from obidistutils.serenity.checkpackage import install_requirements,\
|
||||||
check_requirements, \
|
check_requirements, \
|
||||||
@ -40,7 +40,6 @@ def findPackage(root,base=None):
|
|||||||
|
|
||||||
def findCython(root,base=None,pyrexs=None):
|
def findCython(root,base=None,pyrexs=None):
|
||||||
setupdir = os.path.dirname(sys.argv[0])
|
setupdir = os.path.dirname(sys.argv[0])
|
||||||
csourcedir = os.path.join(setupdir,"src")
|
|
||||||
pyrexs=[]
|
pyrexs=[]
|
||||||
|
|
||||||
if base is None:
|
if base is None:
|
||||||
@ -48,13 +47,18 @@ def findCython(root,base=None,pyrexs=None):
|
|||||||
for module in (path.basename(path.dirname(x))
|
for module in (path.basename(path.dirname(x))
|
||||||
for x in glob.glob(path.join(root,'*','__init__.py'))):
|
for x in glob.glob(path.join(root,'*','__init__.py'))):
|
||||||
|
|
||||||
|
|
||||||
for pyrex in glob.glob(path.join(root,module,'*.pyx')):
|
for pyrex in glob.glob(path.join(root,module,'*.pyx')):
|
||||||
|
libabspath = os.path.abspath('obi_libdir')
|
||||||
|
obiabspath = os.path.abspath('.')
|
||||||
pyrexs.append(Extension('.'.join(base+[module,path.splitext(path.basename(pyrex))[0]]),
|
pyrexs.append(Extension('.'.join(base+[module,path.splitext(path.basename(pyrex))[0]]),
|
||||||
[pyrex]
|
[pyrex],
|
||||||
|
library_dirs=[libabspath],
|
||||||
|
include_dirs=[libabspath],
|
||||||
|
libraries=["obi3"],
|
||||||
|
runtime_library_dirs=[libabspath],
|
||||||
|
extra_link_args=["-Wl,-rpath,"+libabspath, "-L"+libabspath]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
pyrexs[-1].include_dirs.append(csourcedir)
|
|
||||||
try:
|
try:
|
||||||
cfiles = os.path.splitext(pyrex)[0]+".cfiles"
|
cfiles = os.path.splitext(pyrex)[0]+".cfiles"
|
||||||
cfilesdir = os.path.dirname(cfiles)
|
cfilesdir = os.path.dirname(cfiles)
|
||||||
@ -65,13 +69,15 @@ def findCython(root,base=None,pyrexs=None):
|
|||||||
|
|
||||||
log.info("Cython module : %s",cfiles)
|
log.info("Cython module : %s",cfiles)
|
||||||
incdir = set(os.path.dirname(x) for x in cfiles if x[-2:]==".h")
|
incdir = set(os.path.dirname(x) for x in cfiles if x[-2:]==".h")
|
||||||
cfiles = [x for x in cfiles if x[-2:]==".c"]
|
#cfiles = [x for x in cfiles if x[-2:]==".c"]
|
||||||
pyrexs[-1].sources.extend(cfiles)
|
#pyrexs[-1].sources.extend(cfiles)
|
||||||
pyrexs[-1].include_dirs.extend(incdir)
|
pyrexs[-1].include_dirs.extend(incdir)
|
||||||
pyrexs[-1].extra_compile_args.extend(['-msse2',
|
pyrexs[-1].extra_compile_args.extend(['-msse2',
|
||||||
'-Wno-unused-function',
|
'-Wno-unused-function',
|
||||||
'-Wmissing-braces',
|
'-Wmissing-braces',
|
||||||
'-Wchar-subscripts'])
|
'-Wchar-subscripts',
|
||||||
|
'-fPIC'
|
||||||
|
])
|
||||||
|
|
||||||
except IOError:
|
except IOError:
|
||||||
pass
|
pass
|
||||||
@ -137,7 +143,7 @@ def setup(**attrs):
|
|||||||
|
|
||||||
log.set_threshold(log.INFO)
|
log.set_threshold(log.INFO)
|
||||||
|
|
||||||
minversion = attrs.get("pythonmin",'3.4')
|
minversion = attrs.get("pythonmin",'3.7')
|
||||||
maxversion = attrs.get('pythonmax',None)
|
maxversion = attrs.get('pythonmax',None)
|
||||||
fork = attrs.get('fork',False)
|
fork = attrs.get('fork',False)
|
||||||
requirementfile = attrs.get('requirements','requirements.txt')
|
requirementfile = attrs.get('requirements','requirements.txt')
|
||||||
@ -225,4 +231,4 @@ def setup(**attrs):
|
|||||||
|
|
||||||
from distutils.core import setup as ori_setup
|
from distutils.core import setup as ori_setup
|
||||||
|
|
||||||
ori_setup(**attrs)
|
return ori_setup(**attrs)
|
||||||
|
@ -4,12 +4,12 @@ Created on 20 oct. 2012
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# try:
|
try:
|
||||||
# from setuptools.dist import Distribution as ori_Distribution
|
from setuptools.dist import Distribution as ori_Distribution
|
||||||
# except ImportError:
|
except ImportError:
|
||||||
# from distutils.dist import Distribution as ori_Distribution
|
from distutils.dist import Distribution as ori_Distribution
|
||||||
|
|
||||||
from distutils.dist import Distribution as ori_Distribution
|
# from distutils.dist import Distribution as ori_Distribution
|
||||||
|
|
||||||
class Distribution(ori_Distribution):
|
class Distribution(ori_Distribution):
|
||||||
|
|
||||||
|
@ -81,9 +81,15 @@ def serenity_mode(package,version):
|
|||||||
argparser.add_argument('--serenity',
|
argparser.add_argument('--serenity',
|
||||||
dest='serenity',
|
dest='serenity',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
default=False,
|
default=True,
|
||||||
help='Switch the installer in serenity mode. Everythings are installed in a virtualenv')
|
help='Switch the installer in serenity mode. Everythings are installed in a virtualenv')
|
||||||
|
|
||||||
|
argparser.add_argument('--no-serenity',
|
||||||
|
dest='serenity',
|
||||||
|
action='store_false',
|
||||||
|
default=True,
|
||||||
|
help='Switch the installer in the no serenity mode.')
|
||||||
|
|
||||||
argparser.add_argument('--virtualenv',
|
argparser.add_argument('--virtualenv',
|
||||||
dest='virtual',
|
dest='virtual',
|
||||||
type=str,
|
type=str,
|
||||||
|
@ -5,32 +5,35 @@ Created on 2 oct. 2014
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import os
|
|
||||||
|
|
||||||
try:
|
|
||||||
import pip # @UnresolvedImport
|
|
||||||
from pip.utils import get_installed_distributions # @UnresolvedImport
|
|
||||||
except ImportError:
|
|
||||||
from .bootstrappip import bootstrap
|
|
||||||
bootstrap()
|
|
||||||
|
|
||||||
from distutils.version import StrictVersion # @UnusedImport
|
from distutils.version import StrictVersion # @UnusedImport
|
||||||
from distutils.errors import DistutilsError
|
from distutils.errors import DistutilsError
|
||||||
from distutils import log
|
from distutils import log
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
class RequirementError(Exception):
|
class RequirementError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def is_installed(requirement):
|
def is_installed(requirement):
|
||||||
|
pipcommand = os.path.join(os.path.dirname(sys.executable),'pip')
|
||||||
|
pipjson = subprocess.run([pipcommand,"list","--format=json"],
|
||||||
|
capture_output=True).stdout
|
||||||
|
packages = eval(pipjson)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
requirement_project,requirement_relation,requirement_version = parse_package_requirement(requirement)
|
requirement_project,requirement_relation,requirement_version = parse_package_requirement(requirement)
|
||||||
|
|
||||||
package = [x for x in get_installed_distributions() if x.project_name==requirement_project]
|
package = [x for x in packages if x["name"]==requirement_project]
|
||||||
|
|
||||||
if len(package)==1:
|
if len(package)==1:
|
||||||
if requirement_version is not None and requirement_relation is not None:
|
if ( requirement_version is not None
|
||||||
rep = (len(package)==1) and eval("StrictVersion('%s') %s StrictVersion('%s')" % (package[0].version,
|
and requirement_relation is not None):
|
||||||
|
rep = (len(package)==1) and eval("StrictVersion('%s') %s StrictVersion('%s')" % (package[0]["version"],
|
||||||
requirement_relation,
|
requirement_relation,
|
||||||
requirement_version)
|
requirement_version)
|
||||||
)
|
)
|
||||||
@ -44,20 +47,23 @@ def is_installed(requirement):
|
|||||||
log.info("Look for package %s (%s%s) : ok version %s installed" % (requirement_project,
|
log.info("Look for package %s (%s%s) : ok version %s installed" % (requirement_project,
|
||||||
requirement_relation,
|
requirement_relation,
|
||||||
requirement_version,
|
requirement_version,
|
||||||
package[0].version))
|
package[0]["version"]))
|
||||||
else:
|
else:
|
||||||
log.info("Look for package %s : ok version %s installed" % (requirement_project,
|
log.info("Look for package %s : ok version %s installed" % (requirement_project,
|
||||||
package[0].version))
|
package[0]["version"]))
|
||||||
else:
|
else:
|
||||||
if len(package)!=1:
|
if len(package)!=1:
|
||||||
log.info("Look for package %s (%s%s) : not installed" % (requirement_project,
|
if requirement_version is not None and requirement_relation is not None:
|
||||||
requirement_relation,
|
log.info("Look for package %s (%s%s) : not installed" % (requirement_project,
|
||||||
requirement_version))
|
requirement_relation,
|
||||||
|
requirement_version))
|
||||||
|
else:
|
||||||
|
log.info("Look for package %s : not installed" % requirement_project)
|
||||||
else:
|
else:
|
||||||
log.info("Look for package %s (%s%s) : failed only version %s installed" % (requirement_project,
|
log.info("Look for package %s (%s%s) : failed only version %s installed" % (requirement_project,
|
||||||
requirement_relation,
|
requirement_relation,
|
||||||
requirement_version,
|
requirement_version,
|
||||||
package[0].version))
|
package[0]["version"]))
|
||||||
|
|
||||||
return rep
|
return rep
|
||||||
|
|
||||||
@ -86,7 +92,7 @@ def install_requirements(requirementfile='requirements.txt'):
|
|||||||
ok = is_installed(x)
|
ok = is_installed(x)
|
||||||
if not ok:
|
if not ok:
|
||||||
log.info(" Installing requirement : %s" % x)
|
log.info(" Installing requirement : %s" % x)
|
||||||
pip_install_package(x)
|
pip_install_package(x,requirement=requirementfile)
|
||||||
install_something=True
|
install_something=True
|
||||||
if x[0:3]=='pip':
|
if x[0:3]=='pip':
|
||||||
return True
|
return True
|
||||||
@ -139,8 +145,9 @@ def get_package_requirement(package,requirementfile='requirements.txt'):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def pip_install_package(package,directory=None,upgrade=True):
|
def pip_install_package(package,directory=None,requirement=None):
|
||||||
|
|
||||||
|
pipcommand = os.path.join(os.path.dirname(sys.executable),'pip')
|
||||||
if directory is not None:
|
if directory is not None:
|
||||||
log.info(' installing %s in directory %s' % (package,str(directory)))
|
log.info(' installing %s in directory %s' % (package,str(directory)))
|
||||||
|
|
||||||
@ -150,8 +157,9 @@ def pip_install_package(package,directory=None,upgrade=True):
|
|||||||
|
|
||||||
args = ['install']
|
args = ['install']
|
||||||
|
|
||||||
if upgrade:
|
if requirement:
|
||||||
args.append('--upgrade')
|
args.append('--requirement')
|
||||||
|
args.append(requirement)
|
||||||
|
|
||||||
if 'https_proxy' in os.environ:
|
if 'https_proxy' in os.environ:
|
||||||
args.append('--proxy=%s' % os.environ['https_proxy'])
|
args.append('--proxy=%s' % os.environ['https_proxy'])
|
||||||
@ -161,5 +169,7 @@ def pip_install_package(package,directory=None,upgrade=True):
|
|||||||
|
|
||||||
args.append(package)
|
args.append(package)
|
||||||
|
|
||||||
return pip.main(args)
|
pip = subprocess.run([pipcommand] + args)
|
||||||
|
|
||||||
|
return pip
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
* Author: coissac
|
* Author: coissac
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include<stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
0
doc/.gitignore
vendored
Normal file → Executable file
0
doc/Doxyfile
Normal file → Executable file
0
doc/Makefile
Normal file → Executable file
2
doc/conf.py
Normal file → Executable file
@ -33,7 +33,7 @@ extensions = [
|
|||||||
'sphinx.ext.autodoc',
|
'sphinx.ext.autodoc',
|
||||||
'sphinx.ext.todo',
|
'sphinx.ext.todo',
|
||||||
'sphinx.ext.coverage',
|
'sphinx.ext.coverage',
|
||||||
'sphinx.ext.pngmath',
|
'sphinx.ext.imgmath',
|
||||||
'sphinx.ext.ifconfig',
|
'sphinx.ext.ifconfig',
|
||||||
'sphinx.ext.viewcode',
|
'sphinx.ext.viewcode',
|
||||||
'breathe',
|
'breathe',
|
||||||
|
0
doc/source/DMS.rst
Normal file → Executable file
0
doc/source/UML/OBIDMS_UML.png
Normal file → Executable file
Before Width: | Height: | Size: 67 KiB After Width: | Height: | Size: 67 KiB |
0
doc/source/UML/OBITypes_UML.class.violet.html
Normal file → Executable file
0
doc/source/UML/OBITypes_UML.png
Normal file → Executable file
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
0
doc/source/UML/ObiDMS_UML.class.violet.html
Normal file → Executable file
0
doc/source/containers.rst
Normal file → Executable file
0
doc/source/data.rst
Normal file → Executable file
0
doc/source/elementary.rst
Normal file → Executable file
0
doc/source/guidelines.rst
Normal file → Executable file
0
doc/source/images/history.png
Normal file → Executable file
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
0
doc/source/images/version_control.png
Normal file → Executable file
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 48 KiB |
0
doc/source/index.rst
Normal file → Executable file
0
doc/source/specialvalues.rst
Normal file → Executable file
0
doc/source/types.rst
Normal file → Executable file
@ -1 +0,0 @@
|
|||||||
build/lib.macosx-10.6-intel-3.4
|
|
21
python/obi.py
Normal file → Executable file
@ -21,9 +21,28 @@ default_config = { 'software' : "The OBITools",
|
|||||||
'log' : False,
|
'log' : False,
|
||||||
'loglevel' : 'INFO',
|
'loglevel' : 'INFO',
|
||||||
'progress' : True,
|
'progress' : True,
|
||||||
|
'inputURI' : None,
|
||||||
|
'outputURI' : None,
|
||||||
'defaultdms' : None,
|
'defaultdms' : None,
|
||||||
'inputview' : None,
|
'inputview' : None,
|
||||||
'outputview' : None
|
'outputview' : None,
|
||||||
|
'skip' : 0,
|
||||||
|
'only' : None,
|
||||||
|
'fileformat' : None,
|
||||||
|
'skiperror' : True,
|
||||||
|
'qualityformat' : b'sanger',
|
||||||
|
'offset' : -1,
|
||||||
|
'noquality' : False,
|
||||||
|
'seqtype' : b'nuc',
|
||||||
|
"header" : False,
|
||||||
|
"sep" : None,
|
||||||
|
"quote" : [b"'",b'"'],
|
||||||
|
"dec" : b".",
|
||||||
|
"nastring" : b"NA",
|
||||||
|
"stripwhite" : True,
|
||||||
|
"blanklineskip" : True,
|
||||||
|
"commentchar" : b"#",
|
||||||
|
"nocreatedms" : False
|
||||||
}
|
}
|
||||||
|
|
||||||
root_config_name='obi'
|
root_config_name='obi'
|
||||||
|
0
python/obitools3/__init__.py
Normal file → Executable file
0
python/obitools3/__init__.pyc
Normal file → Executable file
0
python/obitools3/apps/__init__.py
Normal file → Executable file
110
python/obitools3/apps/arguments.cfiles
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
0
python/obitools3/apps/arguments.pxd
Normal file → Executable file
1
python/obitools3/apps/arguments.pyx
Normal file → Executable file
@ -57,5 +57,6 @@ cpdef buildArgumentParser(str configname,
|
|||||||
module.addOptions(sub)
|
module.addOptions(sub)
|
||||||
|
|
||||||
sub.set_defaults(**{'%s:module' % configname : module})
|
sub.set_defaults(**{'%s:module' % configname : module})
|
||||||
|
sub.set_defaults(**{'%s:modulename' % configname : c})
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
110
python/obitools3/apps/command.cfiles
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
0
python/obitools3/apps/command.pxd
Normal file → Executable file
0
python/obitools3/apps/command.pyx
Normal file → Executable file
110
python/obitools3/apps/config.cfiles
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
0
python/obitools3/apps/config.pxd
Normal file → Executable file
11
python/obitools3/apps/config.pyx
Normal file → Executable file
@ -101,3 +101,14 @@ cpdef dict getConfiguration(str root_config_name="__default__",
|
|||||||
config['__done__']=True
|
config['__done__']=True
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
def logger(level, *messages):
|
||||||
|
try:
|
||||||
|
config=getConfiguration()
|
||||||
|
root = config["__root_config__"]
|
||||||
|
l = config[root]['logger']
|
||||||
|
if config[root]['verbose']:
|
||||||
|
getattr(l, level)(*messages)
|
||||||
|
except:
|
||||||
|
print(*messages,file=sys.stderr)
|
||||||
|
|
||||||
|
110
python/obitools3/apps/logging.cfiles
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
0
python/obitools3/apps/logging.pxd
Normal file → Executable file
4
python/obitools3/apps/logging.pyx
Normal file → Executable file
@ -22,7 +22,7 @@ cpdef getLogger(dict config):
|
|||||||
logfile= config[root]['log']
|
logfile= config[root]['log']
|
||||||
|
|
||||||
rootlogger = logging.getLogger()
|
rootlogger = logging.getLogger()
|
||||||
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
|
logFormatter = logging.Formatter("%%(asctime)s [%s : %%(levelname)-5.5s] %%(message)s" % config[root]['modulename'])
|
||||||
|
|
||||||
stderrHandler = logging.StreamHandler(sys.stderr)
|
stderrHandler = logging.StreamHandler(sys.stderr)
|
||||||
stderrHandler.setFormatter(logFormatter)
|
stderrHandler.setFormatter(logFormatter)
|
||||||
@ -42,5 +42,7 @@ cpdef getLogger(dict config):
|
|||||||
rootlogger.setLevel(loglevel)
|
rootlogger.setLevel(loglevel)
|
||||||
|
|
||||||
config[root]['logger']=rootlogger
|
config[root]['logger']=rootlogger
|
||||||
|
config[root]['verbose']=True
|
||||||
|
|
||||||
return rootlogger
|
return rootlogger
|
||||||
|
|
||||||
|
272
python/obitools3/apps/optiongroups/__init__.py
Executable file
@ -0,0 +1,272 @@
|
|||||||
|
def __addInputOption(optionManager):
|
||||||
|
|
||||||
|
optionManager.add_argument(
|
||||||
|
dest='obi:inputURI',
|
||||||
|
metavar='INPUT',
|
||||||
|
help='Data source URI')
|
||||||
|
|
||||||
|
|
||||||
|
group = optionManager.add_argument_group("Restriction to a sub-part options",
|
||||||
|
"Allows to limit analysis to a sub-part of the input")
|
||||||
|
|
||||||
|
group.add_argument('--skip',
|
||||||
|
action="store", dest="obi:skip",
|
||||||
|
metavar='<N>',
|
||||||
|
default=None,
|
||||||
|
type=int,
|
||||||
|
help="skip the N first sequences")
|
||||||
|
|
||||||
|
group.add_argument('--only',
|
||||||
|
action="store", dest="obi:only",
|
||||||
|
metavar='<N>',
|
||||||
|
default=None,
|
||||||
|
type=int,
|
||||||
|
help="treat only N sequences")
|
||||||
|
|
||||||
|
|
||||||
|
def __addImportInputOption(optionManager):
|
||||||
|
group = optionManager.add_argument_group("Input format options for imported files")
|
||||||
|
|
||||||
|
group.add_argument('--fasta-input',
|
||||||
|
action="store_const", dest="obi:inputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'fasta',
|
||||||
|
help="Input file is in sanger fasta format")
|
||||||
|
|
||||||
|
group.add_argument('--fastq-input',
|
||||||
|
action="store_const", dest="obi:inputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'fastq',
|
||||||
|
help="Input file is in fastq format")
|
||||||
|
|
||||||
|
group.add_argument('--embl-input',
|
||||||
|
action="store_const", dest="obi:inputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'embl',
|
||||||
|
help="Input file is in embl nucleic format")
|
||||||
|
|
||||||
|
group.add_argument('--genbank-input',
|
||||||
|
action="store_const", dest="obi:inputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'genbank',
|
||||||
|
help="Input file is in genbank nucleic format")
|
||||||
|
|
||||||
|
group.add_argument('--ngsfilter-input',
|
||||||
|
action="store_const", dest="obi:inputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'ngsfilter',
|
||||||
|
help="Input file is an ngsfilter file")
|
||||||
|
|
||||||
|
group.add_argument('--ecopcr-result-input',
|
||||||
|
action="store_const", dest="obi:inputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'ecopcr',
|
||||||
|
help="Input file is the result of an ecoPCR (version 2)")
|
||||||
|
|
||||||
|
group.add_argument('--ecoprimers-result-input',
|
||||||
|
action="store_const", dest="obi:inputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'ecoprimers',
|
||||||
|
help="Input file is the result of an ecoprimers")
|
||||||
|
|
||||||
|
group.add_argument('--tabular-input',
|
||||||
|
action="store_const", dest="obi:inputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'tabular',
|
||||||
|
help="Input file is a tabular file")
|
||||||
|
|
||||||
|
group.add_argument('--no-skip-on-error',
|
||||||
|
action="store_false", dest="obi:skiperror",
|
||||||
|
default=True,
|
||||||
|
help="Don't skip sequence entries with parsing errors (default: they are skipped)")
|
||||||
|
|
||||||
|
group.add_argument('--no-quality',
|
||||||
|
action="store_true", dest="obi:noquality",
|
||||||
|
default=False,
|
||||||
|
help="Do not import fastQ quality")
|
||||||
|
|
||||||
|
group.add_argument('--quality-sanger',
|
||||||
|
action="store_const", dest="obi:qualityformat",
|
||||||
|
default=None,
|
||||||
|
const=b'sanger',
|
||||||
|
help="Fastq quality is encoded following sanger format (standard fastq)")
|
||||||
|
|
||||||
|
group.add_argument('--quality-solexa',
|
||||||
|
action="store_const", dest="obi:qualityformat",
|
||||||
|
default=None,
|
||||||
|
const=b'solexa',
|
||||||
|
help="Fastq quality is encoded following solexa sequencer format")
|
||||||
|
|
||||||
|
group.add_argument('--nuc',
|
||||||
|
action="store_const", dest="obi:moltype",
|
||||||
|
default=None,
|
||||||
|
const=b'nuc',
|
||||||
|
help="Input file contains nucleic sequences")
|
||||||
|
|
||||||
|
group.add_argument('--prot',
|
||||||
|
action="store_const", dest="obi:moltype",
|
||||||
|
default=None,
|
||||||
|
const=b'pep',
|
||||||
|
help="Input file contains protein sequences")
|
||||||
|
|
||||||
|
group.add_argument('--input-na-string',
|
||||||
|
action="store", dest="obi:inputnastring",
|
||||||
|
default="NA",
|
||||||
|
type=str,
|
||||||
|
help="String associated with Non Available (NA) values in the input")
|
||||||
|
|
||||||
|
|
||||||
|
def __addTabularInputOption(optionManager):
|
||||||
|
group = optionManager.add_argument_group("Input format options for tabular files")
|
||||||
|
|
||||||
|
group.add_argument('--header',
|
||||||
|
action="store_true", dest="obi:header",
|
||||||
|
default=False,
|
||||||
|
help="First line of tabular file contains column names")
|
||||||
|
|
||||||
|
group.add_argument('--sep',
|
||||||
|
action="store", dest="obi:sep",
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="Column separator")
|
||||||
|
|
||||||
|
group.add_argument('--dec',
|
||||||
|
action="store", dest="obi:dec",
|
||||||
|
default=".",
|
||||||
|
type=str,
|
||||||
|
help="Decimal separator")
|
||||||
|
|
||||||
|
group.add_argument('--strip-white',
|
||||||
|
action="store_false", dest="obi:stripwhite",
|
||||||
|
default=True,
|
||||||
|
help="Remove white chars at the beginning and the end of values")
|
||||||
|
|
||||||
|
group.add_argument('--blank-line-skip',
|
||||||
|
action="store_false", dest="obi:blanklineskip",
|
||||||
|
default=True,
|
||||||
|
help="Skip empty lines")
|
||||||
|
|
||||||
|
group.add_argument('--comment-char',
|
||||||
|
action="store", dest="obi:commentchar",
|
||||||
|
default="#",
|
||||||
|
type=str,
|
||||||
|
help="Lines starting by this char are considered as comment")
|
||||||
|
|
||||||
|
|
||||||
|
def __addTaxdumpInputOption(optionManager): # TODO maybe not the best way to do it
|
||||||
|
group = optionManager.add_argument_group("Input format options for taxdump")
|
||||||
|
|
||||||
|
group.add_argument('--taxdump',
|
||||||
|
action="store_true", dest="obi:taxdump",
|
||||||
|
default=False,
|
||||||
|
help="Whether the input is a taxdump")
|
||||||
|
|
||||||
|
|
||||||
|
def __addTaxonomyOption(optionManager):
|
||||||
|
group = optionManager.add_argument_group("Input format options for taxonomy")
|
||||||
|
|
||||||
|
group.add_argument('--taxonomy',
|
||||||
|
action="store", dest="obi:taxoURI",
|
||||||
|
default=None,
|
||||||
|
help="Taxonomy URI")
|
||||||
|
|
||||||
|
#TODO option bool to download taxo if URI doesn't exist
|
||||||
|
|
||||||
|
|
||||||
|
def addMinimalInputOption(optionManager):
|
||||||
|
__addInputOption(optionManager)
|
||||||
|
|
||||||
|
|
||||||
|
def addImportInputOption(optionManager):
|
||||||
|
__addInputOption(optionManager)
|
||||||
|
__addImportInputOption(optionManager)
|
||||||
|
|
||||||
|
|
||||||
|
def addTabularInputOption(optionManager):
|
||||||
|
__addTabularInputOption(optionManager)
|
||||||
|
|
||||||
|
|
||||||
|
def addTaxonomyOption(optionManager):
|
||||||
|
__addTaxonomyOption(optionManager)
|
||||||
|
|
||||||
|
|
||||||
|
def addTaxdumpInputOption(optionManager):
|
||||||
|
__addTaxdumpInputOption(optionManager)
|
||||||
|
|
||||||
|
|
||||||
|
def addAllInputOption(optionManager):
|
||||||
|
__addInputOption(optionManager)
|
||||||
|
__addImportInputOption(optionManager)
|
||||||
|
__addTabularInputOption(optionManager)
|
||||||
|
__addTaxonomyOption(optionManager)
|
||||||
|
__addTaxdumpInputOption(optionManager)
|
||||||
|
|
||||||
|
|
||||||
|
def __addOutputOption(optionManager):
|
||||||
|
|
||||||
|
optionManager.add_argument(
|
||||||
|
dest='obi:outputURI',
|
||||||
|
metavar='OUTPUT',
|
||||||
|
help='Data destination URI')
|
||||||
|
|
||||||
|
|
||||||
|
def __addDMSOutputOption(optionManager):
|
||||||
|
group = optionManager.add_argument_group("Output options for DMS data")
|
||||||
|
|
||||||
|
group.add_argument('--no-create-dms',
|
||||||
|
action="store_true", dest="obi:nocreatedms",
|
||||||
|
default=False,
|
||||||
|
help="Don't create an output DMS is it is not existing")
|
||||||
|
|
||||||
|
group.add_argument('--max-elts',
|
||||||
|
action="store", dest="obi:maxelts",
|
||||||
|
metavar='<N>',
|
||||||
|
default=1000,
|
||||||
|
type=int,
|
||||||
|
help="Maximum number of elements per line in a column "
|
||||||
|
"(e.g. the number of different keys in a dictionary-type "
|
||||||
|
"key from sequence headers). If the number of different keys "
|
||||||
|
"is greater than N, the values are stored as character strings")
|
||||||
|
|
||||||
|
|
||||||
|
def __addExportOutputOption(optionManager):
|
||||||
|
group = optionManager.add_argument_group("Output format options for exported files")
|
||||||
|
|
||||||
|
group.add_argument('--fasta-output',
|
||||||
|
action="store_const", dest="obi:outputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'fasta',
|
||||||
|
help="Output file is in sanger fasta format")
|
||||||
|
|
||||||
|
group.add_argument('--fastq-output',
|
||||||
|
action="store_const", dest="obi:outputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'fastq',
|
||||||
|
help="Output file is in fastq format")
|
||||||
|
|
||||||
|
group.add_argument('--print-na',
|
||||||
|
action="store_true", dest="obi:printna",
|
||||||
|
default=False,
|
||||||
|
help="Print Non Available (NA) values in the output")
|
||||||
|
|
||||||
|
group.add_argument('--output-na-string',
|
||||||
|
action="store", dest="obi:outputnastring",
|
||||||
|
default="NA",
|
||||||
|
type=str,
|
||||||
|
help="String associated with Non Available (NA) values in the output")
|
||||||
|
|
||||||
|
|
||||||
|
def addMinimalOutputOption(optionManager):
|
||||||
|
__addOutputOption(optionManager)
|
||||||
|
__addDMSOutputOption(optionManager)
|
||||||
|
|
||||||
|
def addExportOutputOption(optionManager):
|
||||||
|
__addOutputOption(optionManager)
|
||||||
|
__addExportOutputOption(optionManager)
|
||||||
|
|
||||||
|
|
||||||
|
def addAllOutputOption(optionManager):
|
||||||
|
__addOutputOption(optionManager)
|
||||||
|
__addDMSOutputOption(optionManager)
|
||||||
|
__addExportOutputOption(optionManager)
|
||||||
|
|
110
python/obitools3/apps/progress.cfiles
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
1
python/obitools3/apps/progress.pxd
Normal file → Executable file
@ -51,6 +51,7 @@ cdef class ProgressBar:
|
|||||||
cdef int lastlog
|
cdef int lastlog
|
||||||
cdef bint ontty
|
cdef bint ontty
|
||||||
cdef int fd
|
cdef int fd
|
||||||
|
cdef bint cut
|
||||||
|
|
||||||
cdef bytes _head
|
cdef bytes _head
|
||||||
cdef char *chead
|
cdef char *chead
|
||||||
|
48
python/obitools3/apps/progress.pyx
Normal file → Executable file
@ -6,11 +6,14 @@ Created on 27 mars 2016
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
from ..utils cimport str2bytes, bytes2str
|
from ..utils cimport str2bytes, bytes2str
|
||||||
from .config cimport getConfiguration
|
from .config cimport getConfiguration
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
cdef class ProgressBar:
|
cdef class ProgressBar:
|
||||||
|
|
||||||
cdef clock_t clock(self):
|
cdef clock_t clock(self):
|
||||||
cdef clock_t t
|
cdef clock_t t
|
||||||
cdef timeval tp
|
cdef timeval tp
|
||||||
@ -22,11 +25,14 @@ cdef class ProgressBar:
|
|||||||
|
|
||||||
return t
|
return t
|
||||||
|
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
off_t maxi,
|
off_t maxi,
|
||||||
dict config={},
|
dict config={},
|
||||||
str head="",
|
str head="",
|
||||||
double seconde=0.1):
|
double seconde=0.1,
|
||||||
|
cut=False):
|
||||||
|
|
||||||
self.starttime = self.clock()
|
self.starttime = self.clock()
|
||||||
self.lasttime = self.starttime
|
self.lasttime = self.starttime
|
||||||
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
|
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
|
||||||
@ -38,16 +44,15 @@ cdef class ProgressBar:
|
|||||||
if not config:
|
if not config:
|
||||||
config=getConfiguration()
|
config=getConfiguration()
|
||||||
|
|
||||||
|
|
||||||
self.ontty = sys.stderr.isatty()
|
self.ontty = sys.stderr.isatty()
|
||||||
|
|
||||||
if (maxi<=0):
|
if (maxi<=0):
|
||||||
maxi=1
|
maxi=1
|
||||||
|
|
||||||
self.maxi = maxi
|
self.maxi = maxi
|
||||||
self.head = head
|
self.head = head
|
||||||
self.chead= self._head
|
self.chead = self._head
|
||||||
|
self.cut = cut
|
||||||
|
|
||||||
self.logger=config[config["__root_config__"]]["logger"]
|
self.logger=config[config["__root_config__"]]["logger"]
|
||||||
self.wheel = '|/-\\'
|
self.wheel = '|/-\\'
|
||||||
@ -62,7 +67,8 @@ cdef class ProgressBar:
|
|||||||
'##########' \
|
'##########' \
|
||||||
'##########'
|
'##########'
|
||||||
|
|
||||||
def __call__(self,object pos):
|
|
||||||
|
def __call__(self, object pos, bint force=False):
|
||||||
cdef off_t ipos
|
cdef off_t ipos
|
||||||
cdef clock_t elapsed
|
cdef clock_t elapsed
|
||||||
cdef clock_t newtime
|
cdef clock_t newtime
|
||||||
@ -76,7 +82,7 @@ cdef class ProgressBar:
|
|||||||
|
|
||||||
self.cycle+=1
|
self.cycle+=1
|
||||||
|
|
||||||
if self.cycle % self.freq == 0:
|
if self.cycle % self.freq == 0 or force:
|
||||||
self.cycle=1
|
self.cycle=1
|
||||||
newtime = self.clock()
|
newtime = self.clock()
|
||||||
delta = newtime - self.lasttime
|
delta = newtime - self.lasttime
|
||||||
@ -111,7 +117,7 @@ cdef class ProgressBar:
|
|||||||
self.arrow=(self.arrow+1) % 4
|
self.arrow=(self.arrow+1) % 4
|
||||||
|
|
||||||
if days:
|
if days:
|
||||||
<void>fprintf(stderr,b'\r%s %5.1f %% |%.*s%c%.*s] remain : %d days %02d:%02d:%02d',
|
<void>fprintf(stderr,b'\r%s %5.1f %% |%.*s%c%.*s] remain : %d days %02d:%02d:%02d\033[K',
|
||||||
self.chead,
|
self.chead,
|
||||||
percent*100,
|
percent*100,
|
||||||
fraction,self.diese,
|
fraction,self.diese,
|
||||||
@ -119,7 +125,7 @@ cdef class ProgressBar:
|
|||||||
50-fraction,self.spaces,
|
50-fraction,self.spaces,
|
||||||
days,hour,minu,sec)
|
days,hour,minu,sec)
|
||||||
else:
|
else:
|
||||||
<void>fprintf(stderr,b'\r%s %5.1f %% |%.*s%c%.*s] remain : %02d:%02d:%02d',
|
<void>fprintf(stderr,b'\r%s %5.1f %% |%.*s%c%.*s] remain : %02d:%02d:%02d\033[K',
|
||||||
self.chead,
|
self.chead,
|
||||||
percent*100.,
|
percent*100.,
|
||||||
fraction,self.diese,
|
fraction,self.diese,
|
||||||
@ -127,25 +133,25 @@ cdef class ProgressBar:
|
|||||||
50-fraction,self.spaces,
|
50-fraction,self.spaces,
|
||||||
hour,minu,sec)
|
hour,minu,sec)
|
||||||
|
|
||||||
tenth = int(percent * 10)
|
if self.cut:
|
||||||
if tenth != self.lastlog:
|
tenth = int(percent * 10)
|
||||||
|
if tenth != self.lastlog:
|
||||||
|
|
||||||
if self.ontty:
|
if self.ontty:
|
||||||
<void>fputs(b'\n',stderr)
|
<void>fputs(b'\n',stderr)
|
||||||
|
|
||||||
self.logger.info('%s %5.1f %% remain : %02d:%02d:%02d' % (
|
self.logger.info('%s %5.1f %% remain : %02d:%02d:%02d\033[K' % (
|
||||||
bytes2str(self._head),
|
bytes2str(self._head),
|
||||||
percent*100.,
|
percent*100.,
|
||||||
hour,minu,sec))
|
hour,minu,sec))
|
||||||
self.lastlog=tenth
|
self.lastlog=tenth
|
||||||
else:
|
else:
|
||||||
self.cycle+=1
|
self.cycle+=1
|
||||||
|
|
||||||
property head:
|
|
||||||
|
|
||||||
|
property head:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self._head
|
return self._head
|
||||||
|
|
||||||
def __set__(self,str value):
|
def __set__(self,str value):
|
||||||
self._head=str2bytes(value)
|
self._head=str2bytes(value)
|
||||||
self.chead=self._head
|
self.chead=self._head
|
||||||
|
110
python/obitools3/apps/temp.cfiles
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
8
python/obitools3/apps/temp.pxd
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
'''
|
||||||
|
Created on 28 juillet 2017
|
||||||
|
|
||||||
|
@author: coissac
|
||||||
|
'''
|
||||||
|
|
99
python/obitools3/apps/temp.pyx
Executable file
@ -0,0 +1,99 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
'''
|
||||||
|
Created on 28 juillet 2017
|
||||||
|
|
||||||
|
@author: coissac
|
||||||
|
'''
|
||||||
|
|
||||||
|
from os import environb,getpid
|
||||||
|
from os.path import join, isdir
|
||||||
|
from tempfile import TemporaryDirectory, _get_candidate_names
|
||||||
|
from shutil import rmtree
|
||||||
|
from atexit import register
|
||||||
|
|
||||||
|
from obitools3.dms.dms import DMS
|
||||||
|
|
||||||
|
from obitools3.apps.config import getConfiguration
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
|
||||||
|
from obitools3.dms.dms cimport DMS
|
||||||
|
from obitools3.utils cimport tobytes,tostr
|
||||||
|
|
||||||
|
cpdef get_temp_dir():
|
||||||
|
"""
|
||||||
|
Returns a temporary directory object specific of this instance of obitools.
|
||||||
|
|
||||||
|
This is an application function. It cannot be called out of an obi command.
|
||||||
|
It requires a valid configuration.
|
||||||
|
|
||||||
|
If the function is called several time from the same obi session, the same
|
||||||
|
directory is returned.
|
||||||
|
|
||||||
|
If the OBITMP environment variable exist, the temporary directory is created
|
||||||
|
inside this directory.
|
||||||
|
|
||||||
|
The directory is automatically destroyed at the end of the end of the process.
|
||||||
|
|
||||||
|
@return: a temporary python directory object.
|
||||||
|
"""
|
||||||
|
cdef bytes tmpdirname
|
||||||
|
cdef dict config = getConfiguration()
|
||||||
|
|
||||||
|
root = config["__root_config__"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
return config[root]["tempdir"].name
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
basedir=environb[b'OBITMP']
|
||||||
|
except KeyError:
|
||||||
|
basedir=None
|
||||||
|
|
||||||
|
tmp = TemporaryDirectory(dir=basedir)
|
||||||
|
|
||||||
|
config[root]["tempdir"]=tmp
|
||||||
|
|
||||||
|
return tmp.name
|
||||||
|
|
||||||
|
cpdef get_temp_dir_name():
|
||||||
|
"""
|
||||||
|
Returns the name of the temporary directory object
|
||||||
|
specific of this instance of obitools.
|
||||||
|
|
||||||
|
@return: the name of the temporary directory.
|
||||||
|
|
||||||
|
@see get_temp_dir
|
||||||
|
"""
|
||||||
|
return get_temp_dir_name().name
|
||||||
|
|
||||||
|
|
||||||
|
cpdef get_temp_dms():
|
||||||
|
|
||||||
|
cdef bytes tmpdirname # @DuplicatedSignature
|
||||||
|
cdef dict config = getConfiguration() # @DuplicatedSignature
|
||||||
|
cdef DMS tmpdms
|
||||||
|
|
||||||
|
root = config["__root_config__"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
return config[root]["tempdms"]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
tmpdirname=get_temp_dir()
|
||||||
|
|
||||||
|
tempname = join(tmpdirname,
|
||||||
|
b"obi.%d.%s" % (getpid(),
|
||||||
|
tobytes(next(_get_candidate_names())))
|
||||||
|
)
|
||||||
|
|
||||||
|
tmpdms = DMS.new(tempname)
|
||||||
|
|
||||||
|
config[root]["tempdms"]=tmpdms
|
||||||
|
|
||||||
|
return tmpdms
|
||||||
|
|
||||||
|
|
0
python/obitools3/commands/__init__.py
Normal file → Executable file
@ -1,65 +1,103 @@
|
|||||||
../../../src/bloom.h
|
../../../src/obi_lcs.h
|
||||||
../../../src/bloom.c
|
../../../src/obi_lcs.c
|
||||||
../../../src/char_str_indexer.h
|
|
||||||
../../../src/char_str_indexer.c
|
|
||||||
../../../src/crc64.h
|
|
||||||
../../../src/crc64.c
|
|
||||||
../../../src/dna_seq_indexer.h
|
|
||||||
../../../src/dna_seq_indexer.c
|
|
||||||
../../../src/encode.h
|
|
||||||
../../../src/encode.c
|
|
||||||
../../../src/hashtable.h
|
|
||||||
../../../src/hashtable.c
|
|
||||||
../../../src/murmurhash2.h
|
|
||||||
../../../src/murmurhash2.c
|
|
||||||
../../../src/obi_align.h
|
|
||||||
../../../src/obi_align.c
|
|
||||||
../../../src/obiavl.h
|
|
||||||
../../../src/obiavl.c
|
|
||||||
../../../src/obiblob_indexer.h
|
|
||||||
../../../src/obiblob_indexer.c
|
|
||||||
../../../src/obiblob.h
|
|
||||||
../../../src/obiblob.c
|
|
||||||
../../../src/obidebug.h
|
|
||||||
../../../src/obidms_taxonomy.h
|
|
||||||
../../../src/obidms_taxonomy.c
|
|
||||||
../../../src/obidms.h
|
|
||||||
../../../src/obidms.c
|
|
||||||
../../../src/obidmscolumn_blob.c
|
|
||||||
../../../src/obidmscolumn_blob.h
|
|
||||||
../../../src/obidmscolumn_bool.c
|
|
||||||
../../../src/obidmscolumn_bool.h
|
|
||||||
../../../src/obidmscolumn_char.c
|
|
||||||
../../../src/obidmscolumn_char.h
|
|
||||||
../../../src/obidmscolumn_float.c
|
|
||||||
../../../src/obidmscolumn_float.h
|
|
||||||
../../../src/obidmscolumn_idx.h
|
|
||||||
../../../src/obidmscolumn_idx.c
|
|
||||||
../../../src/obidmscolumn_int.c
|
|
||||||
../../../src/obidmscolumn_int.h
|
|
||||||
../../../src/obidmscolumn_qual.h
|
|
||||||
../../../src/obidmscolumn_qual.c
|
|
||||||
../../../src/obidmscolumn_seq.c
|
|
||||||
../../../src/obidmscolumn_seq.h
|
|
||||||
../../../src/obidmscolumn_str.c
|
|
||||||
../../../src/obidmscolumn_str.h
|
|
||||||
../../../src/obidmscolumn.h
|
|
||||||
../../../src/obidmscolumn.c
|
|
||||||
../../../src/obidmscolumndir.h
|
|
||||||
../../../src/obidmscolumndir.c
|
|
||||||
../../../src/obierrno.h
|
../../../src/obierrno.h
|
||||||
../../../src/obierrno.c
|
../../../src/obierrno.c
|
||||||
../../../src/obilittlebigman.h
|
|
||||||
../../../src/obilittlebigman.c
|
|
||||||
../../../src/obitypes.h
|
|
||||||
../../../src/obitypes.c
|
|
||||||
../../../src/obiview.h
|
|
||||||
../../../src/obiview.c
|
|
||||||
../../../src/sse_banded_LCS_alignment.h
|
|
||||||
../../../src/sse_banded_LCS_alignment.c
|
|
||||||
../../../src/uint8_indexer.h
|
|
||||||
../../../src/uint8_indexer.c
|
|
||||||
../../../src/upperband.h
|
../../../src/upperband.h
|
||||||
../../../src/upperband.c
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
../../../src/utils.h
|
../../../src/utils.h
|
||||||
../../../src/utils.c
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
18
python/obitools3/commands/align.pxd
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
|
||||||
|
cpdef align_columns(bytes dms_n,
|
||||||
|
bytes input_view_1_n,
|
||||||
|
bytes output_view_n,
|
||||||
|
bytes input_view_2_n=*,
|
||||||
|
bytes input_column_1_n=*,
|
||||||
|
bytes input_column_2_n=*,
|
||||||
|
bytes input_elt_1_n=*,
|
||||||
|
bytes input_elt_2_n=*,
|
||||||
|
bytes id_column_1_n=*,
|
||||||
|
bytes id_column_2_n=*,
|
||||||
|
double threshold=*, bint normalize=*,
|
||||||
|
int reference=*, bint similarity_mode=*,
|
||||||
|
bint print_seq=*, bint print_count=*,
|
||||||
|
bytes comments=*,
|
||||||
|
int thread_count=*)
|
274
python/obitools3/commands/align.pyx
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view.view cimport View
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
|
|
||||||
|
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
|
||||||
|
obi_lcs_align_two_columns
|
||||||
|
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Aligns one sequence column with itself or two sequence columns"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi align specific options')
|
||||||
|
|
||||||
|
group.add_argument('--input-2', '-I',
|
||||||
|
action="store", dest="align:inputuri2",
|
||||||
|
metavar='<INPUT URI>',
|
||||||
|
default="",
|
||||||
|
type=str,
|
||||||
|
help="Eventually, the URI of the second input to align with the first one.")
|
||||||
|
|
||||||
|
group.add_argument('--threshold','-t',
|
||||||
|
action="store", dest="align:threshold",
|
||||||
|
metavar='<THRESHOLD>',
|
||||||
|
default=0.0,
|
||||||
|
type=float,
|
||||||
|
help="Score threshold. If the score is normalized and expressed in similarity (default),"
|
||||||
|
" it is an identity, e.g. 0.95 for an identity of 95%%. If the score is normalized"
|
||||||
|
" and expressed in distance, it is (1.0 - identity), e.g. 0.05 for an identity of 95%%."
|
||||||
|
" If the score is not normalized and expressed in similarity, it is the length of the"
|
||||||
|
" Longest Common Subsequence. If the score is not normalized and expressed in distance,"
|
||||||
|
" it is (reference length - LCS length)."
|
||||||
|
" Only sequence pairs with a similarity above <THRESHOLD> are printed. Default: 0.00"
|
||||||
|
" (no threshold).")
|
||||||
|
|
||||||
|
group.add_argument('--longest-length','-L',
|
||||||
|
action="store_const", dest="align:reflength",
|
||||||
|
default=0,
|
||||||
|
const=1,
|
||||||
|
help="The reference length is the length of the longest sequence."
|
||||||
|
" Default: the reference length is the length of the alignment.")
|
||||||
|
|
||||||
|
group.add_argument('--shortest-length','-l',
|
||||||
|
action="store_const", dest="align:reflength",
|
||||||
|
default=0,
|
||||||
|
const=2,
|
||||||
|
help="The reference length is the length of the shortest sequence."
|
||||||
|
" Default: the reference length is the length of the alignment.")
|
||||||
|
|
||||||
|
group.add_argument('--raw','-r',
|
||||||
|
action="store_false", dest="align:normalize",
|
||||||
|
default=True,
|
||||||
|
help="Raw score, not normalized. Default: score is normalized with the reference sequence length.")
|
||||||
|
|
||||||
|
group.add_argument('--distance','-D',
|
||||||
|
action="store_false", dest="align:similarity",
|
||||||
|
default=True,
|
||||||
|
help="Score is expressed in distance. Default: score is expressed in similarity.")
|
||||||
|
|
||||||
|
group.add_argument('--print-seq','-s',
|
||||||
|
action="store_true", dest="align:printseq",
|
||||||
|
default=False,
|
||||||
|
help="The nucleotide sequences are written in the output view. Default: they are not written.")
|
||||||
|
|
||||||
|
group.add_argument('--print-count','-n',
|
||||||
|
action="store_true", dest="align:printcount",
|
||||||
|
default=False,
|
||||||
|
help="Sequence counts are written in the output view. Default: they are not written.")
|
||||||
|
|
||||||
|
group.add_argument('--thread-count','-p', # TODO should probably be in a specific option group
|
||||||
|
action="store", dest="align:threadcount",
|
||||||
|
metavar='<THREAD COUNT>',
|
||||||
|
default=1,
|
||||||
|
type=int,
|
||||||
|
help="Number of threads to use for the computation. Default: one.")
|
||||||
|
|
||||||
|
|
||||||
|
cpdef align_columns(bytes dms_n,
|
||||||
|
bytes input_view_1_n,
|
||||||
|
bytes output_view_n,
|
||||||
|
bytes input_view_2_n=b"",
|
||||||
|
bytes input_column_1_n=b"",
|
||||||
|
bytes input_column_2_n=b"",
|
||||||
|
bytes input_elt_1_n=b"",
|
||||||
|
bytes input_elt_2_n=b"",
|
||||||
|
bytes id_column_1_n=b"",
|
||||||
|
bytes id_column_2_n=b"",
|
||||||
|
double threshold=0.0, bint normalize=True,
|
||||||
|
int reference=0, bint similarity_mode=True,
|
||||||
|
bint print_seq=False, bint print_count=False,
|
||||||
|
bytes comments=b"{}",
|
||||||
|
int thread_count=1) :
|
||||||
|
|
||||||
|
if input_view_2_n == b"" and input_column_2_n == b"" :
|
||||||
|
if obi_lcs_align_one_column(dms_n, \
|
||||||
|
input_view_1_n, \
|
||||||
|
input_column_1_n, \
|
||||||
|
input_elt_1_n, \
|
||||||
|
id_column_1_n, \
|
||||||
|
output_view_n, \
|
||||||
|
comments, \
|
||||||
|
print_seq, \
|
||||||
|
print_count, \
|
||||||
|
threshold, normalize, reference, similarity_mode,
|
||||||
|
thread_count) < 0 :
|
||||||
|
raise Exception("Error aligning sequences")
|
||||||
|
|
||||||
|
else:
|
||||||
|
if obi_lcs_align_two_columns(dms_n, \
|
||||||
|
input_view_1_n, \
|
||||||
|
input_view_2_n, \
|
||||||
|
input_column_1_n, \
|
||||||
|
input_column_2_n, \
|
||||||
|
input_elt_1_n, \
|
||||||
|
input_elt_2_n, \
|
||||||
|
id_column_1_n, \
|
||||||
|
id_column_2_n, \
|
||||||
|
output_view_n, \
|
||||||
|
comments, \
|
||||||
|
print_seq, \
|
||||||
|
print_count, \
|
||||||
|
threshold, normalize, reference, similarity_mode) < 0 :
|
||||||
|
raise Exception("Error aligning sequences")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi align")
|
||||||
|
|
||||||
|
# Open the input: only the DMS
|
||||||
|
input = open_uri(config['obi']['inputURI'],
|
||||||
|
dms_only=True)
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_dms_name = input[0].name
|
||||||
|
i_uri = input[1]
|
||||||
|
i_view_name = i_uri.split(b"/")[0]
|
||||||
|
i_column_name = b""
|
||||||
|
i_element_name = b""
|
||||||
|
if len(i_uri.split(b"/")) == 2:
|
||||||
|
i_column_name = i_uri.split(b"/")[1]
|
||||||
|
if len(i_uri.split(b"/")) == 3:
|
||||||
|
i_element_name = i_uri.split(b"/")[2]
|
||||||
|
if len(i_uri.split(b"/")) > 3:
|
||||||
|
raise Exception("Input URI contains too many elements:", config['obi']['inputURI'])
|
||||||
|
|
||||||
|
# Open the second input if there is one
|
||||||
|
i_dms_2 = None
|
||||||
|
i_dms_name_2 = b""
|
||||||
|
original_i_view_name_2 = b""
|
||||||
|
i_view_name_2 = b""
|
||||||
|
i_column_name_2 = b""
|
||||||
|
i_element_name_2 = b""
|
||||||
|
if config['align']['inputuri2']:
|
||||||
|
input_2 = open_uri(config['align']['inputuri2'],
|
||||||
|
dms_only=True)
|
||||||
|
if input_2 is None:
|
||||||
|
raise Exception("Could not read second input")
|
||||||
|
i_dms_2 = input_2[0]
|
||||||
|
i_dms_name_2 = i_dms_2.name
|
||||||
|
i_uri_2 = input_2[1]
|
||||||
|
original_i_view_name_2 = i_uri_2.split(b"/")[0]
|
||||||
|
if len(i_uri_2.split(b"/")) == 2:
|
||||||
|
i_column_name_2 = i_uri_2.split(b"/")[1]
|
||||||
|
if len(i_uri_2.split(b"/")) == 3:
|
||||||
|
i_element_name_2 = i_uri_2.split(b"/")[2]
|
||||||
|
if len(i_uri_2.split(b"/")) > 3:
|
||||||
|
raise Exception("Input URI contains too many elements:", config['align']['inputuri2'])
|
||||||
|
|
||||||
|
# If the 2 input DMS are not the same, temporarily import 2nd input view in first input DMS
|
||||||
|
if i_dms != i_dms_2:
|
||||||
|
temp_i_view_name_2 = original_i_view_name_2
|
||||||
|
i=0
|
||||||
|
while temp_i_view_name_2 in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
temp_i_view_name_2 = original_i_view_name_2+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
i_view_name_2 = temp_i_view_name_2
|
||||||
|
View.import_view(i_dms_2.full_path[:-7], i_dms.full_path[:-7], original_i_view_name_2, i_view_name_2)
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output")
|
||||||
|
o_dms = output[0]
|
||||||
|
o_dms_name = o_dms.name
|
||||||
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
||||||
|
# the right DMS and deleted in the other afterwards.
|
||||||
|
if i_dms != o_dms:
|
||||||
|
temporary_view_name = final_o_view_name
|
||||||
|
i=0
|
||||||
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
else:
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
|
# Save command config in View comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
|
||||||
|
i_dms_list = [i_dms_name]
|
||||||
|
if i_dms_name_2:
|
||||||
|
i_dms_list.append(i_dms_name_2)
|
||||||
|
i_view_list = [i_view_name]
|
||||||
|
if original_i_view_name_2:
|
||||||
|
i_view_list.append(original_i_view_name_2)
|
||||||
|
comments = View.print_config(config, "align", command_line, input_dms_name=i_dms_list, input_view_name=i_view_list)
|
||||||
|
|
||||||
|
# Call cython alignment function
|
||||||
|
# Using default ID columns of the view. TODO discuss adding option
|
||||||
|
align_columns(i_dms_name, \
|
||||||
|
i_view_name, \
|
||||||
|
o_view_name, \
|
||||||
|
input_view_2_n = i_view_name_2, \
|
||||||
|
input_column_1_n = i_column_name, \
|
||||||
|
input_column_2_n = i_column_name_2, \
|
||||||
|
input_elt_1_n = i_element_name, \
|
||||||
|
input_elt_2_n = i_element_name_2, \
|
||||||
|
id_column_1_n = b"", \
|
||||||
|
id_column_2_n = b"", \
|
||||||
|
threshold = config['align']['threshold'], \
|
||||||
|
normalize = config['align']['normalize'], \
|
||||||
|
reference = config['align']['reflength'], \
|
||||||
|
similarity_mode = config['align']['similarity'], \
|
||||||
|
print_seq = config['align']['printseq'], \
|
||||||
|
print_count = config['align']['printcount'], \
|
||||||
|
comments = comments, \
|
||||||
|
thread_count = config['align']['threadcount'])
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, export result view to output DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
|
|
||||||
|
# Save command config in output DMS comments
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the two input DMS are different, delete the temporary input view in the first input DMS
|
||||||
|
if i_dms_2 and i_dms != i_dms_2:
|
||||||
|
View.delete_view(i_dms, i_view_name_2)
|
||||||
|
i_dms_2.close()
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
||||||
|
|
103
python/obitools3/commands/alignpairedend.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
4
python/obitools3/commands/alignpairedend.pxd
Executable file
@ -0,0 +1,4 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
|
||||||
|
cdef object buildAlignment(object direct, object reverse)
|
249
python/obitools3/commands/alignpairedend.pyx
Executable file
@ -0,0 +1,249 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
from obitools3.dms.column.column cimport Column
|
||||||
|
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||||
|
from obitools3.dms.capi.obitypes cimport OBI_QUAL
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
|
||||||
|
from obitools3.libalign._qsrassemble import QSolexaRightReverseAssemble
|
||||||
|
from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequence
|
||||||
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
|
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
|
||||||
|
from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Aligns paired-ended reads"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi alignpairedend specific options')
|
||||||
|
|
||||||
|
group.add_argument('-R', '--reverse-reads',
|
||||||
|
action="store", dest="alignpairedend:reverse",
|
||||||
|
metavar="<URI>",
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="URI to the reverse reads if they are in a different view than the forward reads")
|
||||||
|
|
||||||
|
group.add_argument('--score-min',
|
||||||
|
action="store", dest="alignpairedend:smin",
|
||||||
|
metavar="#.###",
|
||||||
|
default=None,
|
||||||
|
type=float,
|
||||||
|
help="Minimum score for keeping alignments")
|
||||||
|
|
||||||
|
group.add_argument('-A', '--true-ali',
|
||||||
|
action="store_true", dest="alignpairedend:trueali",
|
||||||
|
default=False,
|
||||||
|
help="Performs gap free end alignment of sequences instead of using kmers to compute alignments (slower).")
|
||||||
|
|
||||||
|
group.add_argument('-k', '--kmer-size',
|
||||||
|
action="store", dest="alignpairedend:kmersize",
|
||||||
|
metavar="#",
|
||||||
|
default=3,
|
||||||
|
type=int,
|
||||||
|
help="K-mer size for kmer comparisons, between 1 and 4 (not when using -A option; default: 3)")
|
||||||
|
|
||||||
|
|
||||||
|
la = QSolexaReverseAssemble()
|
||||||
|
ra = QSolexaRightReverseAssemble()
|
||||||
|
cdef object buildAlignment(object direct, object reverse):
|
||||||
|
|
||||||
|
if len(direct)==0 or len(reverse)==0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
la.seqA = direct
|
||||||
|
la.seqB = reverse
|
||||||
|
|
||||||
|
ali=la()
|
||||||
|
ali.direction='left'
|
||||||
|
|
||||||
|
ra.seqA = direct
|
||||||
|
ra.seqB = reverse
|
||||||
|
|
||||||
|
rali=ra()
|
||||||
|
rali.direction='right'
|
||||||
|
|
||||||
|
if ali.score < rali.score:
|
||||||
|
ali = rali
|
||||||
|
|
||||||
|
return ali
|
||||||
|
|
||||||
|
|
||||||
|
def alignmentIterator(entries, aligner):
|
||||||
|
|
||||||
|
if type(entries) == list:
|
||||||
|
two_views = True
|
||||||
|
forward = entries[0]
|
||||||
|
reverse = entries[1]
|
||||||
|
entries_len = len(forward)
|
||||||
|
else:
|
||||||
|
two_views = False
|
||||||
|
entries_len = len(entries)
|
||||||
|
|
||||||
|
for i in range(entries_len):
|
||||||
|
if two_views:
|
||||||
|
seqF = forward[i]
|
||||||
|
seqR = reverse[i]
|
||||||
|
else:
|
||||||
|
seqF = Nuc_Seq.new_from_stored(entries[i])
|
||||||
|
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME])
|
||||||
|
seqR.index = i
|
||||||
|
|
||||||
|
ali = aligner(seqF, seqR)
|
||||||
|
|
||||||
|
if ali is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
yield ali
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi alignpairedend")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
|
||||||
|
two_views = False
|
||||||
|
forward = None
|
||||||
|
reverse = None
|
||||||
|
input = None
|
||||||
|
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not open input reads")
|
||||||
|
if input[2] != View_NUC_SEQS:
|
||||||
|
raise NotImplementedError('obi alignpairedend only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
if "reverse" in config["alignpairedend"]:
|
||||||
|
|
||||||
|
two_views = True
|
||||||
|
|
||||||
|
forward = input[1]
|
||||||
|
|
||||||
|
rinput = open_uri(config["alignpairedend"]["reverse"])
|
||||||
|
if rinput is None:
|
||||||
|
raise Exception("Could not open reverse reads")
|
||||||
|
if rinput[2] != View_NUC_SEQS:
|
||||||
|
raise NotImplementedError('obi alignpairedend only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
reverse = rinput[1]
|
||||||
|
|
||||||
|
if len(forward) != len(reverse):
|
||||||
|
raise Exception("Error: the number of forward and reverse reads are different")
|
||||||
|
|
||||||
|
entries = [forward, reverse]
|
||||||
|
input_dms_name = [forward.dms.name, reverse.dms.name]
|
||||||
|
input_view_name = [forward.name, reverse.name]
|
||||||
|
|
||||||
|
else:
|
||||||
|
entries = input[1]
|
||||||
|
input_dms_name = [entries.dms.name]
|
||||||
|
input_view_name = [entries.name]
|
||||||
|
|
||||||
|
if two_views:
|
||||||
|
entries_len = len(forward)
|
||||||
|
else:
|
||||||
|
entries_len = len(entries)
|
||||||
|
|
||||||
|
# Open the output
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
newviewtype=View_NUC_SEQS)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
|
||||||
|
view = output[1]
|
||||||
|
|
||||||
|
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
|
||||||
|
|
||||||
|
if 'smin' in config['alignpairedend']:
|
||||||
|
smin = config['alignpairedend']['smin']
|
||||||
|
else:
|
||||||
|
smin = 0
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
pb = ProgressBar(entries_len, config, seconde=5)
|
||||||
|
|
||||||
|
if config['alignpairedend']['trueali']:
|
||||||
|
kmer_ali = False
|
||||||
|
aligner = buildAlignment
|
||||||
|
else :
|
||||||
|
kmer_ali = True
|
||||||
|
if type(entries) == list:
|
||||||
|
forward = entries[0]
|
||||||
|
reverse = entries[1]
|
||||||
|
aligner = Kmer_similarity(forward, view2=reverse, kmer_size=config['alignpairedend']['kmersize'])
|
||||||
|
else:
|
||||||
|
aligner = Kmer_similarity(entries, column2=entries[REVERSE_SEQ_COLUMN_NAME], qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], kmer_size=config['alignpairedend']['kmersize'])
|
||||||
|
|
||||||
|
ba = alignmentIterator(entries, aligner)
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for ali in ba:
|
||||||
|
|
||||||
|
pb(i)
|
||||||
|
|
||||||
|
consensus = view[i]
|
||||||
|
|
||||||
|
if not two_views:
|
||||||
|
seqF = entries[i]
|
||||||
|
else:
|
||||||
|
seqF = forward[i]
|
||||||
|
|
||||||
|
if smin > 0:
|
||||||
|
if (ali.score > smin) :
|
||||||
|
buildConsensus(ali, consensus, seqF)
|
||||||
|
else:
|
||||||
|
if not two_views:
|
||||||
|
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality = seqF[REVERSE_QUALITY_COLUMN_NAME])
|
||||||
|
else:
|
||||||
|
seqR = reverse[i]
|
||||||
|
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
|
||||||
|
|
||||||
|
consensus[b"smin"] = smin
|
||||||
|
else:
|
||||||
|
buildConsensus(ali, consensus, seqF)
|
||||||
|
|
||||||
|
if kmer_ali :
|
||||||
|
ali.free()
|
||||||
|
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
if kmer_ali :
|
||||||
|
aligner.free()
|
||||||
|
|
||||||
|
# Save command config in View and DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
output[0].record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(view), file=sys.stderr)
|
||||||
|
|
||||||
|
input[0].close()
|
||||||
|
if two_views:
|
||||||
|
rinput[0].close()
|
||||||
|
output[0].close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
||||||
|
|
103
python/obitools3/commands/annotate.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
382
python/obitools3/commands/annotate.pyx
Executable file
@ -0,0 +1,382 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from functools import reduce
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
|
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||||
|
ID_COLUMN, \
|
||||||
|
DEFINITION_COLUMN, \
|
||||||
|
QUALITY_COLUMN, \
|
||||||
|
COUNT_COLUMN
|
||||||
|
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Annotate views with new tags and edit existing annotations"
|
||||||
|
|
||||||
|
|
||||||
|
SPECIAL_COLUMNS = [NUC_SEQUENCE_COLUMN, ID_COLUMN, DEFINITION_COLUMN, QUALITY_COLUMN]
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addTaxonomyOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi annotate specific options')
|
||||||
|
|
||||||
|
group.add_argument('--seq-rank', # TODO seq/elt/line???
|
||||||
|
action="store_true",
|
||||||
|
dest="annotate:add_rank",
|
||||||
|
default=False,
|
||||||
|
help="Add a rank attribute to the sequence "
|
||||||
|
"indicating the sequence position in the data.")
|
||||||
|
|
||||||
|
group.add_argument('-R', '--rename-tag',
|
||||||
|
action="append",
|
||||||
|
dest="annotate:rename_tags",
|
||||||
|
metavar="<OLD_NAME:NEW_NAME>",
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
help="Change tag name from OLD_NAME to NEW_NAME.")
|
||||||
|
|
||||||
|
group.add_argument('-D', '--delete-tag',
|
||||||
|
action="append",
|
||||||
|
dest="annotate:delete_tags",
|
||||||
|
metavar="<TAG_NAME>",
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
help="Delete tag TAG_NAME.")
|
||||||
|
|
||||||
|
group.add_argument('-S', '--set-tag',
|
||||||
|
action="append",
|
||||||
|
dest="annotate:set_tags",
|
||||||
|
metavar="<TAG_NAME:PYTHON_EXPRESSION>",
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
help="Add a new tag named TAG_NAME with "
|
||||||
|
"a value computed from PYTHON_EXPRESSION.")
|
||||||
|
|
||||||
|
group.add_argument('--set-identifier',
|
||||||
|
action="store",
|
||||||
|
dest="annotate:set_identifier",
|
||||||
|
metavar="<PYTHON_EXPRESSION>",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Set sequence identifier with "
|
||||||
|
"a value computed from PYTHON_EXPRESSION.")
|
||||||
|
|
||||||
|
group.add_argument('--set-sequence',
|
||||||
|
action="store",
|
||||||
|
dest="annotate:set_sequence",
|
||||||
|
metavar="<PYTHON_EXPRESSION>",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Change the sequence itself with "
|
||||||
|
"a value computed from PYTHON_EXPRESSION.")
|
||||||
|
|
||||||
|
group.add_argument('--set-definition',
|
||||||
|
action="store",
|
||||||
|
dest="annotate:set_definition",
|
||||||
|
metavar="<PYTHON_EXPRESSION>",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Set sequence definition with "
|
||||||
|
"a value computed from PYTHON_EXPRESSION.")
|
||||||
|
|
||||||
|
group.add_argument('--run',
|
||||||
|
action="store",
|
||||||
|
dest="annotate:run",
|
||||||
|
metavar="<PYTHON_EXPRESSION>",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Run a python expression on each element.")
|
||||||
|
|
||||||
|
group.add_argument('-C', '--clear',
|
||||||
|
action="store_true",
|
||||||
|
dest="annotate:clear",
|
||||||
|
default=False,
|
||||||
|
help="Clear all tags except the obligatory ones.")
|
||||||
|
|
||||||
|
group.add_argument('-k','--keep',
|
||||||
|
action='append',
|
||||||
|
dest="annotate:keep",
|
||||||
|
metavar="<TAG>",
|
||||||
|
default=[],
|
||||||
|
type=str,
|
||||||
|
help="Only keep this tag. (Can be specified several times.)")
|
||||||
|
|
||||||
|
group.add_argument('--length',
|
||||||
|
action="store_true",
|
||||||
|
dest="annotate:length",
|
||||||
|
default=False,
|
||||||
|
help="Add 'seq_length' tag with sequence length.")
|
||||||
|
|
||||||
|
group.add_argument('--with-taxon-at-rank',
|
||||||
|
action='append',
|
||||||
|
dest="annotate:taxon_at_rank",
|
||||||
|
metavar="<RANK_NAME>",
|
||||||
|
default=[],
|
||||||
|
type=str,
|
||||||
|
help="Add taxonomy annotation at the specified rank level RANK_NAME.")
|
||||||
|
|
||||||
|
|
||||||
|
def sequenceTaggerGenerator(config, taxo=None):
|
||||||
|
|
||||||
|
toSet=None
|
||||||
|
newId=None
|
||||||
|
newDef=None
|
||||||
|
newSeq=None
|
||||||
|
length=None
|
||||||
|
add_rank=None
|
||||||
|
run=None
|
||||||
|
|
||||||
|
if 'set_tags' in config['annotate']: # TODO default option problem, to fix
|
||||||
|
toSet = [x.split(':',1) for x in config['annotate']['set_tags'] if len(x.split(':',1))==2]
|
||||||
|
if 'set_identifier' in config['annotate']:
|
||||||
|
newId = config['annotate']['set_identifier']
|
||||||
|
if 'set_definition' in config['annotate']:
|
||||||
|
newDef = config['annotate']['set_definition']
|
||||||
|
if 'set_sequence' in config['annotate']:
|
||||||
|
newSeq = config['annotate']['set_sequence']
|
||||||
|
if 'length' in config['annotate']:
|
||||||
|
length = config['annotate']['length']
|
||||||
|
if 'add_rank' in config["annotate"]:
|
||||||
|
add_rank = config["annotate"]["add_rank"]
|
||||||
|
if 'run' in config['annotate']:
|
||||||
|
run = config['annotate']['run']
|
||||||
|
counter = [0]
|
||||||
|
|
||||||
|
for i in range(len(toSet)):
|
||||||
|
for j in range(len(toSet[i])):
|
||||||
|
toSet[i][j] = tobytes(toSet[i][j])
|
||||||
|
|
||||||
|
annoteRank=[]
|
||||||
|
if config['annotate']['taxon_at_rank']:
|
||||||
|
if taxo is not None:
|
||||||
|
annoteRank = config['annotate']['taxon_at_rank']
|
||||||
|
else:
|
||||||
|
raise Exception("A taxonomy must be provided to annotate taxon ranks")
|
||||||
|
|
||||||
|
def sequenceTagger(seq):
|
||||||
|
|
||||||
|
if counter[0]>=0:
|
||||||
|
counter[0]+=1
|
||||||
|
|
||||||
|
for rank in annoteRank:
|
||||||
|
if 'taxid' in seq:
|
||||||
|
taxid = seq['taxid']
|
||||||
|
if taxid is not None:
|
||||||
|
rtaxid = taxo.get_taxon_at_rank(taxid, rank)
|
||||||
|
if rtaxid is not None:
|
||||||
|
scn = taxo.get_scientific_name(rtaxid)
|
||||||
|
else:
|
||||||
|
scn=None
|
||||||
|
seq[rank]=rtaxid
|
||||||
|
seq["%s_name"%rank]=scn
|
||||||
|
|
||||||
|
if add_rank:
|
||||||
|
seq['seq_rank']=counter[0]
|
||||||
|
|
||||||
|
for i,v in toSet:
|
||||||
|
#try:
|
||||||
|
if taxo is not None:
|
||||||
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
else:
|
||||||
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
val = eval(v, environ, seq)
|
||||||
|
#except Exception,e: # TODO discuss usefulness of this
|
||||||
|
# if options.onlyValid:
|
||||||
|
# raise e
|
||||||
|
# val = v
|
||||||
|
seq[i]=val
|
||||||
|
|
||||||
|
if length:
|
||||||
|
seq['seq_length']=len(seq)
|
||||||
|
|
||||||
|
if newId is not None:
|
||||||
|
# try:
|
||||||
|
if taxo is not None:
|
||||||
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
else:
|
||||||
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
val = eval(newId, environ, seq)
|
||||||
|
# except Exception,e:
|
||||||
|
# if options.onlyValid:
|
||||||
|
# raise e
|
||||||
|
# val = newId
|
||||||
|
seq.id=val
|
||||||
|
|
||||||
|
if newDef is not None:
|
||||||
|
# try:
|
||||||
|
if taxo is not None:
|
||||||
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
else:
|
||||||
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
val = eval(newDef, environ, seq)
|
||||||
|
# except Exception,e:
|
||||||
|
# if options.onlyValid:
|
||||||
|
# raise e
|
||||||
|
# val = newDef
|
||||||
|
seq.definition=val
|
||||||
|
#
|
||||||
|
if newSeq is not None:
|
||||||
|
# try:
|
||||||
|
if taxo is not None:
|
||||||
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
else:
|
||||||
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
val = eval(newSeq, environ, seq)
|
||||||
|
# except Exception,e:
|
||||||
|
# if options.onlyValid:
|
||||||
|
# raise e
|
||||||
|
# val = newSeq
|
||||||
|
seq.seq=val
|
||||||
|
if 'seq_length' in seq:
|
||||||
|
seq['seq_length']=len(seq)
|
||||||
|
# Delete quality since it must match the sequence.
|
||||||
|
# TODO discuss deleting for each sequence separately
|
||||||
|
if QUALITY_COLUMN in seq:
|
||||||
|
seq.view.delete_column(QUALITY_COLUMN)
|
||||||
|
|
||||||
|
if run is not None:
|
||||||
|
# try:
|
||||||
|
if taxo is not None:
|
||||||
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
else:
|
||||||
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
|
eval(run, environ, seq)
|
||||||
|
# except Exception,e:
|
||||||
|
# if options.onlyValid:
|
||||||
|
# raise e
|
||||||
|
|
||||||
|
return sequenceTagger
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi annotate")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_view = input[1]
|
||||||
|
i_view_name = input[1].name
|
||||||
|
|
||||||
|
# Open the output: only the DMS, as the output view is going to be created by cloning the input view
|
||||||
|
# (could eventually be done via an open_uri() argument)
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
o_dms = output[0]
|
||||||
|
o_view_name = output[1]
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
||||||
|
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
||||||
|
if i_dms != o_dms:
|
||||||
|
imported_view_name = i_view_name
|
||||||
|
i=0
|
||||||
|
while imported_view_name in o_dms: # Making sure view name is unique in output DMS
|
||||||
|
imported_view_name = i_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
||||||
|
i_view = o_dms[imported_view_name]
|
||||||
|
|
||||||
|
# Clone output view from input view
|
||||||
|
o_view = i_view.clone(o_view_name)
|
||||||
|
if o_view is None:
|
||||||
|
raise Exception("Couldn't create output view")
|
||||||
|
i_view.close()
|
||||||
|
|
||||||
|
# Open taxonomy if there is one
|
||||||
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
|
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||||
|
if taxo_uri is None:
|
||||||
|
raise Exception("Couldn't open taxonomy")
|
||||||
|
taxo = taxo_uri[1]
|
||||||
|
else :
|
||||||
|
taxo = None
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
pb = ProgressBar(len(o_view), config, seconde=5)
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
# Apply editions
|
||||||
|
# Editions at view level
|
||||||
|
if 'delete_tags' in config['annotate']:
|
||||||
|
toDelete = config['annotate']['delete_tags'][:]
|
||||||
|
if 'rename_tags' in config['annotate']:
|
||||||
|
toRename = [x.split(':',1) for x in config['annotate']['rename_tags'] if len(x.split(':',1))==2]
|
||||||
|
if 'clear' in config['annotate']:
|
||||||
|
clear = config['annotate']['clear']
|
||||||
|
if 'keep' in config['annotate']:
|
||||||
|
keep = config['annotate']['keep']
|
||||||
|
for i in range(len(toDelete)):
|
||||||
|
toDelete[i] = tobytes(toDelete[i])
|
||||||
|
for i in range(len(toRename)):
|
||||||
|
for j in range(len(toRename[i])):
|
||||||
|
toRename[i][j] = tobytes(toRename[i][j])
|
||||||
|
for i in range(len(keep)):
|
||||||
|
keep[i] = tobytes(keep[i])
|
||||||
|
keep = set(keep)
|
||||||
|
|
||||||
|
if clear or keep:
|
||||||
|
keys = [k for k in o_view.keys()]
|
||||||
|
for k in keys:
|
||||||
|
if k not in keep and k not in SPECIAL_COLUMNS:
|
||||||
|
o_view.delete_column(k)
|
||||||
|
else:
|
||||||
|
for k in toDelete:
|
||||||
|
o_view.delete_column(k)
|
||||||
|
for old_name, new_name in toRename:
|
||||||
|
if old_name in o_view:
|
||||||
|
o_view.rename_column(old_name, new_name)
|
||||||
|
|
||||||
|
# Editions at line level
|
||||||
|
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
|
||||||
|
for i in range(len(o_view)):
|
||||||
|
pb(i)
|
||||||
|
sequenceTagger(o_view[i])
|
||||||
|
|
||||||
|
except Exception, e:
|
||||||
|
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
# Save command config in View and DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
input_dms_name=[input[0].name]
|
||||||
|
input_view_name=[i_view_name]
|
||||||
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
|
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
output[0].record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(o_dms, imported_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
103
python/obitools3/commands/build_ref_db.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
105
python/obitools3/commands/build_ref_db.pyx
Executable file
@ -0,0 +1,105 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms.dms cimport DMS
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.capi.build_reference_db cimport build_reference_db
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
|
from obitools3.dms.view.view cimport View
|
||||||
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Tag a set of sequences for PCR and sequencing errors identification"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addTaxonomyOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi build_ref_db specific options')
|
||||||
|
|
||||||
|
group.add_argument('--threshold','-t',
|
||||||
|
action="store", dest="build_ref_db:threshold",
|
||||||
|
metavar='<THRESHOLD>',
|
||||||
|
default=0.0,
|
||||||
|
type=float,
|
||||||
|
help="Score threshold as a normalized identity, e.g. 0.95 for an identity of 95%%. Default: 0.00"
|
||||||
|
" (no threshold).")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi build_ref_db")
|
||||||
|
|
||||||
|
# Open the input: only the DMS
|
||||||
|
input = open_uri(config['obi']['inputURI'],
|
||||||
|
dms_only=True)
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_dms_name = input[0].name
|
||||||
|
i_view_name = input[1]
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output")
|
||||||
|
o_dms = output[0]
|
||||||
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
||||||
|
# the right DMS and deleted in the other afterwards.
|
||||||
|
if i_dms != o_dms:
|
||||||
|
temporary_view_name = final_o_view_name
|
||||||
|
i=0
|
||||||
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
else:
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
|
# Read taxonomy name
|
||||||
|
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
|
||||||
|
|
||||||
|
# Save command config in View comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
input_dms_name=[i_dms_name]
|
||||||
|
input_view_name= [i_view_name]
|
||||||
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
|
comments = View.print_config(config, "build_ref_db", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
|
||||||
|
if build_reference_db(tobytes(i_dms_name), tobytes(i_view_name), tobytes(taxonomy_name), tobytes(o_view_name), comments, config['build_ref_db']['threshold']) < 0:
|
||||||
|
raise Exception("Error building a reference database")
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, export result view to output DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
|
|
||||||
|
# Save command config in DMS comments
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
||||||
|
|
@ -1,68 +0,0 @@
|
|||||||
#cython: language_level=3
|
|
||||||
|
|
||||||
from obitools3.dms.dms import DMS # TODO cimport doesn't work
|
|
||||||
from obitools3.dms.view.view import View # TODO cimport doesn't work
|
|
||||||
|
|
||||||
|
|
||||||
__title__="Print a preview of a DMS, view, column...."
|
|
||||||
|
|
||||||
default_config = { 'inputview' : None,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# TODO make it work with URIs
|
|
||||||
|
|
||||||
def addOptions(parser):
|
|
||||||
|
|
||||||
# TODO put this common group somewhere else but I don't know where
|
|
||||||
group=parser.add_argument_group('DMS and view options')
|
|
||||||
|
|
||||||
group.add_argument('--default-dms','-d',
|
|
||||||
action="store", dest="obi:defaultdms",
|
|
||||||
metavar='<DMS NAME>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
help="Name of the default DMS for reading and writing data.")
|
|
||||||
|
|
||||||
group.add_argument('--view','-v',
|
|
||||||
action="store", dest="obi:view",
|
|
||||||
metavar='<VIEW NAME>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
help="Name of the view.")
|
|
||||||
|
|
||||||
|
|
||||||
# group=parser.add_argument_group('obi check specific options')
|
|
||||||
|
|
||||||
# group.add_argument('--print',
|
|
||||||
# action="store", dest="less:print",
|
|
||||||
# metavar='<N>',
|
|
||||||
# default=None,
|
|
||||||
# type=int,
|
|
||||||
# help="Print N sequences (default: 10)")
|
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
|
||||||
|
|
||||||
# Open DMS
|
|
||||||
d = DMS.open(config['obi']['defaultdms'])
|
|
||||||
|
|
||||||
# Open input view uif there is one
|
|
||||||
if config['obi']['inputview'] is not None :
|
|
||||||
iview = View.open(d, config['obi']['inputview'])
|
|
||||||
print(repr(iview))
|
|
||||||
|
|
||||||
else :
|
|
||||||
for v in d :
|
|
||||||
print(repr(v))
|
|
||||||
|
|
||||||
d.close()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
103
python/obitools3/commands/clean.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
124
python/obitools3/commands/clean.pyx
Executable file
@ -0,0 +1,124 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms.dms cimport DMS
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.capi.obiclean cimport obi_clean
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
|
from obitools3.dms.view.view cimport View
|
||||||
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Tag a set of sequences for PCR and sequencing errors identification"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi clean specific options')
|
||||||
|
|
||||||
|
group.add_argument('--distance', '-d',
|
||||||
|
action="store", dest="clean:distance",
|
||||||
|
metavar='<DISTANCE>',
|
||||||
|
default=1.0,
|
||||||
|
type=float,
|
||||||
|
help="Maximum numbers of errors between two variant sequences. Default: 1.")
|
||||||
|
|
||||||
|
group.add_argument('--sample-tag', '-s',
|
||||||
|
action="store",
|
||||||
|
dest="clean:sample-tag-name",
|
||||||
|
metavar="<SAMPLE TAG NAME>",
|
||||||
|
type=str,
|
||||||
|
default="merged_sample",
|
||||||
|
help="Name of the tag where sample counts are kept.")
|
||||||
|
|
||||||
|
group.add_argument('--ratio', '-r',
|
||||||
|
action="store", dest="clean:ratio",
|
||||||
|
metavar='<RATIO>',
|
||||||
|
default=0.5,
|
||||||
|
type=float,
|
||||||
|
help="Maximum ratio between the counts of two sequences so that the less abundant one can be considered"
|
||||||
|
" a variant of the more abundant one. Default: 0.5.")
|
||||||
|
|
||||||
|
group.add_argument('--heads-only', '-H',
|
||||||
|
action="store_true",
|
||||||
|
dest="clean:heads-only",
|
||||||
|
default=False,
|
||||||
|
help="Only sequences labeled as heads are kept in the output. Default: False")
|
||||||
|
|
||||||
|
group.add_argument('--cluster-tags', '-C',
|
||||||
|
action="store_true",
|
||||||
|
dest="clean:cluster-tags",
|
||||||
|
default=False,
|
||||||
|
help="Adds tags for each sequence giving its cluster's head and weight for each sample.")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi clean")
|
||||||
|
|
||||||
|
# Open the input: only the DMS
|
||||||
|
input = open_uri(config['obi']['inputURI'],
|
||||||
|
dms_only=True)
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_dms_name = input[0].name
|
||||||
|
i_view_name = input[1]
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output")
|
||||||
|
o_dms = output[0]
|
||||||
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, run obiclean creating a temporary view that will be exported to
|
||||||
|
# the right DMS and deleted in the other afterwards.
|
||||||
|
if i_dms != o_dms:
|
||||||
|
temporary_view_name = final_o_view_name
|
||||||
|
i=0
|
||||||
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
else:
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
|
# Save command config in View comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
|
||||||
|
|
||||||
|
if obi_clean(tobytes(i_dms_name), tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
|
||||||
|
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], 1) < 0:
|
||||||
|
raise Exception("Error running obiclean")
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, export result view to output DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
|
|
||||||
|
# Save command config in DMS comments
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
103
python/obitools3/commands/count.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
@ -1,44 +0,0 @@
|
|||||||
'''
|
|
||||||
Created on 8 mars 2016
|
|
||||||
|
|
||||||
@author: coissac
|
|
||||||
'''
|
|
||||||
|
|
||||||
from obitools3.apps.progress import ProgressBar # @UnresolvedImport
|
|
||||||
import time
|
|
||||||
|
|
||||||
__title__="Counts sequences in a sequence set"
|
|
||||||
|
|
||||||
|
|
||||||
default_config = { 'countmode' : None
|
|
||||||
}
|
|
||||||
|
|
||||||
def addOptions(parser):
|
|
||||||
parser.add_argument(dest='obi:input', metavar='obi:input',
|
|
||||||
nargs='?',
|
|
||||||
default=None,
|
|
||||||
help='input data set' )
|
|
||||||
|
|
||||||
group=parser.add_argument_group('Obicount specific options')
|
|
||||||
group.add_argument('-s','--sequence',
|
|
||||||
action="store_true", dest="count:sequence",
|
|
||||||
default=False,
|
|
||||||
help="Prints only the number of sequence records."
|
|
||||||
)
|
|
||||||
|
|
||||||
group.add_argument('-a','--all',
|
|
||||||
action="store_true", dest="count:all",
|
|
||||||
default=False,
|
|
||||||
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False)."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
|
||||||
# The code of my command
|
|
||||||
pb = ProgressBar(1000,config,seconde=1)
|
|
||||||
|
|
||||||
for i in range(1,1001):
|
|
||||||
pb(i)
|
|
||||||
time.sleep(0.01)
|
|
||||||
|
|
55
python/obitools3/commands/count.pyx
Executable file
@ -0,0 +1,55 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption
|
||||||
|
from obitools3.dms.capi.obiview cimport COUNT_COLUMN
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Counts sequence records"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi count specific options')
|
||||||
|
|
||||||
|
group.add_argument('-s','--sequence',
|
||||||
|
action="store_true", dest="count:sequence",
|
||||||
|
default=False,
|
||||||
|
help="Prints only the number of sequence records.")
|
||||||
|
|
||||||
|
group.add_argument('-a','--all',
|
||||||
|
action="store_true", dest="count:all",
|
||||||
|
default=False,
|
||||||
|
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False).")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi count")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input")
|
||||||
|
entries = input[1]
|
||||||
|
|
||||||
|
count1 = len(entries)
|
||||||
|
count2 = 0
|
||||||
|
|
||||||
|
if COUNT_COLUMN in entries and ((config['count']['sequence'] == config['count']['all']) or (config['count']['all'])) :
|
||||||
|
for e in entries:
|
||||||
|
count2+=e[COUNT_COLUMN]
|
||||||
|
|
||||||
|
if COUNT_COLUMN in entries and (config['count']['sequence'] == config['count']['all']):
|
||||||
|
print(count1,count2)
|
||||||
|
elif COUNT_COLUMN in entries and config['count']['all']:
|
||||||
|
print(count2)
|
||||||
|
else:
|
||||||
|
print(count1)
|
103
python/obitools3/commands/ecopcr.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
202
python/obitools3/commands/ecopcr.pyx
Executable file
@ -0,0 +1,202 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms.dms cimport DMS
|
||||||
|
from obitools3.dms.capi.obidms cimport OBIDMS_p
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes
|
||||||
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
from obitools3.dms.view import View
|
||||||
|
|
||||||
|
from libc.stdlib cimport malloc, free
|
||||||
|
from libc.stdint cimport int32_t
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="in silico PCR"
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: add option to output unique ids
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addTaxonomyOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi ecopcr specific options')
|
||||||
|
|
||||||
|
group.add_argument('--primer1', '-F',
|
||||||
|
action="store", dest="ecopcr:primer1",
|
||||||
|
metavar='<PRIMER>',
|
||||||
|
type=str,
|
||||||
|
help="Forward primer.")
|
||||||
|
|
||||||
|
group.add_argument('--primer2', '-R',
|
||||||
|
action="store", dest="ecopcr:primer2",
|
||||||
|
metavar='<PRIMER>',
|
||||||
|
type=str,
|
||||||
|
help="Reverse primer.")
|
||||||
|
|
||||||
|
group.add_argument('--error', '-e',
|
||||||
|
action="store", dest="ecopcr:error",
|
||||||
|
metavar='<ERROR>',
|
||||||
|
default=0,
|
||||||
|
type=int,
|
||||||
|
help="Maximum number of errors (mismatches) allowed per primer. Default: 0.")
|
||||||
|
|
||||||
|
group.add_argument('--min-length', '-l',
|
||||||
|
action="store",
|
||||||
|
dest="ecopcr:min-length",
|
||||||
|
metavar="<MINIMUM LENGTH>",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Minimum length of the in silico amplified DNA fragment, excluding primers.")
|
||||||
|
|
||||||
|
group.add_argument('--max-length', '-L',
|
||||||
|
action="store",
|
||||||
|
dest="ecopcr:max-length",
|
||||||
|
metavar="<MAXIMUM LENGTH>",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Maximum length of the in silico amplified DNA fragment, excluding primers.")
|
||||||
|
|
||||||
|
group.add_argument('--restrict-to-taxid', '-r',
|
||||||
|
action="append",
|
||||||
|
dest="ecopcr:restrict-to-taxid",
|
||||||
|
metavar="<TAXID>",
|
||||||
|
type=int,
|
||||||
|
default=[],
|
||||||
|
help="Only the sequence records corresponding to the taxonomic group identified "
|
||||||
|
"by TAXID are considered for the in silico PCR. The TAXID is an integer "
|
||||||
|
"that can be found in the NCBI taxonomic database.")
|
||||||
|
|
||||||
|
group.add_argument('--ignore-taxid', '-i',
|
||||||
|
action="append",
|
||||||
|
dest="ecopcr:ignore-taxid",
|
||||||
|
metavar="<TAXID>",
|
||||||
|
type=int,
|
||||||
|
default=[],
|
||||||
|
help="The sequences of the taxonomic group identified by TAXID are not considered for the in silico PCR.")
|
||||||
|
|
||||||
|
group.add_argument('--circular', '-c',
|
||||||
|
action="store_true",
|
||||||
|
dest="ecopcr:circular",
|
||||||
|
default=False,
|
||||||
|
help="Considers that the input sequences are circular (e.g. mitochondrial or chloroplastic DNA).")
|
||||||
|
|
||||||
|
group.add_argument('--salt-concentration', '-a',
|
||||||
|
action="store",
|
||||||
|
dest="ecopcr:salt-concentration",
|
||||||
|
metavar="<FLOAT>",
|
||||||
|
type=float,
|
||||||
|
default=0.05,
|
||||||
|
help="Salt concentration used for estimating the Tm. Default: 0.05.")
|
||||||
|
|
||||||
|
group.add_argument('--salt-correction-method', '-m',
|
||||||
|
action="store",
|
||||||
|
dest="ecopcr:salt-correction-method",
|
||||||
|
metavar="<1|2>",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="Defines the method used for estimating the Tm (melting temperature) between the primers and their corresponding "
|
||||||
|
"target sequences. SANTALUCIA: 1, or OWCZARZY: 2. Default: 1.")
|
||||||
|
|
||||||
|
group.add_argument('--keep-nucs', '-D',
|
||||||
|
action="store",
|
||||||
|
dest="ecopcr:keep-nucs",
|
||||||
|
metavar="<INTEGER>",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Keeps the specified number of nucleotides on each side of the in silico amplified sequences, "
|
||||||
|
"(already including the amplified DNA fragment plus the two target sequences of the primers).")
|
||||||
|
|
||||||
|
group.add_argument('--kingdom-mode', '-k',
|
||||||
|
action="store_true",
|
||||||
|
dest="ecopcr:kingdom-mode",
|
||||||
|
default=False,
|
||||||
|
help="Print in the output the kingdom of the in silico amplified sequences (default: print the superkingdom).")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
cdef int32_t* restrict_to_taxids_p = NULL
|
||||||
|
cdef int32_t* ignore_taxids_p = NULL
|
||||||
|
|
||||||
|
restrict_to_taxids_len = len(config['ecopcr']['restrict-to-taxid'])
|
||||||
|
restrict_to_taxids_p = <int32_t*> malloc((restrict_to_taxids_len + 1) * sizeof(int32_t)) # +1 for the -1 flagging the end of the array
|
||||||
|
for i in range(restrict_to_taxids_len) :
|
||||||
|
restrict_to_taxids_p[i] = config['ecopcr']['restrict-to-taxid'][i]
|
||||||
|
restrict_to_taxids_p[restrict_to_taxids_len] = -1
|
||||||
|
|
||||||
|
ignore_taxids_len = len(config['ecopcr']['ignore-taxid'])
|
||||||
|
ignore_taxids_p = <int32_t*> malloc((ignore_taxids_len + 1) * sizeof(int32_t)) # +1 for the -1 flagging the end of the array
|
||||||
|
for i in range(ignore_taxids_len) :
|
||||||
|
ignore_taxids_p[i] = config['ecopcr']['ignore-taxid'][i]
|
||||||
|
ignore_taxids_p[ignore_taxids_len] = -1
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi ecopcr")
|
||||||
|
|
||||||
|
# Open the input: only the DMS
|
||||||
|
input = open_uri(config['obi']['inputURI'],
|
||||||
|
dms_only=True)
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_dms_name = input[0].name
|
||||||
|
i_view_name = input[1]
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output")
|
||||||
|
o_dms = output[0]
|
||||||
|
o_dms_name = output[0].name
|
||||||
|
o_view_name = output[1]
|
||||||
|
|
||||||
|
# Read taxonomy name
|
||||||
|
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
|
||||||
|
|
||||||
|
# Save command config in View comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
input_dms_name=[i_dms_name]
|
||||||
|
input_view_name= [i_view_name]
|
||||||
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
|
|
||||||
|
comments = View.print_config(config, "ecopcr", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
|
||||||
|
# TODO: primers in comments?
|
||||||
|
|
||||||
|
if obi_ecopcr(tobytes(i_dms_name), tobytes(i_view_name), tobytes(taxonomy_name), \
|
||||||
|
tobytes(o_dms_name), tobytes(o_view_name), comments, \
|
||||||
|
tobytes(config['ecopcr']['primer1']), tobytes(config['ecopcr']['primer2']), \
|
||||||
|
config['ecopcr']['error'], \
|
||||||
|
config['ecopcr']['min-length'], config['ecopcr']['max-length'], \
|
||||||
|
restrict_to_taxids_p, ignore_taxids_p, \
|
||||||
|
config['ecopcr']['circular'], config['ecopcr']['salt-concentration'], config['ecopcr']['salt-correction-method'], \
|
||||||
|
config['ecopcr']['keep-nucs'], config['ecopcr']['kingdom-mode']) < 0:
|
||||||
|
raise Exception("Error running ecopcr")
|
||||||
|
|
||||||
|
# Save command config in DMS comments
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
free(restrict_to_taxids_p)
|
||||||
|
free(ignore_taxids_p)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_dms[o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
|
o_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
103
python/obitools3/commands/ecotag.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
129
python/obitools3/commands/ecotag.pyx
Executable file
@ -0,0 +1,129 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms.dms cimport DMS
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.capi.obiecotag cimport obi_ecotag
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
|
from obitools3.dms.view.view cimport View
|
||||||
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Taxonomic assignment of sequences"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addTaxonomyOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi ecotag specific options')
|
||||||
|
|
||||||
|
group.add_argument('--ref-database','-R',
|
||||||
|
action="store", dest="ecotag:ref_view",
|
||||||
|
metavar='<REF_VIEW>',
|
||||||
|
type=str,
|
||||||
|
help="URI of the view containing the reference database as built by the build_ref_db command.")
|
||||||
|
|
||||||
|
group.add_argument('--minimum-identity','-m',
|
||||||
|
action="store", dest="ecotag:threshold",
|
||||||
|
metavar='<THRESHOLD>',
|
||||||
|
default=0.0,
|
||||||
|
type=float,
|
||||||
|
help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. "
|
||||||
|
"Default: 0.00 (no threshold).")
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi ecotag")
|
||||||
|
|
||||||
|
# Open the query view: only the DMS
|
||||||
|
input = open_uri(config['obi']['inputURI'],
|
||||||
|
dms_only=True)
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_dms_name = input[0].name
|
||||||
|
i_view_name = input[1]
|
||||||
|
|
||||||
|
# Open the reference view: only the DMS
|
||||||
|
ref = open_uri(config['ecotag']['ref_view'],
|
||||||
|
dms_only=True)
|
||||||
|
if ref is None:
|
||||||
|
raise Exception("Could not read reference view URI")
|
||||||
|
ref_dms = ref[0]
|
||||||
|
ref_dms_name = ref[0].name
|
||||||
|
ref_view_name = ref[1]
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output")
|
||||||
|
o_dms = output[0]
|
||||||
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, run ecotag creating a temporary view that will be exported to
|
||||||
|
# the right DMS and deleted in the other afterwards.
|
||||||
|
if i_dms != o_dms:
|
||||||
|
temporary_view_name = final_o_view_name
|
||||||
|
i=0
|
||||||
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
else:
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
|
# Read taxonomy DMS and name
|
||||||
|
taxo = open_uri(config['obi']['taxoURI'],
|
||||||
|
dms_only=True)
|
||||||
|
taxo_dms_name = taxo[0].name
|
||||||
|
taxo_dms = taxo[0]
|
||||||
|
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
|
||||||
|
|
||||||
|
# Save command config in View comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
input_dms_name=[i_dms_name]
|
||||||
|
input_view_name= [i_view_name]
|
||||||
|
input_dms_name.append(ref_dms_name)
|
||||||
|
input_view_name.append(ref_view_name)
|
||||||
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
|
comments = View.print_config(config, "ecotag", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
|
||||||
|
if obi_ecotag(tobytes(i_dms_name), tobytes(i_view_name), \
|
||||||
|
tobytes(ref_dms_name), tobytes(ref_view_name), \
|
||||||
|
tobytes(taxo_dms_name), tobytes(taxonomy_name), \
|
||||||
|
tobytes(o_view_name), comments,
|
||||||
|
config['ecotag']['threshold']) < 0:
|
||||||
|
raise Exception("Error running ecotag")
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, export result view to output DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
|
|
||||||
|
# Save command config in DMS comments
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
||||||
|
|
103
python/obitools3/commands/export.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
170
python/obitools3/commands/export.pyx
Normal file → Executable file
@ -1,109 +1,69 @@
|
|||||||
# from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
#cython: language_level=3
|
||||||
# from obitools3.dms.dms import OBIDMS # TODO cimport doesn't work
|
|
||||||
# from obitools3.utils cimport bytes2str
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
#
|
from obitools3.uri.decode import open_uri
|
||||||
# import time
|
from obitools3.apps.config import logger
|
||||||
# import re
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.obiseq import Nuc_Seq
|
||||||
|
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||||
|
addExportOutputOption
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
__title__="Export a view to a different file format"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addExportOutputOption(parser)
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
def run(config):
|
||||||
pass
|
|
||||||
|
|
||||||
# __title__="Export a NUC_SEQS view to a fasta or fastq file"
|
DMS.obi_atexit()
|
||||||
#
|
|
||||||
#
|
logger("info", "obi export : exports a view to a different file format")
|
||||||
# default_config = { 'inputview' : None,
|
|
||||||
# }
|
# Open the input
|
||||||
#
|
input = open_uri(config['obi']['inputURI'])
|
||||||
# def addOptions(parser):
|
if input is None:
|
||||||
#
|
raise Exception("Could not read input")
|
||||||
# # TODO put this common group somewhere else but I don't know where
|
iview = input[1]
|
||||||
# group=parser.add_argument_group('DMS and view options')
|
|
||||||
#
|
# Open the output
|
||||||
# group.add_argument('--default-dms','-d',
|
output = open_uri(config['obi']['outputURI'],
|
||||||
# action="store", dest="obi:defaultdms",
|
input=False)
|
||||||
# metavar='<DMS NAME>',
|
if output is None:
|
||||||
# default=None,
|
raise Exception("Could not open output URI")
|
||||||
# type=str,
|
|
||||||
# help="Name of the default DMS for reading and writing data.")
|
output_object = output[0]
|
||||||
#
|
writer = output[1]
|
||||||
# group.add_argument('--input-view','-i',
|
|
||||||
# action="store", dest="obi:inputview",
|
# Check that the input view has the type NUC_SEQS if needed # TODO discuss, maybe bool property
|
||||||
# metavar='<INPUT VIEW NAME>',
|
if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") : # Nuc_Seq_Stored? TODO
|
||||||
# default=None,
|
raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
|
||||||
# type=str,
|
|
||||||
# help="Name of the input view, either raw if the view is in the default DMS,"
|
# Initialize the progress bar
|
||||||
# " or in the form 'dms:view' if it is in another DMS.")
|
pb = ProgressBar(len(iview), config, seconde=5)
|
||||||
#
|
|
||||||
# group=parser.add_argument_group('obi export specific options')
|
i=0
|
||||||
#
|
for seq in iview :
|
||||||
# group.add_argument('--format','-f',
|
pb(i)
|
||||||
# action="store", dest="export:format",
|
try:
|
||||||
# metavar='<FORMAT>',
|
writer(seq)
|
||||||
# default="fasta",
|
except StopIteration:
|
||||||
# type=str,
|
break
|
||||||
# help="Export in the format <FORMAT>, 'fasta' or 'fastq'. Default: 'fasta'.") # TODO export in csv
|
i+=1
|
||||||
#
|
|
||||||
# def run(config):
|
pb(i, force=True)
|
||||||
#
|
print("", file=sys.stderr)
|
||||||
# # TODO import doesn't work
|
|
||||||
# NUC_SEQUENCE_COLUMN = "NUC_SEQ"
|
# TODO save command in input dms?
|
||||||
# ID_COLUMN = "ID"
|
|
||||||
# DEFINITION_COLUMN = "DEFINITION"
|
output_object.close()
|
||||||
# QUALITY_COLUMN = "QUALITY"
|
iview.close()
|
||||||
#
|
input[0].close()
|
||||||
# special_columns = [NUC_SEQUENCE_COLUMN, ID_COLUMN, DEFINITION_COLUMN, QUALITY_COLUMN]
|
|
||||||
#
|
logger("info", "Done.")
|
||||||
# # Open DMS
|
|
||||||
# d = OBIDMS(config['obi']['defaultdms'])
|
|
||||||
#
|
|
||||||
# # Open input view
|
|
||||||
# iview = d.open_view(config['obi']['inputview'])
|
|
||||||
#
|
|
||||||
# print(iview.type)
|
|
||||||
#
|
|
||||||
# # TODO check that the view has the type NUC_SEQS
|
|
||||||
# if ((config['export']['format'] == "fasta") or (config['export']['format'] == "fastq")) and (iview.type != "NUC_SEQS_VIEW") : # TODO find a way to import those macros
|
|
||||||
# raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
|
|
||||||
#
|
|
||||||
# # Initialize the progress bar
|
|
||||||
# pb = ProgressBar(len(iview), config, seconde=5)
|
|
||||||
#
|
|
||||||
# i=0
|
|
||||||
# for seq in iview :
|
|
||||||
# pb(i)
|
|
||||||
#
|
|
||||||
# toprint = ">"+seq.id+" "
|
|
||||||
#
|
|
||||||
# for col_name in seq :
|
|
||||||
# if col_name not in special_columns :
|
|
||||||
# toprint = toprint + col_name + "=" + str(seq[col_name]) + "; "
|
|
||||||
#
|
|
||||||
# if DEFINITION_COLUMN in seq :
|
|
||||||
# toprint = toprint + seq.definition
|
|
||||||
#
|
|
||||||
# nucseq = bytes2str(seq.nuc_seq)
|
|
||||||
#
|
|
||||||
# if config['export']['format'] == "fasta" :
|
|
||||||
# nucseq = re.sub("(.{60})", "\\1\n", nucseq, 0, re.DOTALL)
|
|
||||||
#
|
|
||||||
# toprint = toprint + "\n" + nucseq
|
|
||||||
#
|
|
||||||
# if config['export']['format'] == "fastq" :
|
|
||||||
# toprint = toprint + "\n" + "+" + "\n" + seq.get_str_quality()
|
|
||||||
#
|
|
||||||
# print(toprint)
|
|
||||||
# i+=1
|
|
||||||
#
|
|
||||||
# iview.close()
|
|
||||||
# d.close()
|
|
||||||
#
|
|
||||||
# print("Done.")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
103
python/obitools3/commands/grep.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
362
python/obitools3/commands/grep.pyx
Normal file → Executable file
@ -1,96 +1,352 @@
|
|||||||
#cython: language_level=3
|
#cython: language_level=3
|
||||||
|
|
||||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
from obitools3.dms.dms import DMS # TODO cimport doesn't work
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view.view import View, Line_selection # TODO cimport doesn't work
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
|
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
import time
|
import time
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
__title__="Grep view lines that match the given predicates"
|
__title__="Grep view lines that match the given predicates"
|
||||||
|
|
||||||
default_config = { 'inputview' : None,
|
|
||||||
'outputview' : None
|
# TODO should sequences that have a grepped attribute at None be grepped or not? (in obi1 they are but....)
|
||||||
}
|
|
||||||
|
|
||||||
def addOptions(parser):
|
def addOptions(parser):
|
||||||
|
|
||||||
# TODO put this common group somewhere else but I don't know where
|
addMinimalInputOption(parser)
|
||||||
group=parser.add_argument_group('DMS and view options')
|
addTaxonomyOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
group.add_argument('--default-dms','-d',
|
group=parser.add_argument_group("obi grep specific options")
|
||||||
action="store", dest="obi:defaultdms",
|
|
||||||
metavar='<DMS NAME>',
|
group.add_argument("--predicate", "-p",
|
||||||
|
action="append", dest="grep:grep_predicates",
|
||||||
|
metavar="<PREDICATE>",
|
||||||
default=None,
|
default=None,
|
||||||
type=str,
|
type=str,
|
||||||
help="Name of the default DMS for reading and writing data.")
|
help="Python boolean expression to be evaluated in the "
|
||||||
|
"sequence/line context. The attribute name can be "
|
||||||
|
"used in the expression as a variable name."
|
||||||
|
"An extra variable named 'sequence' or 'line' refers"
|
||||||
|
"to the sequence or line object itself. "
|
||||||
|
"Several -p options can be used on the same "
|
||||||
|
"commande line.")
|
||||||
|
|
||||||
group.add_argument('--input-view','-i',
|
group.add_argument("-S", "--sequence",
|
||||||
action="store", dest="obi:inputview",
|
action="store", dest="grep:seq_pattern",
|
||||||
metavar='<INPUT VIEW NAME>',
|
metavar="<REGULAR_PATTERN>",
|
||||||
default=None,
|
|
||||||
type=str,
|
type=str,
|
||||||
help="Name of the input view, either raw if the view is in the default DMS,"
|
help="Regular expression pattern used to select "
|
||||||
" or in the form 'dms:view' if it is in another DMS.")
|
"the sequence. The pattern is case insensitive.")
|
||||||
|
|
||||||
group.add_argument('--output-view','-o',
|
group.add_argument("-D", "--definition",
|
||||||
action="store", dest="obi:outputview",
|
action="store", dest="grep:def_pattern",
|
||||||
metavar='<OUTPUT VIEW NAME>',
|
metavar="<REGULAR_PATTERN>",
|
||||||
default=None,
|
|
||||||
type=str,
|
type=str,
|
||||||
help="Name of the output view, either raw if the view is in the default DMS,"
|
help="Regular expression pattern used to select "
|
||||||
" or in the form 'dms:view' if it is in another DMS.")
|
"the definition of the sequence. The pattern is case insensitive.")
|
||||||
|
|
||||||
|
group.add_argument("-I", "--identifier",
|
||||||
group=parser.add_argument_group('obi grep specific options')
|
action="store", dest="grep:id_pattern",
|
||||||
|
metavar="<REGULAR_PATTERN>",
|
||||||
group.add_argument('--predicate','-p',
|
|
||||||
action="append", dest="grep:predicates",
|
|
||||||
metavar='<PREDICATE>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
type=str,
|
||||||
help="Grep lines that match the given python expression on <line> or <sequence>.")
|
help="Regular expression pattern used to select "
|
||||||
|
"the identifier of the sequence. The pattern is case insensitive.")
|
||||||
|
|
||||||
|
group.add_argument("--id-list",
|
||||||
|
action="store", dest="grep:id_list",
|
||||||
|
metavar="<FILE_NAME>",
|
||||||
|
type=str,
|
||||||
|
help="File containing the identifiers of the sequences to select.")
|
||||||
|
|
||||||
|
group.add_argument("-a", "--attribute",
|
||||||
|
action="append", dest="grep:attribute_patterns",
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
metavar="<ATTRIBUTE_NAME>:<REGULAR_PATTERN>",
|
||||||
|
help="Regular expression pattern matched against "
|
||||||
|
"the attributes of the sequence. "
|
||||||
|
"The pattern is case sensitive. "
|
||||||
|
"Several -a options can be used on the same "
|
||||||
|
"command line.")
|
||||||
|
|
||||||
|
group.add_argument("-A", "--has-attribute",
|
||||||
|
action="append", dest="grep:attributes",
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
metavar="<ATTRIBUTE_NAME>",
|
||||||
|
help="Select records with the attribute <ATTRIBUTE_NAME> "
|
||||||
|
"defined (not set to NA value). "
|
||||||
|
"Several -a options can be used on the same "
|
||||||
|
"command line.")
|
||||||
|
|
||||||
|
group.add_argument("-L", "--lmax",
|
||||||
|
action="store", dest="grep:lmax",
|
||||||
|
metavar="<MAX_LENGTH>",
|
||||||
|
type=int,
|
||||||
|
help="Keep sequences shorter than MAX_LENGTH.")
|
||||||
|
|
||||||
|
group.add_argument("-l", "--lmin",
|
||||||
|
action="store", dest="grep:lmin",
|
||||||
|
metavar="<MIN_LENGTH>",
|
||||||
|
type=int,
|
||||||
|
help="Keep sequences longer than MIN_LENGTH.")
|
||||||
|
|
||||||
|
group.add_argument("-v", "--invert-selection",
|
||||||
|
action="store_true", dest="grep:invert_selection",
|
||||||
|
default=False,
|
||||||
|
help="Invert the selection.")
|
||||||
|
|
||||||
|
|
||||||
|
group=parser.add_argument_group("Taxonomy filtering specific options") #TODO put somewhere else? not in grep
|
||||||
|
|
||||||
|
group.add_argument('--require-rank',
|
||||||
|
action="append", dest="grep:required_ranks",
|
||||||
|
metavar="<RANK_NAME>",
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
help="Select sequences with a taxid that is or has "
|
||||||
|
"a parent of rank <RANK_NAME>.")
|
||||||
|
|
||||||
|
group.add_argument('-r', '--required',
|
||||||
|
action="append", dest="grep:required_taxids",
|
||||||
|
metavar="<TAXID>",
|
||||||
|
type=int,
|
||||||
|
default=[],
|
||||||
|
help="Select the sequences having the ancestor of taxid <TAXID>. "
|
||||||
|
"If several ancestors are specified (with \n'-r taxid1 -r taxid2'), "
|
||||||
|
"the sequences having at least one of them are selected.")
|
||||||
|
|
||||||
|
# TODO useless option equivalent to -r -v?
|
||||||
|
group.add_argument('-i','--ignore',
|
||||||
|
action="append", dest="grep:ignored_taxids",
|
||||||
|
metavar="<TAXID>",
|
||||||
|
type=int,
|
||||||
|
default=[],
|
||||||
|
help="Ignore the sequences having the ancestor of taxid <TAXID>. "
|
||||||
|
"If several ancestors are specified (with \n'-r taxid1 -r taxid2'), "
|
||||||
|
"the sequences having at least one of them are ignored.")
|
||||||
|
|
||||||
|
|
||||||
|
def Filter_generator(options, tax_filter):
|
||||||
|
#taxfilter = taxonomyFilterGenerator(options)
|
||||||
|
|
||||||
|
# Initialize conditions
|
||||||
|
predicates = None
|
||||||
|
if "predicates" in options:
|
||||||
|
predicates = options["predicates"]
|
||||||
|
attributes = None
|
||||||
|
if "attributes" in options:
|
||||||
|
attributes = options["attributes"]
|
||||||
|
lmax = None
|
||||||
|
if "lmax" in options:
|
||||||
|
lmax = options["lmax"]
|
||||||
|
lmin = None
|
||||||
|
if "lmin" in options:
|
||||||
|
lmin = options["lmin"]
|
||||||
|
invert_selection = options["invert_selection"]
|
||||||
|
id_set = None
|
||||||
|
if "id_list" in options:
|
||||||
|
id_set = set(x.strip() for x in open(options["id_list"]))
|
||||||
|
|
||||||
|
# Initialize the regular expression patterns
|
||||||
|
seq_pattern = None
|
||||||
|
if "seq_pattern" in options:
|
||||||
|
seq_pattern = re.compile(tobytes(options["seq_pattern"]), re.I)
|
||||||
|
id_pattern = None
|
||||||
|
if "id_pattern" in options:
|
||||||
|
id_pattern = re.compile(tobytes(options["id_pattern"]))
|
||||||
|
def_pattern = None
|
||||||
|
if "def_pattern" in options:
|
||||||
|
def_pattern = re.compile(tobytes(options["def_pattern"]))
|
||||||
|
attribute_patterns={}
|
||||||
|
if "attribute_patterns" in options:
|
||||||
|
for p in options["attribute_patterns"]:
|
||||||
|
attribute, pattern = p.split(":", 1)
|
||||||
|
attribute_patterns[tobytes(attribute)] = re.compile(tobytes(pattern))
|
||||||
|
|
||||||
|
def filter(line, loc_env):
|
||||||
|
cdef bint good = True
|
||||||
|
|
||||||
|
if seq_pattern and hasattr(line, "seq"):
|
||||||
|
good = <bint>(seq_pattern.search(line.seq))
|
||||||
|
|
||||||
|
if good and id_pattern and hasattr(line, "id"):
|
||||||
|
good = <bint>(id_pattern.search(line.id))
|
||||||
|
|
||||||
|
if good and id_set is not None and hasattr(line, "id"):
|
||||||
|
good = line.id in id_set
|
||||||
|
|
||||||
|
if good and def_pattern and hasattr(line, "definition"):
|
||||||
|
good = <bint>(def_pattern.search(line.definition))
|
||||||
|
|
||||||
|
if good and attributes: # TODO discuss that we test not None
|
||||||
|
good = reduce(lambda bint x, bint y: x and y,
|
||||||
|
(line[attribute] is not None for attribute in attributes),
|
||||||
|
True)
|
||||||
|
|
||||||
|
if good and attribute_patterns:
|
||||||
|
good = (reduce(lambda bint x, bint y : x and y,
|
||||||
|
(line[attribute] is not None for attribute in attributes),
|
||||||
|
True)
|
||||||
|
and
|
||||||
|
reduce(lambda bint x, bint y: x and y,
|
||||||
|
(<bint>(attribute_patterns[attribute].search(tobytes(str(line[attribute]))))
|
||||||
|
for attribute in attribute_patterns),
|
||||||
|
True)
|
||||||
|
)
|
||||||
|
|
||||||
|
if good and predicates:
|
||||||
|
good = (reduce(lambda bint x, bint y: x and y,
|
||||||
|
(bool(eval(p, loc_env, line))
|
||||||
|
for p in predicates), True))
|
||||||
|
|
||||||
|
if good and lmin:
|
||||||
|
good = len(line) >= lmin
|
||||||
|
|
||||||
|
if good and lmax:
|
||||||
|
good = len(line) <= lmax
|
||||||
|
|
||||||
|
if good:
|
||||||
|
good = tax_filter(line)
|
||||||
|
|
||||||
|
if invert_selection :
|
||||||
|
good = not good
|
||||||
|
|
||||||
|
return good
|
||||||
|
|
||||||
|
return filter
|
||||||
|
|
||||||
|
|
||||||
|
def Taxonomy_filter_generator(taxo, options):
|
||||||
|
if taxo is not None:
|
||||||
|
def tax_filter(seq):
|
||||||
|
good = True
|
||||||
|
if b'TAXID' in seq and seq[b'TAXID'] is not None: # TODO use macro
|
||||||
|
taxid = seq[b'TAXID']
|
||||||
|
if "required_ranks" in options and options["required_ranks"]:
|
||||||
|
taxon_at_rank = reduce(lambda x,y: x and y,
|
||||||
|
(taxo.get_taxon_at_rank(seq[b'TAXID'], rank) is not None
|
||||||
|
for rank in options["required_ranks"]),
|
||||||
|
True)
|
||||||
|
good = good and taxon_at_rank
|
||||||
|
if "required_taxids" in options and options["required_taxids"]:
|
||||||
|
good = good and reduce(lambda x,y: x or y,
|
||||||
|
(taxo.is_ancestor(r, taxid)
|
||||||
|
for r in options["required_taxids"]),
|
||||||
|
False)
|
||||||
|
if "ignored_taxids" in options and options["ignored_taxids"]:
|
||||||
|
good = good and not reduce(lambda x,y: x or y,
|
||||||
|
(taxo.is_ancestor(r,taxid)
|
||||||
|
for r in options["ignored_taxids"]),
|
||||||
|
False)
|
||||||
|
return good
|
||||||
|
else:
|
||||||
|
def tax_filter(seq):
|
||||||
|
return True
|
||||||
|
return tax_filter
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
def run(config):
|
||||||
|
|
||||||
# Open DMS
|
DMS.obi_atexit()
|
||||||
d = DMS.open(config['obi']['defaultdms'])
|
|
||||||
|
|
||||||
# Open input view 1
|
logger("info", "obi grep")
|
||||||
iview = View.open(d, config['obi']['inputview'])
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config["obi"]["inputURI"])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_view = input[1]
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
o_dms = output[0]
|
||||||
|
o_view_name_final = output[1]
|
||||||
|
o_view_name = o_view_name_final
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||||
|
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
i=0
|
||||||
|
while o_view_name in i_dms:
|
||||||
|
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
|
taxo_uri = open_uri(config["obi"]["taxoURI"])
|
||||||
|
if taxo_uri is None:
|
||||||
|
raise Exception("Couldn't open taxonomy")
|
||||||
|
taxo = taxo_uri[1]
|
||||||
|
else :
|
||||||
|
taxo = None
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(iview), config, seconde=5)
|
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||||
|
|
||||||
# Apply filter
|
# Apply filter
|
||||||
selection = Line_selection(iview)
|
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
|
||||||
for i in range(len(iview)) :
|
filter = Filter_generator(config["grep"], tax_filter)
|
||||||
|
selection = Line_selection(i_view)
|
||||||
|
for i in range(len(i_view)):
|
||||||
pb(i)
|
pb(i)
|
||||||
line = iview[i]
|
line = i_view[i]
|
||||||
|
|
||||||
loc_env = {'sequence': line, 'line': line} # TODO add taxonomy
|
loc_env = {"sequence": line, "line": line, "taxonomy": taxo}
|
||||||
|
|
||||||
good = (reduce(lambda bint x, bint y: x and y,
|
good = filter(line, loc_env)
|
||||||
(bool(eval(p, loc_env, line))
|
|
||||||
for p in config['grep']['predicates']), True))
|
|
||||||
|
|
||||||
if good :
|
if good :
|
||||||
selection.append(i)
|
selection.append(i)
|
||||||
|
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Create output view with the line selection
|
# Create output view with the line selection
|
||||||
oview = selection.materialize(config['obi']['outputview'], comments="obi grep: "+str(config['grep']['predicates'])+"\n")
|
try:
|
||||||
|
o_view = selection.materialize(o_view_name)
|
||||||
print("\n")
|
except Exception, e:
|
||||||
print(repr(oview))
|
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
d.close()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Save command config in View and DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
input_dms_name=[input[0].name]
|
||||||
|
input_view_name=[input[1].name]
|
||||||
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
|
o_view.write_config(config, "grep", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
# If input and output DMS are not the same, export the temporary view to the output DMS
|
||||||
|
# and delete the temporary view in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
o_view.close()
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||||
|
o_view = o_dms[o_view_name_final]
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
||||||
|
103
python/obitools3/commands/head.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
106
python/obitools3/commands/head.pyx
Executable file
@ -0,0 +1,106 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport str2bytes
|
||||||
|
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Keep the N first lines of a view."
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi head specific options')
|
||||||
|
|
||||||
|
group.add_argument('-n', '--sequence-count',
|
||||||
|
action="store", dest="head:count",
|
||||||
|
metavar='<N>',
|
||||||
|
default=10,
|
||||||
|
type=int,
|
||||||
|
help="Number of first records to keep.")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi head")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config["obi"]["inputURI"])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_view = input[1]
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
o_dms = output[0]
|
||||||
|
o_view_name_final = output[1]
|
||||||
|
o_view_name = o_view_name_final
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||||
|
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
i=0
|
||||||
|
while o_view_name in i_dms:
|
||||||
|
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
n = min(config['head']['count'], len(i_view))
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
pb = ProgressBar(n, config, seconde=5)
|
||||||
|
|
||||||
|
selection = Line_selection(i_view)
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
pb(i)
|
||||||
|
selection.append(i)
|
||||||
|
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
# Create output view with the line selection
|
||||||
|
try:
|
||||||
|
o_view = selection.materialize(o_view_name)
|
||||||
|
except Exception, e:
|
||||||
|
raise RollbackException("obi head error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
|
# Save command config in DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
o_view.write_config(config, "head", command_line, input_dms_name=[i_dms.name], input_view_name=[i_view.name])
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
# If input and output DMS are not the same, export the temporary view to the output DMS
|
||||||
|
# and delete the temporary view in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
o_view.close()
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||||
|
o_view = o_dms[o_view_name_final]
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(view), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
103
python/obitools3/commands/history.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
57
python/obitools3/commands/history.pyx
Executable file
@ -0,0 +1,57 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view import View
|
||||||
|
from obitools3.utils cimport bytes2str
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Command line histories and view history graphs"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi history specific options')
|
||||||
|
|
||||||
|
group.add_argument('--bash', '-b',
|
||||||
|
action="store_const", dest="history:format",
|
||||||
|
default="bash",
|
||||||
|
const="bash",
|
||||||
|
help="Print history in bash format")
|
||||||
|
|
||||||
|
group.add_argument('--dot', '-d',
|
||||||
|
action="store_const", dest="history:format",
|
||||||
|
default="bash",
|
||||||
|
const="dot",
|
||||||
|
help="Print history in DOT format (default: bash format)")
|
||||||
|
|
||||||
|
group.add_argument('--ascii', '-a',
|
||||||
|
action="store_const", dest="history:format",
|
||||||
|
default="bash",
|
||||||
|
const="ascii",
|
||||||
|
help="Print history in ASCII format (only for views; default: bash format)")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
cdef object entries
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
|
||||||
|
entries = input[1]
|
||||||
|
|
||||||
|
if config['history']['format'] == "bash" :
|
||||||
|
print(bytes2str(entries.bash_history))
|
||||||
|
elif config['history']['format'] == "dot" :
|
||||||
|
print(bytes2str(entries.dot_history_graph))
|
||||||
|
elif config['history']['format'] == "ascii" :
|
||||||
|
if isinstance(entries, View):
|
||||||
|
print(bytes2str(entries.ascii_history_graph))
|
||||||
|
else:
|
||||||
|
raise Exception("ASCII history only available for views")
|
||||||
|
|
103
python/obitools3/commands/import.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
273
python/obitools3/commands/import.pyx
Normal file → Executable file
@ -1,29 +1,44 @@
|
|||||||
#cython: language_level=3
|
#cython: language_level=3
|
||||||
|
|
||||||
# TODO cimport generate errors with argument numbers, but without them some variables can't be declared
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
from obitools3.files.universalopener cimport uopen
|
|
||||||
from obitools3.parsers.fasta import fastaIterator
|
|
||||||
from obitools3.parsers.fastq import fastqIterator
|
|
||||||
from obitools3.dms.dms import DMS # TODO cimport doesn't work
|
|
||||||
from obitools3.dms.view.view cimport View
|
from obitools3.dms.view.view cimport View
|
||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS import View_NUC_SEQS # TODO cimport doesn't work
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
from obitools3.dms.column.column cimport Column
|
from obitools3.dms.column.column cimport Column
|
||||||
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.taxo.taxo cimport Taxonomy
|
||||||
|
|
||||||
|
|
||||||
from obitools3.utils cimport tobytes, \
|
from obitools3.utils cimport tobytes, \
|
||||||
get_obitype, \
|
get_obitype, \
|
||||||
update_obitype
|
update_obitype
|
||||||
|
|
||||||
|
from obitools3.dms.capi.obiview cimport VIEW_TYPE_NUC_SEQS, \
|
||||||
|
NUC_SEQUENCE_COLUMN, \
|
||||||
|
ID_COLUMN, \
|
||||||
|
DEFINITION_COLUMN, \
|
||||||
|
QUALITY_COLUMN, \
|
||||||
|
COUNT_COLUMN, \
|
||||||
|
TAXID_COLUMN
|
||||||
|
|
||||||
from obitools3.dms.capi.obitypes cimport obitype_t, \
|
from obitools3.dms.capi.obitypes cimport obitype_t, \
|
||||||
OBI_VOID
|
OBI_VOID, \
|
||||||
|
OBI_QUAL
|
||||||
|
|
||||||
from obitools3.dms.capi.obierrno cimport obi_errno
|
from obitools3.dms.capi.obierrno cimport obi_errno
|
||||||
|
|
||||||
import time
|
from obitools3.apps.optiongroups import addImportInputOption, \
|
||||||
|
addTabularInputOption, \
|
||||||
|
addTaxdumpInputOption, \
|
||||||
|
addMinimalOutputOption
|
||||||
|
|
||||||
import pickle
|
from obitools3.uri.decode import open_uri
|
||||||
|
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
|
||||||
__title__="Imports sequences from different formats into a DMS"
|
__title__="Imports sequences from different formats into a DMS"
|
||||||
|
|
||||||
@ -34,81 +49,21 @@ default_config = { 'destview' : None,
|
|||||||
'skiperror' : False,
|
'skiperror' : False,
|
||||||
'seqinformat' : None,
|
'seqinformat' : None,
|
||||||
'moltype' : 'nuc',
|
'moltype' : 'nuc',
|
||||||
'filename' : None
|
'source' : None
|
||||||
}
|
}
|
||||||
|
|
||||||
def addOptions(parser):
|
def addOptions(parser):
|
||||||
parser.add_argument(dest='import:filename',
|
|
||||||
metavar='<FILENAME>',
|
|
||||||
nargs='?',
|
|
||||||
default=None,
|
|
||||||
help='Name of the sequence file to import' )
|
|
||||||
|
|
||||||
group=parser.add_argument_group('obi import specific options')
|
addImportInputOption(parser)
|
||||||
|
addTabularInputOption(parser)
|
||||||
group.add_argument('--default-dms','-d',
|
addTaxdumpInputOption(parser)
|
||||||
action="store", dest="obi:defaultdms",
|
addMinimalOutputOption(parser)
|
||||||
metavar='<DMS NAME>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
help="Name of the default DMS for reading and writing data")
|
|
||||||
|
|
||||||
group.add_argument('--destination-view','-v',
|
|
||||||
action="store", dest="import:destview",
|
|
||||||
metavar='<VIEW NAME>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="Name of the default DMS for reading and writing data")
|
|
||||||
|
|
||||||
group.add_argument('--skip',
|
|
||||||
action="store", dest="import:skip",
|
|
||||||
metavar='<N>',
|
|
||||||
default=0,
|
|
||||||
type=int,
|
|
||||||
help="Skip the N first sequences")
|
|
||||||
|
|
||||||
group.add_argument('--only',
|
|
||||||
action="store", dest="import:only",
|
|
||||||
metavar='<N>',
|
|
||||||
default=None,
|
|
||||||
type=int,
|
|
||||||
help="Treat only N sequences")
|
|
||||||
|
|
||||||
group.add_argument('--skip-on-error',
|
|
||||||
action="store_true", dest="import:skiperror",
|
|
||||||
default=None,
|
|
||||||
help="Skip sequence entries with parse error")
|
|
||||||
|
|
||||||
group.add_argument('--fasta',
|
|
||||||
action="store_const", dest="import:seqinformat",
|
|
||||||
default=None,
|
|
||||||
const='fasta',
|
|
||||||
help="Input file is in fasta nucleic format (including obitools fasta extentions)")
|
|
||||||
|
|
||||||
group.add_argument('--fastq',
|
|
||||||
action="store_const", dest="import:seqinformat",
|
|
||||||
default=None,
|
|
||||||
const='fastq',
|
|
||||||
help="Input file is in sanger fastq nucleic format (standard fastq)")
|
|
||||||
|
|
||||||
group.add_argument('--nuc',
|
|
||||||
action="store_const", dest="import:moltype",
|
|
||||||
default=None,
|
|
||||||
const='nuc',
|
|
||||||
help="Input file contains nucleic sequences")
|
|
||||||
|
|
||||||
group.add_argument('--prot',
|
|
||||||
action="store_const", dest="import:moltype",
|
|
||||||
default=None,
|
|
||||||
const='pep',
|
|
||||||
help="Input file contains protein sequences")
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: Handling of NA values. Check None. Specify in doc? None or NA? Possiblity to specify in option?
|
|
||||||
# look in R read.table option to specify NA value
|
|
||||||
def run(config):
|
def run(config):
|
||||||
|
|
||||||
|
cdef tuple input
|
||||||
|
cdef tuple output
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef type value_type
|
cdef type value_type
|
||||||
cdef obitype_t value_obitype
|
cdef obitype_t value_obitype
|
||||||
@ -119,9 +74,8 @@ def run(config):
|
|||||||
cdef int nb_elts
|
cdef int nb_elts
|
||||||
cdef object d
|
cdef object d
|
||||||
cdef View view
|
cdef View view
|
||||||
cdef object iseq
|
cdef object entries
|
||||||
cdef object seq
|
cdef object entry
|
||||||
cdef object inputs
|
|
||||||
cdef Column id_col
|
cdef Column id_col
|
||||||
cdef Column def_col
|
cdef Column def_col
|
||||||
cdef Column seq_col
|
cdef Column seq_col
|
||||||
@ -130,7 +84,7 @@ def run(config):
|
|||||||
cdef bint rewrite
|
cdef bint rewrite
|
||||||
cdef dict dcols
|
cdef dict dcols
|
||||||
cdef int skipping
|
cdef int skipping
|
||||||
cdef str tag
|
cdef bytes tag
|
||||||
cdef object value
|
cdef object value
|
||||||
cdef list elt_names
|
cdef list elt_names
|
||||||
cdef int old_nb_elements_per_line
|
cdef int old_nb_elements_per_line
|
||||||
@ -140,60 +94,101 @@ def run(config):
|
|||||||
cdef ProgressBar pb
|
cdef ProgressBar pb
|
||||||
global obi_errno
|
global obi_errno
|
||||||
|
|
||||||
pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
|
DMS.obi_atexit()
|
||||||
|
|
||||||
inputs = uopen(config['import']['filename'])
|
logger("info", "obi import: imports an object (file(s), obiview, taxonomy...) into a DMS")
|
||||||
|
|
||||||
# Create or open DMS
|
entry_count = -1
|
||||||
try:
|
|
||||||
d = DMS.test_open(config['obi']['defaultdms'])
|
|
||||||
except :
|
|
||||||
d = DMS.new(config['obi']['defaultdms'])
|
|
||||||
|
|
||||||
get_quality = False
|
if not config['obi']['taxdump']:
|
||||||
NUC_SEQS_view = False
|
input = open_uri(config['obi']['inputURI'])
|
||||||
if config['import']['seqinformat']=='fasta':
|
if input is None: # TODO check for bytes instead now?
|
||||||
get_quality = False
|
raise Exception("Could not open input URI")
|
||||||
NUC_SEQS_view = True
|
|
||||||
iseq = fastaIterator(inputs)
|
entry_count = input[4]
|
||||||
view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
|
logger("info", "Importing %d entries", entry_count)
|
||||||
elif config['import']['seqinformat']=='fastq':
|
|
||||||
get_quality = True
|
# TODO a bit dirty?
|
||||||
NUC_SEQS_view = True
|
if input[2]==Nuc_Seq:
|
||||||
iseq = fastqIterator(inputs)
|
v = View_NUC_SEQS
|
||||||
view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
|
else:
|
||||||
|
v = View
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('File format not handled')
|
v = None
|
||||||
|
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
newviewtype=v)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
|
||||||
|
# Read taxdump
|
||||||
|
if config['obi']['taxdump']: # The input is a taxdump to import in a DMS
|
||||||
|
taxo = Taxonomy.open_taxdump(output[0], config['obi']['inputURI'])
|
||||||
|
taxo.write(output[1])
|
||||||
|
taxo.close()
|
||||||
|
output[0].record_command_line(" ".join(sys.argv[1:]))
|
||||||
|
output[0].close()
|
||||||
|
return
|
||||||
|
|
||||||
|
if entry_count >= 0:
|
||||||
|
pb = ProgressBar(entry_count, config, seconde=5)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
|
entries = input[1]
|
||||||
|
|
||||||
|
NUC_SEQS_view = False
|
||||||
|
if isinstance(output[1], View) :
|
||||||
|
view = output[1]
|
||||||
|
if output[2] == View_NUC_SEQS :
|
||||||
|
NUC_SEQS_view = True
|
||||||
|
else:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
# Save basic columns in variables for optimization
|
# Save basic columns in variables for optimization
|
||||||
if NUC_SEQS_view :
|
if NUC_SEQS_view :
|
||||||
id_col = view["ID"]
|
id_col = view[ID_COLUMN]
|
||||||
def_col = view["DEFINITION"]
|
def_col = view[DEFINITION_COLUMN]
|
||||||
seq_col = view["NUC_SEQ"]
|
seq_col = view[NUC_SEQUENCE_COLUMN]
|
||||||
if get_quality :
|
|
||||||
qual_col = view["QUALITY"]
|
|
||||||
|
|
||||||
dcols = {}
|
dcols = {}
|
||||||
|
|
||||||
skipping = 0
|
|
||||||
i = 0
|
i = 0
|
||||||
for seq in iseq :
|
for entry in entries :
|
||||||
if skipping < config['import']['skip'] : # TODO not efficient because sequences are parsed
|
|
||||||
skipping+=1
|
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
|
||||||
elif i == config['import']['only'] :
|
if config['obi']['skiperror']:
|
||||||
break
|
i-=1
|
||||||
else :
|
continue
|
||||||
|
else:
|
||||||
|
raise RollbackException("obi import error, rollbacking view", view)
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
pb(i)
|
pb(i)
|
||||||
if NUC_SEQS_view :
|
|
||||||
id_col[i] = seq['id']
|
|
||||||
def_col[i] = seq['definition']
|
|
||||||
seq_col[i] = seq['sequence']
|
|
||||||
if get_quality :
|
|
||||||
qual_col[i] = seq['quality']
|
|
||||||
|
|
||||||
for tag in seq['tags'] :
|
if NUC_SEQS_view:
|
||||||
|
id_col[i] = entry.id
|
||||||
|
def_col[i] = entry.definition
|
||||||
|
seq_col[i] = entry.seq
|
||||||
|
# Check if there is a sequencing quality associated by checking the first entry # TODO haven't found a more robust solution yet
|
||||||
|
if i == 0:
|
||||||
|
get_quality = QUALITY_COLUMN in entry
|
||||||
|
if get_quality:
|
||||||
|
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL)
|
||||||
|
qual_col = view[QUALITY_COLUMN]
|
||||||
|
if get_quality:
|
||||||
|
qual_col[i] = entry.quality
|
||||||
|
|
||||||
value = seq['tags'][tag]
|
for tag in entry :
|
||||||
|
|
||||||
|
if tag != ID_COLUMN and tag != DEFINITION_COLUMN and tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN : # TODO dirty
|
||||||
|
|
||||||
|
value = entry[tag]
|
||||||
|
if tag == b"taxid":
|
||||||
|
tag = TAXID_COLUMN
|
||||||
|
if tag == b"count":
|
||||||
|
tag = COUNT_COLUMN
|
||||||
|
|
||||||
if tag not in dcols :
|
if tag not in dcols :
|
||||||
|
|
||||||
@ -248,7 +243,7 @@ def run(config):
|
|||||||
if value_type == dict : # Check dictionary keys
|
if value_type == dict : # Check dictionary keys
|
||||||
for k in value :
|
for k in value :
|
||||||
if k not in old_elements_names :
|
if k not in old_elements_names :
|
||||||
new_elements_names = list(value)
|
new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
||||||
rewrite = True
|
rewrite = True
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -263,22 +258,44 @@ def run(config):
|
|||||||
if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
||||||
new_nb_elements_per_line = len(new_elements_names)
|
new_nb_elements_per_line = len(new_elements_names)
|
||||||
|
|
||||||
|
# Reset obierrno
|
||||||
|
obi_errno = 0
|
||||||
|
|
||||||
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
||||||
new_data_type=new_type,
|
new_data_type=new_type,
|
||||||
new_nb_elements_per_line=new_nb_elements_per_line,
|
new_nb_elements_per_line=new_nb_elements_per_line,
|
||||||
new_elements_names=new_elements_names),
|
new_elements_names=new_elements_names,
|
||||||
|
rewrite_last_line=False),
|
||||||
value_obitype)
|
value_obitype)
|
||||||
|
|
||||||
# Reset obierrno
|
# Update the dictionary:
|
||||||
obi_errno = 0
|
for t in dcols :
|
||||||
|
dcols[t] = (view[t], dcols[t][1])
|
||||||
|
|
||||||
# Fill value
|
# Fill value
|
||||||
dcols[tag][0][i] = value
|
dcols[tag][0][i] = value
|
||||||
|
|
||||||
i+=1
|
i+=1
|
||||||
|
|
||||||
print("\n")
|
if pb is not None:
|
||||||
print(view.__repr__())
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
d.close()
|
# Save command config in View and DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
view.write_config(config, "import", command_line, input_str=[os.path.abspath(config['obi']['inputURI'])])
|
||||||
|
output[0].record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(view), file=sys.stderr)
|
||||||
|
|
||||||
|
try:
|
||||||
|
input[0].close()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
output[0].close()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
||||||
|
@ -1,236 +0,0 @@
|
|||||||
#cython: language_level=3
|
|
||||||
#
|
|
||||||
# from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|
||||||
# from obitools3.dms.dms import OBIDMS # TODO cimport doesn't work
|
|
||||||
# from obitools3.utils cimport str2bytes
|
|
||||||
#
|
|
||||||
# from obitools3.dms.capi.obialign cimport obi_lcs_align_one_column, \
|
|
||||||
# obi_lcs_align_two_columns
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# import time
|
|
||||||
#
|
|
||||||
# __title__="Aligns one sequence column with itself or two sequence columns"
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# default_config = { 'inputview' : None,
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# def addOptions(parser):
|
|
||||||
#
|
|
||||||
# # TODO put this common group somewhere else but I don't know where.
|
|
||||||
# # Also some options should probably be in another group
|
|
||||||
# group=parser.add_argument_group('DMS and view options')
|
|
||||||
#
|
|
||||||
# group.add_argument('--default-dms', '-d',
|
|
||||||
# action="store", dest="obi:defaultdms",
|
|
||||||
# metavar='<DMS NAME>',
|
|
||||||
# default=None,
|
|
||||||
# type=str,
|
|
||||||
# help="Name of the default DMS for reading and writing data.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--input-view-1', '-i',
|
|
||||||
# action="store", dest="obi:inputview1",
|
|
||||||
# metavar='<INPUT VIEW NAME>',
|
|
||||||
# default=None,
|
|
||||||
# type=str,
|
|
||||||
# help="Name of the (first) input view.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--input-view-2', '-I',
|
|
||||||
# action="store", dest="obi:inputview2",
|
|
||||||
# metavar='<INPUT VIEW NAME>',
|
|
||||||
# default="",
|
|
||||||
# type=str,
|
|
||||||
# help="Eventually, the name of the second input view.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--input-column-1', '-c',
|
|
||||||
# action="store", dest="obi:inputcolumn1",
|
|
||||||
# metavar='<INPUT COLUMN NAME>',
|
|
||||||
# default="",
|
|
||||||
# type=str,
|
|
||||||
# help="Name of the (first) input column. "
|
|
||||||
# " Default: the default nucleotide sequence column of the view if there is one.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--input-column-2', '-C',
|
|
||||||
# action="store", dest="obi:inputcolumn2",
|
|
||||||
# metavar='<INPUT COLUMN NAME>',
|
|
||||||
# default="",
|
|
||||||
# type=str,
|
|
||||||
# help="Eventually, the name of the second input column.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--input-elt-1', '-e',
|
|
||||||
# action="store", dest="obi:inputelement1",
|
|
||||||
# metavar='<INPUT ELEMENT NAME>',
|
|
||||||
# default="",
|
|
||||||
# type=str,
|
|
||||||
# help="If the first input column has multiple elements per line, name of the element referring to the sequence to align. "
|
|
||||||
# " Default: the first element of the line.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--input-elt-2', '-E',
|
|
||||||
# action="store", dest="obi:inputelement2",
|
|
||||||
# metavar='<INPUT ELEMENT NAME>',
|
|
||||||
# default="",
|
|
||||||
# type=str,
|
|
||||||
# help="If the second input column has multiple elements per line, name of the element referring to the sequence to align. "
|
|
||||||
# " Default: the first element of the line.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--id-column-1', '-f',
|
|
||||||
# action="store", dest="obi:idcolumn1",
|
|
||||||
# metavar='<ID COLUMN NAME>',
|
|
||||||
# default="",
|
|
||||||
# type=str,
|
|
||||||
# help="Name of the (first) column containing the identifiers of the sequences to align. "
|
|
||||||
# " Default: the default ID column of the view if there is one.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--id-column-2', '-F',
|
|
||||||
# action="store", dest="obi:idcolumn2",
|
|
||||||
# metavar='<ID COLUMN NAME>',
|
|
||||||
# default="",
|
|
||||||
# type=str,
|
|
||||||
# help="Eventually, the name of the second ID column.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--output-view', '-o',
|
|
||||||
# action="store", dest="obi:outputview",
|
|
||||||
# metavar='<OUTPUT VIEW NAME>',
|
|
||||||
# default=None,
|
|
||||||
# type=str,
|
|
||||||
# help="Name of the output view.")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# group=parser.add_argument_group('obi lcs specific options')
|
|
||||||
#
|
|
||||||
# group.add_argument('--threshold','-t',
|
|
||||||
# action="store", dest="align:threshold",
|
|
||||||
# metavar='<THRESHOLD>',
|
|
||||||
# default=0.0,
|
|
||||||
# type=float,
|
|
||||||
# help="Score threshold. If the score is normalized and expressed in similarity (default),"
|
|
||||||
# " it is an identity, e.g. 0.95 for an identity of 95%%. If the score is normalized"
|
|
||||||
# " and expressed in distance, it is (1.0 - identity), e.g. 0.05 for an identity of 95%%."
|
|
||||||
# " If the score is not normalized and expressed in similarity, it is the length of the"
|
|
||||||
# " Longest Common Subsequence. If the score is not normalized and expressed in distance,"
|
|
||||||
# " it is (reference length - LCS length)."
|
|
||||||
# " Only sequence pairs with a similarity above <THRESHOLD> are printed. Default: 0.00"
|
|
||||||
# " (no threshold).")
|
|
||||||
#
|
|
||||||
# group.add_argument('--longest-length','-L',
|
|
||||||
# action="store_const", dest="align:reflength",
|
|
||||||
# default=0,
|
|
||||||
# const=1,
|
|
||||||
# help="The reference length is the length of the longest sequence."
|
|
||||||
# " Default: the reference length is the length of the alignment.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--shortest-length','-l',
|
|
||||||
# action="store_const", dest="align:reflength",
|
|
||||||
# default=0,
|
|
||||||
# const=2,
|
|
||||||
# help="The reference length is the length of the shortest sequence."
|
|
||||||
# " Default: the reference length is the length of the alignment.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--raw','-r',
|
|
||||||
# action="store_false", dest="align:normalize",
|
|
||||||
# default=True,
|
|
||||||
# help="Raw score, not normalized. Default: score is normalized with the reference sequence length.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--distance','-D',
|
|
||||||
# action="store_false", dest="align:similarity",
|
|
||||||
# default=True,
|
|
||||||
# help="Score is expressed in distance. Default: score is expressed in similarity.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--print-seq','-s',
|
|
||||||
# action="store_true", dest="align:printseq",
|
|
||||||
# default=False,
|
|
||||||
# help="The nucleotide sequences are written in the output view. Default: they are not written.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--print-count','-n',
|
|
||||||
# action="store_true", dest="align:printcount",
|
|
||||||
# default=False,
|
|
||||||
# help="Sequence counts are written in the output view. Default: they are not written.")
|
|
||||||
#
|
|
||||||
# group.add_argument('--thread-count','-p', # TODO should probably be in a specific option group
|
|
||||||
# action="store", dest="align:threadcount",
|
|
||||||
# metavar='<THREAD COUNT>',
|
|
||||||
# default=1,
|
|
||||||
# type=int,
|
|
||||||
# help="Number of threads to use for the computation. Default: one.")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# # cpdef align(str dms_n,
|
|
||||||
# # str input_view_1_n, str output_view_n,
|
|
||||||
# # str input_view_2_n="",
|
|
||||||
# # str input_column_1_n="", str input_column_2_n="",
|
|
||||||
# # str input_elt_1_n="", str input_elt_2_n="",
|
|
||||||
# # str id_column_1_n="", str id_column_2_n="",
|
|
||||||
# # double threshold=0.0, bint normalize=True,
|
|
||||||
# # int reference=0, bint similarity_mode=True,
|
|
||||||
# # bint print_seq=False, bint print_count=False,
|
|
||||||
# # comments="",
|
|
||||||
# # int thread_count=1) :
|
|
||||||
# #
|
|
||||||
# # cdef OBIDMS d
|
|
||||||
# # d = OBIDMS(dms_n)
|
|
||||||
# #
|
|
||||||
# # if input_view_2_n == "" and input_column_2_n == "" :
|
|
||||||
# # if obi_lcs_align_one_column(d._pointer, \
|
|
||||||
# # str2bytes(input_view_1_n), \
|
|
||||||
# # str2bytes(input_column_1_n), \
|
|
||||||
# # str2bytes(input_elt_1_n), \
|
|
||||||
# # str2bytes(id_column_1_n), \
|
|
||||||
# # str2bytes(output_view_n), \
|
|
||||||
# # str2bytes(comments), \
|
|
||||||
# # print_seq, \
|
|
||||||
# # print_count, \
|
|
||||||
# # threshold, normalize, reference, similarity_mode,
|
|
||||||
# # thread_count) < 0 :
|
|
||||||
# # raise Exception("Error aligning sequences")
|
|
||||||
# # else :
|
|
||||||
# # if obi_lcs_align_two_columns(d._pointer, \
|
|
||||||
# # str2bytes(input_view_1_n), \
|
|
||||||
# # str2bytes(input_view_2_n), \
|
|
||||||
# # str2bytes(input_column_1_n), \
|
|
||||||
# # str2bytes(input_column_2_n), \
|
|
||||||
# # str2bytes(input_elt_1_n), \
|
|
||||||
# # str2bytes(input_elt_2_n), \
|
|
||||||
# # str2bytes(id_column_1_n), \
|
|
||||||
# # str2bytes(id_column_2_n), \
|
|
||||||
# # str2bytes(output_view_n), \
|
|
||||||
# # str2bytes(comments), \
|
|
||||||
# # print_seq, \
|
|
||||||
# # print_count, \
|
|
||||||
# # threshold, normalize, reference, similarity_mode) < 0 :
|
|
||||||
# # raise Exception("Error aligning sequences")
|
|
||||||
# #
|
|
||||||
# # d.close()
|
|
||||||
# #
|
|
||||||
# #
|
|
||||||
def run(config):
|
|
||||||
pass
|
|
||||||
# TODO: Build formatted comments with all parameters etc
|
|
||||||
# comments = "Obi align"
|
|
||||||
#
|
|
||||||
# # Call cython alignment function
|
|
||||||
# align(config['obi']['defaultdms'], \
|
|
||||||
# config['obi']['inputview1'], \
|
|
||||||
# config['obi']['outputview'], \
|
|
||||||
# input_view_2_n = config['obi']['inputview2'], \
|
|
||||||
# input_column_1_n = config['obi']['inputcolumn1'], \
|
|
||||||
# input_column_2_n = config['obi']['inputcolumn2'], \
|
|
||||||
# input_elt_1_n = config['obi']['inputelement1'], \
|
|
||||||
# input_elt_2_n = config['obi']['inputelement2'], \
|
|
||||||
# id_column_1_n = config['obi']['idcolumn1'], \
|
|
||||||
# id_column_2_n = config['obi']['idcolumn2'], \
|
|
||||||
# threshold = config['align']['threshold'], \
|
|
||||||
# normalize = config['align']['normalize'], \
|
|
||||||
# reference = config['align']['reflength'], \
|
|
||||||
# similarity_mode = config['align']['similarity'], \
|
|
||||||
# print_seq = config['align']['printseq'], \
|
|
||||||
# print_count = config['align']['printcount'], \
|
|
||||||
# comments = comments, \
|
|
||||||
# thread_count = config['align']['threadcount'])
|
|
||||||
#
|
|
||||||
# print("Done.")
|
|
||||||
# #
|
|
||||||
# #
|
|
||||||
# #
|
|
||||||
# #
|
|
||||||
# #
|
|
103
python/obitools3/commands/less.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
51
python/obitools3/commands/less.pyx
Normal file → Executable file
@ -1,35 +1,16 @@
|
|||||||
#cython: language_level=3
|
#cython: language_level=3
|
||||||
|
|
||||||
from obitools3.dms.dms import DMS # TODO cimport doesn't work
|
from obitools3.apps.optiongroups import addMinimalInputOption
|
||||||
from obitools3.dms.view.view import View # TODO cimport doesn't work
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
|
||||||
|
|
||||||
# TODO with URIs
|
|
||||||
|
|
||||||
__title__="Less equivalent"
|
__title__="Less equivalent"
|
||||||
|
|
||||||
default_config = { 'inputview' : None,
|
|
||||||
}
|
|
||||||
|
|
||||||
def addOptions(parser):
|
def addOptions(parser):
|
||||||
|
|
||||||
# TODO put this common group somewhere else but I don't know where
|
addMinimalInputOption(parser)
|
||||||
group=parser.add_argument_group('DMS and view options')
|
|
||||||
|
|
||||||
group.add_argument('--default-dms','-d',
|
|
||||||
action="store", dest="obi:defaultdms",
|
|
||||||
metavar='<DMS NAME>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
help="Name of the default DMS for reading and writing data.")
|
|
||||||
|
|
||||||
group.add_argument('--view','-v',
|
|
||||||
action="store", dest="obi:view",
|
|
||||||
metavar='<VIEW NAME>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
help="Name of the view to print.")
|
|
||||||
|
|
||||||
|
|
||||||
group=parser.add_argument_group('obi less specific options')
|
group=parser.add_argument_group('obi less specific options')
|
||||||
|
|
||||||
@ -38,20 +19,26 @@ def addOptions(parser):
|
|||||||
metavar='<N>',
|
metavar='<N>',
|
||||||
default=10,
|
default=10,
|
||||||
type=int,
|
type=int,
|
||||||
help="Print N sequences (default: 10)")
|
help="Print N entries (default: 10)")
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
def run(config):
|
||||||
|
|
||||||
# Open DMS
|
cdef object entries
|
||||||
d = DMS.open(config['obi']['defaultdms'])
|
cdef int n
|
||||||
|
|
||||||
# Open input view
|
DMS.obi_atexit()
|
||||||
iview = View.open(d, config['obi']['inputview'])
|
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
|
||||||
|
entries = input[1]
|
||||||
|
|
||||||
|
if config['less']['print'] > len(entries) :
|
||||||
|
n = len(entries)
|
||||||
|
else :
|
||||||
|
n = config['less']['print']
|
||||||
|
|
||||||
# Print
|
# Print
|
||||||
for i in range(config['less']['print']) :
|
for i in range(n) :
|
||||||
print(repr(iview[i]))
|
print(repr(entries[i]))
|
||||||
|
|
||||||
d.close()
|
|
||||||
|
|
||||||
|
103
python/obitools3/commands/ls.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
28
python/obitools3/commands/ls.pyx
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Print a preview of a DMS, view, column...."
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi ls")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input")
|
||||||
|
|
||||||
|
print(repr(input[1]))
|
||||||
|
|
103
python/obitools3/commands/ngsfilter.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
604
python/obitools3/commands/ngsfilter.pyx
Executable file
@ -0,0 +1,604 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
from obitools3.dms.column.column cimport Column, Column_line
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
|
||||||
|
from obitools3.libalign.apat_pattern import Primer_search
|
||||||
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
|
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||||
|
from obitools3.dms.capi.apat cimport MAX_PATTERN
|
||||||
|
from obitools3.utils cimport tobytes
|
||||||
|
|
||||||
|
from libc.stdint cimport INT32_MAX
|
||||||
|
from functools import reduce
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
|
||||||
|
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY" # used by alignpairedend tool
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi ngsfilter specific options')
|
||||||
|
|
||||||
|
group.add_argument('-t','--info-view',
|
||||||
|
action="store", dest="ngsfilter:info_view",
|
||||||
|
metavar="<URI>",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="URI to the view containing the samples definition (with tags, primers, sample names,...)")
|
||||||
|
|
||||||
|
group.add_argument('-R', '--reverse-reads',
|
||||||
|
action="store", dest="ngsfilter:reverse",
|
||||||
|
metavar="<URI>",
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="URI to the reverse reads if the paired-end reads haven't been aligned yet")
|
||||||
|
|
||||||
|
group.add_argument('-u','--unidentified',
|
||||||
|
action="store", dest="ngsfilter:unidentified",
|
||||||
|
metavar="<URI>",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="URI to the view used to store the sequences unassigned to any sample")
|
||||||
|
|
||||||
|
group.add_argument('-e','--error',
|
||||||
|
action="store", dest="ngsfilter:error",
|
||||||
|
metavar="###",
|
||||||
|
type=int,
|
||||||
|
default=2,
|
||||||
|
help="Number of errors allowed for matching primers [default = 2]")
|
||||||
|
|
||||||
|
|
||||||
|
class Primer:
|
||||||
|
|
||||||
|
collection={}
|
||||||
|
|
||||||
|
def __init__(self, sequence, taglength, forward=True, max_errors=2, verbose=False, primer_pair_idx=0, primer_idx=0):
|
||||||
|
'''
|
||||||
|
|
||||||
|
@param sequence:
|
||||||
|
@type sequence:
|
||||||
|
@param direct:
|
||||||
|
@type direct:
|
||||||
|
'''
|
||||||
|
|
||||||
|
assert sequence not in Primer.collection \
|
||||||
|
or Primer.collection[sequence]==taglength, \
|
||||||
|
"Primer %s must always be used with tags of the same length" % sequence
|
||||||
|
|
||||||
|
Primer.collection[sequence]=taglength
|
||||||
|
|
||||||
|
self.primer_pair_idx = primer_pair_idx
|
||||||
|
self.primer_idx = primer_idx
|
||||||
|
self.is_revcomp = False
|
||||||
|
self.revcomp = None
|
||||||
|
self.raw=sequence
|
||||||
|
self.sequence = Nuc_Seq(b"primer", sequence)
|
||||||
|
self.lseq = len(self.sequence)
|
||||||
|
self.max_errors=max_errors
|
||||||
|
self.taglength=taglength
|
||||||
|
self.forward = forward
|
||||||
|
self.verbose=verbose
|
||||||
|
|
||||||
|
def reverse_complement(self):
|
||||||
|
p = Primer(self.raw,
|
||||||
|
self.taglength,
|
||||||
|
not self.forward,
|
||||||
|
verbose=self.verbose,
|
||||||
|
max_errors=self.max_errors,
|
||||||
|
primer_pair_idx=self.primer_pair_idx,
|
||||||
|
primer_idx=self.primer_idx)
|
||||||
|
p.sequence=p.sequence.reverse_complement
|
||||||
|
p.is_revcomp = True
|
||||||
|
p.revcomp = None
|
||||||
|
return p
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(str(self.raw))
|
||||||
|
|
||||||
|
def __eq__(self,primer):
|
||||||
|
return self.raw==primer.raw
|
||||||
|
|
||||||
|
def __call__(self, sequence, same_sequence=False, pattern=0, begin=0):
|
||||||
|
|
||||||
|
if len(sequence) <= self.lseq:
|
||||||
|
return None
|
||||||
|
|
||||||
|
ali = self.aligner.search_one_primer(sequence.seq,
|
||||||
|
self.primer_pair_idx,
|
||||||
|
self.primer_idx,
|
||||||
|
reverse_comp=self.is_revcomp,
|
||||||
|
same_sequence=same_sequence,
|
||||||
|
pattern_ref=pattern,
|
||||||
|
begin=begin)
|
||||||
|
|
||||||
|
if ali is None: # no match
|
||||||
|
return None
|
||||||
|
|
||||||
|
errors, start = ali.first_encountered()
|
||||||
|
|
||||||
|
if errors <= self.max_errors:
|
||||||
|
end = start + self.lseq
|
||||||
|
if self.taglength is not None:
|
||||||
|
if self.sequence.is_revcomp:
|
||||||
|
if (len(sequence)-end) >= self.taglength:
|
||||||
|
tag_start = len(sequence) - end - self.taglength
|
||||||
|
tag = sequence.reverse_complement[tag_start:tag_start+self.taglength].seq
|
||||||
|
else:
|
||||||
|
tag=None
|
||||||
|
else:
|
||||||
|
if start >= self.taglength:
|
||||||
|
tag = tobytes((sequence[start - self.taglength:start].seq).lower()) # turn back to lowercase because apat turned to uppercase
|
||||||
|
else:
|
||||||
|
tag=None
|
||||||
|
else:
|
||||||
|
tag=None
|
||||||
|
|
||||||
|
return errors,start,end,tag
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "%s: %s" % ({True:'D',False:'R'}[self.forward],self.raw)
|
||||||
|
|
||||||
|
__repr__=__str__
|
||||||
|
|
||||||
|
|
||||||
|
cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
||||||
|
infos = {}
|
||||||
|
primer_list = []
|
||||||
|
i=0
|
||||||
|
for p in info_view:
|
||||||
|
forward=Primer(p[b'forward_primer'],
|
||||||
|
len(p[b'forward_tag']) if p[b'forward_tag']!=b'-' else None,
|
||||||
|
True,
|
||||||
|
max_errors=max_errors,
|
||||||
|
verbose=verbose,
|
||||||
|
primer_pair_idx=i,
|
||||||
|
primer_idx=0)
|
||||||
|
|
||||||
|
fp = infos.get(forward,{})
|
||||||
|
infos[forward]=fp
|
||||||
|
|
||||||
|
reverse=Primer(p[b'reverse_primer'],
|
||||||
|
len(p[b'reverse_tag']) if p[b'reverse_tag']!=b'-' else None,
|
||||||
|
False,
|
||||||
|
max_errors=max_errors,
|
||||||
|
verbose=verbose,
|
||||||
|
primer_pair_idx=i,
|
||||||
|
primer_idx=1)
|
||||||
|
|
||||||
|
primer_list.append((p[b'forward_primer'], p[b'reverse_primer']))
|
||||||
|
|
||||||
|
rp = infos.get(reverse,{})
|
||||||
|
infos[reverse]=rp
|
||||||
|
|
||||||
|
if not_aligned:
|
||||||
|
cf=forward
|
||||||
|
cr=reverse
|
||||||
|
|
||||||
|
cf.revcomp = forward.reverse_complement()
|
||||||
|
cr.revcomp = reverse.reverse_complement()
|
||||||
|
|
||||||
|
dpp=fp.get(cr,{})
|
||||||
|
fp[cr]=dpp
|
||||||
|
|
||||||
|
rpp=rp.get(cf,{})
|
||||||
|
rp[cf]=rpp
|
||||||
|
|
||||||
|
else:
|
||||||
|
cf=forward.reverse_complement()
|
||||||
|
cr=reverse.reverse_complement()
|
||||||
|
|
||||||
|
dpp=fp.get(cr,{})
|
||||||
|
fp[cr]=dpp
|
||||||
|
|
||||||
|
rpp=rp.get(cf,{})
|
||||||
|
rp[cf]=rpp
|
||||||
|
|
||||||
|
tags = (p[b'forward_tag'] if p[b'forward_tag']!=b'-' else None,
|
||||||
|
p[b'reverse_tag'] if p[b'reverse_tag']!=b'-' else None)
|
||||||
|
|
||||||
|
assert tags not in dpp, \
|
||||||
|
"Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse)
|
||||||
|
|
||||||
|
# Save additional data
|
||||||
|
special_keys = [b'forward_primer', b'reverse_primer', b'forward_tag', b'reverse_tag']
|
||||||
|
data={}
|
||||||
|
for key in p:
|
||||||
|
if key not in special_keys:
|
||||||
|
data[key] = p[key]
|
||||||
|
|
||||||
|
dpp[tags] = data
|
||||||
|
rpp[tags] = data
|
||||||
|
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
return infos, primer_list
|
||||||
|
|
||||||
|
|
||||||
|
cdef tuple annotate(sequences, infos, verbose=False):
|
||||||
|
|
||||||
|
def sortMatch(match):
|
||||||
|
if match[1] is None:
|
||||||
|
return INT32_MAX
|
||||||
|
else:
|
||||||
|
return match[1][1]
|
||||||
|
|
||||||
|
def sortReverseMatch(match):
|
||||||
|
if match[1] is None:
|
||||||
|
return -1
|
||||||
|
else:
|
||||||
|
return match[1][1]
|
||||||
|
|
||||||
|
not_aligned = len(sequences) > 1
|
||||||
|
sequenceF = sequences[0]
|
||||||
|
sequenceR = None
|
||||||
|
if not not_aligned:
|
||||||
|
final_sequence = sequenceF
|
||||||
|
else:
|
||||||
|
final_sequence = sequenceF.clone() # TODO maybe not cloning and then deleting quality tags is more efficient
|
||||||
|
|
||||||
|
if not_aligned:
|
||||||
|
sequenceR = sequences[1]
|
||||||
|
final_sequence[REVERSE_SEQ_COLUMN_NAME] = sequenceR.seq # used by alignpairedend tool
|
||||||
|
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = sequenceR.quality # used by alignpairedend tool
|
||||||
|
|
||||||
|
for seq in sequences:
|
||||||
|
if hasattr(seq, "quality_array"):
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array),0)/len(seq.quality_array)*10
|
||||||
|
seq[b'avg_quality']=q
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[0:10]),0)
|
||||||
|
seq[b'head_quality']=q
|
||||||
|
if len(seq.quality_array[10:-10]) :
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[10:-10]),0)/len(seq.quality_array[10:-10])*10
|
||||||
|
seq[b'mid_quality']=q
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[-10:]),0)
|
||||||
|
seq[b'tail_quality']=q
|
||||||
|
|
||||||
|
# Try direct matching:
|
||||||
|
directmatch = []
|
||||||
|
first_matched_seq = None
|
||||||
|
second_matched_seq = None
|
||||||
|
for seq in sequences:
|
||||||
|
new_seq = True
|
||||||
|
pattern = 0
|
||||||
|
for p in infos:
|
||||||
|
if pattern == MAX_PATTERN:
|
||||||
|
new_seq = True
|
||||||
|
pattern = 0
|
||||||
|
directmatch.append((p, p(seq, same_sequence=not new_seq, pattern=pattern), seq))
|
||||||
|
new_seq = False
|
||||||
|
pattern+=1
|
||||||
|
|
||||||
|
# Choose match closer to the start of (one of the) sequence(s)
|
||||||
|
directmatch = sorted(directmatch, key=sortMatch)
|
||||||
|
all_direct_matches = directmatch
|
||||||
|
directmatch = directmatch[0] if directmatch[0][1] is not None else None
|
||||||
|
|
||||||
|
if directmatch is None:
|
||||||
|
final_sequence[b'error']=b'No primer match'
|
||||||
|
return False, final_sequence
|
||||||
|
|
||||||
|
first_matched_seq = directmatch[2]
|
||||||
|
if id(first_matched_seq) == id(sequenceF) and not_aligned:
|
||||||
|
second_matched_seq = sequenceR
|
||||||
|
else:
|
||||||
|
second_matched_seq = sequenceF
|
||||||
|
|
||||||
|
match = first_matched_seq[directmatch[1][1]:directmatch[1][2]]
|
||||||
|
|
||||||
|
if not not_aligned:
|
||||||
|
final_sequence[b'seq_length_ori']=len(final_sequence)
|
||||||
|
|
||||||
|
if not not_aligned or id(first_matched_seq) == id(sequenceF):
|
||||||
|
final_sequence = final_sequence[directmatch[1][2]:]
|
||||||
|
else:
|
||||||
|
cut_seq = sequenceR[directmatch[1][2]:]
|
||||||
|
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq # used by alignpairedend tool
|
||||||
|
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality # used by alignpairedend tool
|
||||||
|
|
||||||
|
if directmatch[0].forward:
|
||||||
|
final_sequence[b'direction']=b'forward'
|
||||||
|
final_sequence[b'forward_errors']=directmatch[1][0]
|
||||||
|
final_sequence[b'forward_primer']=directmatch[0].raw
|
||||||
|
final_sequence[b'forward_match']=match.seq
|
||||||
|
|
||||||
|
else:
|
||||||
|
final_sequence[b'direction']=b'reverse'
|
||||||
|
final_sequence[b'reverse_errors']=directmatch[1][0]
|
||||||
|
final_sequence[b'reverse_primer']=directmatch[0].raw
|
||||||
|
final_sequence[b'reverse_match']=match.seq
|
||||||
|
|
||||||
|
# Keep only paired reverse primer
|
||||||
|
infos = infos[directmatch[0]]
|
||||||
|
|
||||||
|
# If not aligned, look for other match in already computed match (choose the one that makes the biggest amplicon)
|
||||||
|
if not_aligned:
|
||||||
|
i=1
|
||||||
|
while all_direct_matches[i][1] is None and all_direct_matches[i][0].forward and i<len(all_direct_matches):
|
||||||
|
i+=1
|
||||||
|
if i < len(all_direct_matches):
|
||||||
|
reversematch = all_direct_matches[i]
|
||||||
|
else:
|
||||||
|
reversematch = None
|
||||||
|
|
||||||
|
# Look for other primer in the other direction on the sequence, or
|
||||||
|
# If sequences are not already aligned and reverse primer not found in most likely sequence (the one without the forward primer), try matching on the same sequence than the first match (primer in the other direction)
|
||||||
|
if not not_aligned or (not_aligned and reversematch[1] is None):
|
||||||
|
if not not_aligned:
|
||||||
|
sequence_to_match = second_matched_seq
|
||||||
|
else:
|
||||||
|
sequence_to_match = first_matched_seq
|
||||||
|
reversematch = []
|
||||||
|
# Compute begin
|
||||||
|
begin=directmatch[1][2]+1 # end of match + 1 on the same sequence
|
||||||
|
# Try reverse matching on the other sequence:
|
||||||
|
new_seq = True
|
||||||
|
pattern = 0
|
||||||
|
for p in infos:
|
||||||
|
if pattern == MAX_PATTERN:
|
||||||
|
new_seq = True
|
||||||
|
pattern = 0
|
||||||
|
if not_aligned:
|
||||||
|
primer=p.revcomp
|
||||||
|
else:
|
||||||
|
primer=p
|
||||||
|
reversematch.append((primer, primer(sequence_to_match, same_sequence=not new_seq, pattern=pattern, begin=begin)))
|
||||||
|
new_seq = False
|
||||||
|
pattern+=1
|
||||||
|
# Choose match closer to the end of the sequence
|
||||||
|
reversematch = sorted(reversematch, key=sortReverseMatch, reverse=True)
|
||||||
|
all_reverse_matches = reversematch
|
||||||
|
reversematch = reversematch[0] if reversematch[0][1] is not None else None
|
||||||
|
|
||||||
|
if reversematch is None and None not in infos:
|
||||||
|
if directmatch[0].forward:
|
||||||
|
message = b'No reverse primer match'
|
||||||
|
else:
|
||||||
|
message = b'No direct primer match'
|
||||||
|
final_sequence[b'error']=message
|
||||||
|
return False, final_sequence
|
||||||
|
|
||||||
|
if reversematch is None:
|
||||||
|
final_sequence[b'status']=b'partial'
|
||||||
|
|
||||||
|
if directmatch[0].forward:
|
||||||
|
tags=(directmatch[1][3],None)
|
||||||
|
else:
|
||||||
|
tags=(None,directmatch[1][3])
|
||||||
|
|
||||||
|
samples = infos[None]
|
||||||
|
|
||||||
|
else:
|
||||||
|
final_sequence[b'status']=b'full'
|
||||||
|
|
||||||
|
match = second_matched_seq[reversematch[1][1]:reversematch[1][2]]
|
||||||
|
match = match.reverse_complement
|
||||||
|
|
||||||
|
if not not_aligned or id(second_matched_seq) == id(sequenceF):
|
||||||
|
final_sequence = final_sequence[0:reversematch[1][1]]
|
||||||
|
else:
|
||||||
|
cut_seq = sequenceR[reversematch[1][2]:]
|
||||||
|
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq # used by alignpairedend tool
|
||||||
|
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality # used by alignpairedend tool
|
||||||
|
|
||||||
|
if directmatch[0].forward:
|
||||||
|
tags=(directmatch[1][3], reversematch[1][3])
|
||||||
|
final_sequence[b'reverse_errors'] = reversematch[1][0]
|
||||||
|
final_sequence[b'reverse_primer'] = reversematch[0].raw
|
||||||
|
final_sequence[b'reverse_match'] = match.seq
|
||||||
|
|
||||||
|
else:
|
||||||
|
tags=(reversematch[1][3], directmatch[1][3])
|
||||||
|
final_sequence[b'forward_errors'] = reversematch[1][0]
|
||||||
|
final_sequence[b'forward_primer'] = reversematch[0].raw
|
||||||
|
final_sequence[b'forward_match'] = match.seq
|
||||||
|
|
||||||
|
if tags[0] is not None:
|
||||||
|
final_sequence[b'forward_tag'] = tags[0]
|
||||||
|
if tags[1] is not None:
|
||||||
|
final_sequence[b'reverse_tag'] = tags[1]
|
||||||
|
|
||||||
|
samples = infos[reversematch[0]]
|
||||||
|
|
||||||
|
if not directmatch[0].forward and not not_aligned: # don't reverse complement if not_aligned
|
||||||
|
final_sequence = final_sequence.reverse_complement
|
||||||
|
|
||||||
|
sample=None
|
||||||
|
|
||||||
|
if tags[0] is not None: # Direct tag known
|
||||||
|
if tags[1] is not None: # Reverse tag known
|
||||||
|
sample = samples.get(tags, None)
|
||||||
|
else: # Only direct tag known
|
||||||
|
s=[samples[x] for x in samples if x[0]==tags[0]]
|
||||||
|
if len(s)==1:
|
||||||
|
sample=s[0]
|
||||||
|
elif len(s)>1:
|
||||||
|
final_sequence[b'error']=b'multiple samples match tags'
|
||||||
|
return False, final_sequence
|
||||||
|
else:
|
||||||
|
sample=None
|
||||||
|
else:
|
||||||
|
if tags[1] is not None: # Only reverse tag known
|
||||||
|
s=[samples[x] for x in samples if x[1]==tags[1]]
|
||||||
|
if len(s)==1:
|
||||||
|
sample=s[0]
|
||||||
|
elif len(s)>1:
|
||||||
|
final_sequence[b'error']=b'multiple samples match tags'
|
||||||
|
return False, final_sequence
|
||||||
|
else:
|
||||||
|
sample=None
|
||||||
|
|
||||||
|
if sample is None:
|
||||||
|
final_sequence[b'error']=b"Cannot assign sequence to a sample"
|
||||||
|
return False, final_sequence
|
||||||
|
|
||||||
|
final_sequence.update(sample)
|
||||||
|
|
||||||
|
if not not_aligned:
|
||||||
|
final_sequence[b'seq_length']=len(final_sequence)
|
||||||
|
|
||||||
|
return True, final_sequence
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi ngsfilter")
|
||||||
|
|
||||||
|
assert config['ngsfilter']['info_view'] is not None, "Option -t must be specified"
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
|
||||||
|
forward = None
|
||||||
|
reverse = None
|
||||||
|
input = None
|
||||||
|
not_aligned = False
|
||||||
|
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not open input reads")
|
||||||
|
if input[2] != View_NUC_SEQS:
|
||||||
|
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
if "reverse" in config["ngsfilter"]:
|
||||||
|
|
||||||
|
forward = input[1]
|
||||||
|
|
||||||
|
rinput = open_uri(config["ngsfilter"]["reverse"])
|
||||||
|
if rinput is None:
|
||||||
|
raise Exception("Could not open reverse reads")
|
||||||
|
if rinput[2] != View_NUC_SEQS:
|
||||||
|
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
reverse = rinput[1]
|
||||||
|
|
||||||
|
if len(forward) != len(reverse):
|
||||||
|
raise Exception("Error: the number of forward and reverse reads are different")
|
||||||
|
|
||||||
|
entries = [forward, reverse]
|
||||||
|
not_aligned = True
|
||||||
|
|
||||||
|
input_dms_name = [forward.dms.name, reverse.dms.name]
|
||||||
|
input_view_name = [forward.name, reverse.name]
|
||||||
|
|
||||||
|
else:
|
||||||
|
entries = input[1]
|
||||||
|
input_dms_name = [entries.dms.name]
|
||||||
|
input_view_name = [entries.name]
|
||||||
|
|
||||||
|
|
||||||
|
if not_aligned:
|
||||||
|
entries_len = len(forward)
|
||||||
|
else:
|
||||||
|
entries_len = len(entries)
|
||||||
|
|
||||||
|
# Open the output
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
newviewtype=View_NUC_SEQS)
|
||||||
|
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
|
||||||
|
o_view = output[1]
|
||||||
|
|
||||||
|
# Open the view containing the informations about the tags and the primers
|
||||||
|
info_input = open_uri(config['ngsfilter']['info_view'])
|
||||||
|
if info_input is None:
|
||||||
|
raise Exception("Could not read the view containing the informations about the tags and the primers")
|
||||||
|
info_view = info_input[1]
|
||||||
|
input_dms_name.append(info_input[0].name)
|
||||||
|
input_view_name.append(info_input[1].name)
|
||||||
|
|
||||||
|
# Open the unidentified view
|
||||||
|
if 'unidentified' in config['ngsfilter'] and config['ngsfilter']['unidentified'] is not None: # TODO keyError if undefined problem
|
||||||
|
unidentified_input = open_uri(config['ngsfilter']['unidentified'],
|
||||||
|
input=False,
|
||||||
|
newviewtype=View_NUC_SEQS)
|
||||||
|
if unidentified_input is None:
|
||||||
|
raise Exception("Could not open the view containing the unidentified reads")
|
||||||
|
unidentified = unidentified_input[1]
|
||||||
|
else:
|
||||||
|
unidentified = None
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
pb = ProgressBar(entries_len, config, seconde=5)
|
||||||
|
|
||||||
|
# Check and store primers and tags
|
||||||
|
infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned) # TODO obi verbose option
|
||||||
|
|
||||||
|
aligner = Primer_search(primer_list, config['ngsfilter']['error'])
|
||||||
|
|
||||||
|
for p in infos:
|
||||||
|
p.aligner = aligner
|
||||||
|
for paired_p in infos[p]:
|
||||||
|
paired_p.aligner = aligner
|
||||||
|
if paired_p.revcomp is not None:
|
||||||
|
paired_p.revcomp.aligner = aligner
|
||||||
|
|
||||||
|
if not_aligned: # create columns used by alignpairedend tool
|
||||||
|
Column.new_column(o_view, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ)
|
||||||
|
Column.new_column(o_view, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=o_view[REVERSE_SEQ_COLUMN_NAME].version)
|
||||||
|
|
||||||
|
Column.new_column(unidentified, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ)
|
||||||
|
Column.new_column(unidentified, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=unidentified[REVERSE_SEQ_COLUMN_NAME].version)
|
||||||
|
|
||||||
|
g = 0
|
||||||
|
u = 0
|
||||||
|
try:
|
||||||
|
for i in range(entries_len):
|
||||||
|
pb(i)
|
||||||
|
if not_aligned:
|
||||||
|
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
||||||
|
else:
|
||||||
|
modseq = [Nuc_Seq.new_from_stored(entries[i])]
|
||||||
|
good, oseq = annotate(modseq, infos)
|
||||||
|
if good:
|
||||||
|
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||||
|
g+=1
|
||||||
|
elif unidentified is not None:
|
||||||
|
unidentified[u].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||||
|
u+=1
|
||||||
|
except Exception, e:
|
||||||
|
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
|
||||||
|
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
# Save command config in View and DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
o_view.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
# Add comment about unidentified seqs
|
||||||
|
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
||||||
|
output[0].record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
|
input[0].close()
|
||||||
|
output[0].close()
|
||||||
|
info_input[0].close()
|
||||||
|
unidentified_input[0].close()
|
||||||
|
aligner.free()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
103
python/obitools3/commands/sort.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
144
python/obitools3/commands/sort.pyx
Executable file
@ -0,0 +1,144 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport str2bytes
|
||||||
|
|
||||||
|
from obitools3.dms.capi.obitypes cimport OBI_BOOL, \
|
||||||
|
OBI_CHAR, \
|
||||||
|
OBI_FLOAT, \
|
||||||
|
OBI_INT, \
|
||||||
|
OBI_QUAL, \
|
||||||
|
OBI_SEQ, \
|
||||||
|
OBI_STR, \
|
||||||
|
OBIBool_NA, \
|
||||||
|
OBIChar_NA, \
|
||||||
|
OBIFloat_NA, \
|
||||||
|
OBIInt_NA
|
||||||
|
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
|
||||||
|
OBI_CHAR: OBIChar_NA,
|
||||||
|
OBI_FLOAT: OBIFloat_NA,
|
||||||
|
OBI_INT: OBIInt_NA,
|
||||||
|
OBI_QUAL: [],
|
||||||
|
OBI_SEQ: b"",
|
||||||
|
OBI_STR: b""}
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Sort view lines according to the value of a given attribute."
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi sort specific options')
|
||||||
|
|
||||||
|
group.add_argument('--key', '-k',
|
||||||
|
action="append", dest="sort:keys",
|
||||||
|
metavar='<TAG NAME>',
|
||||||
|
default=[],
|
||||||
|
type=str,
|
||||||
|
help="Attribute used to sort the sequence records.")
|
||||||
|
|
||||||
|
group.add_argument('--reverse', '-r',
|
||||||
|
action="store_true", dest="sort:reverse",
|
||||||
|
default=False,
|
||||||
|
help="Sort in reverse order.")
|
||||||
|
|
||||||
|
|
||||||
|
def line_cmp(line, key, pb):
|
||||||
|
pb
|
||||||
|
if line[key] is None:
|
||||||
|
return NULL_VALUE[line.view[key].data_type_int]
|
||||||
|
else:
|
||||||
|
return line[key]
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi sort")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config["obi"]["inputURI"])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_view = input[1]
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
o_dms = output[0]
|
||||||
|
o_view_name_final = output[1]
|
||||||
|
o_view_name = o_view_name_final
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||||
|
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
i=0
|
||||||
|
while o_view_name in i_dms:
|
||||||
|
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||||
|
|
||||||
|
keys = config['sort']['keys']
|
||||||
|
|
||||||
|
selection = Line_selection(i_view)
|
||||||
|
|
||||||
|
for i in range(len(i_view)): # TODO special function?
|
||||||
|
selection.append(i)
|
||||||
|
|
||||||
|
for k in keys: # TODO order?
|
||||||
|
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
||||||
|
|
||||||
|
pb(len(i_view), force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
# Create output view with the sorted line selection
|
||||||
|
try:
|
||||||
|
o_view = selection.materialize(o_view_name)
|
||||||
|
except Exception, e:
|
||||||
|
raise RollbackException("obi sort error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
|
# Save command config in View and DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
input_dms_name=[input[0].name]
|
||||||
|
input_view_name=[input[1].name]
|
||||||
|
o_view.write_config(config, "sort", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
# If input and output DMS are not the same, export the temporary view to the output DMS
|
||||||
|
# and delete the temporary view in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
o_view.close()
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||||
|
o_view = o_dms[o_view_name_final]
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
103
python/obitools3/commands/stats.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
265
python/obitools3/commands/stats.pyx
Executable file
@ -0,0 +1,265 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.dms.capi.obiview cimport COUNT_COLUMN
|
||||||
|
|
||||||
|
from functools import reduce
|
||||||
|
import math
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Compute basic statistics for attribute values."
|
||||||
|
|
||||||
|
'''
|
||||||
|
`obi stats` computes basic statistics for attribute values of sequence records.
|
||||||
|
The sequence records can be categorized or not using one or several ``-c`` options.
|
||||||
|
By default, only the number of sequence records and the total count are computed for each category.
|
||||||
|
Additional statistics can be computed for attribute values in each category, such as:
|
||||||
|
|
||||||
|
- minimum value (``-m`` option)
|
||||||
|
- maximum value (``-M`` option)
|
||||||
|
- mean value (``-a`` option)
|
||||||
|
- variance (``-v`` option)
|
||||||
|
- standard deviation (``-s`` option)
|
||||||
|
|
||||||
|
The result is a contingency table with the different categories in rows, and the
|
||||||
|
computed statistics in columns.
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: when is the taxonomy possibly used?
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addTaxonomyOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi stats specific options')
|
||||||
|
|
||||||
|
group.add_argument('-c','--category-attribute',
|
||||||
|
action="append", dest="stats:categories",
|
||||||
|
metavar="<Attribute Name>",
|
||||||
|
default=[],
|
||||||
|
help="Attribute used to categorize the records.")
|
||||||
|
|
||||||
|
group.add_argument('-m','--min',
|
||||||
|
action="append", dest="stats:minimum",
|
||||||
|
metavar="<Attribute Name>",
|
||||||
|
default=[],
|
||||||
|
help="Compute the minimum value of attribute for each category.")
|
||||||
|
|
||||||
|
group.add_argument('-M','--max',
|
||||||
|
action="append", dest="stats:maximum",
|
||||||
|
metavar="<Attribute Name>",
|
||||||
|
default=[],
|
||||||
|
help="Compute the maximum value of attribute for each category.")
|
||||||
|
|
||||||
|
group.add_argument('-a','--mean',
|
||||||
|
action="append", dest="stats:mean",
|
||||||
|
metavar="<Attribute Name>",
|
||||||
|
default=[],
|
||||||
|
help="Compute the mean value of attribute for each category.")
|
||||||
|
|
||||||
|
group.add_argument('-v','--variance',
|
||||||
|
action="append", dest="stats:var",
|
||||||
|
metavar="<Attribute Name>",
|
||||||
|
default=[],
|
||||||
|
help="Compute the variance of attribute for each category.")
|
||||||
|
|
||||||
|
group.add_argument('-s','--std-dev',
|
||||||
|
action="append", dest="stats:sd",
|
||||||
|
metavar="<Attribute Name>",
|
||||||
|
default=[],
|
||||||
|
help="Compute the standard deviation of attribute for each category.")
|
||||||
|
|
||||||
|
|
||||||
|
def statistics(values, attributes, func):
|
||||||
|
|
||||||
|
stat={}
|
||||||
|
lstat={}
|
||||||
|
|
||||||
|
for var in attributes:
|
||||||
|
if var in values:
|
||||||
|
stat[var]={}
|
||||||
|
lstat[var]=0
|
||||||
|
for c in values[var]:
|
||||||
|
v = values[var][c]
|
||||||
|
m = func(v)
|
||||||
|
stat[var][c]=m
|
||||||
|
lm=len(str(m))
|
||||||
|
if lm > lstat[var]:
|
||||||
|
lstat[var]=lm
|
||||||
|
|
||||||
|
return stat, lstat
|
||||||
|
|
||||||
|
|
||||||
|
def minimum(values, options):
|
||||||
|
return statistics(values, options['minimum'], min)
|
||||||
|
|
||||||
|
|
||||||
|
def maximum(values, options):
|
||||||
|
return statistics(values, options['maximum'], max)
|
||||||
|
|
||||||
|
|
||||||
|
def mean(values, options):
|
||||||
|
def average(v):
|
||||||
|
s = reduce(lambda x,y:x+y,v,0)
|
||||||
|
return float(s)/len(v)
|
||||||
|
return statistics(values, options['mean'], average)
|
||||||
|
|
||||||
|
|
||||||
|
def variance(v):
|
||||||
|
if len(v)==1:
|
||||||
|
return 0
|
||||||
|
s = reduce(lambda x,y:(x[0]+y,x[1]+y**2),v,(0.,0.))
|
||||||
|
return s[1]/(len(v)-1) - s[0]**2/len(v)/(len(v)-1)
|
||||||
|
|
||||||
|
|
||||||
|
def varpop(values, options):
|
||||||
|
return statistics(values, options['var'], variance)
|
||||||
|
|
||||||
|
|
||||||
|
def sd(values, options):
|
||||||
|
def stddev(v):
|
||||||
|
return math.sqrt(variance(v))
|
||||||
|
return statistics(values, options['sd'], stddev)
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi stats")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
i_view = input[1]
|
||||||
|
|
||||||
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
|
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||||
|
if taxo_uri is None:
|
||||||
|
raise Exception("Couldn't open taxonomy")
|
||||||
|
taxo = taxo_uri[1]
|
||||||
|
else :
|
||||||
|
taxo = None
|
||||||
|
|
||||||
|
statistics = set(config['stats']['minimum']) | set(config['stats']['maximum']) | set(config['stats']['mean'])
|
||||||
|
total = 0
|
||||||
|
catcount={}
|
||||||
|
totcount={}
|
||||||
|
values={}
|
||||||
|
lcat=0
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||||
|
|
||||||
|
for i in range(len(i_view)):
|
||||||
|
pb(i)
|
||||||
|
line = i_view[i]
|
||||||
|
|
||||||
|
category = []
|
||||||
|
for c in config['stats']['categories']:
|
||||||
|
try:
|
||||||
|
if taxo is not None:
|
||||||
|
loc_env = {'sequence': line, 'line': line, 'taxonomy': taxo}
|
||||||
|
else:
|
||||||
|
loc_env = {'sequence': line, 'line': line}
|
||||||
|
|
||||||
|
v = eval(c, loc_env, line)
|
||||||
|
|
||||||
|
lv=len(str(v))
|
||||||
|
if lv > lcat:
|
||||||
|
lcat=lv
|
||||||
|
category.append(v)
|
||||||
|
except:
|
||||||
|
category.append(None)
|
||||||
|
if 4 > lcat:
|
||||||
|
lcat=4
|
||||||
|
|
||||||
|
category=tuple(category)
|
||||||
|
catcount[category]=catcount.get(category,0)+1
|
||||||
|
try:
|
||||||
|
totcount[category]=totcount.get(category,0)+line[COUNT_COLUMN]
|
||||||
|
except KeyError:
|
||||||
|
totcount[category]=totcount.get(category,0)+1
|
||||||
|
for var in statistics:
|
||||||
|
if var in line:
|
||||||
|
v = line[var]
|
||||||
|
if var not in values:
|
||||||
|
values[var]={}
|
||||||
|
if category not in values[var]:
|
||||||
|
values[var][category]=[]
|
||||||
|
values[var][category].append(v)
|
||||||
|
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
mini, lmini = minimum(values, config['stats'])
|
||||||
|
maxi, lmaxi = maximum(values, config['stats'])
|
||||||
|
avg, lavg = mean(values, config['stats'])
|
||||||
|
varp, lvarp = varpop(values, config['stats'])
|
||||||
|
sigma, lsigma = sd(values, config['stats'])
|
||||||
|
|
||||||
|
pcat = "%%-%ds" % lcat
|
||||||
|
if config['stats']['minimum']:
|
||||||
|
minvar= "min_%%-%ds" % max(len(x) for x in config['stats']['minimum'])
|
||||||
|
else:
|
||||||
|
minvar= "%s"
|
||||||
|
|
||||||
|
if config['stats']['maximum']:
|
||||||
|
maxvar= "max_%%-%ds" % max(len(x) for x in config['stats']['maximum'])
|
||||||
|
else:
|
||||||
|
maxvar= "%s"
|
||||||
|
|
||||||
|
if config['stats']['mean']:
|
||||||
|
meanvar= "mean_%%-%ds" % max(len(x) for x in config['stats']['mean'])
|
||||||
|
else:
|
||||||
|
meanvar= "%s"
|
||||||
|
|
||||||
|
if config['stats']['var']:
|
||||||
|
varvar= "var_%%-%ds" % max(len(x) for x in config['stats']['var'])
|
||||||
|
else:
|
||||||
|
varvar= "%s"
|
||||||
|
|
||||||
|
if config['stats']['sd']:
|
||||||
|
sdvar= "sd_%%-%ds" % max(len(x) for x in config['stats']['sd'])
|
||||||
|
else:
|
||||||
|
sdvar= "%s"
|
||||||
|
|
||||||
|
hcat = "\t".join([pcat % x for x in config['stats']['categories']]) + "\t" +\
|
||||||
|
"\t".join([minvar % x for x in config['stats']['minimum']]) + "\t" +\
|
||||||
|
"\t".join([maxvar % x for x in config['stats']['maximum']]) + "\t" +\
|
||||||
|
"\t".join([meanvar % x for x in config['stats']['mean']]) + "\t" +\
|
||||||
|
"\t".join([varvar % x for x in config['stats']['var']]) + "\t" +\
|
||||||
|
"\t".join([sdvar % x for x in config['stats']['sd']]) + \
|
||||||
|
"\t count" + \
|
||||||
|
"\t total"
|
||||||
|
print(hcat)
|
||||||
|
for c in catcount:
|
||||||
|
for v in c:
|
||||||
|
print(pcat % str(v)+"\t", end="")
|
||||||
|
for m in config['stats']['minimum']:
|
||||||
|
print((("%%%dd" % lmini[m]) % mini[m][c])+"\t", end="")
|
||||||
|
for m in config['stats']['maximum']:
|
||||||
|
print((("%%%dd" % lmaxi[m]) % maxi[m][c])+"\t", end="")
|
||||||
|
for m in config['stats']['mean']:
|
||||||
|
print((("%%%df" % lavg[m]) % avg[m][c])+"\t", end="")
|
||||||
|
for m in config['stats']['var']:
|
||||||
|
print((("%%%df" % lvarp[m]) % varp[m][c])+"\t", end="")
|
||||||
|
for m in config['stats']['sd']:
|
||||||
|
print((("%%%df" % lsigma[m]) % sigma[m][c])+"\t", end="")
|
||||||
|
print("%7d" %catcount[c], end="")
|
||||||
|
print("%9d" %totcount[c])
|
||||||
|
|
||||||
|
input[0].close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
103
python/obitools3/commands/tail.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
110
python/obitools3/commands/tail.pyx
Executable file
@ -0,0 +1,110 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport str2bytes
|
||||||
|
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Keep the N last lines of a view."
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi tail specific options')
|
||||||
|
|
||||||
|
group.add_argument('-n', '--sequence-count',
|
||||||
|
action="store", dest="tail:count",
|
||||||
|
metavar='<N>',
|
||||||
|
default=10,
|
||||||
|
type=int,
|
||||||
|
help="Number of last records to keep.")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi tail")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config["obi"]["inputURI"])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_view = input[1]
|
||||||
|
|
||||||
|
# Open the output: only the DMS
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
o_dms = output[0]
|
||||||
|
o_view_name_final = output[1]
|
||||||
|
o_view_name = o_view_name_final
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||||
|
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
i=0
|
||||||
|
while o_view_name in i_dms:
|
||||||
|
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
start = max(len(i_view) - config['tail']['count'], 0)
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
pb = ProgressBar(len(i_view) - start, config, seconde=5)
|
||||||
|
|
||||||
|
selection = Line_selection(i_view)
|
||||||
|
|
||||||
|
for i in range(start, len(i_view)):
|
||||||
|
pb(i)
|
||||||
|
selection.append(i)
|
||||||
|
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
# Save command config in View comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
comments = View.get_config_dict(config, "tail", command_line, input_dms_name=[i_dms.name], input_view_name=[i_view.name])
|
||||||
|
|
||||||
|
# Create output view with the line selection
|
||||||
|
try:
|
||||||
|
o_view = selection.materialize(o_view_name)
|
||||||
|
except Exception, e:
|
||||||
|
raise RollbackException("obi tail error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
|
# Save command config in DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
o_view.write_config(config, "tail", command_line, input_dms_name=[i_dms.name], input_view_name=[i_view.name])
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
# If input and output DMS are not the same, export the temporary view to the output DMS
|
||||||
|
# and delete the temporary view in the input DMS
|
||||||
|
if i_dms != o_dms:
|
||||||
|
o_view.close()
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||||
|
o_view = o_dms[o_view_name_final]
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
|
if i_dms != o_dms:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
o_dms.close()
|
||||||
|
i_dms.close()
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
103
python/obitools3/commands/test.cfiles
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
../../../src/obi_lcs.h
|
||||||
|
../../../src/obi_lcs.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/libjson/json_utils.h
|
||||||
|
../../../src/libjson/json_utils.c
|
||||||
|
../../../src/libjson/cJSON.h
|
||||||
|
../../../src/libjson/cJSON.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/linked_list.h
|
||||||
|
../../../src/linked_list.c
|
||||||
|
../../../src/obidmscolumn_array.h
|
||||||
|
../../../src/obidmscolumn_array.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/array_indexer.h
|
||||||
|
../../../src/array_indexer.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/build_reference_db.c
|
||||||
|
../../../src/build_reference_db.h
|
||||||
|
../../../src/kmer_similarity.c
|
||||||
|
../../../src/kmer_similarity.h
|
||||||
|
../../../src/obi_clean.c
|
||||||
|
../../../src/obi_clean.h
|
||||||
|
../../../src/obi_ecopcr.c
|
||||||
|
../../../src/obi_ecopcr.h
|
||||||
|
../../../src/obi_ecotag.c
|
||||||
|
../../../src/obi_ecotag.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/_sse.h
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dft_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/dna_code.h
|
||||||
|
../../../src/libecoPCR/libapat/CODES/prot_code.h
|
||||||
|
../../../src/libecoPCR/libapat/apat_parse.c
|
||||||
|
../../../src/libecoPCR/libapat/apat_search.c
|
||||||
|
../../../src/libecoPCR/libapat/apat.h
|
||||||
|
../../../src/libecoPCR/libapat/Gmach.h
|
||||||
|
../../../src/libecoPCR/libapat/Gtypes.h
|
||||||
|
../../../src/libecoPCR/libapat/libstki.c
|
||||||
|
../../../src/libecoPCR/libapat/libstki.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.h
|
||||||
|
../../../src/libecoPCR/libthermo/nnparams.c
|
||||||
|
../../../src/libecoPCR/ecoapat.c
|
||||||
|
../../../src/libecoPCR/ecodna.c
|
||||||
|
../../../src/libecoPCR/ecoError.c
|
||||||
|
../../../src/libecoPCR/ecoMalloc.c
|
||||||
|
../../../src/libecoPCR/ecoPCR.h
|
180
python/obitools3/commands/test.pyx
Normal file → Executable file
@ -1,11 +1,11 @@
|
|||||||
#cython: language_level=3
|
#cython: language_level=3
|
||||||
|
|
||||||
from obitools3.apps.progress cimport ProgressBar # TODO I absolutely don't understand why it doesn't work without that line
|
from obitools3.apps.progress cimport ProgressBar # TODO I absolutely don't understand why it doesn't work without that line
|
||||||
from obitools3.dms.view.view import View, Line_selection
|
from obitools3.dms.view import View, Line_selection
|
||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS import View_NUC_SEQS
|
from obitools3.dms.view.typed_view.view_NUC_SEQS import View_NUC_SEQS
|
||||||
from obitools3.dms.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.column import Column
|
from obitools3.dms.column import Column
|
||||||
from obitools3.dms.taxo.taxo import OBI_Taxonomy
|
from obitools3.dms.taxo import Taxonomy
|
||||||
from obitools3.utils cimport str2bytes
|
from obitools3.utils cimport str2bytes
|
||||||
from obitools3.dms.capi.obitypes cimport OBI_INT, \
|
from obitools3.dms.capi.obitypes cimport OBI_INT, \
|
||||||
OBI_FLOAT, \
|
OBI_FLOAT, \
|
||||||
@ -13,21 +13,24 @@ from obitools3.dms.capi.obitypes cimport OBI_INT, \
|
|||||||
OBI_CHAR, \
|
OBI_CHAR, \
|
||||||
OBI_STR, \
|
OBI_STR, \
|
||||||
OBI_SEQ
|
OBI_SEQ
|
||||||
|
|
||||||
|
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||||
|
ID_COLUMN, \
|
||||||
|
DEFINITION_COLUMN, \
|
||||||
|
QUALITY_COLUMN, \
|
||||||
|
COUNT_COLUMN
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
import string
|
import string
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
|
||||||
VIEW_TYPES = ["", "NUC_SEQS_VIEW"]
|
VIEW_TYPES = [b"", b"NUC_SEQS_VIEW"]
|
||||||
COL_TYPES = [OBI_INT, OBI_FLOAT, OBI_BOOL, OBI_CHAR, OBI_STR, OBI_SEQ]
|
COL_TYPES = [OBI_INT, OBI_FLOAT, OBI_BOOL, OBI_CHAR, OBI_STR, OBI_SEQ]
|
||||||
NUC_SEQUENCE_COLUMN = "NUC_SEQ"
|
|
||||||
ID_COLUMN = "ID"
|
|
||||||
DEFINITION_COLUMN = "DEFINITION"
|
|
||||||
QUALITY_COLUMN = "QUALITY"
|
|
||||||
SPECIAL_COLUMNS = [NUC_SEQUENCE_COLUMN, ID_COLUMN, DEFINITION_COLUMN, QUALITY_COLUMN]
|
SPECIAL_COLUMNS = [NUC_SEQUENCE_COLUMN, ID_COLUMN, DEFINITION_COLUMN, QUALITY_COLUMN]
|
||||||
|
|
||||||
#TAXDUMP = "" TODO path=?
|
#TAXDUMP = "" TODO path=?
|
||||||
TAXTEST = "taxtest"
|
TAXTEST = b"taxtest"
|
||||||
|
|
||||||
NAME_MAX_LEN = 200
|
NAME_MAX_LEN = 200
|
||||||
COL_COMMENTS_MAX_LEN = 2048
|
COL_COMMENTS_MAX_LEN = 2048
|
||||||
@ -42,17 +45,22 @@ default_config = {
|
|||||||
|
|
||||||
|
|
||||||
def test_taxo(config, infos):
|
def test_taxo(config, infos):
|
||||||
tax1 = OBI_Taxonomy.open(infos['dms'], config['obi']['taxo'], taxdump=True)
|
tax1 = Taxonomy.open_taxdump(infos['dms'], config['obi']['taxo'])
|
||||||
tax1.write(TAXTEST)
|
tax1.write(TAXTEST)
|
||||||
tax2 = OBI_Taxonomy.open(infos['dms'], TAXTEST, taxdump=False)
|
tax2 = Taxonomy.open(infos['dms'], TAXTEST)
|
||||||
assert len(tax1) == len(tax2), "Length of written taxonomy != length of read taxdump : "+str(len(tax2))+" != "+str(len(tax1))
|
assert len(tax1) == len(tax2), "Length of written taxonomy != length of read taxdump : "+str(len(tax2))+" != "+str(len(tax1))
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for x in range(config['test']['nbtests']):
|
for x in range(config['test']['nbtests']):
|
||||||
idx = random.randint(0, len(tax1)-1)
|
idx = random.randint(0, len(tax1)-1)
|
||||||
t1 = tax1.get_taxon_by_idx(idx)
|
t1 = tax1.get_taxon_by_idx(idx)
|
||||||
|
taxid1 = t1.taxid
|
||||||
t2 = tax2.get_taxon_by_idx(idx)
|
t2 = tax2.get_taxon_by_idx(idx)
|
||||||
assert t1 == t2, "Taxon gotten from written taxonomy != taxon read from taxdump : "+str(t2)+" != "+str(t1)
|
taxid2 = t2.taxid
|
||||||
|
assert t1 == t2, "Taxon gotten from written taxonomy with index != taxon read from taxdump : "+str(t2)+" != "+str(t1)
|
||||||
|
t1 = tax1[taxid1]
|
||||||
|
t2 = tax2[taxid2]
|
||||||
|
assert t1 == t2, "Taxon gotten from written taxonomy with taxid != taxon read from taxdump : "+str(t2)+" != "+str(t1)
|
||||||
i+=1
|
i+=1
|
||||||
if (i%(config['test']['nbtests']/10)) == 0 :
|
if (i%(config['test']['nbtests']/10)) == 0 :
|
||||||
print("Testing taxonomy functions......"+str(i*100/config['test']['nbtests'])+"%")
|
print("Testing taxonomy functions......"+str(i*100/config['test']['nbtests'])+"%")
|
||||||
@ -69,32 +77,86 @@ def random_bool(config):
|
|||||||
return random.choice([True, False])
|
return random.choice([True, False])
|
||||||
|
|
||||||
|
|
||||||
|
def random_bool_tuples(config):
|
||||||
|
l=[]
|
||||||
|
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
|
||||||
|
l.append(random.choice([None, random_bool(config)]))
|
||||||
|
return tuple(l)
|
||||||
|
|
||||||
|
|
||||||
def random_char(config):
|
def random_char(config):
|
||||||
return str2bytes(random.choice(string.ascii_lowercase))
|
return str2bytes(random.choice(string.ascii_lowercase))
|
||||||
|
|
||||||
|
|
||||||
|
def random_char_tuples(config):
|
||||||
|
l=[]
|
||||||
|
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
|
||||||
|
l.append(random.choice([None, random_char(config)]))
|
||||||
|
return tuple(l)
|
||||||
|
|
||||||
|
|
||||||
def random_float(config):
|
def random_float(config):
|
||||||
return random.randint(0, MAX_INT) + random.random()
|
return random.randint(0, MAX_INT) + random.random()
|
||||||
|
|
||||||
|
|
||||||
|
def random_float_tuples(config):
|
||||||
|
l=[]
|
||||||
|
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
|
||||||
|
l.append(random.choice([None, random_float(config)]))
|
||||||
|
return tuple(l)
|
||||||
|
|
||||||
|
|
||||||
def random_int(config):
|
def random_int(config):
|
||||||
return random.randint(0, config['test']['maxlinenb'])
|
return random.randint(0, config['test']['maxlinenb'])
|
||||||
|
|
||||||
|
|
||||||
|
def random_int_tuples(config):
|
||||||
|
l=[]
|
||||||
|
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
|
||||||
|
l.append(random.choice([None, random_int(config)]))
|
||||||
|
return tuple(l)
|
||||||
|
|
||||||
|
|
||||||
def random_seq(config):
|
def random_seq(config):
|
||||||
return str2bytes(''.join(random.choice(['a','t','g','c']) for i in range(random_length(config['test']['seqmaxlen']))))
|
return str2bytes(''.join(random.choice(['a','t','g','c']) for i in range(random_length(config['test']['seqmaxlen']))))
|
||||||
|
|
||||||
|
|
||||||
|
def random_seq_tuples(config):
|
||||||
|
l=[]
|
||||||
|
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
|
||||||
|
l.append(random.choice([None, random_seq(config)]))
|
||||||
|
return tuple(l)
|
||||||
|
|
||||||
|
|
||||||
def random_bytes(config):
|
def random_bytes(config):
|
||||||
return random_bytes_with_max_len(config['test']['strmaxlen'])
|
return random_bytes_with_max_len(config['test']['strmaxlen'])
|
||||||
|
|
||||||
|
|
||||||
|
def random_bytes_tuples(config):
|
||||||
|
l=[]
|
||||||
|
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
|
||||||
|
l.append(random.choice([None, random_bytes(config)]))
|
||||||
|
return tuple(l)
|
||||||
|
|
||||||
|
|
||||||
def random_str_with_max_len(max_len):
|
def random_str_with_max_len(max_len):
|
||||||
return ''.join(random.choice(string.ascii_lowercase) for i in range(random_length(max_len)))
|
return ''.join(random.choice(string.ascii_lowercase) for i in range(random_length(max_len)))
|
||||||
|
|
||||||
|
|
||||||
def random_bytes_with_max_len(max_len):
|
def random_bytes_with_max_len(max_len):
|
||||||
return str2bytes(''.join(random.choice(string.ascii_lowercase) for i in range(random_length(max_len))))
|
return str2bytes(random_str_with_max_len(max_len))
|
||||||
|
|
||||||
|
|
||||||
|
RANDOM_FUNCTIONS = [random_bool, random_char, random_bytes, random_float, random_int]
|
||||||
|
def random_comments(config):
|
||||||
|
comments = {}
|
||||||
|
for i in range(random_length(1000)):
|
||||||
|
to_add = {random_bytes(config): random.choice(RANDOM_FUNCTIONS)(config)}
|
||||||
|
if len(str(comments)) + len(str(to_add)) >= COL_COMMENTS_MAX_LEN:
|
||||||
|
return comments
|
||||||
|
else:
|
||||||
|
comments.update(to_add)
|
||||||
|
return comments
|
||||||
|
|
||||||
|
|
||||||
def random_column(infos):
|
def random_column(infos):
|
||||||
@ -102,17 +164,17 @@ def random_column(infos):
|
|||||||
|
|
||||||
|
|
||||||
def random_unique_name(infos):
|
def random_unique_name(infos):
|
||||||
name = ""
|
name = b""
|
||||||
while name == "" or name in infos['unique_names'] :
|
while name == b"" or name in infos['unique_names'] :
|
||||||
name = random_str_with_max_len(NAME_MAX_LEN)
|
name = random_bytes_with_max_len(NAME_MAX_LEN)
|
||||||
infos['unique_names'].append(name)
|
infos['unique_names'].append(name)
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
def random_unique_element_name(config, infos):
|
def random_unique_element_name(config, infos):
|
||||||
name = ""
|
name = b""
|
||||||
while name == "" or name in infos['unique_names'] :
|
while name == b"" or name in infos['unique_names'] :
|
||||||
name = random_str_with_max_len(config['test']['elt_name_max_len'])
|
name = random_bytes_with_max_len(config['test']['elt_name_max_len'])
|
||||||
infos['unique_names'].append(name)
|
infos['unique_names'].append(name)
|
||||||
return name
|
return name
|
||||||
|
|
||||||
@ -128,15 +190,32 @@ def test_set_and_get(config, infos):
|
|||||||
col = infos['view'][col_name]
|
col = infos['view'][col_name]
|
||||||
element_names = col.elements_names
|
element_names = col.elements_names
|
||||||
data_type = col.data_type
|
data_type = col.data_type
|
||||||
if data_type == "OBI_QUAL" :
|
if data_type == b"OBI_QUAL" :
|
||||||
print_test(config, "-")
|
print_test(config, "-")
|
||||||
return
|
return
|
||||||
idx = random_int(config)
|
idx = random_int(config)
|
||||||
value = random.choice([None, infos['random_generator'][data_type](config)])
|
value = random.choice([None, infos['random_generator'][(data_type, col.tuples)](config)])
|
||||||
if col.nb_elements_per_line > 1 :
|
if col.nb_elements_per_line > 1 :
|
||||||
elt = random.choice(element_names)
|
elt = random.choice(element_names)
|
||||||
col[idx][elt] = value
|
col[idx][elt] = value
|
||||||
assert col[idx][elt] == value, "Column: "+repr(col)+"\nSet value != gotten value "+str(value)+" != "+str(col[idx][elt])
|
assert col[idx][elt] == value, "Column: "+repr(col)+"\nSet value != gotten value "+str(value)+" != "+str(col[idx][elt])
|
||||||
|
elif col.tuples:
|
||||||
|
col[idx] = value
|
||||||
|
if value is None:
|
||||||
|
totest = None
|
||||||
|
else:
|
||||||
|
totest = []
|
||||||
|
for e in value:
|
||||||
|
if e is not None and e != '':
|
||||||
|
totest.append(e)
|
||||||
|
if len(totest) == 0:
|
||||||
|
totest = None
|
||||||
|
else:
|
||||||
|
totest = tuple(totest)
|
||||||
|
assert col[idx] == totest, "Column: "+repr(col)+"\nSet value != gotten value "+str(totest)+" != "+str(col[idx])
|
||||||
|
if totest is not None:
|
||||||
|
for i in range(len(totest)) :
|
||||||
|
assert col[idx][i] == totest[i], "Column: "+repr(col)+"\nSet value[i] != gotten value[i] "+str(totest[i])+" != "+str(col[idx][i])
|
||||||
else:
|
else:
|
||||||
col[idx] = value
|
col[idx] = value
|
||||||
assert col[idx] == value, "Column: "+repr(col)+"\nSet value != gotten value "+str(value)+" != "+str(col[idx])
|
assert col[idx] == value, "Column: "+repr(col)+"\nSet value != gotten value "+str(value)+" != "+str(col[idx])
|
||||||
@ -210,19 +289,25 @@ def fill_column(config, infos, col) :
|
|||||||
if len(element_names) > 1 :
|
if len(element_names) > 1 :
|
||||||
for i in range(random_int(config)) :
|
for i in range(random_int(config)) :
|
||||||
for j in range(len(element_names)) :
|
for j in range(len(element_names)) :
|
||||||
col[i][element_names[j]] = random.choice([None, infos['random_generator'][data_type](config)])
|
col[i][element_names[j]] = random.choice([None, infos['random_generator'][(data_type, col.tuples)](config)])
|
||||||
else :
|
else :
|
||||||
for i in range(random_int(config)) :
|
for i in range(random_int(config)) :
|
||||||
col[i] = random.choice([None, infos['random_generator'][data_type](config)])
|
r = random.choice([None, infos['random_generator'][(data_type, col.tuples)](config)])
|
||||||
|
col[i] = r
|
||||||
|
|
||||||
|
|
||||||
def create_random_column(config, infos) :
|
def create_random_column(config, infos) :
|
||||||
alias = random.choice(['', random_unique_name(infos)])
|
alias = random.choice([b'', random_unique_name(infos)])
|
||||||
nb_elements_per_line=random.randint(1, config['test']['maxelts'])
|
tuples = random.choice([True, False])
|
||||||
elements_names = []
|
if not tuples :
|
||||||
for i in range(nb_elements_per_line) :
|
nb_elements_per_line=random.randint(1, config['test']['maxelts'])
|
||||||
elements_names.append(random_unique_element_name(config, infos))
|
elements_names = []
|
||||||
elements_names = random.choice([None, elements_names])
|
for i in range(nb_elements_per_line) :
|
||||||
|
elements_names.append(random_unique_element_name(config, infos))
|
||||||
|
elements_names = random.choice([None, elements_names])
|
||||||
|
else :
|
||||||
|
nb_elements_per_line = 1
|
||||||
|
elements_names = None
|
||||||
name = random_unique_name(infos)
|
name = random_unique_name(infos)
|
||||||
data_type = random_col_type()
|
data_type = random_col_type()
|
||||||
|
|
||||||
@ -231,11 +316,12 @@ def create_random_column(config, infos) :
|
|||||||
data_type,
|
data_type,
|
||||||
nb_elements_per_line=nb_elements_per_line,
|
nb_elements_per_line=nb_elements_per_line,
|
||||||
elements_names=elements_names,
|
elements_names=elements_names,
|
||||||
comments=random_str_with_max_len(COL_COMMENTS_MAX_LEN),
|
tuples=tuples,
|
||||||
|
comments=random_comments(config),
|
||||||
alias=alias
|
alias=alias
|
||||||
)
|
)
|
||||||
|
|
||||||
if alias != '' :
|
if alias != b'' :
|
||||||
assert infos['view'][alias] == column
|
assert infos['view'][alias] == column
|
||||||
else :
|
else :
|
||||||
assert infos['view'][name] == column
|
assert infos['view'][name] == column
|
||||||
@ -257,7 +343,7 @@ def random_new_view(config, infos, first=False):
|
|||||||
infos['view_names'].append(infos['view'].name)
|
infos['view_names'].append(infos['view'].name)
|
||||||
infos['view'].close()
|
infos['view'].close()
|
||||||
v_to_clone = View.open(infos['dms'], random.choice(infos["view_names"]))
|
v_to_clone = View.open(infos['dms'], random.choice(infos["view_names"]))
|
||||||
v_type = ""
|
v_type = b""
|
||||||
print_test(config, "View to clone: ")
|
print_test(config, "View to clone: ")
|
||||||
print_test(config, repr(v_to_clone))
|
print_test(config, repr(v_to_clone))
|
||||||
create_line_selection = random_bool(config)
|
create_line_selection = random_bool(config)
|
||||||
@ -271,14 +357,14 @@ def random_new_view(config, infos, first=False):
|
|||||||
v_type = random_view_type()
|
v_type = random_view_type()
|
||||||
|
|
||||||
if line_selection is not None :
|
if line_selection is not None :
|
||||||
infos['view'] = line_selection.materialize(random_unique_name(infos), comments=random_str_with_max_len(config['test']['commentsmaxlen']))
|
infos['view'] = line_selection.materialize(random_unique_name(infos), comments=random_comments(config))
|
||||||
elif v_to_clone is not None :
|
elif v_to_clone is not None :
|
||||||
infos['view'] = v_to_clone.clone(random_unique_name(infos), comments=random_str_with_max_len(config['test']['commentsmaxlen']))
|
infos['view'] = v_to_clone.clone(random_unique_name(infos), comments=random_comments(config))
|
||||||
else :
|
else :
|
||||||
if v_type == "NUC_SEQS_VIEW" :
|
if v_type == "NUC_SEQS_VIEW" :
|
||||||
infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_str_with_max_len(config['test']['commentsmaxlen'])) # TODO quality column
|
infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
|
||||||
else :
|
else :
|
||||||
infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_str_with_max_len(config['test']['commentsmaxlen'])) # TODO quality column
|
infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
|
||||||
|
|
||||||
print_test(config, repr(infos['view']))
|
print_test(config, repr(infos['view']))
|
||||||
if v_to_clone is not None :
|
if v_to_clone is not None :
|
||||||
@ -348,13 +434,13 @@ def addOptions(parser):
|
|||||||
help="Maximum length of character strings. "
|
help="Maximum length of character strings. "
|
||||||
"Default: 200")
|
"Default: 200")
|
||||||
|
|
||||||
group.add_argument('--comments_max_len','-c',
|
group.add_argument('--tuple_max_len','-u',
|
||||||
action="store", dest="test:commentsmaxlen",
|
action="store", dest="test:tuplemaxlen",
|
||||||
metavar='<COMMENTS_MAX_LEN>',
|
metavar='<TUPLE_MAX_LEN>',
|
||||||
default=10000,
|
default=20,
|
||||||
type=int,
|
type=int,
|
||||||
help="Maximum length of view comments. "
|
help="Maximum length of tuples. "
|
||||||
"Default: 10000")
|
"Default: 200")
|
||||||
|
|
||||||
group.add_argument('--max_ini_col_count','-o',
|
group.add_argument('--max_ini_col_count','-o',
|
||||||
action="store", dest="test:maxinicolcount",
|
action="store", dest="test:maxinicolcount",
|
||||||
@ -402,10 +488,18 @@ def run(config):
|
|||||||
'view': None,
|
'view': None,
|
||||||
'view_names': None,
|
'view_names': None,
|
||||||
'unique_names': [],
|
'unique_names': [],
|
||||||
'random_generator': {b"OBI_BOOL": random_bool, b"OBI_CHAR": random_char, b"OBI_FLOAT": random_float, b"OBI_INT": random_int, b"OBI_SEQ": random_seq, b"OBI_STR": random_bytes},
|
'random_generator': {
|
||||||
|
(b"OBI_BOOL", False): random_bool, (b"OBI_BOOL", True): random_bool_tuples,
|
||||||
|
(b"OBI_CHAR", False): random_char, (b"OBI_CHAR", True): random_char_tuples,
|
||||||
|
(b"OBI_FLOAT", False): random_float, (b"OBI_FLOAT", True): random_float_tuples,
|
||||||
|
(b"OBI_INT", False): random_int, (b"OBI_INT", True): random_int_tuples,
|
||||||
|
(b"OBI_SEQ", False): random_seq, (b"OBI_SEQ", True): random_seq_tuples,
|
||||||
|
(b"OBI_STR", False): random_bytes, (b"OBI_STR", True): random_bytes_tuples
|
||||||
|
},
|
||||||
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view]
|
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# TODO ???
|
||||||
config['test']['elt_name_max_len'] = int((COL_COMMENTS_MAX_LEN - config['test']['maxelts']) / config['test']['maxelts'])
|
config['test']['elt_name_max_len'] = int((COL_COMMENTS_MAX_LEN - config['test']['maxelts']) / config['test']['maxelts'])
|
||||||
|
|
||||||
print("Initializing the DMS and the first view...")
|
print("Initializing the DMS and the first view...")
|
||||||
|