Compare commits
11 Commits
v3.0.0-bet
...
v3.0.0b21
Author | SHA1 | Date | |
---|---|---|---|
faf8ea9d86 | |||
ffe2485e94 | |||
6094ce2bbc | |||
a7dcf16c06 | |||
f13f8f6165 | |||
b5a29ac413 | |||
efd2b9d338 | |||
ca6e3e7aad | |||
76ed8e18e5 | |||
1d17f28aec | |||
fa834e4b8b |
@ -6,7 +6,7 @@ recursive-include doc/sphinx/source *.txt *.rst *.py
|
||||
recursive-include doc/sphinx/sphinxext *.py
|
||||
include doc/sphinx/Makefile
|
||||
include doc/sphinx/Doxyfile
|
||||
include README.txt
|
||||
include README.md
|
||||
include requirements.txt
|
||||
include scripts/obi
|
||||
|
||||
|
@ -73,7 +73,7 @@ def addOptions(parser):
|
||||
action="store_true", dest="import:preread",
|
||||
default=False,
|
||||
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
|
||||
"a much faster import.")
|
||||
"a much faster import. This option is not recommended and will slow down the import in any other case.")
|
||||
|
||||
|
||||
def run(config):
|
||||
@ -236,7 +236,7 @@ def run(config):
|
||||
dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
|
||||
nb_elements_per_line=len(dict_dict[tag][0]), \
|
||||
elements_names=list(dict_dict[tag][0])), \
|
||||
value_obitype)
|
||||
dict_dict[tag][1])
|
||||
|
||||
|
||||
# Reinitialize the input
|
||||
@ -270,6 +270,8 @@ def run(config):
|
||||
elif not i%50000:
|
||||
logger("info", "Imported %d entries", i)
|
||||
|
||||
try:
|
||||
|
||||
if NUC_SEQS_view:
|
||||
id_col[i] = entry.id
|
||||
def_col[i] = entry.definition
|
||||
@ -388,6 +390,13 @@ def run(config):
|
||||
# Fill value
|
||||
dcols[tag][0][i] = value
|
||||
|
||||
except Exception as e:
|
||||
print("\nCould not import sequence id:", entry.id, "(error raised:", e, ")")
|
||||
if 'skiperror' in config['obi'] and not config['obi']['skiperror']:
|
||||
raise e
|
||||
else:
|
||||
pass
|
||||
|
||||
i+=1
|
||||
|
||||
if pb is not None:
|
||||
|
@ -63,6 +63,8 @@ cdef extern from "obidmscolumn.h" nogil:
|
||||
|
||||
char* obi_get_elements_names(OBIDMS_column_p column)
|
||||
|
||||
char* obi_column_formatted_infos(OBIDMS_column_p column)
|
||||
|
||||
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
|
||||
|
||||
int obi_column_write_comments(OBIDMS_column_p column, const char* comments)
|
||||
|
@ -14,6 +14,7 @@ from ..capi.obidms cimport obi_import_column
|
||||
from ..capi.obidmscolumn cimport OBIDMS_column_header_p, \
|
||||
obi_close_column, \
|
||||
obi_get_elements_names, \
|
||||
obi_column_formatted_infos, \
|
||||
obi_column_write_comments
|
||||
|
||||
from ..capi.obiutils cimport obi_format_date
|
||||
@ -288,7 +289,13 @@ cdef class Column(OBIWrapper) :
|
||||
@OBIWrapper.checkIsActive
|
||||
def __repr__(self) :
|
||||
cdef bytes s
|
||||
#cdef char* s_b
|
||||
#cdef str s_str
|
||||
#s_b = obi_column_formatted_infos(self.pointer())
|
||||
#s_str = bytes2str(s_b)
|
||||
#free(s_b)
|
||||
s = self._alias + b", data type: " + self.data_type
|
||||
#return s_str
|
||||
return bytes2str(s)
|
||||
|
||||
|
||||
|
@ -177,7 +177,7 @@ def emblIterator_dir(dir_path,
|
||||
for filename in files:
|
||||
if read==only:
|
||||
return
|
||||
print("Parsing file %s (%d/%d)" % (tostr(filename), read_files, len(files)))
|
||||
print("Parsing file %s (%d/%d)" % (tostr(filename), read_files+1, len(files)))
|
||||
f = uopen(filename)
|
||||
if only is not None:
|
||||
only_f = only-read
|
||||
|
@ -25,8 +25,9 @@ from libc.string cimport strcpy, strlen
|
||||
_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN)',re.DOTALL + re.M)
|
||||
|
||||
_headerMatcher = re.compile(b'^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
|
||||
_seqMatcher = re.compile(b'(?<=ORIGIN).+(?=//\n)', re.DOTALL + re.M)
|
||||
_cleanSeq = re.compile(b'[ \n0-9]+')
|
||||
_seqMatcher = re.compile(b'ORIGIN.+(?=//\n)', re.DOTALL + re.M)
|
||||
_cleanSeq1 = re.compile(b'ORIGIN.+\n')
|
||||
_cleanSeq2 = re.compile(b'[ \n0-9]+')
|
||||
_acMatcher = re.compile(b'(?<=^ACCESSION ).+',re.M)
|
||||
_deMatcher = re.compile(b'(?<=^DEFINITION ).+\n( .+\n)*',re.M)
|
||||
_cleanDe = re.compile(b'\n *')
|
||||
@ -42,7 +43,8 @@ def genbankParser(bytes text):
|
||||
ft = _featureMatcher.search(text).group()
|
||||
|
||||
s = _seqMatcher.search(text).group()
|
||||
s = _cleanSeq.sub(b'', s).upper()
|
||||
s = _cleanSeq1.sub(b'', s)
|
||||
s = _cleanSeq2.sub(b'', s)
|
||||
|
||||
acs = _acMatcher.search(text).group()
|
||||
acs = acs.split()
|
||||
@ -52,12 +54,6 @@ def genbankParser(bytes text):
|
||||
de = _deMatcher.search(header).group()
|
||||
de = _cleanDe.sub(b' ',de).strip().strip(b'.')
|
||||
|
||||
except Exception as e:
|
||||
print("\nCould not import sequence id:", text.split()[1], "(error raised:", e, ")")
|
||||
# Do not raise any Exception if you need the possibility to resume the generator
|
||||
# (Python generators can't resume after any exception is raised)
|
||||
return None
|
||||
|
||||
tags = {}
|
||||
extractTaxon(ft, tags)
|
||||
|
||||
@ -68,6 +64,12 @@ def genbankParser(bytes text):
|
||||
offset=-1,
|
||||
tags=tags)
|
||||
|
||||
except Exception as e:
|
||||
print("\nCould not import sequence id:", text.split()[1], "(error raised:", e, ")")
|
||||
# Do not raise any Exception if you need the possibility to resume the generator
|
||||
# (Python generators can't resume after any exception is raised)
|
||||
return None
|
||||
|
||||
return seq
|
||||
|
||||
|
||||
@ -171,10 +173,12 @@ def genbankIterator_dir(dir_path,
|
||||
read = 0
|
||||
read_files = 0
|
||||
files = [filename for filename in glob.glob(os.path.join(path, b'*.gbff*'))]
|
||||
files.extend([filename for filename in glob.glob(os.path.join(path, b'*.seq*'))]) # new genbank extension
|
||||
files = list(set(files))
|
||||
for filename in files:
|
||||
if read==only:
|
||||
return
|
||||
print("Parsing file %s (%d/%d)" % (tostr(filename), read_files, len(files)))
|
||||
print("Parsing file %s (%d/%d)" % (tostr(filename), read_files+1, len(files)))
|
||||
f = uopen(filename)
|
||||
if only is not None:
|
||||
only_f = only-read
|
||||
|
@ -1,5 +1,5 @@
|
||||
major = 3
|
||||
minor = 0
|
||||
serial= '0-beta17'
|
||||
serial= '0b21'
|
||||
|
||||
version ="%d.%02d.%s" % (major,minor,serial)
|
||||
version ="%d.%d.%s" % (major,minor,serial)
|
||||
|
5
requirements.txt
Executable file
5
requirements.txt
Executable file
@ -0,0 +1,5 @@
|
||||
--extra-index-url https://pypi.python.org/simple/
|
||||
Cython>=0.24
|
||||
Sphinx>=1.2.0
|
||||
ipython>=3.0.0
|
||||
breathe>=4.0.0
|
14
setup.py
14
setup.py
@ -5,7 +5,8 @@ import re
|
||||
import subprocess
|
||||
|
||||
from distutils import log
|
||||
from distutils.core import setup
|
||||
#from distutils.core import setup
|
||||
from setuptools import setup # to work with pip
|
||||
|
||||
from distutils.core import Extension
|
||||
from distutils.sysconfig import get_python_lib
|
||||
@ -88,9 +89,10 @@ PACKAGE = "OBITools3"
|
||||
VERSION = version
|
||||
AUTHOR = 'Celine Mercier'
|
||||
EMAIL = 'celine.mercier@metabarcoding.org'
|
||||
URL = "http://metabarcoding.org/obitools3"
|
||||
URL = "https://metabarcoding.org/obitools3"
|
||||
PLATFORMS = "posix"
|
||||
LICENSE = "CeCILL-V2"
|
||||
DESCRIPTION = "Tools and library for DNA metabarcoding",
|
||||
DESCRIPTION = "A package for the management of analyses and data in DNA metabarcoding."
|
||||
PYTHONMIN = '3.5'
|
||||
|
||||
SRC = 'python'
|
||||
@ -147,12 +149,18 @@ classifiers=['Development Status :: 4 - Beta',
|
||||
'Topic :: Utilities',
|
||||
]
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
setup(name=PACKAGE,
|
||||
description=DESCRIPTION,
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
classifiers=classifiers,
|
||||
version=VERSION,
|
||||
author=AUTHOR,
|
||||
author_email=EMAIL,
|
||||
platforms=PLATFORMS,
|
||||
license=LICENSE,
|
||||
url=URL,
|
||||
ext_modules=xx,
|
||||
|
Reference in New Issue
Block a user