Compare commits

...

1085 Commits

Author SHA1 Message Date
3db93ee9c4 Fixed stdout output 2024-01-12 16:13:30 +13:00
4844b20770 Merge branch 'master' of https://git.metabarcoding.org/obitools/obitools3 2024-01-12 15:36:58 +13:00
0d98a4f717 Switch to version 3.0.1b26 2024-01-10 16:40:08 +13:00
837ff1a1ba Taxonomy: fixed an issue related to StopIteration behaviour in new
versions of python
2024-01-10 15:53:15 +13:00
aeed42456a export: columns are now in alphabetical order when exporting to tab
format
2024-01-10 15:52:28 +13:00
fb6e27bb5d Revert "Testing RAM instead of mmap for blob alignment"
This reverts commit 6d94cdcc0d
2023-11-29 04:22:31 +01:00
6d94cdcc0d Testing RAM instead of mmap for blob alignment 2023-11-29 16:19:34 +13:00
8a1f844645 obi import: fixed bug caused by new behaviour of StopIteration
exceptions in Python>=3.7
2023-09-21 17:47:40 +12:00
791ccfb92e Fixed include bug in previous version and switch to version 3.0.1b24 2023-05-15 11:35:42 +12:00
1c9a906f5b ngsfilter and ecopcr: now check for primers too long for apat library to
handle (31bp max) and switch to version 3.0.1b23
2023-05-12 17:04:21 +12:00
55b2679b23 New command obi rm and switch to version 3.0.1b22 2023-05-08 17:48:50 +12:00
9ea2124adc Switch to version 3.0.1b21 2023-02-13 11:00:01 +13:00
2130a949c7 New command: obi taxonomy to add local taxa (closes #64) 2023-02-13 10:59:20 +13:00
eeb93afa7d import: now automatically renames scientific_name tag to
`SCIENTIFIC_NAME`, and suggests using `--input-na-string` when a
sequence import fails
2023-02-13 10:40:38 +13:00
755ce179ad head: added output format options 2023-02-13 10:31:26 +13:00
7e492578b3 Switch to version 3.0.1b20 2022-09-21 11:33:03 +12:00
02e9df3ad1 alignpairedend and ngsfilter: ids of original sequences are now kept 2022-09-21 11:32:19 +12:00
55ada80500 import: made ngsfilter file parsing more resilient and switching to
version 3.0.1b19
2022-07-15 16:02:21 +12:00
ef9d9674b0 obi import: added SINTAX format import and switch to version 3.0.1b18 2022-05-17 09:36:33 +12:00
4f39bb2418 switch to version 3.0.1b17 2022-05-03 10:55:36 +12:00
0a2b8adb50 import: added import of UNITE fasta format 2022-05-03 10:54:41 +12:00
f9b99a9397 annotate: fixed a bug where a column type could be wrongly guessed and
switch to version 3.0.1b16
2022-03-30 16:32:07 +13:00
ce2833c04b switch to version v3.0.1b15 2022-02-25 17:48:44 +13:00
f64b3da30b split command 2022-02-25 17:44:18 +13:00
388b3e0410 removed a trace 2021-11-11 15:53:27 +13:00
c9db990b83 switch to version 3.0.1b14 2021-11-11 15:28:00 +13:00
3f253feb5e Cython: View: fixed keys method to get list of view keys 2021-11-11 15:27:32 +13:00
85d2bab607 small fix 2021-11-11 15:26:48 +13:00
53b3d81137 small fixes and improvements 2021-11-11 15:26:09 +13:00
f6353fbf28 obi export: added options to export to metabaR compatible format 2021-11-11 15:24:12 +13:00
5a8b9dca5d goes with previous commit 2021-11-11 15:12:04 +13:00
8bd6d6c8e9 Python: URI decoding: now properly checking that paths can be encoded in
ASCII (issue #89)
2021-11-02 11:17:59 +13:00
405e6ef420 Python: URI decoding: added metabaR output 2021-11-02 11:16:29 +13:00
fedacfafe7 switch to version 3.0.1b13 2021-09-13 11:46:17 +12:00
2d66e0e965 python: genbank parser: better handling of white spaces 2021-09-13 11:44:38 +12:00
f43856b712 switch to version 3.0.1b12 2021-09-08 10:56:55 +12:00
9e0c319806 Cython: fixed rewriting of column when rewriting a 1 element dict column 2021-09-08 10:54:23 +12:00
58b42cd977 C: views: now correctly parses view names containing '.' when cleaning
unfinished views. Closes #115
2021-09-08 10:52:42 +12:00
34de90bce6 ngsfilter: checks better if there is an associated sequencing quality 2021-09-08 10:30:11 +12:00
4be9f36f99 stats: fixed the computation of variance when it is equal to 0 2021-08-05 11:32:16 +12:00
f10e78ba3c C: fixed the printing of view informations from a DMS (fixes #114) 2021-08-05 11:31:24 +12:00
88c8463ed7 Cython: taxonomy: improved logging 2021-08-05 11:29:20 +12:00
89168271ef ecopcr: now accepting taxonomy from a different DMS than the reference
sequences
2021-08-05 11:28:57 +12:00
82d2642000 Switch to version 3.0.1b11 2021-07-22 09:25:39 +12:00
99c1cd60d6 export: now exports header for tabular files by default and added option
to only export specific columns
2021-07-22 09:23:18 +12:00
ce7ae4ac55 export: fixed 'only' option printing one too many if printing header 2021-07-21 15:23:04 +12:00
0b4283bb58 cat: improved error handling 2021-07-21 15:22:08 +12:00
747f3efbb2 Improved taxonomy reading information display 2021-07-21 15:20:44 +12:00
6c1a3aff47 Fixed the handling of sample names that are numbers (forcing conversion) 2021-07-21 15:19:24 +12:00
e2932b05f2 Implements #108 export integer missing values as 0 for tables by default 2021-07-21 14:41:54 +12:00
32345b9ec4 Addresses #111 2021-07-19 15:55:25 +12:00
9334cf6cc6 import: improved genbank parser and switch to version 3.0.1.b10 2021-06-17 08:42:01 +12:00
8ec13a294c Switch to version 3.0.1b9 2021-06-01 09:21:43 +12:00
3e45c34491 import: now imports and adds taxids for SILVA and RDP files, added
import of lists, fixed skipping of errors (was not overwriting), and
fixed --no-progress-bar option
2021-06-01 09:21:07 +12:00
c2f3d90dc1 build_ref_db: set default threshold to 0.99 2021-06-01 09:11:17 +12:00
6b732d11d3 align: fixed column URI parsing 2021-06-01 09:10:21 +12:00
9eb833a0af typo fix 2021-06-01 09:09:16 +12:00
6b7b0e3bd1 cat: fixed the handling of dictionary columns 2021-06-01 09:06:13 +12:00
47691a8f58 count: added option to specify the count column 2021-06-01 09:05:14 +12:00
b908b581c8 clean: hid not implemented option 2021-06-01 09:04:22 +12:00
03c174fd7a grep: added taxonomy check 2021-05-31 17:03:39 +12:00
2156588ff6 added TODO comment 2021-05-31 17:01:57 +12:00
6ff29c6a6a Increased maximum line count to 10E12 2021-05-31 17:00:55 +12:00
51a3c68fb5 C: build_reference_db: fixed gcc warning/error 2021-05-31 16:59:17 +12:00
da91ffc2c7 URI decoding: fixed reading a taxonomy before any view 2021-05-31 16:57:20 +12:00
c884615522 obi stats: various fixes and improvements 2021-05-31 16:51:06 +12:00
cb53381863 ecotag: BEST_MATCH_TAXIDS now dereplicated (no repeated taxids in the
list) and switch to version 3.0.1b8
2021-05-10 16:02:06 +12:00
72b3e5d872 switch to version 3.0.1b7 2021-04-07 10:31:54 +12:00
238e9f70f3 alignpairedend: fixed bug that would cut out sequence ends when it
should not have
2021-04-07 10:31:12 +12:00
e099a16624 small fixes 2021-04-07 10:28:02 +12:00
847c9c816d import: fixed count estimation for tabular files with header 2021-03-30 09:07:14 +13:00
6026129ca8 Fixes 101 2021-03-30 09:06:08 +13:00
169b6514b4 small doc fixes 2021-03-29 13:07:48 +13:00
89b0c48141 switch to version 3.0.1b6 2021-03-29 11:18:44 +13:00
7c02782e3c import/export: workaround for issue where flake8(?) reads '\t' as
'\'+'t' when parsing an option value
2021-03-29 11:18:19 +13:00
ecc4c2c78b stats: improved the tabular display 2021-03-29 09:03:32 +13:00
f5413381fd C: taxonomy: fixed a bug where some taxa would not be stored in the
merged index
2021-03-29 09:02:18 +13:00
3e93cfff7b import: Columns are now rewritten in OBI_FLOAT if a value is > INT32_MAX 2021-03-29 09:00:52 +13:00
6d445fe3ad switch to version 3.0.1b5 2021-03-22 09:41:01 +13:00
824deb7e21 new command: obi rm: deletes any view (for now the user deleting a view
accepts that there will be missing information when running obi history
if other views came from the deleted view)
2021-03-18 09:17:06 +13:00
d579bb2749 switch to version 3.0.1b4 2021-03-16 17:40:58 +13:00
10e5ebdbc0 ngsfilter: fixed critical bug where barcodes shorter than the forward
primer would be missed
2021-03-16 15:09:28 +13:00
8833110490 import: fixed the import of tabular files with no header 2021-03-16 09:15:48 +13:00
bd38449f2d switch to version 3.0.1b3 2021-03-15 16:50:17 +13:00
904823c827 uniq: now OK to use -m option even if only one unique key in information
to merge (e.g. one sample)
2021-03-15 16:48:22 +13:00
af68a1024c Switch to version 3.0.1b2 2021-03-15 16:26:43 +13:00
425fe25bd2 Made the OBITools3 more 'empty file friendly' 2021-03-15 16:25:41 +13:00
d48aed38d4 switch to version 3.0.1b1 2021-03-11 17:11:23 +13:00
5e32f8523e Merge branch 'wsl_version' 2021-03-11 16:47:59 +13:00
8f1d94fd24 obi test: fixed bug introduced in ad1fd3c3 2021-03-11 16:31:31 +13:00
38f42cb0fb C: Made maximum file path length 2048 instead of 1024 2021-03-11 15:23:22 +13:00
7f0f63cf26 C: now completely unmapping files before truncating them to a smaller
size (#68)
2021-03-11 15:12:40 +13:00
cba78111c9 obi test: fixed bug introduced in previous version 2021-03-11 11:36:52 +13:00
41fbae7b6c Switch to version 3.0.0b43 2021-03-10 16:52:03 +13:00
ad1fd3c341 Now handling dictionaries with one key 2021-03-10 16:50:30 +13:00
fbf0f7dfb6 import: improved genbank parser and switch to version 3.0.0b42 2021-02-17 15:26:35 +13:00
fda0edd0d8 Switch to version 3.0.0b41 2021-02-10 17:29:08 +13:00
382e37a6ae Fixes #88 2021-02-10 17:28:49 +13:00
5cc3e29f75 obi test: made less heavy by default 2021-02-10 17:28:15 +13:00
a8e2aee281 Switch to version 3.0.0b40 2021-02-06 14:45:07 +13:00
13adb479d3 Adds an extern qualifier to the keep_running declaration. 2021-02-05 15:59:43 +01:00
8ba7acdfe1 export: fixed a bug where exporting to tab format with a header would
not export the first line of data and switch to version 3.0.0b39
2021-01-13 16:09:04 +01:00
38051b1e4f Removed spurious commentaries 2021-01-13 16:07:42 +01:00
52a2e21b38 grep: fixed --id-list option
and switch to version 3.0.0b38
2020-11-06 16:36:37 +01:00
d27a5b9115 Switch to version 3.0.0b37 2020-10-30 10:47:13 +01:00
20bd3350b4 New command: obi addtaxids to add NCBI taxids to sequences from their
taxon name.
2020-10-30 10:46:55 +01:00
2e191372d7 Now handling sequences with Uracil (U) nucleotides by converting to
Thymine (T)
2020-10-30 10:46:17 +01:00
112e12cab0 Taxonomy: new functions to find taxa by name 2020-10-30 10:45:20 +01:00
b9b4cec5b5 import: now can import SILVA fasta files 2020-10-30 10:43:04 +01:00
199f3772e8 Small fixes (potential compilation problems) 2020-10-30 10:41:58 +01:00
422a6450fa ecotag: clarified similarity circle documentation 2020-09-29 17:57:29 +02:00
137c109f86 obi ls: now done in C (preparing things for R packages to read DMS) and
switch to version 3.0.0b36
2020-09-29 17:51:39 +02:00
b6648ae81e Revert "Fixed version numbering mistake (should be b34 not b35)"
This reverts commit f6dffbecfe
2020-09-25 16:25:39 +02:00
f6dffbecfe Fixed version numbering mistake (should be b34 not b35) 2020-09-25 16:24:23 +02:00
c4696ac865 ecotag: added separate threshold for minimum circle identity (and switch
to version 3.0.0b35
2020-09-25 16:22:09 +02:00
11a0945a9b obi cat: fixed open file descriptor leak and switch to version 3.0.0b34 2020-08-28 10:41:22 +02:00
f23c40c905 obi cat: fixed a bug introduced in 3.0.0b28 and switch to version
3.0.0b33
2020-08-27 18:38:16 +02:00
f99fc13b75 switch to version 3.0.0b32 2020-08-13 18:17:09 +02:00
1da6aac1b8 C: patch for failed creation of AVL with errno EEXIST 2020-08-12 17:55:08 +02:00
159803b40a export: now automatically sorts dictionary keys alphabetically for
tab/csv output
2020-07-31 16:43:35 +02:00
7dcbc34017 import: fixed entry count estimation when importing fastq files 2020-07-30 16:56:36 +02:00
db2202c8b4 uniq: added a check to make sure that there is more than one element for
one tag when merging its information
2020-07-30 16:14:37 +02:00
d33ff97846 switch to version 3.0.0b31 2020-07-28 09:31:19 +02:00
1dcdf69f1f export: fixed a bug introduced in version 3.0.0b28 2020-07-28 09:31:05 +02:00
dec114eed6 Python: added "date created" information in view representation 2020-07-27 17:38:45 +02:00
f36691053b Python: added the OBITools3 version that generated the view in view
comments
2020-07-27 16:50:00 +02:00
f2aa5fcf8b alignpairedend: fixed division by 0 bug and switch to version 3.0.0b30 2020-07-27 10:15:59 +02:00
bccb3e6874 switch to version 3.0.0b29 2020-07-26 17:40:26 +02:00
f5a17bea68 C: added a missing error check 2020-07-26 17:39:55 +02:00
e28507639a C and Cython: fixed and improved the associated columns system 2020-07-26 17:39:29 +02:00
e6feac93fe obi test: made less heavy to be faster 2020-07-26 17:37:21 +02:00
50b292b489 obi import: added --space-priority option to import a view line by line 2020-07-26 17:36:52 +02:00
24a737aa55 switch to version 3.0.0b28 2020-07-24 16:10:10 +02:00
8aa455ad8a Python: made all commands handle output to buffer object (e.g. stdout) 2020-07-24 16:09:48 +02:00
46ca693ca9 Cython: View: new method to print a view to a buffer (e.g. stdout) 2020-07-24 16:03:23 +02:00
9a9afde113 Cython: progress bar: set default refresh rate to 5 seconds 2020-07-24 15:29:12 +02:00
8dd403a118 grep: now prints the number of entries grepped 2020-07-13 17:08:13 +02:00
9672f01c6a alignpairedend: improved/fixed the output tags for the alignment score
and lengths. Removed minimum score option
2020-07-13 15:59:50 +02:00
ed9549acfb ngsfilter: unidentified sequences are now stored untrimmed 2020-07-13 15:56:40 +02:00
9ace9989c4 Switch to version 3.0.0b27 2020-07-07 16:47:21 +02:00
a3ebe5f118 C: AVL trees: fixed a bug where storing the difference between 2 crc64
values in an int64 would mess trees up resulting in failed data
dereplication
2020-07-07 16:47:00 +02:00
9100e14899 obi uniq: quick fix for bug where some sequences are not correctly
dereplicated
2020-07-03 17:36:57 +02:00
ccda0661ce small help documentation improvement 2020-07-01 18:20:38 +02:00
aab59f2214 obi clean: fixed a memory bug, fixed the behaviour when no sample info,
and added checks warnings and error handling when sample info not
dereplicated
2020-07-01 18:17:47 +02:00
ade1107b42 switch to version 3.0.0b26 2020-06-17 18:56:07 +02:00
9c7d24406f export: dictionaries are now formatted like in the original OBITools
when exporting in tabular format and tuple formatting is cleaner
2020-06-17 18:55:46 +02:00
03bc9915f2 Cython: utils: added handling of tuples to bytes2str_object function 2020-06-17 18:54:14 +02:00
24b1dab573 Cython: Columns: added a keys() method that returns all element names 2020-06-17 18:53:41 +02:00
7593673f3f ngsfilter: now setting 'reversed' tag to False instead of None when
false
2020-06-17 18:52:35 +02:00
aa01236cae switch to version 3.0.0b25 2020-06-13 21:48:49 +02:00
49b8810a76 C: made indexer opening/closing cleaner 2020-06-13 21:47:03 +02:00
7a39df54c0 ls: fixed an issue where big DMS couldn't be read by ls 2020-06-13 21:45:22 +02:00
09e483b0d6 switch to temporary version 3.0.0b24a 2020-06-10 17:47:56 +02:00
14a2579173 uniq: now outputs an empty view if input view is empty instead of
displaying an error
2020-06-10 17:47:26 +02:00
36a8aaa92e grep: now creating empty views instead of displaying an error when
selecting on an unexisting column/tag
2020-06-10 16:57:42 +02:00
a17eb445c2 ngsfilter: made one of the tag error messages more accurate 2020-06-10 16:27:36 +02:00
e4a32788c2 Switch to version 3.0.0b24 2020-06-09 14:36:58 +02:00
2442cc80bf Cython: View: fixed bash history display 2020-06-09 14:36:37 +02:00
aa836b2ace uniq: improved progress bar of second browsing 2020-06-09 14:36:02 +02:00
8776ce22e6 C: fixed a bug where indexers referring to tuples of certain types were
not properly closed and imported
2020-06-09 14:34:43 +02:00
4aa772c405 ecotag: Added list of taxids for all best matches (closes #80) 2020-06-09 14:33:14 +02:00
b0b96ac37a version 3.0.0b23a 2020-06-05 16:10:24 +02:00
687e42ad22 C: kmer alignment: fixed a bug where scores of 0 were at
(0+kmer_length-1) (and now setting alignment direction to None if score
is 0
2020-06-05 16:09:33 +02:00
5fbbb6d304 alignpairedend: fixed a bug when rebuilding joined (unaligned) sequences
where only the forward sequence was kept
2020-06-05 16:06:43 +02:00
359a9fe237 Switch to version 3.0.0b23 2020-06-04 15:35:03 +02:00
f9b6851f75 Python: correctly flagged some mandatory options as required 2020-06-04 15:34:24 +02:00
29a2652bbf Fixed installation on Ubuntu without pip 2020-06-04 15:06:35 +02:00
2a2c233936 obi import: fixed a bug when skipping an entry 2020-05-29 21:19:42 +02:00
faf8ea9d86 Switch to version 3.0.0b21 2020-05-28 20:42:09 +02:00
ffe2485e94 Genbank parser: now reading ORIGIN lines with comments without
triggering error
2020-05-28 20:41:34 +02:00
6094ce2bbc obi import: skip on error more robust 2020-05-28 20:40:36 +02:00
a7dcf16c06 Minor changes for pip release 2020-05-20 15:59:04 +02:00
f13f8f6165 obi import: minor doc/display improvements 2020-05-20 11:46:29 +02:00
b5a29ac413 Switch to version 3.0.0b19 2020-05-20 10:29:36 +02:00
efd2b9d338 Cleaner installation 2020-05-20 10:29:12 +02:00
ca6e3e7aad obi import: fixed to work with seq genbank extension 2020-05-20 10:28:14 +02:00
76ed8e18e5 Switch to version 3.0.0b18 with version formatting that fits setuptools 2020-05-18 17:08:55 +02:00
1d17f28aec setup: now using setuptools instead of distutils to work with pip 2020-05-18 17:08:09 +02:00
fa834e4b8b obi import: small bug fix 2020-05-18 17:06:58 +02:00
a72fea3cc9 Python: fasta parser: fixed a bug stopping the program when the last
line contained a single nucleotide
2020-05-12 11:24:12 +02:00
e9a37d8a6e Switch to version 3.0.0-beta16 2020-05-07 17:09:26 +02:00
ef074f8455 typo 2020-05-07 17:08:59 +02:00
aec5e69f2c C, views: no more automatic COUNT column if MERGED_sample column exists 2020-05-07 17:08:07 +02:00
170ef3f1ba Views: added obi prefix to commands in bash history 2020-05-07 17:07:01 +02:00
f999946582 obi uniq: fixed the remerging of already merged informations, and
efficiency improvements
2020-05-07 17:05:54 +02:00
773b36ec37 obi import: fixed the import of old obitools files with premerged
informations, and other minor improvements
2020-05-07 17:03:04 +02:00
69cb434a6c version 3.0.0-beta15c 2020-04-29 14:25:33 +02:00
55d4f98d60 obi annotate: fixed annotation at ranks 2020-04-29 14:24:40 +02:00
0bec2631e8 ecotag: fixed a bug where all the full DMS path weren't properly sent to
the C layer
2020-04-29 10:35:55 +02:00
e6b6c6fa84 AVLs: Made an error message more informative 2020-04-29 10:14:04 +02:00
974528b2e6 build_ref_db: fixed bug erasing some of the higher LCAs (i.e. lowest
similarities)
2020-04-28 15:56:06 +02:00
1b346b54f9 ecotag: better specificity by now correctly looking for similarities
within refs above best score instead of ecotag threshold
2020-04-28 15:10:07 +02:00
058f2ad8b3 ecopcr: fixed a bug where sequences were considered circular (generating
false positives)
2020-04-27 14:44:35 +02:00
60bfd3ae8d obi annotate: now defaults to setting str if expression is not valid 2020-04-24 11:35:20 +02:00
67bdee105a C: build_ref_db: added progress display for each step 2020-04-18 14:24:08 +02:00
0f745e0113 C: Columns: optimizing column file growth 2020-04-18 13:55:47 +02:00
da8de52ba4 export: fixed progress bar bug 2020-04-17 15:09:10 +02:00
4d36538c6e C: SSE lcs alignment: band-aid for memory bug I don't understand
(triggered on specific db on ubuntu)
2020-04-17 15:07:52 +02:00
8d0b17d87d Switch to version 3.0.0-beta14 2020-04-15 17:47:26 +02:00
343999a627 Taxonomy: fixed a critical memory bug when building the list of merged
taxids
2020-04-15 17:46:13 +02:00
e9a40630e9 C: Columns: rounding column growth to ceil to avoid looping on small
values
2020-04-13 19:02:10 +02:00
8dbcd3025a C: Columns: reduced column growth factor from 2 to 1.3 to avoid errno28 2020-04-13 14:47:56 +02:00
4cf635d001 Switch to version 3.0.0-beta13 2020-04-12 17:42:58 +02:00
b7e7cc232a Made completion script cleaner 2020-04-12 17:41:59 +02:00
b6ab792ceb C: made error message more detailed when checking that sequences and
qualities match
2020-04-12 17:40:24 +02:00
ddea5a2964 obi import: fixed inconsequential error when precomputing number of
entries in some formats
2020-04-12 17:38:42 +02:00
30852ab7d5 View bash history: removed useless shebang 2020-04-12 17:36:04 +02:00
4d0299904e all commands (almost): cleaner DMS closing at the end 2020-04-12 17:31:58 +02:00
eef5156d95 obi stats: fixed error when printing bool keys 2020-04-12 17:12:04 +02:00
e62c991bbc goes with previous commit 2020-04-10 11:22:26 +02:00
1218eed7fd ecopcr: now printing a warning instead of interrupting with an error
when a taxid is not found
2020-04-10 11:22:04 +02:00
cd9cea8c97 obi import: fixed critical bug where the last entry of embl and genbank
files was not imported
2020-04-09 19:26:27 +02:00
98cfb70d73 ecopcr: made some errors more informative 2020-04-09 09:15:28 +02:00
b9f68c76c8 ecopcr: added warnings and check of primer length (related to #75) 2020-04-05 18:40:56 +02:00
0b98371688 ngsfilter: added warning about primer length in -h (#75) 2020-04-05 18:39:20 +02:00
f0d152fcbd ngsfilter: now checking primer length (fixes #75) 2020-04-05 18:29:10 +02:00
8019dee68e ecotag: now closing all DMS properly 2020-04-05 13:20:49 +02:00
0b4a234671 Swich to version 3.0.0-beta11 2020-02-12 14:23:42 +01:00
d32cfdcce5 ecotag: fixed the generated column comments formatting that would
generate errors
2020-02-12 14:23:17 +01:00
219c0d6fdc obi cat: Fixed the handling when concatenating views with dictionaries
having different key sets
2020-02-12 14:21:39 +01:00
dc9f897917 switch to version 3.0.0-beta10 2020-02-02 21:15:27 +01:00
bb72682f7d obi import: new option --preread to do a first readthrough of the
dataset if it contains huge dictionaries for a much faster import.
2020-02-02 21:12:34 +01:00
52920c3c71 URI decoding: dirty temp fix for bug where default dms makes a mess when
should guess file
2020-02-02 21:11:05 +01:00
18c22cecf9 switch to version 3.0.0-beta9 2020-02-01 15:48:55 +01:00
1bfb96023c obi import: rewriting a column now deletes the old one to save disk
space
2020-02-01 15:31:14 +01:00
c67d668989 obi import: fixed a bug when the first entry would contain a dictionary
with one key. Switch to beta8
2020-01-29 20:23:39 +01:00
db0ac37d41 switch to version 3.0.0-beta7 2020-01-29 16:18:53 +01:00
d0c21ecd39 Removed an OpenMP clause that was not obligatory and triggered a known
gcc bug involving macros
2020-01-24 16:00:53 +01:00
53212168a2 History: added 'obi' in bash history for practical reasons 2020-01-23 16:51:49 +01:00
b4b2e62195 Cleaner handling of reverse quality columns 2020-01-18 19:28:12 +01:00
ced82c4242 Switching to version 3.0-beta6 2020-01-18 17:29:23 +01:00
a524f8829e New command: obi cat to concatenate views (not optimized yet) 2020-01-18 17:28:31 +01:00
5c9091e9eb C: closing DMS after cleaning it instead of counting on upper layer 2020-01-18 17:27:35 +01:00
822000cb70 Fixes in documentation 2020-01-18 17:26:18 +01:00
b9cd9bee9a C: Changed obibool definitions because of conflict with R 2020-01-06 15:11:31 +01:00
b1f3e082f9 ngsfilter: fixed a bug when there is only one tag introduced in latest
edit
2020-01-06 13:53:38 +01:00
6c018b403c ecopcr: fixed and improved the options to keep nuclotides around the
amplicon
2019-12-26 20:45:54 +01:00
694d1934a8 Tagging version beta3 2019-12-12 17:03:13 +01:00
fc3ac03630 clean_dms: now works with extension 2019-12-12 17:02:50 +01:00
d75e54a078 uniq: added forced deletion of reverse sequence quality 2019-12-12 17:02:36 +01:00
6bfd7441f3 ngsfilter: fixed sequence cutting when dealing with unaligned sequences.
Could use optimization
2019-12-12 17:01:31 +01:00
81a179239c ngsfilter: fixed sequence cut bug on aligned sequences. Still exists for
unaligned sequences
2019-12-10 18:13:27 +01:00
35ce37c0f7 ngsfilter: fixed a bug with unaligned chimeras (unpaired primers) and
made error annotations more explicit
2019-12-10 13:43:32 +01:00
53f18316b0 ngsfilter: made more robust and practical to use with empty tags 2019-11-29 15:21:08 +01:00
8bc249b2f4 Version 3.0.0-beta1 2019-09-27 14:52:05 +02:00
e308c2e822 versioning 1.0.beta 2019-09-26 21:05:05 +02:00
3b3cf9359d CMake: unset gcc for nix 2019-09-26 21:04:42 +02:00
be85c55c9e Python: URIs: fixed bug on linux systems 2019-09-25 14:41:52 +02:00
6d5b904888 Cleaning 2019-09-25 11:58:00 +02:00
50e8374f6f Added website URL in readme file 2019-09-25 11:40:00 +02:00
6282242a04 C: Views: fixed a bug when trying to add a comment after changing the
file name of a finished view
2019-09-25 11:39:32 +02:00
44517db51f Fixed gcc warnings 2019-09-25 11:38:00 +02:00
c3b9e46291 more cleaning 2019-09-24 13:58:53 +02:00
28b7fce59a Cython API: simpler column repr display 2019-09-22 20:23:31 +02:00
fa9555deb9 obi stats: fixed bug with None values 2019-09-22 20:21:53 +02:00
d30f7e7317 more cleaning 2019-09-22 18:52:05 +02:00
4fa38d9886 cleaning 2019-09-22 17:38:28 +02:00
71276537a6 obi import: fixed bug when importing a taxdump 2019-09-22 16:45:30 +02:00
ba9ba7aa60 obi grep: now able to convert str to bytes in predicate expressions 2019-09-22 16:44:45 +02:00
7b4046c288 Bash completion script for commands, dms and views 2019-09-21 23:46:08 +02:00
e2ba76002a Cleaned setup script and put to my name ;) 2019-09-21 23:44:24 +02:00
336100f716 obi less: now actually behaves like less 2019-09-21 18:29:12 +02:00
d83398c0e0 Cython: View: lines from simple View instances are now displayed in tab
instead of dict format
2019-09-21 18:28:56 +02:00
974d25b815 Cython: Fixed bug in tab formatter with header option always being set
to true
2019-09-21 18:27:47 +02:00
ec0737a600 Added signal catching and handling in C and Cython 2019-09-21 16:47:22 +02:00
06f9d6da60 obi import: importing a view to a DMS now uses the C API (more efficient
and imports all metadata)
2019-09-21 12:49:29 +02:00
f0f7edf152 Python API: small option improvements 2019-09-21 12:08:36 +02:00
9e72c8d16a obi ls: improved taxonomy list 2019-09-20 20:46:33 +02:00
7c3fa14789 obi import: fixed bug when reading output URI 2019-09-20 20:43:48 +02:00
ec874c095b new command: clean_dms to clean and unlock a DMS after a bad exit. 2019-09-20 20:38:25 +02:00
783a1343c4 DMS are now locked when used by a command. Added checks and changed
cleaning mechanisms.
2019-09-20 20:37:19 +02:00
eb6c59dc1e obi import: proper check for taxonomy name already existing in DMS when
importing a taxdump
2019-09-17 13:41:49 +02:00
ad46056179 obi export: if export format is not specified, it is guessed from the
view type
2019-09-17 13:22:41 +02:00
9063e9159d Export options: output option is now only non-positional for obi export 2019-09-17 13:19:17 +02:00
0159385943 URI decoding: fixed bug with dms-only URI 2019-09-17 12:50:37 +02:00
a0c8deb806 obi export: made output to stdout and pipe in less possible 2019-09-17 12:31:03 +02:00
f566618be6 Added option for no progress bar and made output URI option non
positional (for stdout output)
2019-09-17 12:29:33 +02:00
88451116e8 URIs: added stdout output (empty URI) 2019-09-17 12:28:10 +02:00
eb913b2742 ecotag: trying to use a threshold lower than the ref db threshold now
returns an error instead of a warning
2019-09-15 19:27:47 +02:00
f8d1fa678a obi stats: improved display with str instead of bytes 2019-09-10 16:20:36 +02:00
bc55c5ef8c obi clean: fixed an openmp bug where the share size would be 0 blocking
the program
2019-09-10 15:37:33 +02:00
f3b0e10c7f fixed a comment 2019-09-10 14:42:12 +02:00
8f9f2a2d10 obi ls: various improvements 2019-09-10 14:41:43 +02:00
045a751b0f Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2019-09-05 17:20:13 +02:00
ad3a72597f Little fixes for linux compilation 2019-09-05 17:19:29 +02:00
8899478237 Update README.md 2019-09-04 17:29:45 +02:00
ec614e5d15 Update README.md 2019-09-04 17:23:30 +02:00
f8cccebe19 Update README.md 2019-09-04 17:11:46 +02:00
5e3c41b058 C: Fixed opened DIR leak 2019-09-04 16:48:13 +02:00
b3a1011d36 C: fixed a bug when opening or creating a new column directory where the
DMS was not saved in the struct
2019-09-04 13:16:28 +02:00
a7fabff1c7 C: made it so column DIR* are not kept open to handle very large DMS 2019-09-04 12:55:21 +02:00
f296517716 Various display improvements 2019-09-03 21:46:39 +02:00
d491480af2 C: fixed remaining memory bug in array indexer 2019-09-01 17:24:57 +02:00
073d98db08 C: ecotag: now prints a warning if the demanded threshold is lower than
the db threshold
2019-08-31 18:30:06 +02:00
0ee728c4d0 C: build_ref_db: now adds a comment with the threshold used to build the
DB
2019-08-31 18:29:40 +02:00
7423bacac0 C: Json comments: added an obi_read_comment function to read one value
from comments
2019-08-31 18:28:51 +02:00
53dcbc8ea3 Fixed log to be in str instead of bytes 2019-08-29 18:26:51 +02:00
4e75514bad obi import: fixed entry count 2019-08-29 18:26:09 +02:00
1ed2d45ac4 obi grep: made an error message clearer (error could be eventually be
handled by program, looking for str in bytes returned by a column)
2019-08-29 17:17:52 +02:00
e43e49d6f1 C: optimized dir opening 2019-08-29 16:35:10 +02:00
187053026f Better detection of missing taxonomy 2019-08-29 16:10:09 +02:00
dcf8cf1d64 Improved obi stats 2019-08-29 15:18:26 +02:00
3cfe3a9b00 Improved progress display when importing files in a DMS 2019-08-29 10:12:06 +02:00
728af51cb2 Python: better display of tuple values in fasta format 2019-08-28 15:55:36 +02:00
99a397b842 obi uniq: various improvements and fixes #66 2019-08-27 20:27:36 +02:00
f5c472ffd1 C: fixed a memory bug in the array indexer 2019-08-27 20:26:46 +02:00
580db2f710 minor comment 2019-08-27 20:25:54 +02:00
dbe09f83a2 Increased the threshold of elements per line in a column before they are
stored as a character string
2019-08-27 20:25:14 +02:00
3d1b2e8ed9 Better handling of column lines with all values at NA 2019-08-27 20:20:26 +02:00
ae5f42c260 fixes #61 : now reading merged taxids information when building a
reference database
2019-08-19 12:30:56 +02:00
af7cecf59f Fixed a bug where a directory was not closed properly resulting in errno
24 sometimes
2019-08-18 19:46:52 +02:00
5f20be44b2 Minor fixes 2019-08-18 19:45:53 +02:00
66441e0aef Fixed a bug when sending a DMS path to a C function from Cython 2019-08-18 19:43:51 +02:00
13952358b3 Fixed a bug where some commands wouldn't work if the input DMS was not
in the current directory
2019-07-25 11:59:19 +02:00
9f38cd8cf6 updated a comment 2019-07-23 19:03:24 +02:00
946f9723b8 ecotag: fixed a bug where the wrong taxid for the best match was
retrieved
2019-07-23 19:02:17 +02:00
9752ff8494 embl parser: information display about progress when parsing multiple
files
2019-07-23 18:59:07 +02:00
d99702f56f ngsfilter and alignpairedend: paired-end reads are now correctly
reversed and labeled to be aligned correctly by alignpairedend
2019-07-23 18:56:51 +02:00
1759302829 C: ecotag: fixed 2 memory bugs 2019-07-06 16:31:19 +02:00
86bfa96fbe C: kmer similarity: small improvements 2019-07-06 16:30:32 +02:00
f765c6f41e obi alignpairedend: fixed a bug where first seq was kept in result view
instead of consensus seq
2019-07-06 16:29:32 +02:00
a83bf43ab9 obi stats: result display is now sorted 2019-07-06 16:27:51 +02:00
3d9f0352ff obiclean parallelized 2019-06-20 19:44:04 +02:00
9b4c3537f9 multithreaded obiclean working but not cleaned 2019-06-19 17:29:58 +02:00
fd0b7a9177 j loop with critical (untested) 2019-06-04 17:14:36 +02:00
debf59b266 i loop parallelized: bad 2019-05-25 18:37:56 +02:00
a04588da31 openmp on j loop (i loop probably better) 2019-05-24 16:51:04 +02:00
ed5bb70c80 CMake: setting compiler higher to avoid conflicts, and linking libopenmp 2019-05-22 16:26:30 +02:00
22a5ae72d1 obi clean: not using tsearch library anymore, a simple byte array
instead. A lot more time and memory efficient. Closes #67
2019-05-19 17:39:53 +02:00
dc88181eeb Add a --cobitools3 options to setup.py 2019-04-12 14:55:05 +02:00
2f60e91d93 Comment the install of the packages 2019-04-12 13:03:53 +02:00
7ba27b6a99 Ask for python 3.7 2019-04-01 09:08:27 +02:00
d3937e1051 Add the cmakefile to the manifest 2019-04-01 09:01:45 +02:00
35eeb07f08 Build the C src in build/cobject 2019-04-01 08:52:38 +02:00
3afbbeb7e5 CMake: made required version 3.10 for ubuntu 2019-03-31 16:54:05 +02:00
d6056a8e50 dirty temporary fix for install 2019-03-31 16:19:05 +02:00
ac47bdce5d history: fixed DMS history when multiple inputs 2019-03-31 15:44:20 +02:00
7f8d1e7196 C: obi lcs: cleaner progress print 2019-03-31 15:42:58 +02:00
80068a3c19 ngsfilter: fixed parsing error 2019-03-31 15:42:30 +02:00
a3e6b7d913 obi import: fixed import of View_NUC_SEQS to another DMS 2019-03-31 15:42:07 +02:00
416c2d7ba0 Cython: made fasta formatter cleaner 2019-03-31 15:41:32 +02:00
26fb149efb C: made build_ref_db cleaner 2019-03-31 15:40:13 +02:00
2b8c066f8e Cython: added possibility to output in tabular format 2019-03-31 15:39:38 +02:00
e39c1a7fbf Cython: added tab formatter and parser (for obi export) 2019-03-31 15:38:34 +02:00
6841d879aa obi history: fixed a bug when displaying ascii history 2019-03-31 10:51:52 +02:00
f0ff585455 Removing trace 2019-03-30 20:52:54 +01:00
601a2cfd7d obi uniq: various fixes... 2019-03-30 20:34:53 +01:00
7c518300a0 C: Views: fixed a bug when creating automatic columns with unformatted
comments
2019-03-30 20:33:14 +01:00
f16bbca8e2 obi grep: fixed a bug where -p option didn't work 2019-03-30 19:10:42 +01:00
173483448a Merge commit '3d842ff7' 2019-03-30 15:29:52 +01:00
52b3a9fc39 C: taxonomy: fixed a segfault on linux when trying to fclose an unopened
file
2019-03-30 15:19:12 +01:00
ce686e9569 obi import: progress bar fixed when using --only option 2019-03-30 15:16:57 +01:00
c293cfabbb Python: embl parser: fixed a bug preventing taxids from being parsed 2019-03-30 15:15:49 +01:00
0847d618d6 fixed typo 2019-03-30 15:14:30 +01:00
9fcebd7643 C: build_reference_db: made some errors more explicit 2019-03-30 15:11:49 +01:00
5d842ff7e7 Clean the manifest of old files 2019-03-29 16:58:45 +01:00
3445579251 remove all the no more needed .cfiles 2019-03-29 16:56:58 +01:00
995a66b488 Add the new script emplacement 2019-03-29 16:55:23 +01:00
5007b02cbc cleaning stage 2 2019-03-29 16:46:17 +01:00
cdd5975e8b Cleaning first stage 2019-03-29 16:40:36 +01:00
0c466046f4 Merge branch 'pip-standard-orig-python' into 'master'
The new install version based on classical setup.py

See merge request obitools/obitools3!1
2019-03-29 16:25:01 +01:00
2774422224 Patch the way of installing the obi main command 2019-03-29 16:21:28 +01:00
bf62960ee8 Add some .ignore files 2019-03-29 15:52:54 +01:00
9c41b1e9da Move and rename the obi.py script 2019-03-29 15:45:38 +01:00
3a6d1be795 remove the old distutils 2019-03-29 15:42:45 +01:00
6c52966033 Switch back to the original python source from the paster branch 2019-03-29 15:24:31 +01:00
460187970f First version compiling the code correctly 2019-03-29 15:22:10 +01:00
52b8008bdc Add the location of install 2019-03-29 15:21:30 +01:00
6b9da38087 patch some relative imports 2019-03-29 15:19:44 +01:00
ee9947217c alignpairedend: fixed the worst memory leak and the handling of the case
where 0 common kmers are found
2019-03-29 11:16:25 +01:00
ceaafca427 ngsfilter: fixed a bug (maybe 2) in the algo for the choice of the
reverse primer when running on unaligned sequences
2019-03-29 10:56:17 +01:00
7c2787b6b3 trying to fix cython difficulties 2019-03-26 16:19:55 +01:00
14eca43eac Import taxo 2019-03-26 16:17:44 +01:00
0b4ea49539 Convert relative import an delete cfiles 2019-03-26 16:14:03 +01:00
cd88c37a7e Merge branch 'pip-standard' of git@git.metabarcoding.org:obitools/obitools3.git into pip-standard 2019-03-26 15:54:49 +01:00
1095a617a3 Patch relative import to absolute 2019-03-26 15:54:33 +01:00
5a05258fcb fixed relative cython imports to be absolute 2019-03-26 15:52:59 +01:00
10ab557259 First version of the simplified setup.py script 2019-03-26 15:40:31 +01:00
8e70bf1ee1 obi import: fixed bug when rewriting a column (keeping wrong type in
import module)
2019-03-26 14:56:18 +01:00
d8a7bd42bd Cython API, taxonomy: fixed parental tree iterator (skipped second to
last taxon, in OBI1 too)
2019-03-26 14:08:54 +01:00
06178d9d61 Genbank file parser functions that should have been included in a
previous commit
2019-03-20 11:44:43 +01:00
3abe1b7ace obi_errno_to_exception function now properly reads obi_errno global
variable directly
2019-03-20 11:43:12 +01:00
802a3f5933 data import: entries now counted if there are multiple files 2019-03-18 18:16:39 +01:00
7e20870719 Added genbank parser 2019-03-15 16:06:27 +01:00
e8090a44c9 Fixed the ultimate bug with embl (and genbank) parsers: raising any
exception in a python generator makes it unable to resume. So now,
exceptions are not raised but printed, then functions return None and
that's handled at higher level.
2019-03-15 16:06:06 +01:00
832f582802 Fixed no-skip-on-error option :p 2019-03-15 16:04:04 +01:00
58d0c850c2 Made skip on error option True by default...... 2019-03-15 15:50:40 +01:00
7737211ac2 Small fix in embl and genbank features parser 2019-03-15 15:50:11 +01:00
c953f0cb00 Fixed embl import where sequences were not imported as Nuc_Seq objects 2019-03-15 11:41:07 +01:00
bb045c3ae9 added TAXID_COLUMN to C API declarations for Cython 2019-03-15 11:40:06 +01:00
2a4f1b8feb obi import: now properly uses macros for column names 2019-03-15 11:39:21 +01:00
24a63f8732 URIs: URIs built with autocomplete now work too 2019-03-15 10:52:27 +01:00
478d19ab43 Cleaner stderr prints 2019-03-13 18:36:31 +01:00
e3c565d6be Cleaner progress bar 2019-03-13 18:36:05 +01:00
d88390c6d8 Cython API: when importing a file in a DMS, its length is computed
beforehand for the progress bar
2019-03-13 18:35:32 +01:00
50e7cd61a6 added math.h import where needed 2019-03-13 11:17:25 +01:00
49d5f6fb1e removed deprecated comment 2019-03-13 11:17:04 +01:00
b45c2ee653 Cython API: cleaner column rewriting API 2019-03-13 11:13:55 +01:00
6afd1294a7 Cython API: Views: fixed a bug when rewriting a column with different
attributes (last line is not written anymore)
2019-03-12 16:40:30 +01:00
a9ba7744cf obidistutils: added fPIC flag needed for linux compilation and set
minimum python version to 3.7
2019-03-12 14:20:59 +01:00
185a95e667 cleaner Makefile 2019-03-11 15:20:10 +01:00
8835a1a983 removed -R compilaiton flag that gcc doesn't like 2019-03-07 15:55:46 +01:00
1ee50b7222 Fixed a bug when creating a column and checking the comments string if
it was NULL
2019-03-07 15:09:59 +01:00
720bb65b24 Installation: basic Makefile that creates the shared obi3 library used
by Cython
2019-03-07 14:01:37 +01:00
2a1ab9db29 Cython API, Views: guessing an obitype from a python value is now done
through the corresponding functions in utils
2019-03-07 13:57:37 +01:00
4bc52c08c2 minor changes 2019-03-07 13:53:37 +01:00
306da846e3 obidistutils: link obi3 C shared library instead of compiling all C
files with all modules (creating issues with global/static variables).
EXCEPT RUNTIME LINKING DOESN'T WORK YET
2019-03-07 13:50:29 +01:00
af57e532da obidistutils: create doc/sphinx dir if needed 2019-03-07 13:47:23 +01:00
52de6f2717 Update distutils for openmp and new version of pip 2019-02-19 17:30:53 +01:00
29c56572cf Add cfiles everywhere ;-) 2019-02-19 15:04:30 +01:00
de3d12af17 Renamed CAPI file 2019-02-19 14:50:30 +01:00
9ccddd5280 better cfiles 2019-02-19 14:11:29 +01:00
e026e9ec83 Fixed the new alignpaired end to work after ngsfilter with the 9879847
possible cases
2019-02-17 18:32:35 +01:00
4ddd1a1c37 embl iterator: only option on embl directories now works as intended 2019-02-12 16:46:08 +01:00
3015310535 Fixed a bug in kmer similarity computation where the fact that sequences
could be switched was not accounted for
2019-02-10 21:02:24 +01:00
08bcbcd357 ngsfilter: reworked to use apat library 2019-02-06 18:13:54 +01:00
04a3682307 Cython API: added API to use apat (pattern search) C library 2019-02-06 18:12:49 +01:00
6ca6d27ecb ecoPCR: fixed amplicon length computation bug 2019-02-06 18:11:20 +01:00
8f18907566 Cython API: changed revcomp attribute of Nuc_Seq class to is_revcomp to
be more explicit
2019-02-06 18:09:11 +01:00
0b62619e4e Various commentaries and insignificant fixes 2019-01-21 17:32:44 +01:00
c7f5b8d980 Alignpairedend: added alignment using shifting with best kmer similarity
(low level layer in C and Cython API)
2019-01-21 17:30:46 +01:00
59017c0d6b C: taxonomy: fixed a bug when checking for root node 2019-01-21 17:23:25 +01:00
9f6bba183f C: Added a function to get a nucleotide at a specific index in an
encoded sequence
2019-01-21 17:18:02 +01:00
2a6a112d29 obi import: fixed writing quality in views when appropriate (but still
not a satisfying solution)
2018-12-11 19:33:55 +01:00
c437931a35 Cython: fixed history dot graph for all views, and fixed history
recording for build_ref_db and ecotag
2018-12-10 17:09:00 +01:00
eb586b2f53 New command and C functions: obi ecotag 2018-12-09 19:19:32 +01:00
9556130b11 C obi_lcs: updated deprecated column names and associated comments 2018-12-09 19:17:13 +01:00
005aaeec06 C obi_lcs: fixed checking for identical sequences when aligning 2
columns
2018-12-09 19:16:44 +01:00
579f56bb54 obi align (pouic): fixed bug with the saved config when aligning 2
different views
2018-12-09 19:15:58 +01:00
da445066f3 C alignment filter: added a check for sequences not being equal when the
threshold requires that they are
2018-12-09 19:14:51 +01:00
0a407436da C Views: made an error message more specific 2018-12-09 19:14:05 +01:00
54efff36c4 C build ref db: fixed 2 bugs when setting arrays: size of an element in
in bits not bytes and using view API instead of column API
2018-12-09 19:13:06 +01:00
6acb21712a Missing commit for build_ref_db: C API file for cython 2018-12-09 19:11:59 +01:00
12087a6c3a C, views: made 'view_exists' function public (now 'obi_view_exists') 2018-11-27 16:20:30 +01:00
fbabbceb5a Fixed a bug in the array indexer where the value's length was no
properly set to 0 if the value was NA (ignore previous commit with the
same message)
2018-11-27 16:18:34 +01:00
6f27734d71 Cython: fxed a bug in INT tuple columns where values were converted to
double instead of int
2018-11-27 16:14:56 +01:00
b3bfa9ca65 Fixed a bug in the array indexer where the value's length was not
properly set to 0 if the value was NA
2018-11-27 16:12:41 +01:00
ece942e771 new command: build_ref_db to build a reference database with metadata
for the taxonomic assignment of sequences
2018-11-27 16:11:18 +01:00
ef8dc85f3c C, taxonomy: new function to get the lowest common ancestor of two taxa 2018-11-27 16:00:29 +01:00
f942dd856f C: new function to build a reference database with LCA and score
metadata for the taxonomic assignment of sequences
2018-11-27 15:56:50 +01:00
730ea99f85 minor fixes and comments 2018-11-19 11:23:54 +01:00
4d51f4f015 obi import: better checking of whether to import quality 2018-11-19 11:23:39 +01:00
e9c1d5e48d AVLs: made maximum number of nodes per AVL 5 millions, as this combined
with keeping all AVLs mapped seems the most efficient. Now 1 million
sequences more or less constantly takes 1 minute.
2018-11-19 11:22:26 +01:00
7fc1b578cf AVLs: AVLs in a group are not unmapped and remapped constantly anymore
when adding new values, fixed a bug when calculating if an AVL data file
has reached the maximum size, fixed a casting bug, and added a boolean
so read-only AVLs files are not truncated
2018-11-19 11:19:07 +01:00
31053591b5 Fixed 2 bugs when checking qualities matching sequences predicate: now
closing and reopening indexers so that they are mapped properly, and
fixed memory leak when reading sequences
2018-11-19 11:05:53 +01:00
b0da36cb48 New command: obi align, except it's called obi pouic for now because of
a Cython compilation bug
2018-11-07 16:05:48 +01:00
d1f1fd432e Minor fixes 2018-11-07 16:04:17 +01:00
75a28929a7 Renamed Cython alignment library in an attempt to limit some compilation
bugs potentially involving name conflicts
2018-11-07 16:03:32 +01:00
d076ea9900 Alignment: updated functions to align columns (LCS) 2018-11-07 16:00:58 +01:00
6b1c41f3fb Changed an error message to be more specific 2018-11-07 13:37:25 +01:00
362df50fe9 Removed a deprecated element from the DMS structure 2018-11-07 13:36:08 +01:00
b1090574da View import: associated column informations are now correctly updated
with the new versions
2018-11-07 13:35:11 +01:00
8faabd3ebf Cython, URI: Fixed a bug when using an output URI with just a view name
to use the default DMS
2018-11-02 19:04:27 +01:00
35f3e7c30b All commands now handle outputing to another DMS + small fixes 2018-11-02 19:03:09 +01:00
8a8e9e50b2 Fixed declaration going with previous commit 2018-10-31 18:01:04 +01:00
c7ff53b948 obi clean: temporary views are now deleted 2018-10-31 17:52:51 +01:00
1b7bccb236 Small improvement when checking if a view exists 2018-10-31 17:51:10 +01:00
d09aa43133 Cython API: added a function to get the full path to the DMS directory 2018-10-31 14:46:25 +01:00
123e5dc0ac Cython URI API: added an argument to only open the DMS and return the
rest as a character string
2018-10-31 14:45:17 +01:00
320561a582 Views: Added argument to not automatically create default columns in
typed views, a function to delete a view and fixed view history bug
2018-10-31 14:38:05 +01:00
92c0fbc9bf Fixed a bug where an imported column was not flagged as finished,
resulting in its deletion when reopening the DMS.
2018-10-29 17:39:30 +01:00
b11d52d630 Fixed a bug with the DMS counter being wrongly initialized to 0 instead
of 1 (generating memory bugs when using the counter)
2018-10-29 16:12:37 +01:00
6305282305 obi clean: made more efficient with arrays (speed ~x15 compared with
OBI1)
2018-10-21 17:59:02 +02:00
d53323e7f4 Fixed comments bug with obi head and obi tail 2018-10-21 17:39:17 +02:00
e18b762d81 Weird buggy Eclipse commit with nothing changed 2018-10-21 17:35:18 +02:00
0a0f0682a9 Better handling of errors and exceptions when new view name already
exists
2018-10-17 19:47:40 +02:00
4802e32f72 Cython: Sequence objects: repr() method now returns a Fasta or Fastq
formatted string
2018-10-17 16:53:42 +02:00
b027762059 Cython: export: fixed exception raising when no quality data when
exporting to fastq
2018-10-17 16:52:51 +02:00
da0e3d4043 Cython: added full handling of NA strings when importing files 2018-10-17 16:41:15 +02:00
da76f911db Cython: Views: improved repr() method 2018-10-17 15:54:03 +02:00
61ad2deeca obi uniq: Added line breaks when printing informations to cut progress
bar properly
2018-10-17 15:53:28 +02:00
eb6d5581bd Cython: Progress bar: added a cut option to choose whether to do line
breaks every tenth of the full bar, set to False by default for lighter
printing
2018-10-17 15:52:26 +02:00
343dbc7e4d Cython: made the logger lighter (now prints just module name instead of
full module path)
2018-10-17 15:49:55 +02:00
6d018a2d28 Cython: Added 'modulename' in the config informations 2018-10-17 15:47:44 +02:00
2c2df4e098 C: Added a trick to suppress compilation warnings about an unused
function actually called in a macro
2018-10-17 13:13:23 +02:00
8ce6dd6d1a Updated prototypes with no arguments with a void argument as suggested
by compilation warnings
2018-10-17 12:00:40 +02:00
df70086384 New command: obi export 2018-10-17 11:27:50 +02:00
32d8396ee2 Cython: Added fasta and fastq writers 2018-10-17 11:27:15 +02:00
6a8670d24a Cython: minor fixes 2018-10-17 11:26:13 +02:00
ec73fa840a Cython: obi stats fixed to work with reworked options (forgotten in a
previous commit)
2018-10-17 11:25:53 +02:00
11032ec90b Cython: Sequence objects: Quality strings are now returned as bytes
instead of str
2018-10-17 11:24:44 +02:00
8a9ba8b0a8 Cython: Added Column line methods to get a Column line as a str or
bytes, and elements (keys, values) with None values are not returned
anymore
2018-10-17 11:23:07 +02:00
135d3b6e67 Cython: updated the URI decoding to handle outputs other than DMS 2018-10-17 11:21:29 +02:00
58589e04be Cython: rearranged input and output format options to have both and
updated commands accordingly
2018-10-17 11:19:48 +02:00
e6bbe13d81 Cython: fasta and fastq parsers now return bytes and take NA string
argument
2018-10-17 11:16:20 +02:00
61b00d6013 Cython: fastq formatter 2018-10-09 16:41:14 +02:00
8029493c10 Cython: fasta and fastq header formatter 2018-10-09 16:41:00 +02:00
aa5ee53478 Cython: fasta writer 2018-10-09 16:40:30 +02:00
e31c8ea57a New command: obi history to print DMS or view history in bash, dot or
ascii formats
2018-10-07 19:11:36 +02:00
9e700ddc21 obi test: updated to test comments 2018-10-07 19:10:46 +02:00
e9a41c5b97 Commands: updated for JSON formatted comments with history 2018-10-07 19:10:34 +02:00
35cf2962cc Cython: DMS: JSON formatted comments and history handling 2018-10-07 19:06:59 +02:00
74be3c39f0 Cython: Views: JSON formatted comments and history handling 2018-10-07 19:06:23 +02:00
c6ee0bade9 Cython: Columns: goes with handling of JSON formatted comments 2018-10-07 19:04:50 +02:00
ffd5bc76bf Cython utils: functions convert to bytes or str and to remove all empty
objects from a complex object
2018-10-07 19:03:38 +02:00
704d9b0474 Cython: Columns: added support for JSON formatted comments 2018-10-07 18:59:43 +02:00
86bb582a17 Views: implemented handling of JSON formatted comments 2018-10-07 18:56:46 +02:00
bc8c394061 Columns: implemented handling of JSON formatted comments 2018-10-07 18:54:51 +02:00
cef458f570 Obierrno: added errno for JSON related errors 2018-10-07 18:53:53 +02:00
2736a92699 DMS: implemented full information file with JSON formatted comments 2018-10-07 18:53:25 +02:00
79f4185757 C library to handle JSON formatted comments using the cJSON library 2018-10-07 18:51:27 +02:00
1b6b6d825a obi grep: added all the missing filtering options 2018-08-14 17:11:41 +02:00
3847850a9d Taxonomy Cython API: added is_ancestor() function 2018-08-14 17:09:40 +02:00
b57e938cc4 New command: obi stats 2018-08-13 15:08:10 +02:00
2dc7fcceac Minor fixes 2018-08-10 10:39:46 +02:00
e096b929dc New command: obi tail 2018-08-10 10:39:26 +02:00
2c634dae7c New command: obi head 2018-08-10 10:29:37 +02:00
7a4cdc0cfe New command: obi sort 2018-08-09 18:10:47 +02:00
e8dc5eb123 Commands: ngsfilter and alignpairedend can now be used in whichever
order
2018-08-08 19:53:26 +02:00
3fcf29a76f More explicit predicate error when checking that sequences and qualities
match
2018-08-08 19:51:05 +02:00
080a97cccf Cython API: more explicit "Can't guess type" exception 2018-08-08 19:50:26 +02:00
9c9aec2556 Cython API: the associated sequence column for a quality column can now
be specified at the Python level
2018-08-08 19:49:56 +02:00
303648bd47 Cython: embl file parser 2018-07-28 17:14:10 +02:00
2ba6d16147 New command: obi ecopcr 2018-07-28 17:13:45 +02:00
275d85dc5d Cython: fixed a bug when reading an uncompressed file in binary mode
where the first 4 characters would not be read
2018-07-28 17:11:51 +02:00
a39f9697be Views: added macro for taxid column name 2018-07-28 17:10:11 +02:00
b98880b7fa Various non-important fixes and comments 2018-07-28 17:07:17 +02:00
895d09b133 obi import: 'taxid' columns are imported as 'TAXID' to fit view
predicates, and fixed taxdump import and DMS closing
2018-07-28 17:03:00 +02:00
c02c15b93f Cython API: URI decoding now returns the character string with the
object path if it could not be opened
2018-07-28 17:00:42 +02:00
3e8c187f0b Cython API: added EMBL parser and files to import are now read in binary
mode
2018-07-28 16:57:01 +02:00
7f6d1597fc Taxonomy: added functions to check if a taxonomy already exists in a
DMS, and added taxdump import from a compressed file
2018-07-28 16:48:11 +02:00
1de308a856 obi clean: option to only keep heads now works, fixed a bug where last
sequence was not properly labelled, and code is cleaned, fixed and error
checked
2018-05-31 15:11:41 +02:00
892ed83a33 Removed deprecated function declarations 2018-05-31 15:08:11 +02:00
6911bf4d70 obi clean: first version 2018-05-18 14:26:54 +02:00
f0c147c252 C API: Added a function to set an entire column to a specified (atomic)
value.
2018-05-17 15:59:16 +02:00
4aef20add8 Fixed a bug where the line selection column of a view would not be
flagged as finished
2018-05-17 15:17:19 +02:00
62614a8538 Cython API: fixed a bug in URI decoding and option handling where the
quality offset would not be read properly
2018-05-17 15:10:52 +02:00
ffebc6acfb Cython API: better handling of default quality offset value 2018-05-17 15:01:25 +02:00
b91b3176b0 obi uniq: fixed a bug where merged values were wrongly reinitialized 2018-05-17 14:58:15 +02:00
31d8ba5085 obi test: minor change 2018-05-17 14:54:45 +02:00
a166a169cf obi ngsfilter: fixed a bug with -u option 2018-05-17 14:53:53 +02:00
8a10072d99 obi annotate: fixed a bug with --with-taxon-at-rank option and minor
improvements
2018-05-17 14:51:18 +02:00
b380368264 Obi count command 2018-04-04 15:51:23 +02:00
1f4e82e6f6 Fixed three bugs in obi uniq 2018-04-04 15:50:10 +02:00
6825fc13ab Cython API: added ngsfilter file parser 2018-03-21 16:41:25 +01:00
49c17ab7b4 Cython API: added tabular file parser 2018-03-21 16:41:09 +01:00
2684535e26 New command: obi annotate 2018-03-21 16:39:31 +01:00
123fb9d7ba Cython API: in taxonomy, added get_taxon_at_rank() function for Taxonomy
class and rank_idx property for Taxon class
2018-03-21 16:38:26 +01:00
4c3478d8f8 Removed the predicate to check for a quality column (because for example
with obi annotate, clone view so clone predicate, then modify seq, so
quality is deleted, and predicate becomes a problem)
2018-03-21 16:37:19 +01:00
4a815785c4 obi import: added basic taxdump import 2018-03-21 16:35:44 +01:00
75b54c83ca obi grep: fixed bug when reading URIs 2018-03-21 16:34:57 +01:00
53cb3354b8 obi ls command 2018-03-19 13:08:41 +01:00
ea58e254da Cython API: repr function for DMS 2018-03-19 13:08:06 +01:00
9fb63d4894 Minor fixes 2018-03-16 19:05:09 +01:00
d4f7e02c85 New obi grep working with URI API 2018-03-16 19:04:54 +01:00
15e43bb9a1 Cython API: obi import can now import ngsfilter files and tabular files 2018-03-12 18:10:43 +01:00
8a0b95c1d6 New command: obi ngsfilter 2018-03-12 18:09:22 +01:00
dd225a255f obi uniq: better error checking 2018-03-12 18:04:53 +01:00
dad21823ff Cython API: trying to guess the type of a column when adding a None
value does not generate an exception anymore, and RollbackException can
now rollback several views
2018-03-12 18:03:37 +01:00
96bf2daae8 Cython API: added slices in Seq classes and fixes 2018-03-12 17:51:41 +01:00
e6c49b7941 Cython API: moved an eval function to utils 2018-03-12 17:49:54 +01:00
4960662332 Cython API: tobytes() function now handles None values 2018-03-12 17:25:12 +01:00
b2cfa4b52f Cython Sequence classes: reworked improved etc 2018-02-12 14:54:47 +01:00
94a899de12 Cython View API: added small tools 2018-02-12 14:48:27 +01:00
b48330a5c9 Fixed a little bug when cleaning unfinished views 2018-02-12 14:44:56 +01:00
74d880b817 Fixed default quality offset 2018-02-12 14:43:44 +01:00
00993d4215 Cython API: fixed a bug where the quality format would not be read
properly from the configuration values
2018-02-12 14:42:30 +01:00
370fb9272c obi uniq: better typing 2018-02-12 14:38:07 +01:00
c8097e14e1 obi import: removed old traces 2018-02-12 14:36:56 +01:00
01ef85658c New command: obi alignpairedend 2018-02-12 13:30:06 +01:00
f5a00c9322 Cython alignment library 2018-02-12 13:28:20 +01:00
156fb04e88 Implemented functions to build reverse complement sequences 2018-01-05 16:08:36 +01:00
428c4eb5e6 obi import: fixed creation of quality columns (to discuss) 2017-12-19 11:07:00 +01:00
1a5b499b5c Cython API to add an OBI_QUAL column after creating a view 2017-12-19 11:06:24 +01:00
b7b8ba7e5a Better handling of elements names in Cython 2017-12-13 23:12:14 +01:00
e9e7fac999 New obi uniq: stores columns with too many elements per line as
character strings, and keeps a minimum of things in the memory
2017-12-13 22:49:08 +01:00
1fd3323372 Columns: elements names informations are now kept in a memory arena of
adapted size in the header, and added a boolean in the header indicating
whether the values should be evaluated (typically character strings to
be evaluated in Python)
2017-12-13 22:46:50 +01:00
2df5932b67 Cython column API: fixed a memory leak, optimized the reading of
elements names, added a __len__ method to Column_line, and the API for
columns with character strings to evaluate
2017-12-13 22:27:36 +01:00
b93b982a18 Cython: added an option for input taxdump and and an option for the
maximum number of elements in columns with multiple elements per line
2017-12-13 22:25:15 +01:00
ea73047fc7 Added rewinddir before each readdir so that the directories are always
read properly
2017-11-24 18:04:58 +01:00
0998268955 Fixed two little potential bugs when cleaning unfinished columns and
deleted old trace
2017-11-24 18:03:59 +01:00
31726407a3 Taxonomy: fixed a bug where a pointer was not properly reallocated, and
a bug where the merged list of taxids was not built correctly
2017-11-24 18:01:30 +01:00
d21f4a6f90 Header parser: identifiers ending with ';' are now handled 2017-11-24 17:59:52 +01:00
9e3ac477eb OBIDMS: Opened DMS now have a counter associated so that DMS are not
actually opened several times by the same program, which triggers the
cleaning of unfinished views and columns (to discuss)
2017-11-24 17:58:47 +01:00
ee5d647d0d Taxonomy: fixed a bug un parental tree iterator 2017-11-24 17:55:17 +01:00
38fef5b9d4 obi test: better taxonomy testing 2017-11-24 17:54:10 +01:00
3ba7ce1c91 View rollback: version files and column directories aren't deleted
anymore to prevent indexer bug, and fixed a freeing bug
2017-11-15 17:27:26 +01:00
9a50803c00 Added tuple columns containing immutable indexed data arrays of any type 2017-11-15 13:48:59 +01:00
1684f96b79 Fixed a bug when flagging a read-only column as finished 2017-10-26 19:11:29 +02:00
43f65e7fd0 obi uniq: fixed bug where dictionary indexes were not read properly, and
added view rollback in case of an exception.
2017-10-26 19:00:05 +02:00
dfd51939a0 Views are now rollbacked if an error occurs, and unfinished views and
columns are deleted when an OBIDMS is opened.
2017-10-26 18:58:48 +02:00
1ae634d56b Added atexit command to obi import, obi uniq and obi less 2017-10-16 11:09:55 +02:00
04e065094a All DMS opened by a program are now listed and closed with atexit system 2017-10-16 10:35:07 +02:00
5ddd1d9ae6 obi uniq: added taxonomy handling 2017-10-04 16:13:07 +02:00
9fc6868341 Increased maximum length for elements names 2017-10-04 16:10:53 +02:00
f2ece573ff Removed deprecated command 2017-10-04 16:09:41 +02:00
fb9b219abe Fixed a bug with taxonomy URIs not being read correctly 2017-10-04 16:00:30 +02:00
09a5f89849 Column API: improvements to be more flexible when referring to elements
in columns with several elements per line.
2017-10-04 15:59:23 +02:00
535692b020 Taxonomy: new functions and improvements 2017-10-04 15:55:13 +02:00
0ab081f79e Updated obi test to work with changes in taxonomy API 2017-10-04 15:50:32 +02:00
1cb05de7e3 Basic obi less 2017-10-04 15:46:26 +02:00
532d8e9cd7 obi import: small efficiency improvement when dealing with NA values 2017-10-04 15:44:48 +02:00
b4088a7928 Cython API: Added basic taxonomy option 2017-10-04 15:42:17 +02:00
ae24a807da obi uniq: added the option to merge ids, except it only works on small
sets until lists are implemented properly using obiblobs
2017-09-25 17:28:03 +02:00
75c15594c4 obi uniq: added option to use categories additionally to the sequence to
determine uniqueness
2017-09-25 10:56:43 +02:00
5ed6835e0e Fixed a bug where the new line count when truncating a column would not
be computed correctly when dealing with high numbers (bad automatic type
for intermediate result)
2017-09-25 10:52:19 +02:00
41dec03448 Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2017-09-18 16:08:31 +02:00
7c57bd33e5 Added check to prevent views from having the name 'taxonomy' (used for
URIs)
2017-09-15 14:54:55 +02:00
a776e46e6d Add the command name in the log 2017-09-15 14:51:13 +02:00
0e140df0fb Cython API: added some imports in __init__ files 2017-09-14 18:30:04 +02:00
4bb071c048 Merge branch 'master' of
git@git.metabarcoding.org:obitools/obitools3.git

Conflicts:
	python/obitools3/commands/import.pyx
2017-09-05 08:59:45 +02:00
5045d0c2e9 xxx 2017-09-05 08:58:07 +02:00
73bca6288f New obi uniq 2017-08-20 18:04:21 +02:00
6a2759eee6 obi import with new input/ouput API 2017-08-20 17:58:36 +02:00
38029b1f77 Forgot a ; 2017-08-20 17:56:18 +02:00
663a1a1091 Cython API: column elements: added possibility to check if an element
exists from its index, and a dict-like get() method
2017-08-20 17:44:05 +02:00
c6d5436a58 Cython API: fixed a bug where iteration on a NUC_SEQS view would not be
done correctly (bug appeared with optimization modifications done
lately)
2017-08-20 17:41:41 +02:00
47cad285d6 Cython API: fixed 2 little bugs in Seq API 2017-08-20 17:39:30 +02:00
74f15d1a23 Cython API: Various fixes in input handlers (parsers, openers etc).
Mostly working but not bug-free
2017-08-20 17:37:51 +02:00
c559ddf487 BUG FIX: creation of a new column would fail because of a case not
handled when a high number of elements per line would imply less than
one line per memory page
2017-08-20 17:30:23 +02:00
93cff94e7f Fixed some compilation warnings 2017-08-20 17:25:58 +02:00
9744a48a67 BUG FIX: seemingly identical obiblobs would have different hash values
because of the padding added by the compiler. Fixed by using calloc
instead of malloc for obiblob memory allocation.
2017-08-20 17:25:15 +02:00
6afdc9fb5f AVLs: Added an error check 2017-08-20 17:21:06 +02:00
6f202363f4 Fixed a typo in doc 2017-08-20 17:20:13 +02:00
7f1ff49aa2 Cython API to import a column and a view from a DMS to another DMS 2017-08-03 16:34:02 +02:00
4b86aa67a8 New C functions to import a column and a view from a DMS to another DMS 2017-08-03 16:33:12 +02:00
a3e81930c2 Views: finished handling and documenting the conditions for an existing
column to be added to a view
2017-08-03 16:32:22 +02:00
644b55b49f Fixed doc typo 2017-08-03 16:29:25 +02:00
927c684fc2 Utils: new function to copy the content of a file into another file 2017-08-03 16:28:54 +02:00
344566d9e9 AVLs: made some functions public and changed some rights to be able to
import AVLs from a DMS to another
2017-08-03 16:27:43 +02:00
407f61a408 Add the possibility to create temporary objects like a temporary
directory and a temporary DMS
2017-07-28 16:33:19 +02:00
09ddd74652 Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2017-07-28 15:57:01 +02:00
7c0d882bc9 Patch a bug when creating a DMS not in the current directory. Use the
basename function to locate the DMS name instead of the loop...
2017-07-28 15:56:21 +02:00
35b0c55a8c Cython API: various improvements and checks 2017-07-28 13:15:13 +02:00
b9c65a871f Patch decoding of URL 2017-07-28 12:41:28 +02:00
84bb93096f Cython API: fixes and improvements in Column API 2017-07-28 10:27:04 +02:00
01c69e7e25 Cython API: fixed a bug when printing a column 2017-07-28 10:01:56 +02:00
adf5cbef97 Added DMS method to create a DMS if it doesn't already exists, otherwise
opens it
2017-07-28 09:55:43 +02:00
da48a9d1af Patch group of option : types must be callable not a string 2017-07-28 09:36:18 +02:00
9482c663c0 minor comments and changes 2017-07-27 19:46:34 +02:00
c5f3fdc295 Increased maximum element names length in columns 2017-07-27 19:44:49 +02:00
89e2f80fd8 Goes with previous commit 2017-07-27 19:43:00 +02:00
7112f44fb7 Bug fixes for input handlers, openers, parsers etc. Compiling but not
tested
2017-07-27 19:42:44 +02:00
b2fc1f4611 obi uniq: first version 2017-07-27 19:40:19 +02:00
75f691d55a Cython API: Seq classes reworked 2017-07-27 19:39:58 +02:00
0655063bb0 Cython API: view_NUC_SEQS changes to go with previous commits 2017-07-27 19:39:26 +02:00
9701b1230c Cython API: OBIWrapper.new method is now OBIWrapper.new_wrapper to avoid
mismatching method definitions with subclasses
2017-07-27 19:38:25 +02:00
f8a4428674 Cython API: DMS test_open method doesn't raise an exception anymore 2017-07-27 19:36:28 +02:00
1a0f18a11a Cython API: added a __setitem__ method to the View class that can detect
if the item is a Line and create the corresponding columns if needed +
minor changes
2017-07-27 19:35:28 +02:00
3d7aa52c90 Cython API: Fixed a bug when setting NA values in Column_multi_elts, and
added some properties
2017-07-27 19:31:15 +02:00
69c50ff922 Cython API: added a Column subclass to allow direct access to indexes
for columns that store indexes referring to other data
2017-07-27 19:29:10 +02:00
c91969126b Cython C API declarations to go with previous commit 2017-07-27 19:26:59 +02:00
15d383fa8b Added possibility to specify the offset for encoding and decoding
sequence quality character strings
2017-07-27 19:24:41 +02:00
99ceed5fff Cython API: renamed OBI_Taxonomy to Taxonomy and OBI_Taxon to Taxon 2017-07-27 19:21:45 +02:00
fa8f826cdc Cleanup the end of the file 2017-07-27 16:07:39 +02:00
dc91174a5e Complete the input option group functions 2017-07-27 16:06:48 +02:00
ec65f00cf2 Complete the fasta iterator to manage new input options 2017-07-27 16:05:30 +02:00
8d9cdb4d03 Complete the fastq iterator to manage new input options 2017-07-27 16:05:17 +02:00
949e5f9baf Make a first full version of the URI decoder 2017-07-27 16:04:31 +02:00
3c6a05be54 Add option to the default config corresponding to the parsing of the
inputs
2017-07-27 16:03:47 +02:00
8781ecab1f Add a factory checking the file format and returning the correct
iterator. First version working only with fasta and fastq nucleic
formats
2017-07-27 16:02:52 +02:00
0f6ae7dfa6 Options stuff... ;-) 2017-07-25 13:07:03 +02:00
28259cd88b Beginning of URI decoder -- !!! NOT YET FULLY IMPLEMENTED !!! 2017-07-25 13:05:58 +02:00
b24be84b0a Add a first group of options 2017-07-25 11:14:30 +02:00
59dd0a8a8c Standardized and improved the API to create new columns, updated the doc 2017-07-18 17:34:32 +02:00
c88df2e12c First version of automatic ID and COUNT columns, to discuss (for now,
columns created when NUC_SEQ views are closed if the columns don't
already exist)
2017-07-17 17:31:09 +02:00
1e57bfacb4 Fixed some C documentation 2017-07-17 16:45:08 +02:00
3e6aecc635 Added a C function to add a COUNT column to a view with all lines set to
1
2017-07-11 16:44:23 +02:00
ced9a268a1 obi import: added an option to specify the NA value in the input file
(default is 'NA', same as in R's read.table function)
2017-07-11 12:10:33 +02:00
df2ad41150 Cython APi: Added a width property to views, corresponding to their
column count
2017-07-11 11:46:32 +02:00
f8895e879d Cython API: Added a function to get a column from its index in the view 2017-07-11 11:36:42 +02:00
b729b8928f obi less: fixed bug when the length of a view would be less than the
default number of lines printed
2017-07-10 17:04:02 +02:00
b6b95f26b6 obi import: Skipping sequences is now done through the iterators so that
sequences are not uselessly parsed
2017-07-10 17:02:30 +02:00
b94ec9557f Cython API: None values aren't inclued anymore in the dictionary
returned when getting a line from a column with multiple elements per
line, and reworked that function to be more optimized
2017-07-07 17:28:53 +02:00
143bddf1d1 Cython API: Added an __iter__ method to the class Column_line (iterating
on the elements names) (previously an iteration would work but with
unexpected results)
2017-07-07 15:41:10 +02:00
a718081ebd Bug with error handling: for now obi_errno needs to be passed to the
function handling errors and exceptions, as it can't read the right
value of the global obi_errno (Cython configuration problem?)
2017-07-07 15:36:11 +02:00
740d021276 obi import: fixed bugs when rewriting a column: a bug with new elements
names ignoring previous elements names found, a bug with the global
obi_errno being reset too late, and a bug with the column dictionary
used by obi import not being updated after rewriting a column
2017-07-07 15:33:43 +02:00
906343187b Fixed bug with view option in obi less and obi check 2017-07-06 16:42:27 +02:00
c3cd57a9e3 Removed deprecated file 2017-07-06 10:57:14 +02:00
f03928c679 Committing minor comments before merging branch with master 2017-07-06 10:56:39 +02:00
717ee46f08 Commented a loose print 2017-07-05 18:02:18 +02:00
313508cc94 Better *Seq* classes but still need work 2017-07-05 17:53:46 +02:00
535fc2af83 Column rewriter and optimized View getter 2017-07-05 17:49:05 +02:00
3bbc2ae469 More optimized Column item getter 2017-07-05 17:37:19 +02:00
5ee0b3989a Cython API: set_line of Column_multi_elts now accept as values argument
any class where values are referenced by keys with an iterator
2017-07-05 17:32:32 +02:00
d10192ab0e C functions to detect IUPAC sequences 2017-07-05 17:26:03 +02:00
101f764cce New obi import with rewriting of columns when column type or line
elements (keys) change
2017-07-05 17:15:23 +02:00
cb5ad2ed2d Added functions to try to open a DMS if it exists 2017-07-05 15:38:22 +02:00
f5e992abbf Added a check on the element when setting a value in a column 2017-07-05 14:49:20 +02:00
1d2996c6c0 Better handling and tracing of Index Errors between C and Cython 2017-07-05 14:45:43 +02:00
f6631f3857 Removed deprecated declarations 2017-07-05 14:42:21 +02:00
3f5fef10b9 obi test: minor changes 2017-07-05 14:37:27 +02:00
20c72af697 Basic obi check command to check DMS and view informations 2017-07-05 13:54:19 +02:00
d252131950 Basic obi less command 2017-07-05 13:44:12 +02:00
ca16ce0bb0 Basic obi grep with new Cython API 2017-07-05 11:58:10 +02:00
ac94b35336 Removed unused import 2017-07-05 11:52:31 +02:00
2d65db4ebc Goes with c2af955b : forgotten files for NUC_SEQS views 2017-04-21 15:15:12 +02:00
4b037ae236 Updated obi test to test NUC_SEQS views and the taxonomy API 2017-04-21 12:09:04 +02:00
c2af955b78 Cython view API: added NUC_SEQS views and sequence classes + changed
cloning API
2017-04-21 12:08:14 +02:00
71b1a43df8 Added functions to clone views with a simpler API 2017-04-21 11:58:15 +02:00
1725b8b80c Reworked taxonomy Cython API to be a subclass of OBIWrapper 2017-04-21 11:54:05 +02:00
ab0d08293e Cython API: removed unnecessary imports 2017-04-21 11:51:05 +02:00
2f0c4b90d7 Fixed a problem where a view would have a wrong line count after adding
a first column to it if there was already a Line selection associated
(happening when cloning), and fixed a bad error check.
2017-04-14 16:25:55 +02:00
537b9847da Minor C doc clarification 2017-04-14 16:23:17 +02:00
b998373be5 Cython API: updated the test command for the new API and deactivated the
other commands for now
2017-04-14 16:21:33 +02:00
6f780148e2 Cython API: added taxonomy API 2017-04-14 16:20:30 +02:00
0e08fc486a Cython API: fixed bug when deleting a column from a view where the
Cython wrapper wasn't closed, and fixed the Line selection
materialization
2017-04-14 16:19:18 +02:00
2bbee64e57 Cython API: fixed problems with Column class 2017-04-14 16:14:41 +02:00
693859eec2 Cython API: fixed conversion bugs when setting and getting values
(especially NA values) in OBI_CHAR, OBI_STR and OBI_SEQ columns
2017-04-14 16:07:23 +02:00
a3fad27190 Cython API: automatic importing of column classes now works 2017-04-06 15:45:02 +02:00
f351540b0b Merge branch 'Eric_new_Python_API' of git@git.metabarcoding.org:obitools/obitools3.git into Eric_new_Python_API 2017-04-06 15:39:52 +02:00
6dccaa0213 Patch the registering function : register_all_column_classes 2017-04-06 15:37:51 +02:00
5de9e0de51 Cython API: now using const char* instead of char* for the type of
values read from OBI_STR columns
2017-04-06 15:15:20 +02:00
ad8de80353 Views: better checks when adding an existing column to a view 2017-04-06 14:44:07 +02:00
8cd3e3604f Cython Column API 2017-04-06 14:42:11 +02:00
255f3c92ae Cython View API 2017-04-06 14:41:58 +02:00
08be4e231d Cython Object API 2017-04-06 14:41:43 +02:00
b5b7995411 new Cython DMS API 2017-04-06 14:41:26 +02:00
0dfb1eb3e6 Cython typed columns 2017-04-06 14:40:44 +02:00
381194194c Cython API: compiling but not working 2017-03-06 16:07:02 +01:00
778acc48cd Added linked lists to handle lists of column pointers in views (not
tested)
2017-03-06 16:06:17 +01:00
3319ede837 Views: Column dictionaries now store and return pointers on column
pointers instead of column pointers.
2017-02-22 13:49:50 +01:00
fc20b83ad1 Merging 2017-02-20 14:56:04 +01:00
431c1c8c6a Merge branch 'Eric_new_Python_API' of
git@git.metabarcoding.org:obitools/obitools3.git into
Eric_new_Python_API

Conflicts:
	python/obitools3/obidms/_obidms.pxd
	python/obitools3/obidms/_obidms.pyx
	python/obitools3/obidms/_obidmscolumn_bool.pyx
	python/obitools3/obidms/_obidmscolumn_str.pyx
	python/obitools3/obidms/_obiseq.pxd
	python/obitools3/obidms/_obiseq.pyx
	python/obitools3/obidms/_obitaxo.pxd
	python/obitools3/obidms/_obitaxo.pyx
	python/obitools3/obidms/_obiview.pxd
	python/obitools3/obidms/_obiview.pyx
	python/obitools3/obidms/_obiview_nuc_seq.pxd
	python/obitools3/obidms/_obiview_nuc_seq.pyx
	python/obitools3/obidms/_obiview_nuc_seq_qual.pxd
	python/obitools3/obidms/_obiview_nuc_seq_qual.pyx
	python/obitools3/obidms/capi/obialign.pxd
	python/obitools3/obidms/capi/obidmscolumn.pxd
	python/obitools3/obidms/capi/obitaxonomy.pxd
	python/obitools3/obidms/capi/obiview.pxd
2017-02-20 14:55:36 +01:00
f23315e26f New Cython API: compile but doesn't work 2017-02-17 15:14:06 +01:00
071a3b61ab Merged master fixed conflict. 2017-02-14 10:58:43 +01:00
e524041013 Views: Files for unfinished views now have the extension
'.obiview_unfinished', renamed to '.obiview' when the view is finished.
2017-02-07 17:16:09 +01:00
a9102620f5 Fixed missing email address 2017-02-07 17:14:10 +01:00
7e9932f488 Fixed a C function declaration 2017-02-07 17:12:56 +01:00
e50da64ea1 The elements names when a column contains several elements per line are
now formatted with '\0' as separator and handled in a more optimized way
2017-01-31 16:48:06 +01:00
651c1d7845 utilities: bsearch and qsort with additional user_data pointer argument 2017-01-31 16:45:47 +01:00
c0bcdce724 Taxonomy: documentation for all the functions, and fixed bugs when
closing the taxonomy (overwriting of .pdx files, missing freeing, and
re-placed a misplaced condition)
2017-01-18 18:22:49 +01:00
c065c1914a Taxonomy: adding, writing and reading preferred names, changed some
function names, and fixed a bug with taxa indices not being properly
initialized
2017-01-16 17:28:20 +01:00
0385a92e02 Taxonomy: Refactored the taxdump reading, and little fixes 2017-01-11 16:36:08 +01:00
cf7f2de016 Modify __init__ and close method to deal with registration process 2017-01-10 14:26:16 +01:00
5122ad52a7 Merge branch 'Eric_new_Python_API' of git@git.metabarcoding.org:obitools/obitools3.git into Eric_new_Python_API 2017-01-10 14:07:50 +01:00
4b02ba73ac Add the OBIObject concept 2017-01-10 14:07:10 +01:00
41ad3deec0 Taxonomy: informations about deleted taxids is now read from
delnodes.dmp file and added to *.adx file
2017-01-09 17:28:49 +01:00
d68374018b Taxonomy: functions to read the *.adx file (containing the deprecated
and current taxids and their corresponding indices in the taxa
structure) and to find the taxa using the merged index.
2017-01-06 15:52:21 +01:00
f396625f98 Taxonomy: function to write *.adx files 2017-01-05 15:37:13 +01:00
897032387f Taxonomy: reading merged.dmp file in taxdump 2017-01-05 14:28:36 +01:00
4a1d3167a7 Last change on my branch 2017-01-02 16:46:52 +01:00
153c22257f Last change on my branch 2017-01-02 16:46:17 +01:00
2139bfc748 refactoring... 2017-01-02 13:05:22 +01:00
65f3b16e6d Refactoring ... 2016-12-29 18:22:05 +01:00
0526386337 first working DMS class 2016-12-27 06:17:45 +01:00
62caf1346e temporary remove some files 2016-12-26 15:03:24 +01:00
3ac6e85fb3 Big refactoring 4 2016-12-26 14:58:03 +01:00
5156f6bb9e Big refactoring 3 2016-12-26 14:18:01 +01:00
e6db2086d5 Big refactoring 2 2016-12-26 13:56:31 +01:00
daacd0df76 Strong refactoring 1 2016-12-26 13:35:31 +01:00
8e92bf6dac LCS alignment: it is now checked that sequences are not longer than what
a 16 bits integer can code for (as the LCS and alignment lengths are
kept in 16 bits registers)
2016-12-22 17:06:23 +01:00
30e4359c85 LCS alignment: documentation for all the lowest level functions 2016-12-22 17:03:51 +01:00
5c50e5b378 Embryo of code for openMP parallelization of LCS alignment but
deactivated for now because can't make it compile with cython/clang
2016-12-20 11:46:58 +01:00
3cedd00d7f Add register function for column type 2016-12-20 11:13:57 +01:00
82fbe43980 transfert method to obiviews 2016-12-20 08:18:47 +01:00
d1a972dfcb patch import 2016-12-20 08:15:42 +01:00
f43dc3e3ab separate the obicolumn classes in new files 2016-12-20 08:15:08 +01:00
9c71b06117 Removed deprecated TODOs 2016-12-19 14:36:40 +01:00
3bf5260174 Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2016-12-19 10:31:18 +01:00
857a5198e4 Updated `obi lcs` for the LCS alignment of two columns 2016-12-16 19:40:36 +01:00
d99447c12b C function for LCS alignment of two columns, and optimized and fixed
line count bug in function to align one column
2016-12-16 19:39:02 +01:00
303bd6f445 Added function to build kmer table for 2 columns, and fixed bug (with
line count) when building kmer table of one column
2016-12-16 19:10:18 +01:00
490f5fe6b9 Updated deprecated code in cython API for columns (using line count of
view instead of column)
2016-12-16 19:04:21 +01:00
191c83aafc Added missing *.cfiles 2016-12-15 15:28:34 +01:00
04d39c62ab Try for a new API 2016-12-14 08:44:44 +01:00
9b24818fe2 Refactored alignment code for minimum redundancy between the function
that aligns 1 column and the function that aligns 2 columns
2016-12-13 17:18:12 +01:00
06cb7a9a58 Some change in the way to manage access to special items of the
dictionary like sequence or quality
2016-12-13 12:49:34 +01:00
fc55fc117d Some cosmetic on the code 2016-12-13 12:48:13 +01:00
4ef5cb0d87 Move the OBIView_NUC_SEQS class to files _obiview_nuc_seq.pxd and
_obiview_nuc_seq.pyx to avoid circular inclusion
2016-12-13 12:46:49 +01:00
fc805e5443 Remove some warnings in the editor 2016-12-13 08:29:22 +01:00
8d7ef7d3d1 patch the distutils to add the C source directory in the include path.
This should solve most of the compilation problems related to .h files
located in this directory
2016-12-13 08:02:09 +01:00
8afb1644e9 Alignment: API rework. 'obi align' is now 'obi lcs', and the results are
now written to columns automatically created in the output view, all
optimally handled at the C level.
2016-12-12 11:58:59 +01:00
fa4e4ffaff Changed the cython API to create new views so as to have different
functions for the different cases
2016-12-07 14:17:57 +01:00
936be64c34 Goes with 5e0c9f87 (missing ';' and fixed compilation warnings) 2016-12-05 11:18:29 +01:00
5e0c9f878b Added the doc for the function building the element names, and a missing
free
2016-12-05 10:46:21 +01:00
852e5488c8 The default element names for columns with multiple elements per line
are now "O;1;2;...;n"
2016-12-02 17:54:51 +01:00
e60497651c Updated the documentation for the functions to set and get in the
context of a view
2016-11-30 12:22:47 +01:00
4ad8c16a73 Finished adding all the functions to directly set and get indices in
columns containing indices referring to any type of data.
2016-11-30 11:08:11 +01:00
6f6099687d Sequence alignment: if no sequence column is given and the view has the
type NUC_SEQS_VIEW, the default sequence column is aligned
2016-11-29 16:52:41 +01:00
98d0849653 Sequence alignment: added the possibility to specify the index of the
sequences to align in a column containing multiple sequences per line (C
level for now)
2016-11-29 16:15:02 +01:00
5fb025f310 When aligning, it is now quickly checked whether the sequences are
identical using their indexes
2016-11-28 11:39:29 +01:00
8ce6f6c80b Added an argument to specify whether the two sequences can be identical
when applying filters before aligning
2016-11-28 11:38:02 +01:00
3e53f9418b Added functions to recover the indexes themselves from any column
referring to indexed values
2016-11-28 11:35:19 +01:00
d40d2d0c76 Fixed error in documentation 2016-11-28 10:55:23 +01:00
f897e87600 When closing a view, it is now automatically checked that all OBI_QUAL
columns correspond to their associated OBI_SEQ column
2016-11-25 12:04:57 +01:00
70e056a2aa It is now impossible to open or clone a view that is not finished (= has
been closed at least once)
2016-11-24 11:19:07 +01:00
8abbfa203a Good file for commit 6fa9a8bd: When a view is cloned, a comment is added
to the new view specifying the name of the cloned view
2016-11-23 11:32:39 +01:00
6fa9a8bd76 When a view is cloned, a comment is added to the new view specifying the
name of the cloned view
2016-11-23 11:29:21 +01:00
76a4c6b14e Fixed a bug when cloning a view and checking its type 2016-11-23 11:28:17 +01:00
0ab9e6c05a When adding an existing column to a view, it is checked that the
column's line count is at least the view's line count. This can't be
more stringent for reasons that need to be rediscussed
2016-11-23 11:04:53 +01:00
70c49e214a Added the kmer filter to LCS alignments, and now obiblobs containing
encoded sequences are directly put in int16_t arrays for the alignment
2016-11-18 16:29:28 +01:00
08e67a090f Changed the inline functions syntax, which should make it compatible
with more compilers
2016-11-18 16:21:26 +01:00
621b4972db Functions to get obiblobs through views 2016-11-18 15:59:50 +01:00
7d022c1a52 If the indexer name is NULL when creating a column, it now becomes the
column name
2016-11-18 15:56:51 +01:00
1c71c195fc Goes with a0ebc2d8 2016-11-10 15:01:29 +01:00
54cfeffd85 Goes with 8f724f4f, forgotten file 2016-11-10 14:48:31 +01:00
a0ebc2d871 Functions to directly retrieve Obiblobs from indexers 2016-11-10 14:45:28 +01:00
8f724f4f8e Some code refactoring 2016-11-09 16:48:00 +01:00
359578814b Added view type property to OBIView cython class and updated obi export
to use it
2016-11-08 17:49:59 +01:00
51b23915ca Added properties for Nuc_Seq cython classes (and updated commands using
them)
2016-11-08 16:59:32 +01:00
b5b889c4a2 Fixed the OBI_Nuc_Seq_Stored cython class not being up to date with the
new properties of its parent class
2016-11-08 11:26:37 +01:00
36ac315125 Fixed bugs with python view type when creating a new view, and a bug
when trying to guess the obi type of a nucleotide sequence when its type
was bytes
2016-11-08 11:23:54 +01:00
8291693309 obi grep: updated to work with the new line selection class and within
the local sequence environment, and progress bar functioning
2016-11-08 11:19:12 +01:00
4bc19c3e49 obi export: view type is now checked and progress bar functioning 2016-11-08 11:17:20 +01:00
2d2fe5279d Added functions to add new taxa to a taxonomy with handling of
associated *.ldx files
2016-11-03 17:59:21 +01:00
2504bf0fa9 Added an iterator to the OBI_Taxonomy cython class 2016-11-02 11:08:18 +01:00
d8a257e711 Taxonomy handling functions in C. Features: read taxdump, read binary
files, write binary files. Not fully handled yet: *.adx, *.pdx, *.ldx,
merged.dmp and delnodes.dmp files.
2016-10-27 18:56:11 +02:00
b63d0fb9fb Added C functions to write .rdx, .tdx, .ndx binary taxonomy files from a
taxonomy C structure
2016-10-14 17:03:10 +02:00
0dfd67ec89 The endianness of binary taxonomy files is now correctly checked 2016-10-10 17:04:29 +02:00
0faaac49cf The taxonomy directory of the DMS is now automatically created with the
DMS
2016-10-10 17:02:51 +02:00
1b07109e51 Removed deprecated code 2016-10-10 17:01:51 +02:00
60ab503a14 Added properties in the OBI_Taxonomy class 2016-10-10 17:01:17 +02:00
2dcfdc59fc When a new view is created with a line selection, the view to clone is
automatically found + compacted redundant code + fixed potential bug
when cloning a NUC_SEQS view by name
2016-10-06 17:55:18 +02:00
399fc2c051 Removed deprecated source files previously used for tests 2016-09-30 17:49:37 +02:00
9cd57deca9 Added OBIView_line_selection class to make new line selections
associated with the view to clone, and improved and renamed method
closing a view
2016-09-30 17:48:53 +02:00
d88811ed7d Added a seed option to the obi test command for reproducible tests 2016-09-29 17:34:48 +02:00
8c402101e4 Renamed private attributes as _* and removed some deprecated code 2016-09-28 16:56:44 +02:00
1a7b42018e Added some error checking when opening or creating a view 2016-09-28 14:28:34 +02:00
b717e8bb8b Added properties for the OBIView class and cleaned up deprecated code 2016-09-28 14:26:23 +02:00
03a2c8ef7c Finished restructuring the OBIDMS_column class properties 2016-09-27 14:16:30 +02:00
a7f891d1c9 Added a lines_used property to the OBIDS_column class 2016-09-26 18:04:28 +02:00
bd50b3f972 Added version property to OBIDMS_column class 2016-09-26 17:45:10 +02:00
81380363b7 Added original_name property to OBIDMS_column class 2016-09-26 17:31:32 +02:00
a4b8349274 Added data_type property to OBIDMS_column class 2016-09-26 17:12:20 +02:00
a474391b27 Added nb_elements_per_line property to OBIDMS_column class 2016-09-26 17:01:13 +02:00
a0bc45cc92 Added elements_names property to OBIDMS_column class 2016-09-26 16:53:16 +02:00
76f89717fe Added alias property to OBIDMS_column cython class 2016-09-26 16:12:48 +02:00
b408a4f6eb Changed file name limits to adapt to system limits + minor changes 2016-09-22 18:05:07 +02:00
b083745f56 Deleted the "new line selection while editing a view" system 2016-09-22 11:19:29 +02:00
43f3c69a40 Fixed bug when cloning column with line selection 2016-09-21 17:50:21 +02:00
e79507b629 Fixed bugs in the process ensuring that all the columns of a view have
the same line count, fixed a bug when trying to set a value in a view
when a line selection exists, fixed a bug when adding a new column to a
view where line counts would be wrong
2016-09-21 17:42:17 +02:00
bb25723d99 Improved documentation of a function 2016-09-21 17:30:39 +02:00
a0da984003 Fixed bug where columns would not get truncated to the right size, and
fixed bug where column directories would be open and not closed in some
instances
2016-09-21 17:28:52 +02:00
802bae110b Removed deprecated function 2016-09-21 17:09:59 +02:00
dd55aef3e5 Added column class method to get the unique references (name and
version) of a column
2016-09-21 17:08:44 +02:00
9ac522fde1 Better obi test command 2016-09-21 17:06:35 +02:00
6adb9eb623 Should solde issue #56 2016-09-19 21:40:40 +02:00
8f49553d5a First version of the obi test command, testing that the OBITools3 work
correctly
2016-09-15 12:26:07 +02:00
986f90c59e Fixed bug where column directories weren't closed correctly, leading to
too many file descriptors open, and added error checking when closing
file descriptors
2016-09-15 12:18:40 +02:00
a240ec0169 Added error checking when closing file descriptors 2016-09-15 11:58:56 +02:00
0a3c23d9d0 Added a missing closedir 2016-09-15 11:58:34 +02:00
8724445fa1 Added error checking when closing files 2016-09-15 11:50:30 +02:00
de189fd7e0 Fixed major bug when cloning an AVL where the bloom filter was not
copied properly (because the sutructure copy via assignation does not
work for structures with a variable size)
2016-09-15 11:47:02 +02:00
9a97f1f633 View predicates are now carried over when cloning a view 2016-09-06 16:22:24 +02:00
00014eb023 View files now have the *.obiview extension 2016-09-06 14:19:13 +02:00
acc0da2d0b Readjusted some limits for file names and file numbers to be under OS
limits
2016-09-05 12:39:04 +02:00
668696fc5a Fixed major bug: when setting all the columns of a view to the same
number of lines, columns are now cloned before being enlarged if needed
+ predicate functions now print error messages if the predicates are not
respected
2016-09-05 12:37:36 +02:00
ba84ef4847 Fixed typo 2016-09-05 12:31:06 +02:00
c9dce03295 Fixed major bug when cloning an AVL group (last AVL of new group was not
correctly enlarged before copying the data) + minor improvements
2016-09-05 12:29:52 +02:00
eb82d088cb Added some view class methods 2016-09-05 12:20:00 +02:00
f46ea0b988 Finished fixing issues with DMS paths 2016-08-30 11:09:45 +02:00
5b2e370ffb Fixed a bug when using an absolute path for a DMS 2016-08-29 17:30:31 +02:00
8d360b0fac Minor improvements to obi export command 2016-08-19 17:49:22 +02:00
b34769b27c Minor improvements to obi export command 2016-08-19 17:46:55 +02:00
2d0a714e37 Basic obi export command exporting from view to fasta or fastq format,
for testing purposes
2016-08-19 17:40:58 +02:00
7b780ffb28 View files now have a dynamic size to allow unlimited comments size 2016-08-18 17:57:03 +02:00
e4129610cf Quality columns are now optional in NUC_SEQS views + minor fixes 2016-08-16 15:17:26 +02:00
cf839522e7 Minor update and fix to obi grep command 2016-08-12 17:45:44 +02:00
10b22f79da The cython subclass is now correctly chosen when cloning a view 2016-08-12 17:39:19 +02:00
ad8e10f2d1 Reworked a bit alignment API 2016-08-12 15:56:07 +02:00
92cad61417 Fixed bug when closing views with no associated predicate 2016-08-12 15:52:38 +02:00
64a745ce0b First very basic version of obi grep command 2016-08-11 17:32:08 +02:00
2d8ac2b035 Fixed bug when creating an OBI_IDX column 2016-08-11 17:30:32 +02:00
5b7917bb5a Fixed bug when writing predicates in view file 2016-08-11 17:30:09 +02:00
d3c58780a0 Added __len__ function do OBIViews that returns the line count 2016-08-10 17:20:23 +02:00
029d395da1 Added __iter__ function to OBIView lines 2016-08-10 17:08:22 +02:00
bea02cc7a5 Added (temporary?) check for the type of quality strings because the
import now seems to return them with bytes type
2016-08-10 16:25:45 +02:00
4ba01617af Fixed obscure compilation bug 2016-08-10 15:26:40 +02:00
bec684d5e2 Fixed merge conflict 2016-08-10 15:05:37 +02:00
2aaa87edcc 1st version of obi align command and reworked functions that handle
column alignment
2016-08-10 14:51:02 +02:00
400a3f9f3d Merge branch 'Eric_version_for_sequence'
Conflicts:
	python/obitools3/obidms/_obidmscolumn_seq.pyx
2016-08-04 09:42:42 +02:00
d1d26b9028 Simplify the code 2016-08-04 08:00:54 +02:00
465ea81c77 Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2016-08-03 10:13:47 +02:00
1e6d6e32e0 Switch to Cython version >= 0.24 2016-08-03 10:13:10 +02:00
ccc877764e Patch a bug in the printing of the progress bar leading to a bus error
when compiled with some C compilers and Cython >= 0.24
2016-08-03 10:12:23 +02:00
8f0462c407 Merge branch 'master' into Eric_version_for_sequence
Conflicts:
	python/obitools3/obidms/_obidmscolumn_seq.pyx
2016-08-03 10:09:20 +02:00
26b8e1f215 Modified C API to set and get in columns: added functions to set and get
using column names instead of pointers, and changed function names
2016-08-02 16:33:19 +02:00
312f50ff0f Major update: Column aliases. Columns are now identified in the context
of a view by an alias that can be modified.
2016-08-01 18:25:30 +02:00
3843485a04 Deleted deprecated function declaration that would make compilation
impossible and fixed error in documentation
2016-07-22 16:21:02 +02:00
20425a5d2b Deleted deprecated structure declarations 2016-07-19 15:48:56 +02:00
56e4848ebd The predicates associated with a view are now described in its comments
field
2016-07-19 15:31:21 +02:00
8850e40b6e Minor changes for better presentation 2016-07-19 15:30:17 +02:00
b89af38109 Goes with 38718320 2016-07-18 13:57:49 +02:00
38718320f9 First version for the association of one column to another. Closes #55 2016-07-15 15:38:49 +02:00
8ee85c3005 A first version of predicate functions that are checked when a new view
is saved and closed
2016-07-12 14:54:11 +02:00
000b9999ad Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2016-07-03 09:22:22 +02:00
aff9831c13 Substitute fprintf call by fputs call to conform with the new ubuntu
compilation rules
2016-07-03 09:21:56 +02:00
448fa8d325 first trial for a fasta formater 2016-07-03 09:18:52 +02:00
6af62d8124 Change a fprintf without argument to a fputs to comply with the new
default parameter on ubuntu
2016-07-03 08:25:06 +02:00
0869b9ba3f Closes issue #47 by storing each view in a separate file named with the
view's name and created upon view creation.
2016-06-30 11:41:30 +02:00
ad2af0b512 Some comments updated 2016-06-16 11:26:54 +02:00
38e603ed57 Deleted some redundant cython code 2016-06-10 10:34:47 +02:00
f438c3d913 OBIQUAL columns can now handle multiple elements per line 2016-06-09 15:54:36 +02:00
2a1ea3ba3f Setting NA values is now handled properly for OBI_SEQ, OBI_STR and
OBI_QUAL columns
2016-06-09 14:22:36 +02:00
fc3641d7ff Read-only AVLs are now hard-linked instead of copied when cloning an AVL
group to make it writable. Also fixed several bugs when handling AVL
groups.
2016-06-03 19:02:46 +02:00
799b942017 Deleted old debugging print 2016-06-03 18:57:32 +02:00
6e3f5b230e Fixed typo in doc 2016-06-03 18:56:45 +02:00
2f57f80c63 Fixed a bug where an unmapped variable would be read 2016-06-03 18:55:58 +02:00
2962c4d250 Goes with previous commit 2016-06-03 18:54:25 +02:00
69bf7ec2e7 NA value for OBI_STR and OBI_SEQ columns is now NULL 2016-06-03 18:53:22 +02:00
bac7ce7184 Start of the implementation of the export methods 2016-06-02 19:10:33 +02:00
f186395661 Trap potential exception generated by char* to bytes casts 2016-05-29 21:18:20 +02:00
85395dfc1a value returned for sequence is now bytes and no more str 2016-05-29 13:53:32 +02:00
f830389974 Add some comment on the location of the align method. 2016-05-29 12:58:31 +02:00
2e35229357 Add conversion checking on the value of a seq column 2016-05-29 12:54:13 +02:00
a8ed57dc6e few small changes 2016-05-21 12:29:55 +02:00
c3274d419c remove an extra debug log 2016-05-21 12:29:08 +02:00
cca0dbb46b Close issue #54 by adding a read1 method to the MagicKeyFile class 2016-05-21 12:24:48 +02:00
5a78157112 increase parsing speed of the header 2016-05-21 10:29:11 +02:00
0b9a41d952 Patch a bug about the reading of the last sequence 2016-05-21 10:28:03 +02:00
e681ca646d Fixed a problem with some columns being shorter in views and triggering
errors when trying to get values. Temporary fix that needs discussion
2016-05-20 18:45:29 +02:00
3b59043ea8 Major update: New column type to store sequence qualities. Closes #41 2016-05-20 16:45:22 +02:00
ffff91e76c Fixed variable name that had been accidentally changed for better
clarity
2016-05-18 13:27:41 +02:00
6a8df069ad Indexers are now cloned if needed to modify them after they've been
closed. Obligatory indexers' names now follow the same pattern as other
indexers (columnname_version). Closes #46 and #49.
2016-05-18 13:23:48 +02:00
8ae7644945 First version of quality handling (not working yet) and now it is
checked that a column is writable before enlarging it
2016-05-11 16:38:14 +02:00
b3c47809da First version of alignment functions (imported from suma* programs) 2016-05-11 16:36:23 +02:00
3567681339 Now when a column is added to a view, if there is a line selection, all
columns in the view are cloned first
2016-05-11 16:34:20 +02:00
757ef8509a Deleting CeCILL license duplicates 2016-05-09 11:17:45 +02:00
f961621f5d Minor improvements in _obidms Cython layer 2016-05-04 13:43:26 +02:00
bc12360490 Reworked and commented a bit the cython layer for dms, columns and views 2016-05-02 15:16:06 +02:00
872071b104 Removed a list of column pointers kept in the OBIView class that was not
really needed
2016-05-02 14:23:42 +02:00
32cc8968e8 Adding CeCILL license 2016-05-02 11:51:59 +02:00
d6481f0db8 Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2016-04-29 17:46:59 +02:00
a32920e401 Relative paths when creating or opening a DMS now work 2016-04-29 17:46:36 +02:00
31cf27d676 Added indexer function that returns the name of the indexer 2016-04-29 16:18:56 +02:00
baba2d742e commenting _obidms.pyx 2016-04-29 16:07:03 +02:00
5bd12079ae Added comments about listing columns and indexers in obidms functions 2016-04-29 16:06:01 +02:00
072ee5ac03 Re-re-fixed line breaks in README file 2016-04-29 15:44:40 +02:00
9fe21316ff Refixed line breaks in README file 2016-04-29 15:39:46 +02:00
3dc3aaa46b Fixed line breaks in README file 2016-04-29 15:36:58 +02:00
b371030edd Adding README file 2016-04-29 15:35:08 +02:00
b3976fa461 Merge branch 'luke_tests' 2016-04-28 11:17:24 +02:00
6ea2cfb9ca Merging luke_tests branch without the commit turning inline functions in macros 2016-04-28 11:17:18 +02:00
0eca86107e Pseudo obihead for tests 2016-04-27 14:27:28 +02:00
0de953a3ef pseudo obigrep for tests 2016-04-27 14:19:55 +02:00
f3b20b809d Fixed bug with indexer names being defined and generating seg fault if
creating a column not using indexers
2016-04-27 14:01:36 +02:00
d159b921eb Fixed obi import trying to print all lines at the end (source of
segfault?)
2016-04-27 13:14:19 +02:00
4e4cf46b16 Added all C files as source files for all cython files to stop having
that kind of problem with linux systems
2016-04-27 10:44:24 +02:00
6b61533650 Added more C source files for _obiseq 2016-04-27 10:41:00 +02:00
419885485b Added files in _obitaxo C sources for cython 2016-04-27 10:30:16 +02:00
0c8504b6db Commented #ifdef directive for detect_bucket_size function because it
causes errors
2016-04-27 10:24:40 +02:00
654c34a1a6 changed inline functions to macros to make it work on Luke 2016-04-26 15:40:12 +02:00
2d8c06f7b7 Fixed variable initialization for error detection 2016-04-26 14:38:46 +02:00
a6c8d35491 import command a bit modified for tests 2016-04-26 14:29:54 +02:00
366264828e Renamed MurmurHash2.c file to murmurhash2.c as it could be a problem 2016-04-26 14:29:17 +02:00
d3a6ff6043 Removed deprecated code 2016-04-26 14:27:16 +02:00
5ca84b91dc Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2016-04-25 18:35:57 +02:00
87935c6678 Fixed all compilation problems with new function names, locations etc 2016-04-25 18:35:02 +02:00
92980508c0 Made the function to clone a column in the context of a view private 2016-04-25 18:15:25 +02:00
65880db422 Made function to update the line count of a view private 2016-04-25 18:11:37 +02:00
767d9c7804 Reordered view functions for better coherence 2016-04-25 18:07:58 +02:00
2566377e2a Updated the documentation for utils functions 2016-04-25 18:02:58 +02:00
1fbbdd43f9 Updated obiversion_t declaration 2016-04-25 17:58:37 +02:00
8cdfbb379e Documentation for views and reworked the code a little 2016-04-25 17:58:12 +02:00
0a55e26520 Reworked obiview code and added more comments 2016-04-25 11:37:53 +02:00
68a8509c12 Updated documentation in obitypes.h 2016-04-25 10:33:01 +02:00
5f98d2ed5c Fixed the calculation of the size of data for OBI_STR and OBI_SEQ
columns
2016-04-25 10:26:51 +02:00
ef1be141c1 Update Licence to english version 2016-04-23 18:03:50 +02:00
bbfd40d56d Add license 2016-04-23 18:03:10 +02:00
5d08da46a2 Updated the documentation in obidmscolumn.h 2016-04-22 17:55:53 +02:00
66045acf1d Creating a column now uses the function to create the indexer name if
one was not provided
2016-04-22 17:47:00 +02:00
6977c4315c Improved function to build an indexer name 2016-04-22 17:38:23 +02:00
839b3000a8 Added a function to build indexer names 2016-04-22 17:08:23 +02:00
ffa4557928 changed MAP_PRIVATE flags to MAP_SHARED when opening a column because it
seems a lot more efficient
2016-04-22 16:26:24 +02:00
003cd11362 Fixed initialization of NA values for OBI_STR and OBI_SEQ columns 2016-04-22 16:14:23 +02:00
c87227b65a Uncommented an error message that doesn't need to be commented anymore 2016-04-22 16:11:56 +02:00
c07e75f2ac Updated the documentation for OBI_STR columns 2016-04-22 15:59:32 +02:00
6b394a5cf7 Updated the documentation for OBI_SEQ columns 2016-04-22 15:58:20 +02:00
2416b8ccd8 Deleted more unused inclusions in OBI_STR and OBI_SEQ column types code 2016-04-22 15:56:09 +02:00
b9921e111d Removed unused inclusions and definitions in all column types code 2016-04-22 15:50:19 +02:00
8f5aa8841d Removed unused definition in OBI_IDX columns code 2016-04-22 15:44:30 +02:00
900d67de87 Updated the documentation for columns with the type OBI_IDX 2016-04-22 15:43:39 +02:00
22e3c3eeed Updated the documentation for obidms functions 2016-04-22 11:28:09 +02:00
4ead37ee48 Finished moving obiblob functions to obiblob files and documentation for
obiblob functions
2016-04-21 15:18:14 +02:00
bce360bbd5 Documentation for obiblob indexer API 2016-04-21 15:08:40 +02:00
2a68cb26f8 Improved AVL tree documentation 2016-04-21 15:07:27 +02:00
043e70ff49 Updated AVL documentation 2016-04-21 14:39:03 +02:00
66021367f6 Moved some blob functions to obiblob.c 2016-04-21 14:20:26 +02:00
e69f44ae3d Little annotations for the murmur hash function. 2016-04-21 13:53:29 +02:00
1941a3785e Updated encode functions documentation 2016-04-21 13:46:02 +02:00
c7b8db6a2e Replaced malloc+memset with calloc 2016-04-21 13:45:39 +02:00
1dc4a3be49 Documentation for DNA sequence indexing functions 2016-04-21 13:36:51 +02:00
09597016fd Short doc for crc function 2016-04-21 13:23:52 +02:00
1a2fa0923c Documented the functions indexing and retrieving character strings 2016-04-21 11:35:21 +02:00
00f2f2cc51 Documented changes made in bloom functions 2016-04-21 11:22:31 +02:00
7a88ca619a First obi import (doesn't import tags yet because NA values aren't
handled)
2016-04-15 17:00:08 +02:00
eddd19a245 Changes in obi commands 2016-04-15 16:59:21 +02:00
2aafecc3b5 Changed sequence 'description' to 'definition' everywhere 2016-04-15 16:31:43 +02:00
094b2371e9 Deleted obsolete directory 2016-04-15 14:44:31 +02:00
c1034d300d merging and fixed git conflict with obiavl.h 2016-04-15 13:23:29 +02:00
02d67c257f The default name of an AVL is now the column name + '_indexer', and when
an AVL is opened (as opposed to created), it is read-only
2016-04-15 12:55:26 +02:00
e04ea85d1e Fixed problematic __str__ method and useless declarations in the
OBI_Nuc_Seq_Stored class
2016-04-15 11:22:05 +02:00
527d3555f0 Moved the functions getting full paths for files and directories to
obidms.c/.h files
2016-04-15 11:11:13 +02:00
71492ad229 Made the handling of listing and unlisting opened columns and indexers
functions in the obidms files.
2016-04-15 10:49:12 +02:00
73d64e5aff Renamed 'unmap_header' function to 'close_header' 2016-04-14 15:19:27 +02:00
4cb52e1632 Made the truncating of columns automatic when closing them (note:
already the case for AVLs)
2016-04-14 15:13:30 +02:00
9d042f7bd0 Refactored and relocated the set and get functions of all column types,
both within and out of the context of a view
2016-04-13 15:10:24 +02:00
5ec2d8842e Character string indexer API 2016-04-12 17:21:01 +02:00
04c9470f7d Fixed and cleaned DNA_seq_indexer API 2016-04-12 17:20:24 +02:00
be05c889e2 DNA_seq_indexer API 2016-04-12 16:38:47 +02:00
04e3a7b5a9 Added more references in cython .cfiles files because it seems necessary
for linux distributions
2016-04-12 15:10:54 +02:00
d8107533d8 Obiblob_indexer API 2016-04-12 14:53:33 +02:00
cd4e65e190 Fixed typo and includes in obiblob files 2016-04-12 14:52:27 +02:00
375bfcce8a Renamed "Obi_byte_arrays" to "Obiblobs" and moved Obiblob functions to
separate obiblob.c and obiblob.h files
2016-04-12 11:21:14 +02:00
c225cfd8b6 Fixed bug with retrieval of values from AVLs (bad cast in byte array
structure)
2016-04-11 17:07:22 +02:00
6fe4c6134a Allows for calling getConfiguration without parametter for geting the
default configuration
2016-04-11 13:31:09 +02:00
966b1325ed Deleted declaration of obsolete public function 2016-04-11 11:14:20 +02:00
019dfc01b4 Branch to refactor and debug (AVLs bugged) 2016-04-08 15:38:57 +02:00
45c9c5075c A first version of the fasta parser 2016-04-01 18:15:54 +02:00
20b97c972b Add boolean type in the tag evaluation 2016-04-01 13:42:24 +02:00
efc4a4a3c6 Reduce the call count to eval. This reduce by 3 the time of fast(q|a)
header processing
2016-04-01 08:54:06 +02:00
ce6ea89c21 Add the missing bootstrappip module 2016-03-31 17:28:03 +02:00
4207db7c17 Transfers bug patch from orgasm 2016-03-31 16:53:09 +02:00
1cd35b3359 firt version of a fastq parser 2016-03-31 10:47:12 +02:00
f51a6df5b2 Add a class buffering lines during a text file reading 2016-03-30 14:53:25 +02:00
94417e1330 patch the uncompress module to be able to deal with remote file 2016-03-29 20:57:39 +02:00
2e17dbce55 Adds a uopen function able to open transparently a local or a remote
file compressed or not
2016-03-29 20:56:54 +02:00
a9eed1f5d9 Adds class for uncompressing transparently compressed files on line 2016-03-29 18:21:04 +02:00
2dfab3f378 Some changes in relation with the new obitools3.apps module 2016-03-28 15:05:59 +02:00
e583098a96 change in the obi programme according to the new obitools3.apps module
creation
2016-03-28 15:05:02 +02:00
b926ca3997 A template for a command 2016-03-28 15:04:06 +02:00
aacfefad26 A set of utilitaty function for creating commands 2016-03-28 15:03:26 +02:00
edc4fd7b3e Fixed minor warning 2016-03-25 16:11:52 +01:00
ff6c27acf2 Implemented the retrieval of values with groups of AVLs 2016-03-25 15:35:16 +01:00
69856f18dd untested (and no possible retrieval) of CRC used to represent data in
AVL trees
2016-03-24 16:38:11 +01:00
2c084c8cf7 Switch to 10000000 per avl 2016-03-23 16:13:28 +01:00
58ac860cc7 Added macro for the bloom filter parameters and deleted old unused
macros for crc
2016-03-23 13:33:40 +01:00
d44117d625 obiimport function for testing purposes 2016-03-23 13:00:02 +01:00
6bd42132c4 Minor fixes to silence warnings and replaced two asprintf uses 2016-03-23 12:58:53 +01:00
4085904362 Merge branch 'multiple_avls_bloom' 2016-03-22 14:14:10 +01:00
b04b4b5902 made POSIX compliant 2016-03-21 11:33:06 +01:00
383e738ab7 Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2016-03-18 15:49:53 +01:00
3681cecb4d Multiple AVLs with bloom filters (very raw test version) 2016-03-18 11:06:02 +01:00
545ed8111a Code for tests storing data in multiple AVLs.
(note: unretrievable data as implemented)
2016-03-11 15:34:55 +01:00
86071d30c9 Minor improvement in AVL initial size calculation 2016-03-11 14:07:40 +01:00
21d1b2ed3e First implementation of taxonomy reading 2016-03-11 13:56:38 +01:00
6157633137 prototype for the obi unix command and the count sub command 2016-03-08 16:06:00 +01:00
a08def47e6 It is now impossible to create a view with a name identical to one of an
existing written view
2016-03-01 13:36:54 +01:00
fc5a12bad7 Closes #34 2016-02-29 17:56:55 +01:00
e323d8e702 Cython classes for nucleotide sequences (outside or in the context of a
view)
2016-02-29 16:33:30 +01:00
b350ea0393 Fixed minor error 2016-02-29 16:28:34 +01:00
8e9e21a02e Increased the maximum depth of AVL trees 2016-02-29 16:27:23 +01:00
4df313c54a Added Obiviews specialized for the handling of nucleotide sequences 2016-02-25 09:43:27 +01:00
ffc68d448f Deleted a forgotten print statement 2016-02-18 15:15:42 +01:00
a8f03248a8 Major update : views 2016-02-18 10:38:51 +01:00
cfaf069095 Fixed more typos and formatting imperfections. 2015-12-11 17:37:25 +01:00
a6144eabe2 Fixed typos 2015-12-11 17:26:20 +01:00
c139367555 DNA sequences and character strings are now handled using AVL trees. 2015-12-11 17:24:44 +01:00
1586956d57 Added the lists of opened columns and arrays in the OBIDMS structure,
and a counter in the OBIDMS column structure; fixed some bugs and
created tests for referring columns that are bound to disappear anyway.
2015-12-02 17:32:07 +01:00
b45b496b0e Major update: new type of columns containing indices referring to lines
in other columns
2015-11-29 11:57:07 +01:00
2cf10cb6f0 Column type is now passed as a character string when creating the column
(either 'OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_STR' or
'OBI_SEQ')
2015-11-23 15:48:27 +01:00
5a5516303d deleting useless .pyc files 2015-11-23 14:43:34 +01:00
d6a99bafea Fixed a major bug with the versioning of columns that was introduced in
f6ec8ba9
2015-11-23 13:34:51 +01:00
08f2657e18 Increased maximum line count of columns to 1^9 2015-11-23 13:23:18 +01:00
6aa2f92930 DNA sequences are now encoded on 4 bits when they are in IUPAC 2015-11-20 15:32:09 +01:00
87044b41d8 modified the encoding function on 2 bits a little 2015-11-20 11:32:47 +01:00
6ab1c83302 New column type for DNA sequences. Only for those coded on 2 bits (only
'ATGCatgc') for now.
2015-11-19 18:12:48 +01:00
e371248567 changed version to 0.0.0 2015-11-19 18:11:21 +01:00
dbf9463238 The endianness of a DMS is now stored in the OBIDMS structure 2015-11-18 15:35:09 +01:00
eb12af4da4 Fixed minor error in the documentation of a function. 2015-11-16 15:38:01 +01:00
e8417b4f6f The endianness of an OBIDMS is now stored in an informations file that
is read when opening the OBIDMS.
2015-11-16 14:37:51 +01:00
6579566c6e Minor changes in code to improve readability and fix C compilation
warnings
2015-11-10 14:37:58 +01:00
410e2e02a0 When retrieving the header of a column, the version number of the column
wanted can now be provided.
2015-11-10 13:30:10 +01:00
8ce4f264aa When enlarging a column, the function doesn't try anymore to keep the
mapped region at the same pointer (never works), and unmap/remap
instead.
2015-11-10 13:18:36 +01:00
d885eb48ff The header size when creating a column is now calculated according to
the size of the header structure and the page size of the platform.
2015-11-10 13:09:30 +01:00
661fe3606a In OBI_CHAR columns, characters are now given and retrieved as decoded
(unicode) characters.
2015-11-10 11:24:08 +01:00
c4b7e579cf Comments in column headers are now working. 2015-11-10 10:56:45 +01:00
f6ec8ba963 The header size is now directly read in the file when a column or an
array is opened.
2015-11-09 17:50:32 +01:00
0e3d6ed2d7 Methods __len__ (number of lines used) and __sizeof__ (total size in
bytes) implemented for columns.
2015-11-09 15:56:20 +01:00
01bfc14503 The data size in bytes is now stored in the header of a column. 2015-11-09 15:55:00 +01:00
65c1b1e8b2 Minor changes to make the creation of files and directories cleaner 2015-11-09 15:22:01 +01:00
b37bd8f21c File descriptors for dms, column and array directories are now stored in
structures.
2015-11-09 15:06:02 +01:00
05e3956a0c Minor changes in code to improve readability (freeing some character
strings earlier)
2015-11-09 11:22:51 +01:00
9b066f4327 Major update: obiarrays with columns containing indices referring to
character strings.
2015-11-06 17:55:15 +01:00
456551ffeb obi arrays that don't work because of cython bug passing wrong pointers 2015-11-03 14:22:00 +01:00
ecb9d97adb Reorganized the code to have less functions, and the functions to get
and format the creation date of a column are now working.
2015-10-15 15:12:45 +02:00
0eaa5aa784 Major changes : new cython subclasses to handle columns with multiple
elements per line in a more efficient way + now elements_names are
passed as a list + new function to recover only the header of a column
2015-10-14 18:05:34 +02:00
21923e213d The unit tests now test for None values 2015-10-12 18:02:40 +02:00
6877fc4892 Fixed a critical bug where values were initialized to NA at the wrong
location when there was multiple elements per line
2015-10-12 17:54:36 +02:00
dbed3d9d1d New module for unit testing with PyUnit 2015-10-09 15:42:57 +02:00
fc8bf16769 Fixed a critical bug in the computation of the new number of lines of a
column when truncating
2015-10-09 13:49:48 +02:00
e114a3c9cb fixed a critical bug where data size was not calculated correctly and
column directory is now closed when column is closed
2015-10-09 10:25:40 +02:00
ebc9f6f512 fixed a bug where Cython was casting doubles in floats 2015-10-08 15:28:30 +02:00
2b3f03ec28 Removed deprecated script 2015-10-08 10:46:46 +02:00
8fd9c06be2 Fixed missing file for documentation compilation 2015-10-08 10:45:54 +02:00
b553eef781 Method to close a DMS is uncommented but not complete yet (columns have
to be closed separately)
2015-10-08 10:44:13 +02:00
ee4c513fd4 Fixed a bug where cloning a column would fail if the data was empty 2015-10-08 10:36:02 +02:00
c013e6ad33 fixed typo in doxygen doc 2015-10-08 10:33:19 +02:00
c98d567e2f Updated the documentation and restructured a bit because it wasn't
compiling (note: Breathe not working)
2015-10-06 11:09:01 +02:00
392f110c8d new functions in the OBIDMS_column class to raise NotImplementedError
exceptions and to get the creation date of a column
2015-10-02 13:51:26 +02:00
6ced3c4896 new functions to get the creation date of a column 2015-10-02 13:47:53 +02:00
4b8bf41a71 closes #13, obi_errno is initialized to 0 2015-10-02 13:46:34 +02:00
c59a244e9d Fixed little typo 2015-09-30 12:07:13 +02:00
4b7f2d268b Doxygen documentation corrected and completed. 2015-09-30 12:03:46 +02:00
360 changed files with 59206 additions and 11143 deletions

518
LICENSE Executable file
View File

@ -0,0 +1,518 @@
CeCILL FREE SOFTWARE LICENSE AGREEMENT
Version 2.1 dated 2013-06-21
Notice
This Agreement is a Free Software license agreement that is the result
of discussions between its authors in order to ensure compliance with
the two main principles guiding its drafting:
* firstly, compliance with the principles governing the distribution
of Free Software: access to source code, broad rights granted to users,
* secondly, the election of a governing law, French law, with which it
is conformant, both as regards the law of torts and intellectual
property law, and the protection that it offers to both authors and
holders of the economic rights over software.
The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
license are:
Commissariat à l'énergie atomique et aux énergies alternatives - CEA, a
public scientific, technical and industrial research establishment,
having its principal place of business at 25 rue Leblanc, immeuble Le
Ponant D, 75015 Paris, France.
Centre National de la Recherche Scientifique - CNRS, a public scientific
and technological establishment, having its principal place of business
at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
Institut National de Recherche en Informatique et en Automatique -
Inria, a public scientific and technological establishment, having its
principal place of business at Domaine de Voluceau, Rocquencourt, BP
105, 78153 Le Chesnay cedex, France.
Preamble
The purpose of this Free Software license agreement is to grant users
the right to modify and redistribute the software governed by this
license within the framework of an open source distribution model.
The exercising of this right is conditional upon certain obligations for
users so as to preserve this status for all subsequent redistributions.
In consideration of access to the source code and the rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors only have limited liability.
In this respect, the risks associated with loading, using, modifying
and/or developing or reproducing the software by the user are brought to
the user's attention, given its Free Software status, which may make it
complicated to use, with the result that its use is reserved for
developers and experienced professionals having in-depth computer
knowledge. Users are therefore encouraged to load and test the
suitability of the software as regards their requirements in conditions
enabling the security of their systems and/or data to be ensured and,
more generally, to use and operate it in the same conditions of
security. This Agreement may be freely reproduced and published,
provided it is not altered, and that no provisions are either added or
removed herefrom.
This Agreement may apply to any or all software for which the holder of
the economic rights decides to submit the use thereof to its provisions.
Frequently asked questions can be found on the official website of the
CeCILL licenses family (http://www.cecill.info/index.en.html) for any
necessary clarification.
Article 1 - DEFINITIONS
For the purpose of this Agreement, when the following expressions
commence with a capital letter, they shall have the following meaning:
Agreement: means this license agreement, and its possible subsequent
versions and annexes.
Software: means the software in its Object Code and/or Source Code form
and, where applicable, its documentation, "as is" when the Licensee
accepts the Agreement.
Initial Software: means the Software in its Source Code and possibly its
Object Code form and, where applicable, its documentation, "as is" when
it is first distributed under the terms and conditions of the Agreement.
Modified Software: means the Software modified by at least one
Contribution.
Source Code: means all the Software's instructions and program lines to
which access is required so as to modify the Software.
Object Code: means the binary files originating from the compilation of
the Source Code.
Holder: means the holder(s) of the economic rights over the Initial
Software.
Licensee: means the Software user(s) having accepted the Agreement.
Contributor: means a Licensee having made at least one Contribution.
Licensor: means the Holder, or any other individual or legal entity, who
distributes the Software under the Agreement.
Contribution: means any or all modifications, corrections, translations,
adaptations and/or new functions integrated into the Software by any or
all Contributors, as well as any or all Internal Modules.
Module: means a set of sources files including their documentation that
enables supplementary functions or services in addition to those offered
by the Software.
External Module: means any or all Modules, not derived from the
Software, so that this Module and the Software run in separate address
spaces, with one calling the other when they are run.
Internal Module: means any or all Module, connected to the Software so
that they both execute in the same address space.
GNU GPL: means the GNU General Public License version 2 or any
subsequent version, as published by the Free Software Foundation Inc.
GNU Affero GPL: means the GNU Affero General Public License version 3 or
any subsequent version, as published by the Free Software Foundation Inc.
EUPL: means the European Union Public License version 1.1 or any
subsequent version, as published by the European Commission.
Parties: mean both the Licensee and the Licensor.
These expressions may be used both in singular and plural form.
Article 2 - PURPOSE
The purpose of the Agreement is the grant by the Licensor to the
Licensee of a non-exclusive, transferable and worldwide license for the
Software as set forth in Article 5 <#scope> hereinafter for the whole
term of the protection granted by the rights over said Software.
Article 3 - ACCEPTANCE
3.1 The Licensee shall be deemed as having accepted the terms and
conditions of this Agreement upon the occurrence of the first of the
following events:
* (i) loading the Software by any or all means, notably, by
downloading from a remote server, or by loading from a physical medium;
* (ii) the first time the Licensee exercises any of the rights granted
hereunder.
3.2 One copy of the Agreement, containing a notice relating to the
characteristics of the Software, to the limited warranty, and to the
fact that its use is restricted to experienced users has been provided
to the Licensee prior to its acceptance as set forth in Article 3.1
<#accepting> hereinabove, and the Licensee hereby acknowledges that it
has read and understood it.
Article 4 - EFFECTIVE DATE AND TERM
4.1 EFFECTIVE DATE
The Agreement shall become effective on the date when it is accepted by
the Licensee as set forth in Article 3.1 <#accepting>.
4.2 TERM
The Agreement shall remain in force for the entire legal term of
protection of the economic rights over the Software.
Article 5 - SCOPE OF RIGHTS GRANTED
The Licensor hereby grants to the Licensee, who accepts, the following
rights over the Software for any or all use, and for the term of the
Agreement, on the basis of the terms and conditions set forth hereinafter.
Besides, if the Licensor owns or comes to own one or more patents
protecting all or part of the functions of the Software or of its
components, the Licensor undertakes not to enforce the rights granted by
these patents against successive Licensees using, exploiting or
modifying the Software. If these patents are transferred, the Licensor
undertakes to have the transferees subscribe to the obligations set
forth in this paragraph.
5.1 RIGHT OF USE
The Licensee is authorized to use the Software, without any limitation
as to its fields of application, with it being hereinafter specified
that this comprises:
1. permanent or temporary reproduction of all or part of the Software
by any or all means and in any or all form.
2. loading, displaying, running, or storing the Software on any or all
medium.
3. entitlement to observe, study or test its operation so as to
determine the ideas and principles behind any or all constituent
elements of said Software. This shall apply when the Licensee
carries out any or all loading, displaying, running, transmission or
storage operation as regards the Software, that it is entitled to
carry out hereunder.
5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
The right to make Contributions includes the right to translate, adapt,
arrange, or make any or all modifications to the Software, and the right
to reproduce the resulting software.
The Licensee is authorized to make any or all Contributions to the
Software provided that it includes an explicit notice that it is the
author of said Contribution and indicates the date of the creation thereof.
5.3 RIGHT OF DISTRIBUTION
In particular, the right of distribution includes the right to publish,
transmit and communicate the Software to the general public on any or
all medium, and by any or all means, and the right to market, either in
consideration of a fee, or free of charge, one or more copies of the
Software by any means.
The Licensee is further authorized to distribute copies of the modified
or unmodified Software to third parties according to the terms and
conditions set forth hereinafter.
5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
The Licensee is authorized to distribute true copies of the Software in
Source Code or Object Code form, provided that said distribution
complies with all the provisions of the Agreement and is accompanied by:
1. a copy of the Agreement,
2. a notice relating to the limitation of both the Licensor's warranty
and liability as set forth in Articles 8 and 9,
and that, in the event that only the Object Code of the Software is
redistributed, the Licensee allows effective access to the full Source
Code of the Software for a period of at least three years from the
distribution of the Software, it being understood that the additional
acquisition cost of the Source Code shall not exceed the cost of the
data transfer.
5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
When the Licensee makes a Contribution to the Software, the terms and
conditions for the distribution of the resulting Modified Software
become subject to all the provisions of this Agreement.
The Licensee is authorized to distribute the Modified Software, in
source code or object code form, provided that said distribution
complies with all the provisions of the Agreement and is accompanied by:
1. a copy of the Agreement,
2. a notice relating to the limitation of both the Licensor's warranty
and liability as set forth in Articles 8 and 9,
and, in the event that only the object code of the Modified Software is
redistributed,
3. a note stating the conditions of effective access to the full source
code of the Modified Software for a period of at least three years
from the distribution of the Modified Software, it being understood
that the additional acquisition cost of the source code shall not
exceed the cost of the data transfer.
5.3.3 DISTRIBUTION OF EXTERNAL MODULES
When the Licensee has developed an External Module, the terms and
conditions of this Agreement do not apply to said External Module, that
may be distributed under a separate license agreement.
5.3.4 COMPATIBILITY WITH OTHER LICENSES
The Licensee can include a code that is subject to the provisions of one
of the versions of the GNU GPL, GNU Affero GPL and/or EUPL in the
Modified or unmodified Software, and distribute that entire code under
the terms of the same version of the GNU GPL, GNU Affero GPL and/or EUPL.
The Licensee can include the Modified or unmodified Software in a code
that is subject to the provisions of one of the versions of the GNU GPL,
GNU Affero GPL and/or EUPL and distribute that entire code under the
terms of the same version of the GNU GPL, GNU Affero GPL and/or EUPL.
Article 6 - INTELLECTUAL PROPERTY
6.1 OVER THE INITIAL SOFTWARE
The Holder owns the economic rights over the Initial Software. Any or
all use of the Initial Software is subject to compliance with the terms
and conditions under which the Holder has elected to distribute its work
and no one shall be entitled to modify the terms and conditions for the
distribution of said Initial Software.
The Holder undertakes that the Initial Software will remain ruled at
least by this Agreement, for the duration set forth in Article 4.2 <#term>.
6.2 OVER THE CONTRIBUTIONS
The Licensee who develops a Contribution is the owner of the
intellectual property rights over this Contribution as defined by
applicable law.
6.3 OVER THE EXTERNAL MODULES
The Licensee who develops an External Module is the owner of the
intellectual property rights over this External Module as defined by
applicable law and is free to choose the type of agreement that shall
govern its distribution.
6.4 JOINT PROVISIONS
The Licensee expressly undertakes:
1. not to remove, or modify, in any manner, the intellectual property
notices attached to the Software;
2. to reproduce said notices, in an identical manner, in the copies of
the Software modified or not.
The Licensee undertakes not to directly or indirectly infringe the
intellectual property rights on the Software of the Holder and/or
Contributors, and to take, where applicable, vis-à-vis its staff, any
and all measures required to ensure respect of said intellectual
property rights of the Holder and/or Contributors.
Article 7 - RELATED SERVICES
7.1 Under no circumstances shall the Agreement oblige the Licensor to
provide technical assistance or maintenance services for the Software.
However, the Licensor is entitled to offer this type of services. The
terms and conditions of such technical assistance, and/or such
maintenance, shall be set forth in a separate instrument. Only the
Licensor offering said maintenance and/or technical assistance services
shall incur liability therefor.
7.2 Similarly, any Licensor is entitled to offer to its licensees, under
its sole responsibility, a warranty, that shall only be binding upon
itself, for the redistribution of the Software and/or the Modified
Software, under terms and conditions that it is free to decide. Said
warranty, and the financial terms and conditions of its application,
shall be subject of a separate instrument executed between the Licensor
and the Licensee.
Article 8 - LIABILITY
8.1 Subject to the provisions of Article 8.2, the Licensee shall be
entitled to claim compensation for any direct loss it may have suffered
from the Software as a result of a fault on the part of the relevant
Licensor, subject to providing evidence thereof.
8.2 The Licensor's liability is limited to the commitments made under
this Agreement and shall not be incurred as a result of in particular:
(i) loss due the Licensee's total or partial failure to fulfill its
obligations, (ii) direct or consequential loss that is suffered by the
Licensee due to the use or performance of the Software, and (iii) more
generally, any consequential loss. In particular the Parties expressly
agree that any or all pecuniary or business loss (i.e. loss of data,
loss of profits, operating loss, loss of customers or orders,
opportunity cost, any disturbance to business activities) or any or all
legal proceedings instituted against the Licensee by a third party,
shall constitute consequential loss and shall not provide entitlement to
any or all compensation from the Licensor.
Article 9 - WARRANTY
9.1 The Licensee acknowledges that the scientific and technical
state-of-the-art when the Software was distributed did not enable all
possible uses to be tested and verified, nor for the presence of
possible defects to be detected. In this respect, the Licensee's
attention has been drawn to the risks associated with loading, using,
modifying and/or developing and reproducing the Software which are
reserved for experienced users.
The Licensee shall be responsible for verifying, by any or all means,
the suitability of the product for its requirements, its good working
order, and for ensuring that it shall not cause damage to either persons
or properties.
9.2 The Licensor hereby represents, in good faith, that it is entitled
to grant all the rights over the Software (including in particular the
rights set forth in Article 5 <#scope>).
9.3 The Licensee acknowledges that the Software is supplied "as is" by
the Licensor without any other express or tacit warranty, other than
that provided for in Article 9.2 <#good-faith> and, in particular,
without any warranty as to its commercial value, its secured, safe,
innovative or relevant nature.
Specifically, the Licensor does not warrant that the Software is free
from any error, that it will operate without interruption, that it will
be compatible with the Licensee's own equipment and software
configuration, nor that it will meet the Licensee's requirements.
9.4 The Licensor does not either expressly or tacitly warrant that the
Software does not infringe any third party intellectual property right
relating to a patent, software or any other property right. Therefore,
the Licensor disclaims any and all liability towards the Licensee
arising out of any or all proceedings for infringement that may be
instituted in respect of the use, modification and redistribution of the
Software. Nevertheless, should such proceedings be instituted against
the Licensee, the Licensor shall provide it with technical and legal
expertise for its defense. Such technical and legal expertise shall be
decided on a case-by-case basis between the relevant Licensor and the
Licensee pursuant to a memorandum of understanding. The Licensor
disclaims any and all liability as regards the Licensee's use of the
name of the Software. No warranty is given as regards the existence of
prior rights over the name of the Software or as regards the existence
of a trademark.
Article 10 - TERMINATION
10.1 In the event of a breach by the Licensee of its obligations
hereunder, the Licensor may automatically terminate this Agreement
thirty (30) days after notice has been sent to the Licensee and has
remained ineffective.
10.2 A Licensee whose Agreement is terminated shall no longer be
authorized to use, modify or distribute the Software. However, any
licenses that it may have granted prior to termination of the Agreement
shall remain valid subject to their having been granted in compliance
with the terms and conditions hereof.
Article 11 - MISCELLANEOUS
11.1 EXCUSABLE EVENTS
Neither Party shall be liable for any or all delay, or failure to
perform the Agreement, that may be attributable to an event of force
majeure, an act of God or an outside cause, such as defective
functioning or interruptions of the electricity or telecommunications
networks, network paralysis following a virus attack, intervention by
government authorities, natural disasters, water damage, earthquakes,
fire, explosions, strikes and labor unrest, war, etc.
11.2 Any failure by either Party, on one or more occasions, to invoke
one or more of the provisions hereof, shall under no circumstances be
interpreted as being a waiver by the interested Party of its right to
invoke said provision(s) subsequently.
11.3 The Agreement cancels and replaces any or all previous agreements,
whether written or oral, between the Parties and having the same
purpose, and constitutes the entirety of the agreement between said
Parties concerning said purpose. No supplement or modification to the
terms and conditions hereof shall be effective as between the Parties
unless it is made in writing and signed by their duly authorized
representatives.
11.4 In the event that one or more of the provisions hereof were to
conflict with a current or future applicable act or legislative text,
said act or legislative text shall prevail, and the Parties shall make
the necessary amendments so as to comply with said act or legislative
text. All other provisions shall remain effective. Similarly, invalidity
of a provision of the Agreement, for any reason whatsoever, shall not
cause the Agreement as a whole to be invalid.
11.5 LANGUAGE
The Agreement is drafted in both French and English and both versions
are deemed authentic.
Article 12 - NEW VERSIONS OF THE AGREEMENT
12.1 Any person is authorized to duplicate and distribute copies of this
Agreement.
12.2 So as to ensure coherence, the wording of this Agreement is
protected and may only be modified by the authors of the License, who
reserve the right to periodically publish updates or new versions of the
Agreement, each with a separate number. These subsequent versions may
address new issues encountered by Free Software.
12.3 Any Software distributed under a given version of the Agreement may
only be subsequently distributed under the same version of the Agreement
or a subsequent version, subject to the provisions of Article 5.3.4
<#compatibility>.
Article 13 - GOVERNING LAW AND JURISDICTION
13.1 The Agreement is governed by French law. The Parties agree to
endeavor to seek an amicable solution to any disagreements or disputes
that may arise during the performance of the Agreement.
13.2 Failing an amicable solution within two (2) months as from their
occurrence, and unless emergency proceedings are necessary, the
disagreements or disputes shall be referred to the Paris Courts having
jurisdiction, by the more diligent Party.

7
MANIFEST.in Normal file → Executable file
View File

@ -1,11 +1,12 @@
include setup.py
recursive-include distutils.ext *.py *.c *.pem
recursive-include python *.pyx *.pxd *.c *.h *.cfiles
recursive-include python *.pyx *.pxd *.c *.h
recursive-include src *.c *.h
include src/CMakeLists.txt
recursive-include doc/sphinx/source *.txt *.rst *.py
recursive-include doc/sphinx/sphinxext *.py
include doc/sphinx/Makefile
include doc/sphinx/Doxyfile
include README.txt
include README.md
include requirements.txt
include scripts/obi

41
README.md Executable file
View File

@ -0,0 +1,41 @@
The `OBITools3`: A package for the management of analyses and data in DNA metabarcoding
---------------------------------------------
**Website: <https://metabarcoding.org/obitools3>**
DNA metabarcoding offers new perspectives for biodiversity research [1]. This approach of ecosystem studies relies heavily on the use of Next-Generation Sequencing (NGS), and consequently requires the ability to to treat large volumes of data. The `OBITools` package satisfies this requirement thanks to a set of programs specifically designed for analyzing NGS data in a DNA metabarcoding context [2] - <https://metabarcoding.org/obitools>. Their capacity to filter and edit sequences while taking into account taxonomic annotation helps to setup tailored-made analysis pipelines for a broad range of DNA metabarcoding applications, including biodiversity surveys or diet analyses.
**The `OBITools3`.** This new version of the `OBITools` looks to significantly improve the storage efficiency and the data processing speed. To this end, the `OBITools3` rely on an ad hoc database system, inside which all the data that a DNA metabarcoding experiment must consider is stored: the sequences, the metadata (describing for instance the samples), the database containing the reference sequences used for the taxonomic annotation, as well as the taxonomic databases. Besides the gain in efficiency, this new structure allows an easier access to all the data associated with an experiment.
**Column-oriented storage.** An analysis pipeline corresponds to a succession of commands, each computing one step of the analysis, and where the result of the command *n* is used by the command *n+1*. DNA metabarcoding data can easily be represented in the form of tables, and each command can be regarded as an operation transforming one or several 'input' tables into one or several 'output' tables, which can be used by the next command. Many of the basic operations in a pipeline copy without modification an important part of the input tables to the result tables, and use for their calculations only a small part of the input data. In the original `OBITools`, those tables are kept in the form of annotated sequence files in the FASTA or FASTQ format. This has two consequences: i) keeping the transitional results of the analysis pipeline means using disk space for an important volume of redundant data, ii) The coding and decoding of informations that are not actually used represent an important part of the treatment process. The new database system used by the `OBITools3` (called DMS for Data Management System) relies on column-oriented storage. The columns are immutable and can be assembled in views representing the data tables. This way, the data not modified by a command in an input table can easily be associated to the result table without duplicating any information ; and the data not used at all by a command can be associated with the result table without being read. This strategy results in a gain in disk space efficiency by limiting data redundancy, as well as a gain in execution time by limiting data reading, writing and conversion operations. Finally, as a mean to optimize data access, each column is stored in a binary file directly mapped in memory for reading and writing operations.
**Storage optimization.** DNA metabarcoding data is intrinsically very redundant. For example, the same sequence corresponding to a species will be present several thousand times across all samples. In order to limit the disk space used and make comparison operations more efficient, data in the form of character strings is stored in columns using a complex indexing structure, efficient on millions of values, coupling hash functions, Bloom filters and AVL trees. Finally, DNA sequences are compressed by encoding each nucleotide on two or four bits depending on whether the sequences contain only the four nucleotides (A, C, G, T) or use the IUPAC codes.
**Saving the data processing history.** The totality of the informations used by the `OBITools3` is stored in immutable data structures in the DMS. If a command has to modify a column used as input to produce its result, a new version of that column is created, leaving the initial version intact. This storage system enables to keep, at minimal cost, the totality of the transitional results produced by the pipeline. The storage of metadata describing all the operations that have produced a view (a result table) in the DMS makes possible the creation of an oriented hypergraph, where each node corresponds to a view and each arrow to an operation. By retracing the dependency relationships in this hypergraph, it is possible to rebuild *a posteriori* the entirety of the process that has produced a result table.
**Tools.** The `OBITools3` offer the same tools as the original `OBITools`, plus `ecoPCR` (*in silico* PCR) [4] and `Sumatra` (sequence alignment, not multithreaded yet) [5].
Eventually, new versions of `ecoPrimers` (PCR primer design) [3], as well as `Sumaclust` (sequence alignment and clustering) [5] will be added, taking advantage of the database structure developed for the `OBITools3`.
**Implementation and disponibility.** The lower layers managing the DMS as well as all the compute-intensive functions are coded in `C99` for efficiency reasons. A `Cython` (<http://www.cython.org>) object layer allows for a simple but efficient implementation of the `OBITools3` commands in `Python 3`. The `OBITools3` are now being released, check the wiki for more information.
**References.**
1. Taberlet P, Coissac E, Hajibabaei M, Rieseberg LH: Environmental DNA. Mol Ecol 2012:17891793.
2. Boyer F, Mercier C, Bonin A, Le Bras Y, Taberlet P, Coissac E: OBITools: a Unix-inspired software package for DNA metabarcoding. Mol Ecol Resour, 2016: 176-182.
3. Riaz T, Shehzad W, Viari A, Pompanon F, Taberlet P, Coissac E: ecoPrimers: inference of new DNA barcode markers from whole genome sequence analysis. Nucleic Acids Res 2011, 39:e145.
4. Ficetola GF, Coissac E, Zundel S, Riaz T, Shehzad W, Bessière J, Taberlet P, Pompanon F: An in silico approach for the evaluation of DNA barcodes. BMC Genomics 2010, 11:434.
5. Mercier C, Boyer F, Bonin A, Coissac E (2013) SUMATRA and SUMACLUST: fast and exact comparison and clustering of sequences. Available: <http://metabarcoding.org/sumatra> and <http://metabarcoding.org/sumaclust>

View File

@ -1,18 +0,0 @@
CC = gcc
CFLAGS = -c -Wall
LDFLAGS =
SOURCES = obicount.c ../obidmscolumn.c
OBJECTS = $(SOURCES:.c=.o)
EXECUTABLE = obicount
all: $(SOURCES) $(EXECUTABLE)
$(EXECUTABLE): $(OBJECTS)
$(CC) $(LDFLAGS) $(OBJECTS) -o $@
.c.o:
$(CC) $(CFLAGS) $< -o $@
clean:
rm *o
rm $(EXECUTABLE)

View File

@ -1,87 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h> /* mmap() is defined in this header */
#include <stdint.h>
#include <fcntl.h>
#include <sys/stat.h>
#include "../obitypes.h"
#include "../obidmscolumn.h"
/**
* @brief Computes the size to map.
*
* * @param OBIDMSColumn_file The file to map.
* @return The size to map.
*/
int get_size_to_map(int OBIDMSColumn_file)
// compute size to map : file size minus size of the header
{
int size;
struct stat s;
fstat(OBIDMSColumn_file, &s);
size = (s.st_size) - HEADER_SIZE;
return(size);
}
/**
* @brief Computes and prints the total number of sequences by summing their counts.
*
* * @param The count file.
*/
int main(int argc, char const *argv[])
{
char* map;
int size;
int OBIDMSColumn_file;
int count;
char c;
char num_str[10] = "";
int num_int;
int i,j;
// initialize variables
OBIDMSColumn_file = open(argv[1], O_RDONLY); //read only
count = 0;
j = 0;
// compute size to map
size = get_size_to_map(OBIDMSColumn_file);
// map the data
map = obi_map_read_only(OBIDMSColumn_file, HEADER_SIZE, size);
// sum the counts
for (i=0; i<size; i++)
{
c = map[i];
if (c != SEPARATOR) // reading lines
{
num_str[j] = c;
j++;
}
else if (c == SEPARATOR) // end of a line
{
num_int = atoi(num_str); // turn number from character string to int
count = count + num_int; // add the number to the sum
j = 0;
num_str[j] = '\0';
}
}
// print the final count of sequences
fprintf(stderr, "Sequence count = %d\n", count);
// unmap
obi_unmap(size);
// close file
close(OBIDMSColumn_file);
return(0);
}

View File

@ -1,38 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
from distutils.command.build import build as ori_build
from obidistutils.serenity.checksystem import is_mac_system
class build(ori_build):
def has_ext_modules(self):
return self.distribution.has_ext_modules()
def has_pidname(self):
return is_mac_system()
def has_doc(self):
return True
def has_littlebigman(self):
return True
try:
from obidistutils.command.build_sphinx import build_sphinx # @UnusedImport
sub_commands = [("littlebigman",has_littlebigman),
('pidname',has_pidname)
] \
+ ori_build.sub_commands + \
[('build_sphinx',has_doc)]
except ImportError:
sub_commands = [("littlebigman",has_littlebigman),
('pidname',has_pidname)
] \
+ ori_build.sub_commands

View File

@ -1,85 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
from .build_ctools import build_ctools
from .build_exe import build_exe
from distutils.errors import DistutilsSetupError
from distutils import log
import os
class build_cexe(build_ctools):
description = "build C/C++ executable distributed with Python extensions"
def initialize_options(self):
build_ctools.initialize_options(self)
self.built_files = None
def finalize_options(self):
# This might be confusing: both build-cexe and build-temp default
# to build-temp as defined by the "build" command. This is because
# I think that C libraries are really just temporary build
# by-products, at least from the point of view of building Python
# extensions -- but I want to keep my options open.
build_cexe_dir = self.build_cexe
build_ctools.finalize_options(self)
if build_cexe_dir is None:
self.build_cexe=None
self.set_undefined_options('build',
('build_scripts', 'build_cexe'))
self.set_undefined_options('build_files',
('files', 'built_files'))
self.executables = self.distribution.executables
# self.build_cexe = os.path.join(os.path.dirname(self.build_cexe),'cbinaries')
# self.mkpath(self.build_cexe)
if self.executables:
self.check_executable_list(self.executables)
# XXX same as for build_ext -- what about 'self.define' and
# 'self.undef' ?
def substitute_sources(self,exe_name,sources):
"""
Substitutes source file name starting by an @ by the actual
name of the built file (see --> build_files)
"""
sources = list(sources)
for i in range(len(sources)):
message = "%s :-> %s" % (exe_name,sources[i])
if sources[i][0]=='@':
try:
filename = self.built_files[sources[i][1:]]
except KeyError:
raise DistutilsSetupError(
'The %s filename declared in the source '
'files of the program %s have not been '
'built by the installation process' % (sources[i],
exe_name))
sources[i]=filename
log.info("%s changed to %s",message,filename)
else:
log.info("%s ok",message)
return sources
def run(self):
for cmd_name in self.get_sub_commands():
self.run_command(cmd_name)
build_exe.run(self)

View File

@ -1,63 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
from .build_exe import build_exe
from distutils import log
class build_ctools(build_exe):
description = "build C/C++ executable not distributed with Python extensions"
def initialize_options(self):
build_exe.initialize_options(self)
# List of built tools
self.ctools = None
self.littlebigman = None
def finalize_options(self):
# This might be confusing: both build-cexe and build-temp default
# to build-temp as defined by the "build" command. This is because
# I think that C libraries are really just temporary build
# by-products, at least from the point of view of building Python
# extensions -- but I want to keep my options open.
build_exe.finalize_options(self)
self.set_undefined_options('build',
('build_temp', 'build_cexe'))
self.set_undefined_options('littlebigman',
('littlebigman', 'littlebigman'))
self.executables = self.distribution.ctools
self.check_executable_list(self.executables)
if self.littlebigman =='-DLITTLE_END':
if self.define is None:
self.define=[('LITTLE_END',None)]
else:
self.define.append('LITTLE_END',None)
log.info('Look for CPU architecture... %s',self.define)
self.ctools = set()
def run(self):
for cmd_name in self.get_sub_commands():
self.run_command(cmd_name)
build_exe.run(self)
for e,p in self.executables: # @UnusedVariable
self.ctools.add(e)

View File

@ -1,211 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
import os
from distutils.core import Command
from distutils.sysconfig import customize_compiler
from distutils.errors import DistutilsSetupError
from distutils import log
from distutils.ccompiler import show_compilers
class build_exe(Command):
description = "build an executable -- Abstract command "
user_options = [
('build-cexe', 'x',
"directory to build C/C++ libraries to"),
('build-temp', 't',
"directory to put temporary build by-products"),
('debug', 'g',
"compile with debugging information"),
('force', 'f',
"forcibly build everything (ignore file timestamps)"),
('compiler=', 'c',
"specify the compiler type"),
]
boolean_options = ['debug', 'force']
help_options = [
('help-compiler', None,
"list available compilers", show_compilers),
]
def initialize_options(self):
self.build_cexe = None
self.build_temp = None
# List of executables to build
self.executables = None
# Compilation options for all libraries
self.include_dirs = None
self.define = None
self.undef = None
self.extra_compile_args = None
self.debug = None
self.force = 0
self.compiler = None
self.sse = None
self.built_files=None
def finalize_options(self):
# This might be confusing: both build-cexe and build-temp default
# to build-temp as defined by the "build" command. This is because
# I think that C libraries are really just temporary build
# by-products, at least from the point of view of building Python
# extensions -- but I want to keep my options open.
self.set_undefined_options('build',
('build_temp', 'build_temp'),
('compiler', 'compiler'),
('debug', 'debug'),
('force', 'force'))
if self.include_dirs is None:
self.include_dirs = self.distribution.include_dirs or []
if isinstance(self.include_dirs, str):
self.include_dirs = self.include_dirs.split(os.pathsep)
self.sse = self.distribution.sse
if self.sse is not None:
if self.extra_compile_args is None:
self.extra_compile_args=['-m%s' % self.sse]
else:
self.extra_compile_args.append('-m%s' % self.sse)
# XXX same as for build_ext -- what about 'self.define' and
# 'self.undef' ?
def run(self):
if not self.executables:
return
self.mkpath(self.build_cexe)
# Yech -- this is cut 'n pasted from build_ext.py!
from distutils.ccompiler import new_compiler
self.compiler = new_compiler(compiler=self.compiler,
dry_run=self.dry_run,
force=self.force)
customize_compiler(self.compiler)
if self.include_dirs is not None:
self.compiler.set_include_dirs(self.include_dirs)
if self.define is not None:
# 'define' option is a list of (name,value) tuples
for (name,value) in self.define:
self.compiler.define_macro(name, value)
if self.undef is not None:
for macro in self.undef:
self.compiler.undefine_macro(macro)
self.build_executables(self.executables)
def check_executable_list(self, executables):
"""Ensure that the list of executables is valid.
`executable` is presumably provided as a command option 'executables'.
This method checks that it is a list of 2-tuples, where the tuples
are (executable_name, build_info_dict).
Raise DistutilsSetupError if the structure is invalid anywhere;
just returns otherwise.
"""
if not isinstance(executables, list):
raise DistutilsSetupError("'executables' option must be a list of tuples")
for exe in executables:
if not isinstance(exe, tuple) and len(exe) != 2:
raise DistutilsSetupError("each element of 'executables' must a 2-tuple")
name, build_info = exe
if not isinstance(name, str):
raise DistutilsSetupError(
"first element of each tuple in 'executables' "
"must be a string (the executables name)")
if '/' in name or (os.sep != '/' and os.sep in name):
raise DistutilsSetupError(
"bad executable name '%s': "
"may not contain directory separators" % exe[0])
if not isinstance(build_info, dict):
raise DistutilsSetupError(
"second element of each tuple in 'executables' "
"must be a dictionary (build info)")
def get_executable_names(self):
# Assume the executables list is valid -- 'check_executable_list()' is
# called from 'finalize_options()', so it should be!
if not self.executables:
return None
exe_names = []
for (exe_name, build_info) in self.executables: # @UnusedVariable
exe_names.append(exe_name)
return exe_names
def get_source_files(self):
self.check_executable_list(self.executables)
filenames = []
for (exe_name, build_info) in self.executables: # @UnusedVariable
sources = build_info.get('sources')
if sources is None or not isinstance(sources, (list, tuple)):
raise DistutilsSetupError(
"in 'executables' option (library '%s'), "
"'sources' must be present and must be "
"a list of source filenames" % exe_name)
filenames.extend(sources)
return filenames
def substitute_sources(self,exe_name,sources):
return list(sources)
def build_executables(self, executables):
for (exe_name, build_info) in executables:
sources = build_info.get('sources')
if sources is None or not isinstance(sources, (list, tuple)):
raise DistutilsSetupError(
"in 'executables' option (library '%s'), "
"'sources' must be present and must be "
"a list of source filenames" % exe_name)
sources = self.substitute_sources(exe_name,sources)
log.info("building '%s' program", exe_name)
# First, compile the source code to object files in the library
# directory. (This should probably change to putting object
# files in a temporary build directory.)
macros = build_info.get('macros')
include_dirs = build_info.get('include_dirs')
extra_args = self.extra_compile_args or []
objects = self.compiler.compile(sources,
output_dir=self.build_temp,
macros=macros,
include_dirs=include_dirs,
extra_postargs=extra_args,
debug=self.debug)
# Now "link" the object files together into a static library.
# (On Unix at least, this isn't really linking -- it just
# builds an archive. Whatever.)
self.compiler.link_executable(objects, exe_name,
output_dir=self.build_cexe,
debug=self.debug)

View File

@ -1,113 +0,0 @@
'''
Created on 13 fevr. 2014
@author: coissac
'''
from distutils import log
import os
from Cython.Distutils import build_ext as ori_build_ext # @UnresolvedImport
from Cython.Compiler import Options as cython_options # @UnresolvedImport
from distutils.errors import DistutilsSetupError
class build_ext(ori_build_ext):
def modifyDocScripts(self):
build_dir_file=open("doc/sphinx/build_dir.txt","w")
print(self.build_lib,file=build_dir_file)
build_dir_file.close()
def initialize_options(self):
ori_build_ext.initialize_options(self) # @UndefinedVariable
self.littlebigman = None
self.built_files = None
def finalize_options(self):
ori_build_ext.finalize_options(self) # @UndefinedVariable
self.set_undefined_options('littlebigman',
('littlebigman', 'littlebigman'))
self.set_undefined_options('build_files',
('files', 'built_files'))
self.cython_c_in_temp = 1
if self.littlebigman =='-DLITTLE_END':
if self.define is None:
self.define=[('LITTLE_END',None)]
else:
self.define.append('LITTLE_END',None)
def substitute_sources(self,exe_name,sources):
"""
Substitutes source file name starting by an @ by the actual
name of the built file (see --> build_files)
"""
sources = list(sources)
for i in range(len(sources)):
message = "%s :-> %s" % (exe_name,sources[i])
if sources[i][0]=='@':
try:
filename = self.built_files[sources[i][1:]]
except KeyError:
tmpfilename = os.path.join(self.build_temp,sources[i][1:])
if os.path.isfile (tmpfilename):
filename = tmpfilename
else:
raise DistutilsSetupError(
'The %s filename declared in the source '
'files of the program %s have not been '
'built by the installation process' % (sources[i],
exe_name))
sources[i]=filename
log.info("%s changed to %s",message,filename)
else:
log.info("%s ok",message)
return sources
def build_extensions(self):
# First, sanity-check the 'extensions' list
for ext in self.extensions:
ext.sources = self.substitute_sources(ext.name,ext.sources)
self.check_extensions_list(self.extensions)
for ext in self.extensions:
log.info("%s :-> %s",ext.name,ext.sources)
ext.sources = self.cython_sources(ext.sources, ext)
self.build_extension(ext)
def run(self):
self.modifyDocScripts()
for cmd_name in self.get_sub_commands():
self.run_command(cmd_name)
cython_options.annotate = True
ori_build_ext.run(self) # @UndefinedVariable
def has_files(self):
return self.distribution.has_files()
def has_executables(self):
return self.distribution.has_executables()
sub_commands = [('build_files',has_files),
('build_cexe', has_executables)
] + \
ori_build_ext.sub_commands

View File

@ -1,63 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
import os.path
from distutils.core import Command
from distutils import log
class build_files(Command):
def initialize_options(self):
self.files=None
self.ctools=None
self.build_temp=None
self.build_cexe=None
def finalize_options(self):
self.set_undefined_options('build_ctools',
('ctools', 'ctools'),
('build_temp','build_temp'),
('build_cexe','build_cexe'),
)
self.files = {}
def run(self):
for cmd_name in self.get_sub_commands():
self.run_command(cmd_name)
for dest,prog,command in self.distribution.files:
destfile = os.path.join(self.build_temp,dest)
if prog in self.ctools:
progfile = os.path.join(self.build_cexe,prog)
else:
progfile = prog
log.info("Building file : %s" % dest)
commandline = command % {'prog' : progfile,
'dest' : destfile}
log.info(" --> %s" % commandline)
os.system(commandline)
self.files[dest]=destfile
log.info("Done.\n")
def has_ctools(self):
return self.distribution.has_ctools()
sub_commands = [('build_ctools', has_ctools)] + \
Command.sub_commands

View File

@ -1,104 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
import os.path
from distutils.command.build_scripts import build_scripts as ori_build_scripts
from distutils.util import convert_path
from distutils import log, sysconfig
from distutils.dep_util import newer
from stat import ST_MODE
import re
first_line_re = re.compile('^#!.*python[0-9.]*([ \t].*)?$')
class build_scripts(ori_build_scripts):
def copy_scripts (self):
"""Copy each script listed in 'self.scripts'; if it's marked as a
Python script in the Unix way (first line matches 'first_line_re',
ie. starts with "\#!" and contains "python"), then adjust the first
line to refer to the current Python interpreter as we copy.
"""
self.mkpath(self.build_dir)
rawbuild_dir = os.path.join(os.path.dirname(self.build_dir),'raw_scripts')
self.mkpath(rawbuild_dir)
outfiles = []
for script in self.scripts:
adjust = 0
script = convert_path(script)
outfile = os.path.join(self.build_dir, os.path.splitext(os.path.basename(script))[0])
rawoutfile = os.path.join(rawbuild_dir, os.path.basename(script))
outfiles.append(outfile)
if not self.force and not newer(script, outfile):
log.debug("not copying %s (up-to-date)", script)
continue
# Always open the file but ignore failures in dry-run mode --
# that way, we'll get accurate feedback if we can read the
# script.
try:
f = open(script, "r")
except IOError:
if not self.dry_run:
raise
f = None
else:
first_line = f.readline()
if not first_line:
self.warn("%s is an empty file (skipping)" % script)
continue
match = first_line_re.match(first_line)
if match:
adjust = 1
post_interp = match.group(1) or ''
log.info("Store the raw script %s -> %s", script,rawoutfile)
self.copy_file(script, rawoutfile)
if adjust:
log.info("copying and adjusting %s -> %s", script,
self.build_dir)
if not self.dry_run:
outf = open(outfile, "w")
if not sysconfig.python_build:
outf.write("#!%s%s\n" %
(self.executable,
post_interp))
else:
outf.write("#!%s%s\n" %
(os.path.join(
sysconfig.get_config_var("BINDIR"),
"python%s%s" % (sysconfig.get_config_var("VERSION"),
sysconfig.get_config_var("EXE"))),
post_interp))
outf.writelines(f.readlines())
outf.close()
if f:
f.close()
else:
if f:
f.close()
self.copy_file(script, outfile)
if os.name == 'posix':
for F in outfiles:
if self.dry_run:
log.info("changing mode of %s", F)
else:
oldmode = os.stat(F)[ST_MODE]
oldmode = oldmode & 0o7777
newmode = (oldmode | 0o555) & 0o7777
if newmode != oldmode:
log.info("changing mode of %s from %o to %o",
F, oldmode, newmode)
os.chmod(F, newmode)

View File

@ -1,27 +0,0 @@
'''
Created on 10 mars 2015
@author: coissac
'''
try:
from sphinx.setup_command import BuildDoc as ori_build_sphinx # @UnresolvedImport
class build_sphinx(ori_build_sphinx):
'''Build Sphinx documentation in html, epub and man formats
'''
description = __doc__
def run(self):
self.builder='html'
self.finalize_options()
ori_build_sphinx.run(self)
self.builder='epub'
self.finalize_options()
ori_build_sphinx.run(self)
self.builder='man'
self.finalize_options()
ori_build_sphinx.run(self)
except ImportError:
pass

View File

@ -1,19 +0,0 @@
'''
Created on 6 oct. 2014
@author: coissac
'''
# try:
# from setuptools.command.install import install as install_ori
# except ImportError:
# from distutils.command.install import install as install_ori
from distutils.command.install import install as install_ori
class install(install_ori):
def __init__(self,dist):
install_ori.__init__(self, dist)
# self.sub_commands.insert(0, ('build',lambda self: True))
self.sub_commands.append(('install_sphinx',lambda self: self.distribution.serenity))

View File

@ -1,47 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
# try:
# from setuptools.command.install_scripts import install_scripts as ori_install_scripts
# except ImportError:
# from distutils.command.install_scripts import install_scripts as ori_install_scripts
from distutils.command.install_scripts import install_scripts as ori_install_scripts
import os.path
from distutils import log
class install_scripts(ori_install_scripts):
def initialize_options(self):
ori_install_scripts.initialize_options(self)
self.public_dir = None
def install_public_link(self):
self.mkpath(self.public_dir)
for file in self.get_outputs():
log.info("exporting file %s -> %s", file,os.path.join(self.public_dir,
os.path.split(file)[1]
))
if not self.dry_run:
dest = os.path.join(self.public_dir,
os.path.split(file)[1]
)
if os.path.exists(dest):
os.unlink(dest)
os.symlink(file,dest)
def run(self):
ori_install_scripts.run(self)
if self.distribution.serenity:
self.public_dir=os.path.join(self.install_dir,"../export/bin")
self.public_dir=os.path.abspath(self.public_dir)
self.install_public_link()

View File

@ -1,61 +0,0 @@
'''
Created on 10 mars 2015
@author: coissac
'''
from distutils.core import Command
import os.path
import glob
class install_sphinx(Command):
'''
Install the sphinx documentation
'''
description = "Install the sphinx documentation in serenity mode"
boolean_options = ['force', 'skip-build']
def initialize_options (self):
self.install_doc = None
self.build_dir = None
def finalize_options (self):
self.set_undefined_options('build_sphinx', ('build_dir', 'build_dir'))
self.set_undefined_options('install',
('install_scripts', 'install_doc'))
def run (self):
if self.distribution.serenity:
self.install_doc = os.path.join(self.install_doc,"../export/share")
self.install_doc=os.path.abspath(self.install_doc)
self.mkpath(self.install_doc)
self.mkpath(os.path.join(self.install_doc,'html'))
outfiles = self.copy_tree(os.path.join(self.build_dir,'html'), # @UnusedVariable
os.path.join(self.install_doc,'html'))
self.mkpath(os.path.join(self.install_doc,'man','man1'))
outfiles = self.copy_tree(os.path.join(self.build_dir,'man'), # @UnusedVariable
os.path.join(self.install_doc,'man','man1'))
for epub in glob.glob(os.path.join(self.build_dir,'epub/*.epub')):
self.copy_file(os.path.join(epub),
os.path.join(self.install_doc,os.path.split(epub)[1]))
def get_outputs(self):
directory=os.path.join(self.install_doc,'html')
files = [os.path.join(self.install_doc,'html', f)
for dp, dn, filenames in os.walk(directory) for f in filenames] # @UnusedVariable
directory=os.path.join(self.build_dir,'man')
files.append(os.path.join(self.install_doc,'man','man1', f)
for dp, dn, filenames in os.walk(directory) for f in filenames) # @UnusedVariable
directory=os.path.join(self.build_dir,'epub')
files.append(os.path.join(self.install_doc, f)
for dp, dn, filenames in os.walk(directory) # @UnusedVariable
for f in glob.glob(os.path.join(dp, '*.epub')) )
return files

View File

@ -1,59 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
import os
from obidistutils.command.build_exe import build_exe
from distutils import log
import subprocess
class littlebigman(build_exe):
description = "build the littlebigman executable testing endianness of the CPU"
def initialize_options(self):
build_exe.initialize_options(self)
self.littlebigman = None
def finalize_options(self):
# This might be confusing: both build-cexe and build-temp default
# to build-temp as defined by the "build" command. This is because
# I think that C libraries are really just temporary build
# by-products, at least from the point of view of building Python
# extensions -- but I want to keep my options open.
build_exe.finalize_options(self)
self.set_undefined_options('build',
('build_temp', 'build_cexe'))
# self.ctools = self.distribution.ctools
if os.path.isfile("distutils.ext/src/littlebigman.c"):
self.executables = [('littlebigman',{"sources":["distutils.ext/src/littlebigman.c"]})]
self.check_executable_list(self.executables)
else:
self.executables = []
def run_littlebigman(self):
p = subprocess.Popen("'%s'" % os.path.join(self.build_temp,
'littlebigman'),
shell=True,
stdout=subprocess.PIPE)
little = p.communicate()[0]
return little.decode('latin1')
def run(self):
build_exe.run(self)
self.littlebigman=self.run_littlebigman()
log.info("Your CPU is in mode : %s" % self.littlebigman)

View File

@ -1,55 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
import os
from obidistutils.command.build_exe import build_exe
from obidistutils.serenity.checksystem import is_mac_system
class pidname(build_exe):
description = "build the pidname executable returning the executable path from a PID on a mac"
def initialize_options(self):
build_exe.initialize_options(self)
self.pidname = False
def finalize_options(self):
# This might be confusing: both build-cexe and build-temp default
# to build-temp as defined by the "build" command. This is because
# I think that C libraries are really just temporary build
# by-products, at least from the point of view of building Python
# extensions -- but I want to keep my options open.
build_exe.finalize_options(self)
self.set_undefined_options('build',
('build_scripts', 'build_cexe'))
# self.ctools = self.distribution.ctools
if os.path.isfile("distutils.ext/src/pidname.c"):
self.executables = [('pidname',{"sources":["distutils.ext/src/pidname.c"]})]
self.check_executable_list(self.executables)
else:
self.executables = []
# self.build_cexe = os.path.join(os.path.dirname(self.build_cexe),'cbinaries')
# self.mkpath(self.build_cexe)
def run(self):
if is_mac_system():
build_exe.run(self)
self.pidname=True
else:
self.pidname=False

View File

@ -1,42 +0,0 @@
'''
Created on 10 mars 2015
@author: coissac
'''
import os.path
from distutils.command.sdist import sdist as orig_sdist
from distutils import dir_util
class sdist(orig_sdist):
def make_distribution(self):
"""Create the source distribution(s). First, we create the release
tree with 'make_release_tree()'; then, we create all required
archive files (according to 'self.formats') from the release tree.
Finally, we clean up by blowing away the release tree (unless
'self.keep_temp' is true). The list of archive files created is
stored so it can be retrieved later by 'get_archive_files()'.
"""
# Don't warn about missing meta-data here -- should be (and is!)
# done elsewhere.
base_dir = self.distribution.get_fullname()
base_name = os.path.join(self.dist_dir,base_dir)
self.make_release_tree(os.path.join('tmp',base_dir), self.filelist.files)
archive_files = [] # remember names of files we create
# tar archive must be created last to avoid overwrite and remove
if 'tar' in self.formats:
self.formats.append(self.formats.pop(self.formats.index('tar')))
for fmt in self.formats:
file = self.make_archive(base_name, fmt, root_dir='tmp',base_dir=base_dir,
owner=self.owner, group=self.group)
archive_files.append(file)
self.distribution.dist_files.append(('sdist', '', file))
self.archive_files = archive_files
if not self.keep_temp:
dir_util.remove_tree(os.path.join('tmp',base_dir), dry_run=self.dry_run)

View File

@ -1,226 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
from os import path
import os.path
import glob
import sys
# try:
# from setuptools.extension import Extension
# except ImportError:
# from distutils.extension import Extension
from distutils.extension import Extension
from obidistutils.serenity.checkpackage import install_requirements,\
check_requirements, \
RequirementError
from obidistutils.serenity.rerun import enforce_good_python
from obidistutils.serenity.rerun import rerun_with_anothe_python
from distutils import log
from obidistutils.dist import Distribution
from obidistutils.serenity import is_serenity
def findPackage(root,base=None):
modules=[]
if base is None:
base=[]
for module in (path.basename(path.dirname(x))
for x in glob.glob(path.join(root,'*','__init__.py'))):
modules.append('.'.join(base+[module]))
modules.extend(findPackage(path.join(root,module),base+[module]))
return modules
def findCython(root,base=None,pyrexs=None):
setupdir = os.path.dirname(sys.argv[0])
pyrexs=[]
if base is None:
base=[]
for module in (path.basename(path.dirname(x))
for x in glob.glob(path.join(root,'*','__init__.py'))):
for pyrex in glob.glob(path.join(root,module,'*.pyx')):
pyrexs.append(Extension('.'.join(base+[module,path.splitext(path.basename(pyrex))[0]]),
[pyrex]
)
)
try:
cfiles = os.path.splitext(pyrex)[0]+".cfiles"
cfilesdir = os.path.dirname(cfiles)
cfiles = open(cfiles)
cfiles = [os.path.relpath(os.path.join(cfilesdir,y),setupdir).strip()
if y[0] !='@' else y.strip()
for y in cfiles]
log.info("Cython module : %s",cfiles)
incdir = set(os.path.dirname(x) for x in cfiles if x[-2:]==".h")
cfiles = [x for x in cfiles if x[-2:]==".c"]
pyrexs[-1].sources.extend(cfiles)
pyrexs[-1].include_dirs.extend(incdir)
pyrexs[-1].extra_compile_args.extend(['-msse2',
'-Wno-unused-function',
'-Wmissing-braces',
'-Wchar-subscripts'])
except IOError:
pass
pyrexs.extend(findCython(path.join(root,module),base+[module]))
return pyrexs
def rootname(x):
return os.path.splitext(x.sources[0])[0]
def prepare_commands():
from obidistutils.command.build import build
from obidistutils.command.littlebigman import littlebigman
# from obidistutils.command.serenity import serenity
from obidistutils.command.build_cexe import build_cexe
from obidistutils.command.build_ext import build_ext
from obidistutils.command.build_ctools import build_ctools
from obidistutils.command.build_files import build_files
from obidistutils.command.build_scripts import build_scripts
from obidistutils.command.install_scripts import install_scripts
from obidistutils.command.install_sphinx import install_sphinx
from obidistutils.command.install import install
from obidistutils.command.pidname import pidname
from obidistutils.command.sdist import sdist
COMMANDS = {'build':build,
# 'serenity':serenity,
'littlebigman':littlebigman,
'pidname':pidname,
'build_ctools':build_ctools,
'build_files':build_files,
'build_cexe':build_cexe,
'build_ext': build_ext,
'build_scripts':build_scripts,
'install_scripts':install_scripts,
'install_sphinx':install_sphinx,
'install':install,
'sdist':sdist}
# try:
# from setuptools.commands import egg_info
# COMMANDS['egg_info']=egg_info
# except ImportError:
# pass
try:
from obidistutils.command.build_sphinx import build_sphinx
COMMANDS['build_sphinx']=build_sphinx
except ImportError:
pass
return COMMANDS
CTOOLS =[]
CEXES =[]
FILES =[]
def setup(**attrs):
log.set_threshold(log.INFO)
minversion = attrs.get("pythonmin",'3.4')
maxversion = attrs.get('pythonmax',None)
fork = attrs.get('fork',False)
requirementfile = attrs.get('requirements','requirements.txt')
try:
del attrs['pythonmin']
except KeyError:
pass
try:
del attrs['pythonmax']
except KeyError:
pass
try:
del attrs['fork']
except KeyError:
pass
try:
del attrs['requirements']
except KeyError:
pass
if is_serenity():
enforce_good_python(minversion, maxversion, fork)
if (install_requirements(requirementfile)):
rerun_with_anothe_python(sys.executable,minversion,maxversion,fork)
try:
check_requirements(requirementfile)
except RequirementError as e :
log.error(e)
sys.exit(1)
if 'distclass' not in attrs:
attrs['distclass']=Distribution
if 'python_src' not in attrs:
SRC = 'python'
else:
SRC = attrs['python_src']
del(attrs['python_src'])
if 'scripts' not in attrs:
attrs['scripts'] = glob.glob('%s/*.py' % SRC)
if 'package_dir' not in attrs:
attrs['package_dir'] = {'': SRC}
if 'packages' not in attrs:
attrs['packages'] = findPackage(SRC)
if 'cmdclass' not in attrs:
attrs['cmdclass'] = prepare_commands()
if 'ctools' not in attrs:
attrs['ctools'] = CTOOLS
if 'executables' not in attrs:
attrs['executables'] = CEXES
if 'files' not in attrs:
attrs['files'] = FILES
if 'sse' not in attrs:
attrs['sse']=None
if 'serenity' not in attrs:
attrs['serenity']=False
EXTENTION=findCython(SRC)
if 'ext_modules' not in attrs:
attrs['ext_modules'] = EXTENTION
# try:
# from setuptools.core import setup as ori_setup
# except ImportError:
# from distutils.core import setup as ori_setup
from distutils.core import setup as ori_setup
ori_setup(**attrs)

View File

@ -1,58 +0,0 @@
'''
Created on 20 oct. 2012
@author: coissac
'''
# try:
# from setuptools.dist import Distribution as ori_Distribution
# except ImportError:
# from distutils.dist import Distribution as ori_Distribution
from distutils.dist import Distribution as ori_Distribution
class Distribution(ori_Distribution):
def __init__(self,attrs=None):
self.executables = None
self.ctools = None
self.files = None
self.build_cexe = None
self.deprecated_scripts = None
self.zip_safe=False
self.sse = None
self.serenity=attrs['serenity']
ori_Distribution.__init__(self, attrs)
self.global_options.insert(0,('serenity', None, "install or build the package in a python virtualenv "
"without polluting the installed python and with many "
"checks during the installation process"
))
self.global_options.insert(0,('virtualenv', None, "if the installation is done using the serenity mode "
"this option allows for specifying the virtualenv name. "
"By default the name is PACKAGE-VERSION"
))
def run_commands(self):
"""Run each command that was seen on the setup script command line.
Uses the list of commands found and cache of command objects
created by 'get_command_obj()'.
"""
# self.run_command('littlebigman')
ori_Distribution.run_commands(self)
def has_executables(self):
return self.executables is not None and self.executables
def has_ctools(self):
return self.ctools is not None and self.ctools
def has_files(self):
return self.files is not None and self.files
def has_deprecated_scripts(self):
return self.deprecated_scripts is not None and self.deprecated_scripts

View File

@ -1,112 +0,0 @@
import sys
from distutils import util
from distutils import sysconfig
from distutils import log
from distutils.version import LooseVersion, StrictVersion
import glob
import os
import subprocess
import re
from distutils.errors import DistutilsError
import tempfile
from importlib.util import spec_from_file_location # @UnresolvedImport
import zipimport
import argparse
import base64
from .checkpython import is_python_version
from obidistutils.serenity.rerun import enforce_good_python
from obidistutils.serenity.rerun import rerun_with_anothe_python
from obidistutils.serenity.virtual import serenity_virtualenv
from obidistutils.serenity.checksystem import is_mac_system, \
is_windows_system
from obidistutils.serenity.checkpackage import install_requirements
from obidistutils.serenity.checkpackage import check_requirements
from obidistutils.serenity.util import save_argv
from obidistutils.serenity.snake import snake
def serenity_snake(envname,package,version):
old = log.set_threshold(log.INFO)
log.info("Installing %s (%s) in serenity mode" % (package,version))
enforce_good_python()
virtualpython=serenity_virtualenv(envname,package,version)
if virtualpython!=os.path.realpath(sys.executable):
log.info("Restarting installation within the %s virtualenv" % (envname))
rerun_with_anothe_python(virtualpython)
log.info("%s will be installed with python : %s" % (package,virtualpython))
if install_requirements():
log.info("Restarting installation with all dependencies ok")
rerun_with_anothe_python(virtualpython)
log.set_threshold(old)
def serenity_assert(version):
check_requirements()
def is_serenity():
from obidistutils.serenity.globals import local_serenity
return local_serenity and local_serenity[0]
def serenity_mode(package,version):
save_argv()
from obidistutils.serenity.globals import saved_args
from obidistutils.serenity.globals import local_serenity
old = log.set_threshold(log.INFO)
argparser = argparse.ArgumentParser(add_help=False)
argparser.add_argument('--serenity',
dest='serenity',
action='store_true',
default=False,
help='Switch the installer in serenity mode. Everythings are installed in a virtualenv')
argparser.add_argument('--virtualenv',
dest='virtual',
type=str,
action='store',
default="%s-%s" % (package,version),
help='Specify the name of the virtualenv used by the serenity mode [default: %s-%s]' % (package,version))
args, unknown = argparser.parse_known_args()
sys.argv = [sys.argv[0]] + unknown
if args.serenity:
local_serenity.append(True)
serenity_snake(args.virtual,package,version)
else:
local_serenity.append(False)
log.set_threshold(old)
return args.serenity
def getVersion(source,main,version):
path = os.path.join(source,main,'%s.py' % version)
spec = spec_from_file_location('version',path)
return spec.loader.load_module().version.strip()

View File

@ -1,160 +0,0 @@
'''
Created on 2 oct. 2014
@author: coissac
'''
import re
import os
import pip # @UnresolvedImport
from pip.utils import get_installed_distributions # @UnresolvedImport
from distutils.version import StrictVersion # @UnusedImport
from distutils.errors import DistutilsError
from distutils import log
class RequirementError(Exception):
pass
def is_installed(requirement):
requirement_project,requirement_relation,requirement_version = parse_package_requirement(requirement)
package = [x for x in get_installed_distributions() if x.project_name==requirement_project]
if len(package)==1:
if requirement_version is not None and requirement_relation is not None:
rep = (len(package)==1) and eval("StrictVersion('%s') %s StrictVersion('%s')" % (package[0].version,
requirement_relation,
requirement_version)
)
else:
rep=True
else:
rep=False
if rep:
if requirement_version is not None and requirement_relation is not None:
log.info("Look for package %s (%s%s) : ok version %s installed" % (requirement_project,
requirement_relation,
requirement_version,
package[0].version))
else:
log.info("Look for package %s : ok version %s installed" % (requirement_project,
package[0].version))
else:
if len(package)!=1:
log.info("Look for package %s (%s%s) : not installed" % (requirement_project,
requirement_relation,
requirement_version))
else:
log.info("Look for package %s (%s%s) : failed only version %s installed" % (requirement_project,
requirement_relation,
requirement_version,
package[0].version))
return rep
def get_requirements(requirementfile='requirements.txt'):
try:
requirements = open(requirementfile).readlines()
requirements = [x.strip() for x in requirements]
requirements = [x for x in requirements if x[0]!='-']
except IOError:
requirements = []
return requirements
def install_requirements(requirementfile='requirements.txt'):
install_something=False
requirements = get_requirements(requirementfile)
log.info("Required packages for the installation :")
for x in requirements:
ok = is_installed(x)
if not ok:
log.info(" Installing requirement : %s" % x)
pip_install_package(x)
install_something=True
if x[0:3]=='pip':
return True
return install_something
def check_requirements(requirementfile='requirements.txt'):
requirements = get_requirements(requirementfile)
log.info("Required packages for the installation :")
for x in requirements:
ok = is_installed(x)
if not ok:
raise RequirementError(" Missing requirement : %s -- Package installation stopped" % x)
def parse_package_requirement(requirement):
version_pattern = re.compile('[=><]+(.*)$')
project_pattern = re.compile('[^=><]+')
relationship_pattern = re.compile('[=><]+')
try:
requirement_project = project_pattern.search(requirement).group(0)
requirement_version = version_pattern.search(requirement)
if requirement_version is not None:
requirement_version=requirement_version.group(1)
requirement_relation= relationship_pattern.search(requirement)
if requirement_relation is not None:
requirement_relation=requirement_relation.group(0)
except:
raise DistutilsError("Requirement : %s not correctly formated" % requirement)
return requirement_project,requirement_relation,requirement_version
def get_package_requirement(package,requirementfile='requirements.txt'):
requirements = get_requirements(requirementfile)
req = [x for x in requirements
if x[0:len(package)]==package
]
if len(req)==1:
return req[0]
else:
return None
def pip_install_package(package,directory=None,upgrade=True):
if directory is not None:
log.info(' installing %s in directory %s' % (package,str(directory)))
if 'http_proxy' in os.environ and 'https_proxy' not in os.environ:
os.environ['https_proxy']=os.environ['http_proxy']
args = ['install']
if upgrade:
args.append('--upgrade')
if 'https_proxy' in os.environ:
args.append('--proxy=%s' % os.environ['https_proxy'])
if directory is not None:
args.append('--target=%s' % directory)
args.append(package)
return pip.main(args)

View File

@ -1,138 +0,0 @@
'''
Created on 24 mai 2015
@author: coissac
'''
from distutils.version import StrictVersion
from distutils import sysconfig
import subprocess
import os
import glob
import re
from obidistutils.serenity.checksystem import is_windows_system
import sys
def is_python_version(path=None,minversion='3.4',maxversion=None):
'''
Checks that the python version is in the range {minversion,maxversion[
@param path: if None consider the running python
otherwise the python pointed by the path
@param minversion: the minimum version to consider
@param maxversion: the maximum version to consider (strictly inferior to)
@return: True if the python version match
@rtype: bool
'''
if path is None:
pythonversion = StrictVersion(sysconfig.get_python_version())
else:
command = """'%s' -c 'from distutils import sysconfig; """ \
"""print(sysconfig.get_python_version())'""" % path
p = subprocess.Popen(command,
shell=True,
stdout=subprocess.PIPE)
pythonversion=str(p.communicate()[0],'utf8').strip()
pythonversion = StrictVersion(pythonversion)
return ( pythonversion >=StrictVersion(minversion)
and ( maxversion is None
or pythonversion < StrictVersion(maxversion))
)
def lookfor_good_python(minversion='3.4',maxversion=None,followLink=False):
'''
Look for all python interpreters present in the system path that
match the version constraints.
@param minversion: the minimum version to consider
@param maxversion: the maximum version to consider (strictly inferior to)
@param followLink: a boolean value indicating if link must be substituted
by their real path.
@return: a list of path to interpreters
'''
exe = []
if not is_windows_system():
paths = os.environ['PATH'].split(os.pathsep)
for p in paths:
candidates = glob.glob(os.path.join(p,'python*'))
pexe = []
pythonpat=re.compile('python([0-9]|[0-9]\.[0-9])?$')
for e in candidates:
print(e)
if pythonpat.search(e) is not None:
if followLink and os.path.islink(e):
e = os.path.realpath(e)
if (os.path.isfile(e) and
os.access(e, os.X_OK) and
is_python_version(e,minversion,maxversion)):
pexe.append(e)
exe.extend(set(pexe))
return exe
def is_a_virtualenv_python(path=None):
'''
Check if the python is belonging a virtualenv
@param path: the path pointing to the python executable.
if path is None then the running python is
considered.
@param path: str or None
@return: True if the python belongs a virtualenv
False otherwise
@rtype: bool
'''
if path is None:
rep = sys.base_exec_prefix != sys.exec_prefix
else:
command = """'%s' -c 'import sys; print(sys.base_exec_prefix != sys.exec_prefix)'""" % path
p = subprocess.Popen(command,
shell=True,
stdout=subprocess.PIPE)
rep = eval(str(p.communicate()[0],'utf8'))
return rep
def which_virtualenv(path=None,full=False):
'''
Returns the name of the virtualenv.
@param path: the path to a python binary or None
if you want to consider the running python
@type path: str or None
@param full: if set to True, returns the absolute path,
otherwise only return a simple directory name
@type full: bool
@return: the virtual environment name or None if the
path does not belong a virtualenv
@rtype: str or None
'''
if path is None:
path = sys.executable
if is_a_virtualenv_python(path):
parts = path.split(os.sep)
try:
if full:
rep = os.sep.join(parts[0:parts.index('bin')])
rep = os.path.realpath(rep)
else:
rep = parts[parts.index('bin')-1]
except ValueError:
rep = None
else:
rep=None
return rep

View File

@ -1,18 +0,0 @@
'''
Created on 24 mai 2015
@author: coissac
'''
from distutils import util
def is_mac_system():
platform = util.get_platform().split('-')[0]
return platform=='macosx'
def is_windows_system():
platform = util.get_platform().split('-')[0]
return platform=='Windows'

View File

@ -1,14 +0,0 @@
'''
Created on 24 mai 2015
@author: coissac
'''
saved_args=[]
tmpdir=[]
local_pip=[]
local_virtualenv=[]
local_cython=[]
local_serenity=[]

View File

@ -1,61 +0,0 @@
'''
Created on 24 mai 2015
@author: coissac
'''
import sys
import os
from distutils import log
from distutils.errors import DistutilsError
from obidistutils.serenity.globals import saved_args
from obidistutils.serenity.checkpython import is_python_version,\
lookfor_good_python
def rerun_with_anothe_python(path, minversion='3.4',maxversion=None, fork=False):
if saved_args:
args = saved_args
else:
args = list(sys.argv)
assert is_python_version(path,minversion,maxversion), \
'the selected python is not adapted to the installation of this package'
args.insert(0, path)
sys.stderr.flush()
sys.stdout.flush()
if fork:
log.info('Forking a new install process')
os.system(' '.join(list(args)))
log.info('External process ended')
sys.exit(0)
else:
log.info('Install script restarting...')
os.execv(path,list(args))
def enforce_good_python(minversion='3.4',maxversion=None, fork=False):
if is_python_version(minversion=minversion,maxversion=maxversion):
log.info('You are running the good python')
return True
goodpython = lookfor_good_python(minversion,maxversion)
if not goodpython:
raise DistutilsError('No good python identified on your system')
goodpython=goodpython[0]
log.warn("========================================")
log.warn("")
log.warn(" Switching to python : %s" % goodpython)
log.warn("")
log.warn("========================================")
rerun_with_anothe_python(goodpython)

View File

@ -1,35 +0,0 @@
'''
Created on 2 oct. 2014
@author: coissac
'''
snake ="""
___
,'._,`.
(-.___.-)
(-.___.-)
`-.___.-'
(( @ @| . __
\ ` | ,\ |`. @| | | _.-._
__`.`=-=mm===mm:: | | |`. | | | ,'=` '=`.
( `-'|:/ /:/ `/ @| | | |, @| @| /---)W(---\
\ \ / / / / @| | ' (----| |----) ,~
|\ \ / /| / / @| \---| |---/ |
| \ V /||/ / `.-| |-,' |
| `-' |V / \| |/ @'
| , |-' __| |__
| .;: _,-. ,--""..| |..""--.
;;:::' " ) (`--::__|_|__::--')
,-" _, / \`--...___...--'/
( -:--'/ / /`--...___...--'\
"-._ `"'._/ /`---...___...---'\
"-._ "---. (`---....___....---')
.' ",._ ,' ) |`---....___....---'|
/`._| `| | (`---....___....---')
( \ | / \`---...___...---'/
`. `, ^"" `:--...___...--;'
`.,' hh `-._______.-'
"""

View File

@ -1,27 +0,0 @@
'''
Created on 2 oct. 2014
@author: coissac
'''
import sys
import tempfile
from obidistutils.serenity.globals import tmpdir # @UnusedImport
from obidistutils.serenity.globals import saved_args # @UnusedImport
def get_serenity_dir():
global tmpdir
if not tmpdir:
tmpdir.append(tempfile.mkdtemp())
return tmpdir[0]
def save_argv():
global saved_args
del saved_args[:]
saved_args.extend(list(sys.argv))

View File

@ -1,69 +0,0 @@
'''
Created on 2 oct. 2014
@author: coissac
'''
import os
import sys
import venv
from distutils.errors import DistutilsError
from .globals import local_virtualenv # @UnusedImport
from .checkpython import which_virtualenv,\
is_python_version, \
is_a_virtualenv_python
def serenity_virtualenv(envname,package,version,minversion='3.4',maxversion=None):
#
# Checks if we are already running under the good virtualenv
#
ve = which_virtualenv(full=True)
if ve == os.path.realpath(envname) and is_python_version(minversion=minversion,maxversion=maxversion):
return sys.executable
#
# Check if the virtualenv exist
#
python = None
if os.path.isdir(envname):
python = os.path.join(envname,'bin','python')
ok = (is_python_version(python,
minversion=minversion,
maxversion=maxversion) and
is_a_virtualenv_python(python))
#
# The virtualenv already exist but it is not ok
#
if not ok:
raise DistutilsError("A virtualenv %s already exists but not with the required python")
else:
ok = False
#
# Creates a new virtualenv
#
if not ok:
venv.create(envname,
system_site_packages=False,
clear=True,
symlinks=False,
with_pip=True)
# check the newly created virtualenv
return serenity_virtualenv(envname,package,version)
return os.path.realpath(python)

View File

@ -1,24 +0,0 @@
/*
* littlebigman.c
*
* Created on: 11 juil. 2012
* Author: coissac
*/
#include<stdio.h>
int main(int argc, char *argv[])
{
union { int entier;
char caractere[4] ;
} test;
test.entier=0x01020304;
if (test.caractere[3] == 1)
printf("-DLITTLE_END");
else
printf("-DBIG_END");
return 0;
}

View File

@ -1,24 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <libproc.h>
int main (int argc, char* argv[])
{
pid_t pid; int ret;
char pathbuf[PROC_PIDPATHINFO_MAXSIZE];
if ( argc > 1 ) {
pid = (pid_t) atoi(argv[1]);
ret = proc_pidpath (pid, pathbuf, sizeof(pathbuf));
if ( ret <= 0 ) {
fprintf(stderr, "PID %d: proc_pidpath ();\n", pid);
fprintf(stderr, " %s\n", strerror(errno));
} else {
printf("proc %d: %s\n", pid, pathbuf);
}
}
return 0;
}

3
doc/.gitignore vendored
View File

@ -1,3 +0,0 @@
/build/
/doxygen/
/build_dir.txt

File diff suppressed because it is too large Load Diff

View File

@ -1,203 +0,0 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = build
DOXYGENDIR = doxygen
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " applehelp to make an Apple Help Book"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
@echo " coverage to run coverage check of the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
rm -rf $(DOXYGENDIR)/*
html:
@echo "Generating Doxygen documentation..."
doxygen Doxyfile
@echo "Doxygen documentation generated. \n"
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OBITools-3.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OBITools-3.qhc"
applehelp:
$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
@echo
@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
@echo "N.B. You won't be able to view it unless you put it in" \
"~/Library/Documentation/Help or install it in your application" \
"bundle."
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/OBITools-3"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/OBITools-3"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
@echo "Generating Doxygen documentation..."
doxygen Doxyfile
@echo "Doxygen documentation generated. \n"
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
@echo "Generating Doxygen documentation..."
doxygen Doxyfile
@echo "Doxygen documentation generated. \n"
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
coverage:
$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
@echo "Testing of coverage in the sources finished, look at the " \
"results in $(BUILDDIR)/coverage/python.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

View File

@ -1,4 +0,0 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

View File

@ -1,160 +0,0 @@
*********************************************
The OBItools3 Data Management System (OBIDMS)
*********************************************
A complete DNA metabarcoding experiment relies on several kinds of data.
- The sequence data resulting from the sequencing of the PCR products,
- The description of the samples including all their metadata,
- One or several reference databases used for the taxonomic annotation,
- One or several taxonomy databases.
Up to now, each of these categories of data were stored in separate
files, and nothing made it mandatory to keep them together.
The `Data Management System` (DMS) of OBITools3 can be regarded as a basic
database system.
OBIDMS UML
==========
.. image:: ./UML/OBIDMS_UML.png
:download:`html version of the OBIDMS UML file <UML/ObiDMS_UML.class.violet.html>`
An OBIDMS directory contains :
* one `OBIDMS history file <#obidms-history-files>`_
* Two different kinds of directories :
* OBIDMS column directories
* OBIDMS column group directories containing OBIDMS column directories
OBIDMS column directories
=========================
OBIDMS column directories contain :
* all the different versions of one OBIDMS column, under the form of different files (`OBIDMS column files <#obidms-column-files>`_)
* one `OBIDMS version file <#obidms-version-files>`_
The directory name is the column attribute, or sub-attribute if the column directory is in a column group directory.
OBIDMS column group directories
===============================
OBIDMS column group directories contain OBIDMS column directories. They are used to store dictionary-like data, where
each key corresponds to an OBIDMS column.
The directory name is the dictionary attribute. Each key is considered a sub-attribute and is associated to its column.
OBIDMS column files
===================
Each OBIDMS column file contains :
* a header of a size equal to a multiple of PAGESIZE (PAGESIZE being equal to 4096 bytes
on most systems) containing metadata
* one column of data with the same `OBIType <types.html#obitypes>`_
Header
------
The header of an OBIDMS column contains :
* Endian byte order
* Header size (PAGESIZE multiple)
* Number of lines of data
* Number of lines of data used
* `OBIType <types.html#obitypes>`_ (type of the data)
* Date of creation of the file
* Version of the OBIDMS column
* The column name
* Eventual comments
Data
----
A column of data with the same `OBIType <types.html#obitypes>`_.
Mandatory columns
-----------------
Some columns must exist in an OBIDMS directory :
* sequence identifiers column (type ``OBIStr_t``)
File name
---------
Each file is named with the attribute associated to the data it contains, and the number of
its version, separated by an ``@``, and with the extension ``.odc``.
Example : ``count@3.odc``
Modifications
-------------
An OBIDMS column file can only be modified by the process that created it, and while its status is set to Open.
When a process wants to modify an OBIDMS column file that is closed, it must first clone it. Cloning creates a new version of the
file that belongs to the process, i.e., only that process can modify that file, as long as its status is set to Open. Once the process
has finished writing the new version of the column file, it sets the column file's status to Closed, and the file can never be modified
again.
That means that one column is stored in one file (if there is only one version)
or more (if there are several versions), and that there is one file per version.
All the versions of one column are stored in one directory.
Versioning
----------
The first version of a column file is numbered 0, and each new version increments that
number by 1.
The number of the latest version of an OBIDMS column is stored in the `OBIDMS version file <#obidms-version-files>`_ of its directory.
OBIDMS version files
====================
Each OBIDMS column is associated with an OBIDMS version file in its directory, that contains the number of the latest
version of the column.
File name
---------
OBIDMS version files are named with the attribute associated to the data contained in the column, and
have the extension ``.odv``.
Example : ``count.odv``
OBIDMS views
============
An OBIDMS view consists of a list of OBIDMS columns and lines. A view includes one version
of each mandatory column. Only one version of each column is included. All the columns of
one view contain the same number of lines in the same order.
OBIDMS history file
===================
An OBIDMS history file consists of an ordered list of views and commands, those commands leading
from one view to the next one.
This history can be represented in the form of a ?? showing all the
operations ever done in the OBIDMS directory and the views in between them :
.. image:: ./images/history.png
:width: 150 px
:align: center

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

View File

@ -1,832 +0,0 @@
<HTML>
<HEAD>
<META name="description"
content="Violet UML Editor cross format document" />
<META name="keywords" content="Violet, UML" />
<META charset="UTF-8" />
<SCRIPT type="text/javascript">
function switchVisibility() {
var obj = document.getElementById("content");
obj.style.display = (obj.style.display == "block") ? "none" : "block";
}
</SCRIPT>
</HEAD>
<BODY>
This file was generated with Violet UML Editor 2.1.0.
&nbsp;&nbsp;(&nbsp;<A href=# onclick="switchVisibility()">View Source</A>&nbsp;/&nbsp;<A href="http://sourceforge.net/projects/violet/files/violetumleditor/" target="_blank">Download Violet</A>&nbsp;)
<BR />
<BR />
<SCRIPT id="content" type="text/xml"><![CDATA[<ClassDiagramGraph id="1">
<nodes id="2">
<ClassNode id="3">
<children id="4"/>
<location class="Point2D.Double" id="5" x="520.0" y="30.0"/>
<id id="6" value="a6688f6e-9346-46c6-9cf5-4fa6148f613f"/>
<revision>1</revision>
<backgroundColor id="7">
<red>255</red>
<green>255</green>
<blue>255</blue>
<alpha>255</alpha>
</backgroundColor>
<borderColor id="8">
<red>0</red>
<green>0</green>
<blue>0</blue>
<alpha>255</alpha>
</borderColor>
<textColor reference="8"/>
<name id="9" justification="1" size="3" underlined="false">
<text>OBIType_t
</text>
</name>
<attributes id="10" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="11" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="12">
<children id="13"/>
<location class="Point2D.Double" id="14" x="780.0" y="100.0"/>
<id id="15" value="7edd4f08-c5e5-4e41-bc05-8b357cb5e629"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="16" justification="1" size="3" underlined="false">
<text>OBIContainer_t</text>
</name>
<attributes id="17" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="18" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="19">
<children id="20"/>
<location class="Point2D.Double" id="21" x="330.0" y="110.0"/>
<id id="22" value="dbb15831-2f0b-4e97-83e7-5ecdda6d6075"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="23" justification="1" size="3" underlined="false">
<text>OBIElementary_t</text>
</name>
<attributes id="24" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="25" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="26">
<children id="27"/>
<location class="Point2D.Double" id="28" x="670.0" y="240.0"/>
<id id="29" value="9693da23-1b47-4bf3-9544-86390a533713"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="30" justification="1" size="3" underlined="false">
<text>OBIList_t</text>
</name>
<attributes id="31" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="32" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="33">
<children id="34"/>
<location class="Point2D.Double" id="35" x="780.0" y="240.0"/>
<id id="36" value="b2f4d561-0c10-4443-b8f6-d3628ab9bcfe"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="37" justification="1" size="3" underlined="false">
<text>OBISet_t</text>
</name>
<attributes id="38" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="39" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="40">
<children id="41"/>
<location class="Point2D.Double" id="42" x="890.0" y="240.0"/>
<id id="43" value="8cc209c6-18c7-4a90-a5d4-ab7246638b2f"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="44" justification="1" size="3" underlined="false">
<text>OBIDictionnary_t</text>
</name>
<attributes id="45" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="46" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="47">
<children id="48"/>
<location class="Point2D.Double" id="49" x="170.0" y="220.0"/>
<id id="50" value="cb77086b-7535-49dc-ab33-b58d16eec496"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="51" justification="1" size="3" underlined="false">
<text>OBIAtomic_t</text>
</name>
<attributes id="52" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="53" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="54">
<children id="55"/>
<location class="Point2D.Double" id="56" x="500.0" y="240.0"/>
<id id="57" value="5a32037d-eaf1-4bbc-977e-06589f1d2ca5"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="58" justification="1" size="3" underlined="false">
<text>OBIComposite_t
</text>
</name>
<attributes id="59" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="60" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="61">
<children id="62"/>
<location class="Point2D.Double" id="63" x="560.0" y="400.0"/>
<id id="64" value="84fa636d-0c5a-4df8-bac7-79df8e546c12"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="65" justification="1" size="3" underlined="false">
<text>OBIString_t</text>
</name>
<attributes id="66" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="67" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="68">
<children id="69"/>
<location class="Point2D.Double" id="70" x="450.0" y="400.0"/>
<id id="71" value="752b5f9b-0ece-4902-8de5-e2c465d281dd"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="72" justification="1" size="3" underlined="false">
<text>OBITaxid_t
</text>
</name>
<attributes id="73" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="74" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="75">
<children id="76"/>
<location class="Point2D.Double" id="77" x="220.0" y="400.0"/>
<id id="78" value="9b89f530-cedc-4b33-a36f-371ccfb2ffae"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="79" justification="1" size="3" underlined="false">
<text>OBIInteger_t
</text>
</name>
<attributes id="80" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="81" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="82">
<children id="83"/>
<location class="Point2D.Double" id="84" x="0.0" y="400.0"/>
<id id="85" value="01da8ca2-da98-4fde-9ffe-9753a3f202bf"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="86" justification="1" size="3" underlined="false">
<text>OBIFloat_t</text>
</name>
<attributes id="87" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="88" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="89">
<children id="90"/>
<location class="Point2D.Double" id="91" x="110.0" y="400.0"/>
<id id="92" value="1134dbf0-087e-4c9b-be7a-6157bb10ebf0"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="93" justification="1" size="3" underlined="false">
<text>OBIBool_t
</text>
</name>
<attributes id="94" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="95" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
<ClassNode id="96">
<children id="97"/>
<location class="Point2D.Double" id="98" x="330.0" y="400.0"/>
<id id="99" value="a78dbab0-8879-4149-b5c9-aed41e7da0bb"/>
<revision>1</revision>
<backgroundColor reference="7"/>
<borderColor reference="8"/>
<textColor reference="8"/>
<name id="100" justification="1" size="3" underlined="false">
<text>OBIChar_t</text>
</name>
<attributes id="101" justification="0" size="4" underlined="false">
<text></text>
</attributes>
<methods id="102" justification="0" size="4" underlined="false">
<text></text>
</methods>
</ClassNode>
</nodes>
<edges id="103">
<InheritanceEdge id="104">
<start class="ClassNode" reference="12"/>
<end class="ClassNode" reference="3"/>
<startLocation class="Point2D.Double" id="105" x="50.0" y="10.0"/>
<endLocation class="Point2D.Double" id="106" x="70.0" y="40.0"/>
<transitionPoints id="107"/>
<id id="108" value="debdd86e-d072-4413-9d4c-dcabf70e44f9"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="109">
<start class="ClassNode" reference="19"/>
<end class="ClassNode" reference="3"/>
<startLocation class="Point2D.Double" id="110" x="90.0" y="10.0"/>
<endLocation class="Point2D.Double" id="111" x="20.0" y="50.0"/>
<transitionPoints id="112"/>
<id id="113" value="1491704a-dd29-47dc-92e4-2b53c62fd634"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="114">
<start class="ClassNode" reference="47"/>
<end class="ClassNode" reference="19"/>
<startLocation class="Point2D.Double" id="115" x="70.0" y="10.0"/>
<endLocation class="Point2D.Double" id="116" x="40.0" y="40.0"/>
<transitionPoints id="117"/>
<id id="118" value="8475a565-b6dd-404b-8dd3-07f89b3a2853"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="119">
<start class="ClassNode" reference="54"/>
<end class="ClassNode" reference="19"/>
<startLocation class="Point2D.Double" id="120" x="60.0" y="20.0"/>
<endLocation class="Point2D.Double" id="121" x="80.0" y="40.0"/>
<transitionPoints id="122"/>
<id id="123" value="a8696d02-b718-4800-bacb-d076aa2ed3ce"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="124">
<start class="ClassNode" reference="68"/>
<end class="ClassNode" reference="54"/>
<startLocation class="Point2D.Double" id="125" x="50.0" y="20.0"/>
<endLocation class="Point2D.Double" id="126" x="40.0" y="30.0"/>
<transitionPoints id="127"/>
<id id="128" value="4c4010cd-e981-4051-8cf6-a28ebc154bc7"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="129">
<start class="ClassNode" reference="61"/>
<end class="ClassNode" reference="54"/>
<startLocation class="Point2D.Double" id="130" x="50.0" y="10.0"/>
<endLocation class="Point2D.Double" id="131" x="80.0" y="50.0"/>
<transitionPoints id="132"/>
<id id="133" value="b99845fa-28f7-4625-9228-313aa4a9cd17"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="134">
<start class="ClassNode" reference="26"/>
<end class="ClassNode" reference="12"/>
<startLocation class="Point2D.Double" id="135" x="50.0" y="10.0"/>
<endLocation class="Point2D.Double" id="136" x="50.0" y="50.0"/>
<transitionPoints id="137"/>
<id id="138" value="3410cc22-8aee-4667-9dcf-8f24edfe9ce2"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="139">
<start class="ClassNode" reference="33"/>
<end class="ClassNode" reference="12"/>
<startLocation class="Point2D.Double" id="140" x="60.0" y="10.0"/>
<endLocation class="Point2D.Double" id="141" x="70.0" y="50.0"/>
<transitionPoints id="142"/>
<id id="143" value="0316447c-b7b7-480a-80cd-87d66ab6452b"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="144">
<start class="ClassNode" reference="40"/>
<end class="ClassNode" reference="12"/>
<startLocation class="Point2D.Double" id="145" x="50.0" y="10.0"/>
<endLocation class="Point2D.Double" id="146" x="90.0" y="50.0"/>
<transitionPoints id="147"/>
<id id="148" value="d995fa9f-7a1d-4340-b519-d29256c972ce"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="149">
<start class="ClassNode" reference="82"/>
<end class="ClassNode" reference="47"/>
<startLocation class="Point2D.Double" id="150" x="60.0" y="20.0"/>
<endLocation class="Point2D.Double" id="151" x="30.0" y="20.0"/>
<transitionPoints id="152"/>
<id id="153" value="cdf54d62-5b64-46ab-8fdc-ee0184ea9a43"/>
<revision>1</revision>
<bentStyle name="STRAIGHT"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="154">
<start class="ClassNode" reference="89"/>
<end class="ClassNode" reference="47"/>
<startLocation class="Point2D.Double" id="155" x="80.0" y="20.0"/>
<endLocation class="Point2D.Double" id="156" x="20.0" y="30.0"/>
<transitionPoints id="157"/>
<id id="158" value="9d2d9603-b709-499b-8357-4d844bfa227d"/>
<revision>1</revision>
<bentStyle name="STRAIGHT"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="159">
<start class="ClassNode" reference="75"/>
<end class="ClassNode" reference="47"/>
<startLocation class="Point2D.Double" id="160" x="50.0" y="10.0"/>
<endLocation class="Point2D.Double" id="161" x="70.0" y="50.0"/>
<transitionPoints id="162"/>
<id id="163" value="4c3e06c4-c978-410f-8925-32a64eea92f8"/>
<revision>1</revision>
<bentStyle name="STRAIGHT"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<InheritanceEdge id="164">
<start class="ClassNode" reference="96"/>
<end class="ClassNode" reference="47"/>
<startLocation class="Point2D.Double" id="165" x="20.0" y="10.0"/>
<endLocation class="Point2D.Double" id="166" x="90.0" y="50.0"/>
<transitionPoints id="167"/>
<id id="168" value="37c12090-8ace-4fbe-b7f9-e86eb9bf2805"/>
<revision>1</revision>
<bentStyle name="STRAIGHT"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</InheritanceEdge>
<CompositionEdge id="169">
<start class="ClassNode" reference="12"/>
<end class="ClassNode" reference="19"/>
<startLocation class="Point2D.Double" id="170" x="10.0" y="50.0"/>
<endLocation class="Point2D.Double" id="171" x="70.0" y="20.0"/>
<transitionPoints id="172"/>
<id id="173" value="7df9ed88-49f1-4828-9f45-b9d693d877aa"/>
<revision>1</revision>
<bentStyle name="AUTO"/>
<startLabel></startLabel>
<middleLabel></middleLabel>
<endLabel></endLabel>
</CompositionEdge>
</edges>
</ClassDiagramGraph>]]></SCRIPT>
<BR />
<BR />
<IMG alt="embedded diagram image" src="
JyNFIJt0TFI0LNab7tDo3HTfbk1F5+akJpvUsh02qWilo7u2sa4VZ6PiVjY5yWlpmy1W2RZbtjwt
3WFZsbtkeKvBETbxNCIp50gL6Yh+Xuv7eH3nO78YmBnm1+PxB2fm4vr5vF7X+/24Zq65rssuAAAA
2HAZDAcKBgCCtDEnAgAAsLN8QiArAMDyAQAAcyUrAAAsHwAAMFeyAgDA8gEAAHMNzqy+uARpAACW
DwAAWD6WDwCA5QMAAJaP5QMAYPkAAIDlY/kAgOUDAABg+Vg+AGD5AACA5WP5WD4AYPkAAIDlY/kA
AFg+AABg+Vg+AACWDwAAWD6WDwA0UEQAAABYPpYPAFg+AABg+YDlAwCWDwAAWD6WDwCA5QMAAJaP
5QMAYPkAAIDlY/kAAFg+AABg+Vg+AGD5AACA5QOWDwBYPgAAYPlYPgAAlg8AAFg+lg8AgOUDAACW
j+UDAGD5AACA5WP5AIDlAwBA2NHV1eUnyz9//nxHR4eb+UeC5VssFmoMALB8AAAIgJ6WlZUNDAz4
0PLPnDmzadOmmJiYqKgomUlcXNzLL788ODio/nvllVdedpFZs2atXr36m2++0YYnJCTIi2uuueYy
Z3z55Zd+CuHs2bP+sPyamhq+DwEALB8AAAJj+UVFRQaDwWw2W61Wn1j+I488ItOWlJScPn365MmT
a9askbfPPvusZvMLFy5sbm5+7LHHZPiGDRvsLL+jo+PwRRYvXiwjtLS0qLfeu7hTli5dOmPGDN9a
fnt7e1ZWlslkwvIBAMsHAIDAWL6yUlHS9PT0pqYmLy3/yJEjUVFRkydPPnfunBrS09MzZsyYK664
oq+vT9n8zTffLC/eeecdWcRzzz1nZ/kaGRkZMsK3336r3q5bt85oNB48eFC9ffLJJ+X8RGn6XXfd
tWPHjqSkpOTk5D/84Q9qhAMHDtx6662xsbELFiz4+OOPna6tnHuMHz8+Ojpa5rx3716fWH5paamc
NdXV1V3gtw0AgOUDAEAALV8hip+Wlia6L9I/YkPdtWuXTHjnnXfaDly0aJEMbG5uVjY/e/bszZs3
y9+cnJxTp055aPlvvPGGvP3pT38qr3t7e+W0Yc+ePWrCKVOmyClKfn7+uHHjrr76ahkoU82YMeP6
66/fvXt3ampqYmKidsmQLa2trbNmzZo6daqsdnd3tzcxyqxkJvPnz5dzD1k9b86UAACwfAAA8Jnl
K2pqagwGg6jqyAx169at6nId24H333+/DKyvr1dSPmHChJiYGL1ebzabz58/76Hlywsx++uuu05e
v/TSS+LT2oQzZ85Uo6kFHThw4M0335QXzzzzTE9Pz4YNG+T1/v37na6wnAmoEwNvkFXNy8uTMw11
JuMmXgAALB8AAJx7uW9xXIQI8eTJk+VfnZ2dw109dR3OsmXLbAfOnj1bfdp94dIVOwMDAy+88IIM
XLdunYeWL+Tm5qrvBIxG4yuvvKJNuGDBAvW6tLRURvjd7373/PPPy4tx48ZNuIT64N8fli8pybLm
zJnj+CPmy8IXjkQALB8AAHxs+X6dW0NDQ0pKyqpVq0a2IIvFoqxaU96vvvpKZjVt2jQ1RLsuf3Bw
cMqUKXFxcerjfE8s/+2335YhMrlM1d/f72j5q1evlhGOHj2qLu8pLi4ecoXF8qdPn+59jMuXL7/p
ppsOHDjgv50VrkUIAFg+AAD40fJbWlqys7OzsrLkhTcLeuqpp2TalStXdnV1tbW13X777fJ227Zt
mpQbjca33npr3bp1So614UNa/pkzZ6ZOnSoD169frw2UCSdNmlRdXf3hhx/K67lz58rAkydPynmF
nAy8/vrrHR0ddXV1siZO11bWU2bY1NSknTaMLMYvvvjixRdfTExM3Lhxo3aGg+UDAJYPAAABs3zR
8fz8/NTU1Pr6eu1+miNe0Llz5zZv3hwTE6Mu7RDbrqqqspVyNVx8/d577/3ss888t3xB1jM6Ovro
0aO2M5w5c+aSJUtkZJ1Op67+F0T65XRCLWv69Onvv/++07Xdu3evnBjIONu3b/fS8oVPPvmkqKhI
lqsuEMLyAQDLBwCAwFj+xo0bDQZDZWWlr+6XrxgcHGxra/P5s2+XLVt233332Q7Rrtjp7u7Wbt+p
cerUKe2ON26QabXfAXtj+ep++c3NzWlpaSO+6gnLBwAsHwAAy/d2biUlJU49OAhNbseOHbJWdje2
t70u3z1zHfBhjHbPvpVTpvLyciwfALB8AAAIgGC5+aw9CE3ugw8+2L17t93AP/3pT3/5y188mdzi
gP8sX1siRQgAWD4AAASRYGFy3ls+RQgAWD4AAGD5WD5FCABYPgAAlo/JYflYPgBg+QAACBYmh+VT
GwBYPgAAYPmA5VMbAFg+AABg+Vg+RQgAWD4AAGD5WD5FCABYPgAAlo/JYflYPgBg+QAACBYmh+VT
GwBYPgAAIFiYHJZPbQBg+QAAgOVj+Vg+AGD5AACA5WP5FCEAYPkAAIDlY/lYPgBg+QAACBYmh+VT
GwBYPgAAIFiYHJZPbQBg+QAAgOUDlg8AWD4AAGD5WD5FCABYPgAAYPlYPpYPAFg+AACChclh+dQG
AGD5AAAIFiaH5VMbAFg+AABg+YDlAwCWDwAAWD6WTxECAJYPAABYPpaP5QMAlg8AgGD5dEHgOVg+
AGD5AAAQYoL1BXgMRQgAWD4AAGD5WD5FCABYPgAA7TWWj+Vj+QCA5QMAIFgAFCEAlg8AAAgWAEUI
gOUDAACCBRQhAGD5AACAYAFFCABYPgAAIFhAEQIAlg8AgGABUIQAHLBEAACAYAFQhE63FzyHAwTL
BwAABAsoQraXrADLBwAAOlGgCNlesgIsHwCAThSAImR7yQrLBwAAOlEAipCDbuRZfXEJ0sDyAQAA
4QCKkO3F8gHLBwBAsAAoQrYXywcsHwAA4QCgCNleLB/LBwAAhAOAIuSgw/KxfAAAQDiAImR7sXws
H8sHgFFrcyGcnviIcADWy/Zi+YDlAwD9U7jlzA4FWhW2F8sHLB8A6J+wfACKkO3F8rF8AKB/Aiwf
gCLkoMPysXwAoH+C0ezb2KFAq8L20hIClg8A9E9YPgBFyPZi+Vg+ANA/AZYPQBFy0GH5WD4A0D8B
lg+0KmwvYPlYPgDQP9G3sUOBVoXtpSUELB8A6J/o29ihQKvC9tISApYPAPRPWD6A/4qQJ1sDlo/l
AwCWT9/GDgWgFaUlBCwfAOif6NvYoQC0orSEgOUDAP0Tlg8AHHRYPmD5APRPgOUDwAgPuvPnz3d0
dHR1dY14njKHY8eOHTx48PTp035d+cbGxo8//hjLx/IBAMsHLB8AXB50Z86c2bRpU0xMTFRUlAyP
i4t7+eWXBwcH1X+vvPJK9WvdWbNmrV69+ptvvtGGJyQkqNf9/f0/+9nPJk2aJKONHTtW/ur1+hGv
29mzZ92PIItesGCBn5JxXDqWj+UDAJaP5bNDAUKvFX3kkUfkbUlJyenTp0+ePLlmzRp5++yzz2pK
vXDhwubm5scee0yGb9iwwdHy165dK/+S+Rw/ftxqte7fv/+1114b2YotXbp0xowZ7sc5duzY119/
7Y9YnC4dy8fyAQDLx/LZoQAh1ooeOXIkKipq8uTJ586dU0N6enrGjBlzxRVX9PX1KZu/+eab5cU7
77wjUz333HN2lt/W1ibjz58///z5844Lkv9mZ2dPmTIlMTGxuLh4YGBA8+m77rpr165dc+bMWbJk
yWeffSYD5dRi/Pjx0dHRRqNx7969MuSHP/zhDTfcMHXq1LvvvltriNS0buYjHDhw4NZbb42NjV2w
YIF2eY8a+dVXX501a9bnn39ut6qOS8fysXwAwPKxfHYoQEi2ouLH8vrOO++0/e+iRYtkYHNzs7L5
2bNnb968Wf7m5OScOnXKzvJ37twpIz/66KOOS+nv7xe5j4uLq66uLiwslNHkrza5TqdLSUkpKCiQ
4bm5uTKwtbVV/FucXtaqu7v7wsXvGeT1u+++e/nll99///3atNoVO07n8+23386YMeP666/fvXt3
amqqrIO6AElGlvMN2ZC1a9d2dHTYra3j0rF8LB8AsHwsnx0KEJKt6NatW9XlOrb/FZ+WgfX19cqM
J0yYEBMTo9frzWaz9oG9ZvlO56Cora2Vf4l/X7j421zR/fHjx6svDWRyeasuvLnqqquuvfZaNYmo
+dVXX203H3FukXXtWho7y3ecz5tvvinLfeaZZ3p6ejZs2CCv9+/fr0aWrbBYLK5icbp0LB/LBwAs
Pwxz5jGcAOHdiqrrcJYtW2b739mzZ8vA1tbWC5eu2BkYGHjhhRdk4Lp16+wsf/fu3TJ88eLFjkt5
/vnn5V+VlZXqbWZmprw9evSonanPvoijZw8ODq5fv37evHmXX375uHHjtOF2lu84H7VcmWTCJfbs
2XPBg5/tYvlYPgBg+ZGSM30bQHi3ohaLRXmwdsX8V199Jf+dNm2aGqJdly/OPWXKlLi4OPVxvmb5
x44dE5+WSRzvwllTUyPDn3jiCfU2KSkpKirqu+++c2/506dPV69/+9vfql8CWK3WjIwMzy3/jTfe
kAmLi4vt1scTy9eWTkuI5QMAlo/lA0AIt6JPPfWUvF25cqVoeltb2+233y5vt23bppmx0Wh86623
1q1bJ8OXL1+uDdfusSMeL/+68cYb33//fTlJkJEffvjhb7/9tru7W10HL22I2WyWcbRfzbqyfFkN
Ga2pqam/v/+VV16R11u3bt25c6fId0xMzD//+U9PLP/kyZNyliInJK+//npHR0ddXZ1slyeWb7t0
WkIsHwCwfCwfAEK4FT137tzmzZvFodXlc+LHVVVV2n+1++VPnTr13nvv1W5iY2v5Z8+e/cUvfhEb
G6vGnDx58i233KKM/L333ktMTJSBY8aMueOOO7Q7YLqy/L17986dO1fG3759+6lTpxYuXCivlyxZ
8tBDD8mL1atXe2L5wocffignJ2p95AxBTj88sXzbpdMSYvkA4C/cPILRE8v3/jmOdjz99NO/+tWv
sHzBarVSnwBh9lnJ4OBgW1ubl8++7ezsPHLkiOMtNY8fP2776fiQdHd3azPRnsN14sSJM2fODGuV
5Dyht7d3uBtiu3QsH8sHAL90QmVlZdqlop5bvvfPcdTGUcjcLvj5aYuuGPIZkH6ds9O+7cc//rEM
/4//+I8R9J0AELSWH2nMdWC4n3cAlg8AI++EioqKDAaD2Wy2+/DYff/k/XMc5XV8fPzhS6gzgdG3
fE+eAenXOdv1be+88860adO0k5/o6Ggxfj7XB8DyQxGLA1g+lg8Ao9oJtbe3m0ym9PT0pqYmT/on
75/jaPdaw9byXT1V8YEHHti+fXtSUpKcSLS2tv76179OTEyUMbXHr7h5HKM3z4BUj3JcuXKljHzw
4EH1ryeffFJOkxwjcvV8Rzd924cffvjv//7vEqPjHTAnTZq0Y8cOyhUAyw/jrLB8LB8A/NUJieKn
paWJ7ov0u++fvH+Oo3otMl19kZ07d9pZvpunKk6bNk3WUz1QxmAwyMnJPffco51LuJnQm2dAao9y
fOmll2Tyn/70pzK8t7dXTmzULaLtcPV8R6e7oKWl5cEHHxw7dqz7u93LtnzwwQcULQCWj+UDlg8A
w+6EampqRJ2Liorc9E/eP8dRvRavVbdrsL1tnLJ8N09VTEpKOn36dH9/f1RU1Lx58+TFiRMnZIQ1
a9a4n9CbZ0Bqj3KUswgx++uuu05ei/HPnz/fVUpO5+x0F8jJgIePtZJNXrp06bFjxyhdACwfywcs
HyDMW0afPw9VtHjy5Mnyr87OTqcL9f45jheGumLHzVMVMzMz1cjyX5PJJC9kQZrle/I4Rm+eASnk
5uaqby2MRuMrr7zijeVLwpcBQPhCJ4XlY/kAEJjPihwnb2hoSElJWbVqlZs5e/8cxyEt381TFd1b
viePY/TmGZDC22+/LaPJBsp2ublvndPnOzrdBbJFM2fO1G6k7QpZ4nXXXSe7Rl1SBQDh3T5j+YDl
A9CL+GbylpaW7OzsrKwseTHknL1/jqO6DObdS3z66ae2Pu3mqYruLd+TxzF68wzICxfvIqqusVm/
fr2biJw+39FV3/biiy+K6C9atCg6OtrR7w0Gw0033aTtGgDA8rF8wPIB6EWGnlxMPT8/PzU1tb6+
Xrtpo/s5e/8cR7v75aenp9v5tKunKrq3/AsePI7Rm2dAKiQu0fGjR4+6icjp8x3d9G2ffPJJUVFR
UlLSvHnztFjkdOXf/u3fZK1sdw0AYPlYPmD5APQiQ0y+ceNGg8FQWVk5rPvlK7x/juOQjOypisOd
cLjPgFy2bNl999033Dl70rc1NzenpaV973vfk9MhOfORsxHHXQMAWD6WD1g+AL3IEJOXlJQ4tWH6
J1fs2LFDwrG9C77nz3f0pG8Tpy8vL3ezawAAy8fyAcsHoBdxh5uP4emfXPHBBx/s3r3bdojnz3f0
vG/j83sALB/LBywfgF6E/om+DQCwfFpCwPIB6EXon+jbAADLpyUELB8Aywf6NgCgFaUlxPIBAMsH
+jYA2megJcTyAQDLp28DANpnWkLA8gEAy6dvAwAsn5YQsHwAehH6J/o2AMDyaQkBywfA8iEQfduw
broPAFg+lg9YPgC9CP1TsPdtNTU15A9A+4zlA5YPQC9C/xQmfVt7e3tWVpbJZCJ/ANpnLB+wfAB6
EfqncOjbSktLDQZDXV0d+QPQPmP5gOUD0IvQP4X2Hmxtbd21a9f8+fOLiop6e3vJH4D2GcsHLB+A
lhHLD20yMjLy8vLS09Obm5vJH4D22RM6OzsDMq3/5ozlY/kAgOWHFdIpSs5z5swZGBhwzH8EEClA
ULXP/iAjIyMg0/p7zlg+lg8AWH647cHly5ffdNNNBw4c8DJ/dhlA0PKFj2htbc3Ly7vhhhtuu+22
0Zx2NOdMtWD5AIDlh8kelF7txRdfTExM3Lhxo/ahPpYPgOU7smvXrvT0dKvVunTpUmk3Rm3a0Zwz
1YLlAwCWHz6WL3zyySdFRUVGo3HPnj1YPgCW75T58+er3/B8+eWXiYmJH3300ehMO5pzplqwfADA
8sPK8lXfJj1lWlraqlWrsHwAsKO0tLSoqMjVW/9NG6g5A5YPgOVj+eFj+YLVai0vL8fyAcCW9vZ2
g8Gg3WxXtRUpKSl2t+fy+bSBmjNg+QA4IpYfbpavsFgs7DIA0MjOzlaPzLOlqakpLS1NxNp/0wZq
zoDlA+CIWH54Wj67DAA0ampqTCaT03/l5+eXl5f7aVr/rRVg+QCA5WP57DKAiMZisQx5s/muri6f
T+u/tQIsHwCwfCyfXQYAvjzY6TUAywfA8mGUwPIBAMsHLB8AAtCqfgGjAl0sAGD5gOUDAJaP5dPF
AtDyY/mA5QPQ1mP5WD4AYPlYPmD5AFg+UAwAgOXTBGH5AIDYAcUAAFg+YPkAQKsKFAMAYPmA5QMA
rSpQDAAc7Fg+YPkAtPVAMQAABzuWD1g+AGIHFAMAYPk0QVg+ACB2QDEAAJYPWD4A0KoCxQAAWD5g
+QBAqwoUAwBg+YDlA9DWA8UAABzsWD5g+QCIHVAMAIDl0wQBlg+A2AHFAABYPk0Qlg8AtKpAMQAA
lg9YPgDQqgLFAABYPmD5ALT1QDEAAAc7lg9YPgBiBxQDAGD5NEGA5QMgdkAxAACWTxOE5QMArSpQ
DACA5QOWDwC0qkAxAACWD1g+AG09UAwAwMGO5QOWD4DYAcUAAFg+TRBg+QCIHVAMAIDl0wRh+QCA
2AHFAABYPmD5AECrChQDAGD5gOUD0NYDUAwAHOxYPmD5ALT1QDEAAJZPEwRYPgBiBxQDQEDLG2wJ
TssHn+wjLB8AEDugGIDyBpIkZywfgHYEKAYAypskgZyxfADaEaAYAChvkgQsHwBoR4BiAKC8SZKc
sXwAoL0GigGA8iZJcsbyAYD2GigGoLyBJMkZywegHQGKAYDyBpIkZywfgHYEKAYAypskAcsHANoR
oBgAKG+SBCwfAGhHgGIAoLxJkpyxfACgvQaKAShvIElyxvIBaEeAYgCgvIEkyRnLB6AdAYoBgPIm
SSBnLB+AdgQoBgDKmyQBywcA2hGgGAAob5IkZywfAGivgWIAypvyJklyxvIBaEeAYgCgvIEkyRnL
B6AdAYoBgPImSSBnLB+AdgQoBgDKmyQBy4cwrWDwHNoRoFMBoLxJkpyxfKCCyYq9ABQDAOVNkuSM
5QMVjOUDZQZAeQNJkjOWD1Qwlg+UGQDlTZJAzlg+0FKMPKsvLsFeAA5JAMqbJMkZywcqGMtnLwDF
AJQ35U2S5IzlAxWM5QOHJADlDSRJzlg+0FJg+cAhCUB5kySQM5YPtBRYPnBIAlDeJAlYPlDBWD6W
DxySAJQ3SZIzlg9UMJbPXgCKAYDyJklyxvKBCsbygUMSgPIGkiRnLB9oKbB84JAEoLxJEsgZywda
CiwfOCQBKG+SBCwfqGDA8oFDEoDyJklyxvKBCsby2QtAMQBQ3iRJzlg+UMFYPgDFAJQ3kCQ5Y/lA
BWP5wCEJQHmTJJAzlg+0FFg+cEgCUN4kCVg+UMGA5QOHJADlTZLkjOUDFYzlsxcgWIrhMhgtgrOQ
wCd7kHBIMqT3AnqBUgCWD+Fp+eQZsa0oe99XWZEkSYb0XiB0qgGwfMDyActnD5IkSWL5QP+E5bMX
AMsHLB83BZLE8iEM+qfz5893dHR0dXUF7bY0NjZ+/PHHWD6gjNRVeJz/s/eDYQ+SJEmG9F4gdJRi
CM6cObNp06aYmJioqCgZPy4u7uWXXx4cHFT/vfLKK9WvPWbNmrV69epvvvlGG56QkGA7n3vuuUdG
k1nZzf/s2bM+2RZZ4oIFC4Y1ybAWjeUDlg9YPm4KJInlQ/goxSOPPCKjlZSUnD59+uTJk2vWrJG3
zz77rObWCxcubG5ufuyxx2T4hg0bnFq+2P/48eOvuOKK5OTk8+fPa8OXLl06Y8YMn2zLsWPHvv76
a8/HH+6isXzA8gHLx02BJLF8CBOlOHLkSFRU1OTJk8+dO6eG9PT0jBkzRny9r69P2fzNN98sL955
5x2Z23PPPefU8l9++eV58+Y9/vjjMk5jY6MaKKcKov7R0dFGo3Hv3r0ypK2tLTs7e8qUKYmJicXF
xQMDA5qRP/DAA9u3b09KSpKTitbW1l//+tcyzq233trR0aGNc9ddd2mrfeedd06bNk2v169du9Zx
uxwXjeUDlg9YPnuQJEkSy4dIUYpdu3bJOGLMtgMXLVokA5ubm5XNz549e/PmzfI3Jyfn1KlTTi1/
8eLFW7Zs+fzzz2XCNWvWqIEi67NmzZo6daospbu7u7+/X8Q9Li6uurq6sLBQxpS/2txE2dPS0u6/
/34ZbjAY0tPT1SVAtucV6oodmU9ycvLEiRPXr1//5ptvVlZWOm6X3aKxfMDyActnD5IkSWL5EEFK
sXXrVnW5ju1Apdr19fXKrSdMmBATE6PX681ms3Y1jq3lS6lFRUWpX+4uXLhQRlbfAwjXX3/91Vdf
rV7X1tbKbAsKCi5c/LGv6P748ePVdwgyt6SkpNOnT4vBy6zmzZsnL06cOGF7zqBZ/u9//3sZLivp
ftNsF43lA5YPWD57kCRJEsuHCFIKdR3OsmXLbAfOnj1bBra2tl64dMXOwMDACy+8IAPXrVvnaPmP
P/54bGxszkUMBoOMVl1d7ajazz//vPxL++g9MzNT3h49elTNTd6q4ePGjTOZTPJCFurU8svKymS4
nJ/43PIj6tmZgOUDls8eJEmSxPIhbJXCYrFMuIh2ifxXX30lU02bNk0N0a7LHxwcnDJlSlxcnPo4
X7N8q9U6ffr0u+++e91FHn300bFjx956662aast/1euamhqZ8xNPPKHeJiUlRUVFfffdd8O1fDWf
Bx98cEjL1xYdun05YPl4HpbP3sdNSRKwfBjJUffUU0/JaCtXruzq6mpra7v99tvl7bZt2zS3NhqN
b731lhi8DF++fLk2XFn+H//4R9tLdITs7OwxY8aoX83KbGWqpqam/v7+7u5uOU+YPXu2lKbZbJbh
2q9ph2X5aj46na66ulrW+YMPPnC6XbaLxvIBywcsnz1IkiSJ5UNkKcW5c+c2b94spq6uLZk2bVpV
VZX2X+1++VOnTr333ns/++wzO8u/5557HnjgAdsZKoNXN87fu3fv3Llz5e327dvl7XvvvZeYmChv
5TTgjjvu0O6MOSzLV/NJSkpSK5aRkeF0u+wWjeUDlg9YPnuQJEkSy4eIU4rBwcG2tjY/Pfu2u7vb
9ib6x48f9/Dz9SFne/r06WEtGssHLB+wfPYgSZIklg8oRWgw14Gw6cuBQxLPw/LZ+7gpSQKWDxF6
1FkcwPIBywcsH3BTksTyAaUALB+wfMDycVMgSSwfsHz6cgAsn5aBth03JUmSxPKBnoC+HADLp2Vg
7+OmJAlYPtAT0JcDhyRHNy0Dex83JUnA8oGjDssHLB+wfMBNSRLLB5QCsHzA8gHLx02BJLF8wPLp
ywGwfFoG2nbclCRJEssHegL6coAAWf758+c7Ojq8eaa1zOHYsWMHDx4c8jnTo0ljY+PHH3+M5fsE
74skgHsnCN3Uyzxl8sOHD3/zzTdhZvk+L7OR1VuwFSeWD1g+lg8cksOe1ZkzZzZt2hQTExMVFSVj
xsXFvfzyy4ODg+q/V1555WUXmTVr1urVqzWlkOEJCQnqdX9//89+9rNJkybJaGPHjpW/er0+SGKU
9VywYIH29uzZsyOYiYdThbHle18ktq9d7Z2RhT+yfRpYy/c+z5///Ofx8fFq8ltuuSV4MvQmSZ+U
2WWXuPrqq7Ozsz/77DPP6802B0+KM7AMudewfMDysXyIdMt/5JFHZISSkpLTp0+fPHlyzZo18vbZ
Z5/VurqFCxc2Nzc/9thjMnzDhg2OPevatWvlXzKf48ePW63W/fv3v/baa0ES47Fjx77++mv1eunS
pTNmzBjuHDyfKowt3/sicWr5tntnZOGPbJ8G3PK9zPPw4cPiwfn5+QMDAx0dHe+//37wZOhNkj4p
Mzn5kXz27du3bdu2SZMmjR079q9//asn9WaXw5DFGVg82WtYPmD5WD5EtOUfOXJEdGHy5Mnnzp1T
Q3p6esaMGXPFFVf09fWpXvPmm2+WF++8847M57nnnrPrWdva2mT8+fPnnz9/3nH+8t/s7OwpU6Yk
JiYWFxeLlGhd1AMPPLB9+/akpCTpuVtbW3/961/LOLfeeqtYizbOXXfdtWPHDhknOTn5D3/4g/t5
fvTRR9///vfV8JqaGtuZyAtxhfHjx0dHRxuNxr1798qQAwcOyOJiY2MXLFjg6qt5x6ki0PK9LxJX
lq/tHae7b8jwh7V3gsfyvc/zz3/+swxfsWLFd999Zztnx5Ie/QxHnKQ/yqyyslLG/K//+i/HepPF
3XnnndOmTdPr9WvXrnXMwXZkN+2YjLNr1645c+YsWbJE+97A1fAf/vCHN9xww9SpU++++24tGTXy
q6++OmvWrJUrV8oKHDx4UP3rySefLCoqGvFew/IBy8fyIaItX/oh+a/0drYDFy1aJAObm5tVrzl7
9uzNmzfL35ycnFOnTtn1pjt37pSRH330UceZ9/f3S6cYFxdXXV1dWFgoo8lfbXLpX9PS0u6//34Z
bjAY0tPT77nnHrvOW7pVGZ6fnz9u3Lirr77a/TxvvPFG6VNFFt966y35q81Efe0uJxLSiUr/Kpvc
3d397bffzpgx4/rrr9+9e3dqaqrMU7swwBa7qSLT8r0vkgseXLHjuPuGDH9Yeyd4LN/7PM+ePSuH
jIwv6f3tb39T/3Va0qOf4YiT9EeZiZ3L5ElJSXb1Js1IcnLyxIkT169f/+abb8rJgGMOtiO7acd0
Ol1KSkpBQYEMz83NdT/8kUcekfm/++67l19+uTR9tg2dbJScbLz00ksy/k9/+lMZ3tvbK2c4e/bs
GfFew/IBy8fyIaItf+vWreorctuByrzr6+tVDzRhwoSYmBi9Xm82m7UP7LXe1OkcFLW1tfIv6ecu
XPxFnXST48ePVx/UyeTS9Z4+fVp60KioqHnz5smLEydOyPhr1qzRFjFz5kxxF22VDhw44Gqe8loU
R16/9tprtr5u65EiQOpUQZCuXebzzDPP9PT0bNiwQV7v37/faUS2U0Wm5XtfJENavqvdN2T4nu+d
4LF8n+Qpjnv77bfLJKKqv/nNb9yU9ChnOOIk/VRm48aNk1bCrt5+//vfy2w1z3aagzay+3ZM3qoL
e6666qprr71Wm9bpcIV4uZyGadfbyMiyRRaLRZ2qidlfd9118lqMf/78+d7sNSwfsHwsH4K3zHyI
q6Wo776XLVtmO3D27NkysLW19cKlb8kHBgZeeOEFGbhu3Tq73nT37t0yfPHixY4zf/755+VflZWV
6m1mZqa8PXr0qJpc3mrdsMlkkheyFDvL1wS9tLRU/vW73/3OzTz/+Mc/Tp8+Xd4uWbLk73//u3vL
V/ORRU+4hNPPzLB8nxTJBQ8+y3e6+8LS8n2SpzJOEUGpYTlPFnF0VdKhYvn+KDP1wUFaWppdvZWV
lclwOa/wxPLdt2NaAc++iGNha8Pl9HX9+vXz5s27/PLLtS8nLzj8zDc3N1d9fWE0Gl955RUsH7B8
LB9ghEe3xWJRQqBdafrVV1/J+NOmTVNDtGthpYuaMmVKXFyc+ghN602PHTsmPZZM4njnu5qaGhn+
xBNPqLdJSUmiI+pK4uFa/urVq1XP6mae6gPOjRs3yghqnR0tXzxSvX7jjTdktOLi4iHTs50qMi3f
+yK54Nk9dhx335Dhe753gsfyfZKnxoMPPijT1tbWuirpUc5wxEn6o8yUoP/kJz+xqzfVjEh0bnKw
G9lVO+a55f/2t79VVyRardaMjAxXlv/222+rQ0A2sL+/35u9huUDlo/lQ6Qf3U899ZSMsHLlStH0
trY2dRnAtm3btB7IaDS+9dZb69atk+HLly937E2l/5N/3Xjjje+//750zDLyww8//O2333Z3d6vr
TeVIMZvNMo72azYPLX/SpEnV1dUffvihvJ47d+6Fi192O52nLO7VV1/9xz/+cfLkyalTp952222O
Pahso4zf1NQkfaeMJvYg/ejrr7/e0dFRV1cn2+40H9upItPyfVIk6rKEdy+xb98+273javcNGb7n
eyd4LN/7PPfu3bt9+/YjR44cOnRIxhwzZkxra6urkh7lDL1J0odl9vvf/15GGzt2rMh0b2+vXWug
mhGdTifNiyzrgw8+cMzBbmRX7Zjnlv/KK6+oLxB27twpgh4TE/PPf/7T0fLPnDkjh4CMuX79ejcJ
e7LXsHzA8rF8iPSj+9y5c5s3b5YuR13bI6JQVVVl+4GWGi4dz7333qvdLMK2Zz179uwvfvGL2NhY
NebkyZNvueUW1YG99957iYmJMlBE5I477tDuTOeh5c+cOXPJkiUyUPpjdW2uq3meOnVKBsoQ6ddv
uummTz/91LG7FTeSUwWZUAxJ3srJg0iDWmfpdF3djtBuqsi0fO+LxPZG5tpFFNrecbX7hgzf870T
VJbvZZ47duxQI0hiixYt0i4mcVrSo5yhN0n6sMyioqIMBsMPfvCDEydO2M5Baw2kGUlKSlIjZ2Rk
OOZgN7Krdsxzy5ciX7hwobom7aGHHpIXq1evvuDsxvz5+fnR0dHqoiBXeLLXsHzA8rF84Oj+F4OD
g21tbV4++7azs/PIkSOOt9Q8fvz4CD4mtP0sTbu5nvt5yhDtzhuukLnZrqGMr33U5/lUkWb5vioS
97jafUOG78neCSrL9z5Pyerw4cNO76/itKRHLUPvk/R3mdlttd2zut3kMLJ2zA7tYV5y+nHmzBmn
4yxbtuy+++7zcP3d7DUsH7B8LB84uoOU0X/25FwHwqBlCJu23cu9E4SWH5YZRkiS/kN9UWN7F/wR
7zUsHzjqsHzg6A5S/vSnP/3lL38ZzSVaHMDygwcv9w5uOjoZYvle8sEHH+zevdsnew3LB446LB84
uiGcWwb2Pm5KkuwFLJ9qoACwfODoBiyfPYibkiSWD3jAJXp6eqxW68im/cc//jHi5R4+fHjE0x4/
fhzLB45uwPIBNyVJLB/Cvxo8JyYmJjExcd68efPnz584ceLIHsYpExYVFY34WZ7Tpk2Tv2PHjh3x
HK666irZhJkzZ45gJlg+YPmA5eOmQJJYPoQYX/z/+fDDD5XaXnfddbfccst///d/FxcXl5eXm83m
xsbG+Rf5YpgcPHhQFD89PV1mu3v37i+Gz7Zt26Kjo/V6/Y9+9KMRTH7ttdeWlpaWlZXl5OTEx8dP
mTJl+fLljz76aFVV1SeffOL5fKgWwPIBy8dNgSSxfAhJyxeqq6vT0tK0505r9PX1xcbGvvrqq8My
7E8//VT8vrCwUGa4ePHi7du3j0DTxc4rKio6OzvnzJnz/e9/v6WlZViT5+XlbdmyRdsQmY+ctMjZ
izqf2bVrF5YPWD5g+exBkiRJLB/C3PKFH/3oR+LldmOWl5cP94P8119/PTk5uba2Vs3h/vvvf/rp
p4er+J988sm0adPUUzPkTGP58uU33njjRx995PkcfvnLX/7nf/6n3ebs2bNnypQplZWVfJYPWD5g
+exBkiRJLB8iAqvVmp6eXldXZzvQYDDU1NR4Pofi4mKZSXt7uzZw48aNpaWlw10ZOUlQj6TW5pyf
n280Gm3n7J7Ozk69Xm/7o+GGhgadTldfX8++BiwfsHz2IEmSJJYPEURXV1diYqJm0mL8CQkJHt5d
R6bVrtKxHV5ZWVlQUDDcNcnNzS0vL7cbuGXLFhH3pqYmD2cSHx+vbcuePXsmTZr09ttvs5cBywcs
nz1IkiSJ5UNkIVo/ffr01NRUZeqZmZm2l7a7obm5WV3vXlNT09fXZ/uv+vr6FStWDGs1ZOmxsbF2
j3+Ts4jq6uro6GhZSktLiyfzycnJkUkuXPoUf+nSpVlZWY6/PQDA8gHLZw+SJEli+RC2lJaWxsfH
79mzp7i4uLCwUMRdVLu3t9fDyXt6eqqqqrKzs2Uqk8kkei1DZLgYuZw2DPdkQ7tcp7W1VVZs8eLF
er1eZi6LULP1BDlFKSgokC0SxRfRt1qteXl56enpns8BAMsHLB83BZLE8iFUEf0tKirSrnpXF+iL
rI/gShtBTgxWrlx5zTXXiFuvWLGivLx84sSJw5qDnCSI0MuZhsFgSE5Ovueee2688Ua7rwg8obGx
UY4Bu2vxZaNktnZfFABg+YDl46ZERJJYPoQVYs/i4pmZmbafcHd1dUn1eP5TVztSU1PVZ/nyV3xd
ZuX5dTKyPjJ+fHz8pk2b9u3bJ6ccMkRM3fNvFexm1dDQYDdcTmnk5GHEWweA5dOnsvdxU5IkSSwf
gprOzk4x8ry8PEcLH7EEt7S0iKPb/mZXBH1Ylu94RY3JZKqoqBjByhw6dMjp8PLycr1ej+gDlg9Y
PnuQJEkSy4dwo6mpSX1k7uFddDwkJyenpKTEt6uqvhPw+TxF9BsbG6kEwPIBy2cPkiRJYvkQJqjb
zmjPrvIVPT090dHRrj5BHzG9vb3D+imwh9TU1Mh5DqIPWD5g+exBkiRJLB/CgYqKiuTk5D179vh8
zqWlpbm5uf5Y5+zsbMc76HuPKH5CQoLnT/4CwPLpU7F83JQkSRLLh6DDarUWFhYajcbOzk5/zFyM
2fHXrj7BbDYP9777HtLa2mowGPxxCgGA52H57H3clCQBywe/09PTk5GRkZWV5fNLXxT19fWiy769
yt925XU6nZ9udd/e3h4fH19UVESRAJYPWD5uCiSJ5UMo0dXVJQqel5fnJwu/cPGimrKyMv9tQk5O
jv8+ce/s7ExJSSksLPRfPgB4HpbP3sdNSRKwfPAlzc3NCQkJpaWl/lPY9vb2kd3V3nNqamqysrL8
N39Z+czMTJPJRMEAlg9YPm4KJInlQ7BjNpvj4+P9/QPTgoKCvLw8vy6ir68vNja2q6vLf4sYGBjI
yMhYsWLFCB61C4DnYfnsfdyUJEkSy4dRoqKiQhS/ubnZr0sROdbpdD6/gaYjJpOpsrLSr4uwWq2y
lPT0dEQfsHz3+ONH/MG8XCwfNwWSxPIhWCgsLExJSRmFHlHOJUSLR2GLqqurMzMzR2FBRUVFqamp
FouFKoKQaOtHn4yMjIhariI4LR98sgcJhyRDei9g+RFEb29vTk5OVlbW6HwgLUIs/j062+Xvi3Y0
SktLk5OT29vbKScIIb4YFVpbW/Py8m644Ybbbrvti1EkUMt1JJL3fnhAkiQZZnsBy48UDh06ZDQa
8/PzBwYGRmFxLS0tCQkJo3Zrmuzs7IqKitFZlhJ92UCKCrB8W3bt2pWeni5H/dKlS1988cVR68kC
tVwsHzcFksTyIfA0NzfHx8ePmgdfuPi725KSklFbnNlsHp2rgxQ1NTWj8MMGgNDyvPnz56uD4ssv
v0xMTPzoo4/Ce7lYPm4KJInlQ4Cpra0VJa2vrx+1Jfb09ERHR4/mj+FkibGxsX56PJZTGhoaJFU/
PdMXIOQoLS21fYSc3dvwWy4AQPCD5Yc5W7ZsGf3ryMvLy0f/BvN+fTyWU/bs2aPX60fz9AkgOJEW
xmAw2D4Zw2q1pqSk+Pv7rkAtFwAAy4cAU1xcnJqaOso3mJNeVs4rxIBHeWNramqys7NHeaHq5wdV
VVUUG0QycujV1dXZDWxqakpLS/Prj3MCtVwAACwfAkZfX5/JZJIucHR+a2tLQ0ODwWAY/S5WNnni
xImjc6cdW2SJsr3+vmE/QNAiJ9iuvrvLz8/33zdsgVouAACWDwGjvb3daDQWFxcH5NOsnJycQCmv
dPkB6do7OztTUlIKCwv5+BAiDYvFMuTNm/1x7h2o5QIAYPkQMBobG+Pj4wPl2XKCodPpbC+THU2q
qqqysrICsmjZ5LS0tFWrViH6EOmdSoCecMmTNQEAsPxwpq6uThS/qakpUCtQUlJSUFAQqKWP5uOx
nC49MzMzIFdJAWD5WD4AAJYftmzZsiUhISGAN5cQu9XpdIcOHQpgCCLZAbxE3mq15ufnp6enj87T
hQGwfCwfAADLD3MKCwtTU1NH84bxjlRXVwfqghnbdcjMzAzsOhQUFAR8XwBg+QAAWD6ENgMDAyaT
KScnJ+CfH4tei2QHdh0Ce9GORmlpqdFoHOXHFABg+QAAgOWHCSK1GRkZJSUlAf/RZ2tra3JycjD8
9jSwF+1oBOR5ZABYPgAAYPkhT1NTk3hkaWlpMKxMfn6+eG0wrEkwXLSjMJvNer2+sbGRWgUsH8sH
AMDywSPq6up0Ol19fX0wrExPT4+sTJDcnTpILtrRdpOIfpDsJgAsHwAAy4egpqKiIrC307GjsrIy
JycnePIJkot2FPv27ZOdVVNTQ90Clo/lAwBg+eCSkpKS1NRUi8USJOtjtVoNBkMAb9LvSPBctKNo
aWnR6/XBc+IBgOUDAGD5EET09fWZTKZVq1YF1e3Y6+vrjUZjUAUVVBftKDo7O5OTk8vKyihjwPKx
fAAALB/+P8RZ09LSioqKguE+NrbIiUcQymtQXbSj7UE5HZI9SDEDlo/lAwBg+fAvWltbDQZDRUVF
EJ57xMfH9/b2BtuKBdtFOwoJStYqPz9/YGCAqgYsH8sHAMDyI5r6+nox6eC8T8umTZuKi4uDcMWC
8KIdRV9fn4h+VlYWog9YPpYPAIDlRy7l5eV6vT54bqdji3iqrFtra2twRheEF+1oueXm5qanpwfV
7ysAsHwAACwfRomioiJxwSD8QFphNpszMjKCNr3gvGhHYbVa8/LyUlJSOjs7qXPA8rF8AAAsP1JQ
t9PJysoK5o97xaHr6uqCdvWC9qIdjbKyMqPR2N7eTsEDlo/lAwBg+eGPup1OcXFxMF+63dTUlJyc
HGw3/LEjaC/a0aioqJAYW1paKHvA8rF8AAAsP5xpbW0V7Qv+JygVFBRs3LgxyFcymC/a0airqwva
n14AYPkAAFg++ID6+noRvtra2iBfz56eHp1OF8wXwyhkDWNjY4PwRp+O+z0+Pt5sNnMIAJaP5QMA
YPnhRnl5ucFgCNpb1titqslkColUMzMzq6urg389W1paRPRramo4EADLx/IBALD88KG4uDiEfogp
ZyONjY0hsaqVlZXZ2dkhsapK9OUMisMBsHwsHwAAyw95BgYG8vLy0tPTe3p6QmKF6+rq5IQkyH93
qxEqF+0oOjs7U1JSNm3axHEBWD6WDwCA5YcwYp+ZmZmFhYUh9CRUk8lUUVERQiGHykU7CovFkpqa
WlRUxNEBWD6WDwCA5Yck7e3tBoMhtD64lXXW6XSh8tG4IoQu2tHO/bKysvLy8kLo3A8AywcAwPLh
XzQ2NiYnJwf/7XTsKCkpKSwsDK11Dq2LdhR9fX1yZmIymRB9wPKxfAAALD9kMJvNCQkJIXeLdDFO
vV4fio9wCq2LdhRWqzU/Pz+EfrABgOUDAGD5EU1paanRaAyJO2baUVlZmZGREYqZh9xFOxoFBQUp
KSmdnZ0cOIDlY/kAAFh+kDIwMLBq1SrRzb6+vlBcf1H8EH1yUyhetGN3WojoA5aP5QMAYPnBiLqd
Tm5ubohead3Y2JiQkBAqN9B0JBQv2tHYsmVLcnJyKH7/A1g+lg8AgOWHM+p2OhUVFaFryXJ+EtL3
cQ/di3YUDQ0Ncpa1b98+jibA8rF8AAAsPyhobGyMj48P3Q+SL1y8ibtOp5O/obsJIX3RjqK5uVmv
14vuc0wBlo/lAwBg+QHGbDYnJyc3NTWF9FaUlpbm5OSE+r4I6Yt2FPv27ZMzxpqaGo4swPKxfAAA
LD+QcpySktLe3h7SW2G1Wg0GQ2NjY6jvjlC/aEdx6NAhOW8MrccPA5aP5QMAYPlhgpixyWQKj5ud
m81m2ZAw2CldXV0TJ04M6Yt2FBaLJTU1tbCwMHR/5gFYPpYPAIDlh6SEiRbn5eWFh4SFwYUuGitW
rKisrAyDDZFzFamxVatWIfqA5WP5AABY/migLqgoKSkJD/2SzdHpdCF6909HRPGzsrLCY1tkp5hM
puzs7LDZO4DlY/kAAFh+kGI2m8Psx5EFBQVyxhI2m2OxWMLjoh2NnJycjIyMcNoiwPKxfAAALD+4
2LJlS0JCQnNzc9hskbijXq8P9V8P22EymcLmAqQLF38BkpeXl5qaGtL3OQUsH8sHAMDyg5SCggKj
0djZ2RlOGxUeN6Wxo6KiImwu2tHYuHFjcnJymJUfYPlYPgAAlh9IBgYGVqxYkZmZGQa307FDzlvC
7xlMspvC7KIdRVlZWXx8fEtLC4ckYPkAAECz6C2dnZ0pKSl5eXnh9wvIffv2GQyGsLyFi8lkCo87
7dhRW1sroh/qj2ADX1lvoAi/7aWcAADLjzhaWlpEqrZs2RKWWyenLuG6aVVVVRkZGWG5aQ0NDXq9
Pvy+gYERWC8hkCQAYPkwcp0Kp9vp2NLb26vT6cL1zi0WiyU2NjZct665uTnMbvQEuClJAgBg+aNE
+N1Ox3ED8/LywngPhtOjvhxpaWmRU9CKigoOVdwUSBIAsHzwlLC8nY4tVqvVYDDs27cvjHdiOD0e
yynq6WylpaUcsLgpkCQAYPkwBGF8Ox1bGhoa0tPTw3tXWiyWML4kSSEnonK2VlhYGJY/oQbclCQB
ALB8n3lhWlpaQUFB2DtTdnZ2GF/NoiEnbGF5px1b5HRUTthWrVqF6OOmQJIAgOWDE8L7djq2tLe3
6/X68LsxqCOi+OF6px1bZFdmXyQS9ingpiQJAIDlD4Pwvp2OHSUXiYQtDdfHYzlitVpXrVolpzSR
sLGAm5IkAACW7xGVlZXhfTsdWwYGBuR8pr29PUJ2rslkioRrkxT5+fmpqakWi4WDGjcFkgQALD/S
Cfvb6dghypubmxtRp3DhfacdOwoLC5OTkyOnnnFTIEkAwPLBngi5nY4d6enpjY2NkbO9fX19Yfx4
LKeUlZXFx8e3tLRwjOOmQJIAgOVHHJFzOx1b9u3bZzQaI21fm0wms9kcUZtcXV2t1+ubmpo40nFT
IEkAwPIjCPU4oUi4nY4deXl5YX9nSUeqqqoyMzMjbavr6+tF9BsaGjjecVMgSQDA8iMCdTsdcaBI
2/De3t74+PgIvAeLbHKkXbSjaG5ulj0eIXeOwk2BJAEAy49oIup2OnZs2bKloKAgMvd7hDwFzJGW
lhY5p62oqODYx02BJAEAyw9bSkpKIup2OnbIth86dCgyt91sNkfUnXZskZ0uZ7alpaW0ALgpkCQA
YPnhhrqdTnZ2dl9fX2Qm0NjYGIHXpmv09PTodLqIupmSLXJmK+d4EfIoNNwUSBIAsPxIITJvp2NH
bm5upN1nxg45zYvkK9TlDEdO8/Ly8iL5KMBNgSQBAMsPHyL2djq2dHV1SQgRrneReacdW/r6+rKz
s00m08DAAC0jbgokCQBYfgjT2NiYkJAQgbfTsWPTRSI8hN7e3okTJ0bsRTsKdelaVlZWBN5xCDcF
kgQALD9MiOTb6dhitVqTk5O7uro4GEwmUwQ+LsCxHvLz81NTUyP8hAc3BZIEACw/JCkpKRGPsVgs
VIDZbM7NzSWHCxefCJudnU0OwsaNGzlAcFMgSQDA8kOJgYGBVatWRfLtdOzIzMxsbGwkhwsXL0yP
jY3lM2zFli1bDAZDxN5YFjcFkgQALD+UsFgsGRkZEX47HVsOHTpkNBrJQcNkMlVVVZGDoqKigqva
cFMgSQDA8oOdlpYWUZbIfMSpK/Lz85FaW2pra3NycshBQ44XnU7X0NBAFLgpSQIAYPnByJ49e+Lj
4yP8lvB2DAwMSCbcTcWWvr4+vV7PrSRtqampEdGX8x+iwE1JEgAAyw8uKioqRGdbW1vZ33axFBQU
kIMdOTk5nA3a0dLSIic/iD5uSpIAAFh+sGC1WgsLC9PS0trb29nZdhiNRs58HGloaMjKyiIHR9GX
U2U5MyQK3JQkAQCw/AAzMDCQm5ubnp7ORSmONDY2yskPOTg9MxSd5U47jsg5ocFg2LhxI1HgpiQJ
AIDlBwyLxSJ+X1hYyDXWTsnOzua6FFesWrWKHyU7RU6Y5eQwPz+fu1ThpiQJAIDlBwB1dcGmTZtw
EVenQJIPTwxwRW1trclkIgc3oi8nQhxcuClJAgBg+aNKY2NjcnLynj172LuuKC4ullMgcnCFnP/o
dDou9HIj+llZWdnZ2XxRhpuSJAAAlj9KlJeXi+LzMFc3WK1WvV7f1dVFFG5YsWJFXV0dObipory8
vIyMDM6FcFOSBADA8v1OQUGBKL7FYmG/uqG6ujo7O5sc3FNfX8+ddoYkPz8/NTWVIw43JUkAACzf
X/T19eXm5q5YsYJbowxJSkoKjzIdEqvVqtPp8NchKSkpSUhI4E61uClJAgBg+b6nq6srLS0tJyeH
q4SHpLm5WZyM3016gpw3cqcdT5CUDAbDoUOHiAI3JUkAACzfZ7S3t4u2lpWVYa6eUFhYyP3OPcRs
NnOnHQ+prq7W6/VyDkkUuClJAgBg+T6gtrY2Pj5eDIMd6Qk9PT06nY7f3XpIb2+vxMX9Rj2krq5O
RJ+LwXBTkgQAwPK9ZdOmTQkJCa2trexFDykrK8vNzSUHz1mxYgXPDvOcxsZGOesmMdyUJAEAsPwR
YrVa8/PzU1JSuBR4WBgMBj5qHRY1NTUi+uTgOS0tLXq9nt8z4KYkCQCA5Q+bvr4+Ea/09HQupRgW
9fX1qamp/HphuOeTXOM0XNrb25OTk8vKyogCNyVJAAAsfxgCYTAYCgoKuJ3OcDGZTIjXCOBOOyOg
s7NTRL+kpISzStyUJAEAsPyhaW1t1ev1xcXFqMNwOXTokE6n4zGlI6C2tpaHiI0Ai8WSlpYm50ic
kOOmJAkAgOW7w2w2x8fH19fXs89GgJwaFRQUkMMI6Ovri42N5QRpBIjfywlSTk4Op+W4KUkCAGD5
zlG302lsbGSHjQB1R0juZT5iRFW5b8zIEL9fsWKFBMgn+rgpSQIAYPn2llBUVGQwGCwWC3trZJSX
l2dmZpLDiKmqquJOO94cwvn5+RkZGT09PaSBm5IkAACW/y96e3uzL4Lie0NaWhoPDvOGgYEBnU5H
EXrDpk2bjEZjZ2cnUeCmJAkAEOmWL0JgMBhMJhMX9XpDQ0NDfHw810t4SW5ubkVFBTl4Q2lpqZQi
z7jATUkSACCiLb+lpUWEQLQAxfeSVatWlZSUkIOXmM3mrKwscvCS6upqOa737dtHFLgpSQIAjELz
BcOAKEmSJCMwSZpRKjacKhYAIsjyg3C1gvPGhUP2T0G4zsF5rQ5JRnKSoXh081lvJFdsKCYJABC8
lk//RJJAklg+FQtkBQBYPv0TSZIkSWL5VCxJAgBg+SMK64tL0OaSJElGTpL4FhUbThULAFg+0D+R
JEmSJJZPxWL5AIDl0z8BSZIklg9ULJYPAFg+/RNJkiRJYvnkTJJYPgBg+fRPJAkkieVTsSSJ5QMA
lk//RJIkCVg+FUvFAgCWD/RPJEmSJInlU7FYPgBg+bSq9E8kSZJYPlCxWD4AYPn0TyRJkiSJ5ZMz
SWL5AIDl0z+RJJAklk/FApYPAFg+/RNJkiRJYvlULBULAFg+0D+RJEmSJJZPxWL5AIDl06rSP5Ek
SeJM5EzFYvkAgOXTPwFJkiSWT85ULJYPAFg+/RNJAkli+VQsYPkAgOXTP5EkSZIklk/FUrEAQENB
q0r/RJIkSZJYPhWL5QMAlk+rSv9EkiSJM5EzFYvlAwCWT/8EJEmSWD5QsVg+AGD59E8kCSSJ5VOx
gOUDQIAs//z58x0dHV1dXb5aoaeffvpXv/pVZPZPPg8zaPP3VZLBkBg1OeQcjh07dvDgwdOnTwfz
YR4Qyw9sATc2Nn788ceOw7ds2fLLX/4yhCrWyxhl8sOHD3/zzTde5oblA0D4WP6ZM2c2bdoUExMT
FRUlM4mLi3v55ZcHBwfVf6+88srLLjJr1qzVq1drDagMT0hIsBtH8ZOf/EQNXLBgwchW6ezZs16O
ECij8mGYduMMlyHz9zJDXyXpk8S01/7eXt+WcajUZH9//89+9rNJkybJaGPHjpW/er3ewzLza2jB
YPlexnvNNddc5owvv/zS+4N99uzZycnJfkrbtxXrfZX+/Oc/j4+PV5PfcsstnmxjkFQvlg8AfrT8
Rx55RKYtKSk5ffr0yZMn16xZI2+fffZZrR1cuHBhc3PzY489JsM3bNjgVLOkeT18iRMnTnjTgC5d
unTGjBnejBBAo/JJmE7H8a3le5+hr5L0SWJDWr6vtteHZRxCNbl27Vr5l8zn+PHjVqt1//79r732
mj88ySe7aZQt38t4Ozo6VLO5ePFiGaGlpUW9HZYvHjt27Ouvvx6u5XuZtm8r1ssYJTHx+/z8/IGB
AYn0/fff92QbXeU2ytWL5QOAvyz/yJEj0jhOnjz53LlzakhPT8+YMWOuuOKKvr4+1YzefPPN8uKd
d96RRTz33HMeapZt99/W1padnT1lypTExMTi4mJpiNXwH/7whzfccMPUqVPvvvtu1cBJsz5+/Pjo
6Gij0bh3717HFR5yhAAala/CdDqOqwxdDXejXz7J0CdJ+rz8pKO96667du3aNWfOnCVLlnz22WdO
t/fAgQO33nprbGysRKR9Zf/pp5+mpqbqdLqCgoI777zztttuU8OdjqwW9Oqrr86aNevzzz/3PuGg
rUkpMBl//vz558+fd3UyaRe400PbH6EF3PJ9UsCKjIwMGeHbb7/VhjhmKPFKOD/4wQ/ktcVikeTf
ffddLVg11V//+ldVxjL5zJkzXVm+92n7sGK9j/HPf/6zDF+xYsV3333nZhvtKtA2N6dNh5tmwYd5
YvkA4C/Ll0ZNJpTGy3bgokWLZGBzc7NqRmfPnr1582b5m5OTc+rUKaeaFR8f336Rzs5OO8vs7+8X
AY2Li6uuri4sLJQ5y1/t8xtZAemoLr/88vvvv1+GtLa2SvsrHZsM7+7udlzhIUcIoFH5KkzHcVxl
6CZbN5bvkwx9kqSvErN9Lf1xSkqKdMkyk9zcXMftFZGaMWPG9ddfv3v3bum/JUB1YcDcuXPlZKmq
qurHP/7x2LFjlR65GlkWJCPLWq1du7ajo8P7hIO2Jnfu3CkjP/roo07n7zRwp4e2P0ILuOX7pIBd
Wb7TDO+55x4ZTYavXLnye9/7njr1sj3YpVbljPR//ud/Hn74YVFnV5bvfdo+rFjvYzx79qzBYJDx
JYe//e1vrrbRrgJtc3NVyU6bBd/mieUDgL8sf+vWrep7UtuB0qPIwPr6etX2TZgwISYmRq/Xm81m
7fM8V9fl2w5UDWhtba0Ml6bzwsVfR4mSjh8/XvvMRpA2UfxJ+65Teqmrr77azToPOUKgjMpXYTqO
4ypDN9m6v5TC+wx9kqSvErN9LSGob+Gvuuqqa6+91nF733zzTZn/M88809PTs2HDBnm9f/9+6fLl
RV5enhpHu9TB6chqQbJKFovFVwkHbU06nYOt5TsN3Omh7fPQAm75PilgV5bvNMOuri6ReLFJMc6/
//3vdo3tsWPHZCarV6+2K2N/pO3DivVJjKL+t99+u0wyceLE3/zmN0630a4C7SzfsZJdNQu+zRPL
BwB/Wb76AnTZsmW2A6Utk4Gtra0XLn1VOjAw8MILL8jAdevWOVWr+Pj4Ixf56quv7BrQ559/Xias
rKxUwzMzM+Xt0aNHBwcH169fP2/evMsvv3zcuHFa+xi6lu+rMB3HcZWhq+GhYvm+Sszx3FLNR3Dc
XhWalNyES+zZs2fHjh0yUJZi1507HfmCB9ejB4nle5/w7t27ZfjixYtdWb5j4K4ObZ+HFnDL90kB
O7V8VxkKjz/+uIyZnZ3tuBfEbuVf2l2PQsXyfRWj2P9LL70kcUVFRakP1B0t37YC7SzfsZJdNQtY
PgCEhuVbLBblLtr13KLpMqtp06apIdoFkdLrTJkyJS4uTvuO2MPr8mtqamSGTzzxhBqelJQkTfB3
333329/+Vl1habVapYeztfzp06e7VwH3IwTKqHwVpuM4rjJ0NdwTy/cyQ58k6fPyc2P52va+8cYb
soji4mLbNdm3b5/6gam87u3tlQWp7tzpyB4K67ASDtqaPHbsmGiTTOL0/oZOA3d1aPs8tIBbvk8K
2Knlu8rw5MmTEpGof3R0tPpayTZYOQXVJPjcuXMylXsr9SZtH1asD2MUHnzwQZm2trbWcRuHa/mu
mgXf5onlA4C/LF946qmnZNqVK1dKL97W1qa+9Ny2bZvW9hmNxrfeekt6Dhm+fPly9x+mOjag3d3d
6lJIacLMZrPMRP3g6ZVXXpHXW7du3blzpzSOMTEx//znP2W4rIkMb2pq6u/vd7rCQ44QKKPyVZiO
47jK0NXwIY3K+wx9laRvy8+V5dtur3iS2IN02K+//npHR0ddXZ0sV5RIp9NdccUVP/7xj5csWRIb
G6umdTqyJ8I63ISDuSblNFL+deONN77//vuiXzLyww8/rHzUaeCuDm2fhxZwy/dJvE4t31WGDzzw
wKJFi/73f/9XzudFfLVfiahgxYknTpx41VVXbd++/b777pPTMzdW6mXavq1YL2Pcu3evbPKRI0cO
HTokY44ZM0Z9CWC3jcO1fFfNgm/zxPIBwI+WLw3Z5s2bpRdRF9aL01RVVdm2g2r41KlT7733Xu3O
A8O6x857772XmJgoM5HG94477lDXPp46dWrhwoUyUFrPhx56SLucVNrruXPnyltptZ2u8JAjBNCo
fBKm03GcZuhmuHuj8j5DXyXp2/JzZfl22/vhhx+KCqg5i0Kp++698cYbs2bNuuaaa0ToxZPmz5+v
pnU68pDCOtyEg7kmz549+4tf/EIUR405efLkW265xVHctcBdHdo+Dy0YLN/7eJ1avtMM//znP4u4
q9sTiXeq0wC7YJ9++mk5AZgwYUJpaan7K0y8TNu3FetljOrSGtUMylmQdhGj3TYO1/LdNAs+zBPL
BwA/Wr5icHCwra3Nr89uPH78uOOHHNrzTU6cOHHmzBlteHd3t9M793k+QkCMyt9hOs3QzXD3eJOh
b5MchfJz3F4Rqd7eXu1tR0eH1WpVWi9rnpOTYzut3cg+Tzj4a1I2pLOz88iRIx5ukatD299lGZBn
3/qpgEeWYV9fnzoH82va/qhYb2KUBvDw4cNO72/jTUW5bxZ8siwsHwD8bvnByVwHfBWWX40qEjIM
vyQXLlwYGxs7Z84cWW2DwSDGMJoJR1RN+rUsA2L5EZh2hFSs02bBt3li+QAQoZZvcQA3DZIMwy/J
U6dO/eUvf9m+ffv//d//jX7CkWb5/itLLH900o6QinXaLPg2TywfACLU8v0XVkQZFUmSJElGsuVT
seFasQCA5QP9E0mSJEli+VQslg8AWD79E5AkSWL5QMVi+QCA5dM/kSRJkiSWT84kieUDAJZP/0SS
QJJYPhVLklg+AGD59E8kSZKA5VOxVCwAYPlA/0SSJEmSWD4Vi+UDAJZPq0r/RJIkieUDFYvlAwCW
T/9EkiRJklg+OZMklg8AWD79E0kCSWL5VCxg+QCA5dM/kSRJkiSWT8VSsQCA5QP9E0mSJEli+VQs
lg8AWD6tKv0TSZIkzkTOVCyWDwBYPv0TkCRJYvnkTMVi+QCA5dM/kSSQJJZPxQKWDwBYPv0TSZIk
SWL5VCwVCwA0FLSq9E8kSZIkieVTsVg+AGD5tKr0TyRJkjgTOVOxWD4AYPn0T0CSJInlAxWL5QMA
lk//RJJAklg+FQtYPgBg+fRPJEmSJInlU7EkCQCA5dM/kSRJApZPxWL5AIDlExb9E0mSJM5EzlQs
lg8AIWL54Dnu+ycgSZIMyyRpRqnYcKpYAIgUy7d98wV4jPtYyYckSTJckyRnKjacKhYAsHygfyJJ
kiRJLJ+KxfIBAMunfwKSJEksH6hYLB8AgsHyAQAAAAAgDPh/BFQraMwlB3wAAAAASUVORK5C" />
</BODY>
</HTML>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

File diff suppressed because it is too large Load Diff

View File

@ -1,298 +0,0 @@
# -*- coding: utf-8 -*-
#
# OBITools3 documentation build configuration file, created by
# sphinx-quickstart on Mon May 4 14:36:57 2015.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
import shlex
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.pngmath',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
'breathe',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'OBITools3'
copyright = u'2015, Céline Mercier, Eric Coissac, Frédéric Boyer'
author = u'Céline Mercier, Eric Coissac, Frédéric Boyer'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.0'
# The full version, including alpha/beta/rc tags.
release = '0.0.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = []
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'bizstyle'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Language to be used for generating the HTML full-text search index.
# Sphinx supports the following languages:
# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
#html_search_language = 'en'
# A dictionary with options for the search language support, empty by default.
# Now only 'ja' uses this config value
#html_search_options = {'type': 'default'}
# The name of a javascript file (relative to the configuration directory) that
# implements a search results scorer. If empty, the default will be used.
#html_search_scorer = 'scorer.js'
# Output file base name for HTML help builder.
htmlhelp_basename = 'OBITools3doc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
# Latex figure (float) alignment
#'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'OBITools3.tex', u'OBITools3 Documentation',
u'Céline Mercier, Eric Coissac, Frédéric Boyer', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'obitools3', u'OBITools3 Documentation',
[author], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'OBITools3', u'OBITools3 Documentation',
author, 'OBITools3', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False
#Breathe configuration
sys.path.append( "../breathe/" )
breathe_projects = { "OBITools3": "../doxygen/xml/" }
breathe_default_project = "OBITools3"

View File

@ -1,29 +0,0 @@
===============
Container types
===============
Containers allow to manage collections of values of homogeneous type.
Three container types exist.
A container is a non-mutable structure once it has been locked.
Consequently, only insertion procedures are needed.
Lists
-----
Correspond to an ordered collection of values belonging to an elementary type.
At its creation, ...
Sets
----
Correspond to an unordered collection of values belonging to an elementary type.
Dictionaries
------------
Dictionaries allow to associate a `key` to a `value`. Values can be retrieved through their associated key.
Values must belong to an elementary type and keys must be *OBIStr_t*.

View File

@ -1,16 +0,0 @@
#################
Data in OBITools3
#################
The OBITools3 inaugure a new way to manage DNA metabarcoding data.
They rely on a `Data management System` (DMS) that can be considered as
a simplified database system.
.. toctree::
:maxdepth: 2
The data management system <DMS>
The data types <types>

View File

@ -1,40 +0,0 @@
================
Elementary types
================
They correspond to simple values.
Atomic types
------------
========= ========= ============ ==============================
Type C type OBIType Definition
========= ========= ============ ==============================
integer int32_t OBIInt_t a signed integer value
float double OBIFloat_t a floating value
boolean bool OBIBool_t a boolean true/false value
char char OBIChar_t a character
index size_t OBIIdx_t an index in a data structure
========= ========= ============ ==============================
The composite types
-------------------
Character string type
.....................
================ ====== ======== ==================
Type C type OBIType Definition
================ ====== ======== ==================
Character string ? OBIStr_t a character string
================ ====== ======== ==================
The taxid type
..............
==================== ====== ========== ======================
Type C type OBIType Definition
==================== ====== ========== ======================
Taxonomic identifier size_t OBITaxid_t a taxonomic identifier
==================== ====== ========== ======================

View File

@ -1,132 +0,0 @@
######################
Programming guidelines
######################
***************
Version control
***************
Version control is managed with `Git <http://git-scm.com/>`_.
Issue tracking and repository management are done using `GitLab <https://about.gitlab.com/>`_
at http://git.metabarcoding.org/.
Branching strategy
==================
Master branch
-------------
The master branch should only contain functional scripts.
Topic branches
--------------
Topic branches should correspond to development branches revolving around a topic corresponding
to the branch's name.
Release branches
----------------
Release branches should start with duplicates of tags and be used to patch them.
Tags
----
Tags should never be committed to.
Rebasing
--------
Rebasing should be avoided on the distant server.
Merging
-------
Merging should never overwrite on a release branch or on a tag.
Branching strategy diagram
--------------------------
.. image:: ./images/version_control.png
Issue tracking
==============
Issue tracking is done using `GitLab <https://about.gitlab.com/>`_ at http://git.metabarcoding.org/.
Tickets should always be labeled with the branches for which they are relevant.
*************
Documentation
*************
C functions are documented in the header files.
**************
OBITools3 wiki
**************
The OBITools3 wiki is managed with GitLab.
*********************
Programming languages
*********************
C99 :
* All the low-level input/output functions (e.g. all the `OBIDMS <formats.html#the-obitools3-data-management-system-obidms>`_ functions)
* Computing-intensive code (e.g. alignment or pattern matching)
`Cython <cython.org>`_ :
* Object layer
* OBITools3 library
`Python 3 <https://www.python.org/>`_ :
* Top layer code (scripts)
For the documentation, `Sphinx <http://sphinx-doc.org/>`_ should be used for both the original
documentation and for the generation of documentation from the python code. `Doxygen <http://www.stack.nl/~dimitri/doxygen/>`_
should be used for the generation of documentation from the C code, which should be then integrated
in the Sphinx documentation using `Breathe <https://breathe.readthedocs.org/en/latest/>`_.
******************
Naming conventions
******************
Struct, Enum: ``Title_case``
Enum members, macros, constants: ``ALL_CAPS``
Functions, local variables: ``lower_case``
Functions that shouldn't be called directly: ``_lower_case`` (``_`` prefix)
Global variables: ``g_lower_case`` (``g_`` prefix)
Pointers: ``pointer_ptr`` (``_ptr`` suffix)
.. note::
Underscores are used to delimit 'words'.
.. todo::
``obi_function`` for public functions names?
*****************
Programming rules
*****************
*

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

View File

@ -1,22 +0,0 @@
.. OBITools3 documentation master file, created by
sphinx-quickstart on Mon May 4 14:36:57 2015.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
OBITools3 documentation
==========================
.. toctree::
:maxdepth: 2
Programming guidelines <guidelines>
Data structures <data>
Pistes de reflexion <pistes>
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,23 +0,0 @@
###################
Pistes de reflexion
###################
******************************
Ce que l'on veut pouvoir faire
******************************
* Gerer les valeurs manquantes
* Modifier une colonne en cours d'ecriture (mmap)
* Ajouter des valeurs a la fin du fichier d'une colonne en cours d'ecriture (mmap)
******
Divers
******
* Si l'ordre d'une colonne est change, elle est reecrite (pas d'index).
* Utilisation de semaphores pour la lecture
* Utilisation de tas pour l'indexation des chaines de caracteres. Chaque colonne dont
le type est OBIStr_t est stockee dans 3 fichiers : un fichier contenant les chaines, un
fichier contenant les index, et un fichier contenant le tas.

View File

@ -1,52 +0,0 @@
==============
Special values
==============
NA values
=========
All OBITypes have an associated NA (Not Available) value.
NA values are implemented by specifying an explicit NA value for each type, corresponding to the R standards:
* For the types ``OBIInt_t``, ``OBIBool_t``, ``OBIIdx_t`` and ``OBITaxid_t``, the NA value is ``INT_MIN``.
* For the type ``OBIChar_t``: the NA value is ``\0`` (?).
* For the type ``OBIStr_t`` : the NA value is ``\0`` (?).
* For the type ``OBIFloat_t``::
typedef union
{
double value;
unsigned int word[2];
} ieee_double;
static double NA_value(void)
{
volatile ieee_double x;
x.word[hw] = 0x7ff00000;
x.word[lw] = 1954;
return x.value;
}
Minimum and maximum values for ``OBIInt_t``
===========================================
* Maximum value : ``INT_MAX``
* Minimum value : ``INT_MIN(-1?)``
Infinity values for the type ``OBIFloat_t``
===========================================
* Positive infinity : ``INFINITY`` (should be defined in ``<math.h>``)
* Negative infinity : ``-INFINITY``
NaN value for the type ``OBIFloat_t``
=====================================
* NaN (Not a Number) value : ``NAN`` (should be defined in ``<math.h>`` but probably needs to be tested)

View File

@ -1,17 +0,0 @@
********
OBITypes
********
.. image:: ./UML/OBITypes_UML.png
:download:`html version of the OBITypes UML file <UML/OBITypes_UML.class.violet.html>`
.. toctree::
:maxdepth: 2
The elementary types <elementary>
The containers <containers>
Special values <specialvalues>

25
obi_completion_script.bash Executable file
View File

@ -0,0 +1,25 @@
_obi_comp ()
{
local cur prev
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
if [ "${#COMP_WORDS[@]}" = "2" ]; then
COMPREPLY=($(compgen -W "align alignpairedend annotate build_ref_db clean_dms clean count ecopcr ecotag export grep head history import less ls ngsfilter sort stats tail test uniq" "${COMP_WORDS[1]}"))
else
if [[ "$cur" == *VIEWS* ]]; then
COMPREPLY=($(compgen -o plusdirs -f -X '!*.obiview' -- "${COMP_WORDS[COMP_CWORD]}"))
elif [[ -d $cur.obidms ]]; then
COMPREPLY=($(compgen -o plusdirs -f $cur.obidms/VIEWS/ -- "${COMP_WORDS[COMP_CWORD]}"), $(compgen -o plusdirs -f -X '!*.obidms/' -- "${COMP_WORDS[COMP_CWORD]}"))
elif [[ "$cur" == *obidms* ]]; then
COMPREPLY=($(compgen -o plusdirs -f $cur/VIEWS/ -- "${COMP_WORDS[COMP_CWORD]}"))
else
COMPREPLY=($(compgen -o plusdirs -f -X '!*.obidms/' -- "${COMP_WORDS[COMP_CWORD]}"))
fi
if [[ "$prev" == import ]]; then
COMPREPLY+=($(compgen -f -- "${COMP_WORDS[COMP_CWORD]}"))
fi
fi
}
complete -o nospace -F _obi_comp obi

2
python/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/.DS_Store
/OBITools3.egg-info/

1
python/obitools3/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/.DS_Store

0
python/obitools3/__init__.py Normal file → Executable file
View File

0
python/obitools3/__init__.pyc Normal file → Executable file
View File

View File

@ -0,0 +1,3 @@
#cython: language_level=3
cpdef buildArgumentParser(str configname, str softname)

View File

@ -0,0 +1,58 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import argparse
import sys
from .command import getCommandsList
class ObiParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
cpdef buildArgumentParser(str configname,
str softname):
parser = ObiParser()
parser.add_argument('--version', dest='%s:version' % configname,
action='store_true',
default=False,
help='Print the version of %s' % softname)
parser.add_argument('--log', dest='%s:log' % configname,
action='store',
type=str,
default=None,
help='Create a logfile')
subparsers = parser.add_subparsers(title='subcommands',
description='valid subcommands',
help='additional help')
commands = getCommandsList()
for c in commands:
module = commands[c]
if hasattr(module, "run"):
if hasattr(module, "__title__"):
sub = subparsers.add_parser(c,help=module.__title__)
else:
sub = subparsers.add_parser(c)
if hasattr(module, "addOptions"):
module.addOptions(sub)
sub.set_defaults(**{'%s:module' % configname : module})
sub.set_defaults(**{'%s:modulename' % configname : c})
return parser

View File

@ -0,0 +1,3 @@
#cython: language_level=3
cdef object loadCommand(str name,loader)

View File

@ -0,0 +1,44 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import pkgutil
from obitools3 import commands
cdef object loadCommand(str name,loader):
'''
Load a command module from its name and an ImpLoader
This function is for internal use
@param name: name of the module
@type name: str
@param loader: the module loader
@type loader: ImpLoader
@return the loaded module
@rtype: module
'''
module = loader.find_module(name).load_module(name)
return module
def getCommandsList():
'''
Returns the list of sub-commands available to the main `obi` command
@return: a dict instance with key corresponding to each command and
value corresponding to the module
@rtype: dict
'''
cdef dict cmds = dict((x[1],loadCommand(x[1],x[0]))
for x in pkgutil.iter_modules(commands.__path__)
if not x[2])
return cmds

View File

@ -0,0 +1,10 @@
#cython: language_level=3
cpdef str setRootConfigName(str rootname)
cpdef str getRootConfigName()
cdef dict buildDefaultConfiguration(str root_config_name,
dict config)
cpdef dict getConfiguration(str root_config_name=?,
dict config=?)

114
python/obitools3/apps/config.pyx Executable file
View File

@ -0,0 +1,114 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import sys
from .command import getCommandsList
from .logging cimport getLogger
from .arguments cimport buildArgumentParser
from ..version import version
cdef dict __default_config__ = {}
cpdef str setRootConfigName(str rootname):
global __default_config__
if '__root_config__' in __default_config__:
if __default_config__["__root_config__"] in __default_config__:
__default_config__[rootname]=__default_config__[__default_config__["__root_config__"]]
del __default_config__[__default_config__["__root_config__"]]
__default_config__['__root_config__']=rootname
return rootname
cpdef str getRootConfigName():
global __default_config__
return __default_config__.get('__root_config__',None)
cdef dict buildDefaultConfiguration(str root_config_name,
dict config):
global __default_config__
__default_config__.clear()
setRootConfigName(root_config_name)
__default_config__[root_config_name]=config
config['version']=version
commands = getCommandsList()
for c in commands:
module = commands[c]
assert hasattr(module, "run")
if hasattr(module, 'default_config'):
__default_config__[c]=module.default_config
else:
__default_config__[c]={}
return __default_config__
cpdef dict getConfiguration(str root_config_name="__default__",
dict config={}):
global __default_config__
if '__done__' in __default_config__:
return __default_config__
if root_config_name=="__default__":
raise RuntimeError("No root_config_name specified")
if not config:
raise RuntimeError("Base configuration is empty")
config = buildDefaultConfiguration(root_config_name,
config)
parser = buildArgumentParser(root_config_name,
config[root_config_name]['software'])
options = vars(parser.parse_args())
if options['%s:version' % root_config_name]:
print("%s - Version %s" % (config[root_config_name]['software'],
config[root_config_name]['version']))
sys.exit(0)
for k in options:
section,key = k.split(':')
s = config[section]
if options[k] is not None:
s[key]=options[k]
if not 'module' in config[root_config_name]:
print('\nError: No command specified',file=sys.stderr)
parser.print_help()
sys.exit(2)
getLogger(config)
config['__done__']=True
return config
def logger(level, *messages):
try:
config=getConfiguration()
root = config["__root_config__"]
l = config[root]['logger']
if config[root]['verbose']:
getattr(l, level)(*messages)
except:
print(*messages,file=sys.stderr)

View File

@ -0,0 +1,3 @@
#cython: language_level=3
cpdef getLogger(dict config)

View File

@ -0,0 +1,48 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import logging
import sys
cpdef getLogger(dict config):
'''
Returns the logger as defined by the command line option
or by the config file
:param config:
'''
root = config["__root_config__"]
level = config[root]['loglevel']
logfile= config[root]['log']
rootlogger = logging.getLogger()
logFormatter = logging.Formatter("%%(asctime)s [%s : %%(levelname)-5.5s] %%(message)s" % config[root]['modulename'])
stderrHandler = logging.StreamHandler(sys.stderr)
stderrHandler.setFormatter(logFormatter)
rootlogger.addHandler(stderrHandler)
if logfile:
fileHandler = logging.FileHandler(logfile)
fileHandler.setFormatter(logFormatter)
rootlogger.addHandler(fileHandler)
try:
loglevel = getattr(logging, level)
except:
loglevel = logging.INFO
rootlogger.setLevel(loglevel)
config[root]['logger']=rootlogger
config[root]['verbose']=True
return rootlogger

View File

@ -0,0 +1,389 @@
import codecs
def unescaped_str(arg_str):
return arg_str.encode('latin-1', 'backslashreplace').decode('unicode-escape')
def __addInputOption(optionManager):
optionManager.add_argument(
dest='obi:inputURI',
metavar='INPUT',
help='Data source URI')
group = optionManager.add_argument_group("Restriction to a sub-part options",
"Allows to limit analysis to a sub-part of the input")
group.add_argument('--skip',
action="store", dest="obi:skip",
metavar='<N>',
default=None,
type=int,
help="skip the N first sequences")
group.add_argument('--only',
action="store", dest="obi:only",
metavar='<N>',
default=None,
type=int,
help="treat only N sequences")
def __addImportInputOption(optionManager):
group = optionManager.add_argument_group("Input format options for imported files")
group.add_argument('--fasta-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'fasta',
help="Input file is in sanger fasta format")
group.add_argument('--fastq-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'fastq',
help="Input file is in fastq format")
group.add_argument('--silva-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'silva',
help="Input file is in SILVA fasta format. If NCBI taxonomy provided with --taxonomy, taxid and scientific name will be added for each sequence.")
group.add_argument('--rdp-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'rdp',
help="Input file is in RDP training set fasta format. If NCBI taxonomy provided with --taxonomy, taxid and scientific name will be added for each sequence.")
group.add_argument('--unite-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'unite',
help="Input file is in UNITE fasta format. If NCBI taxonomy provided with --taxonomy, taxid and scientific name will be added for each sequence.")
group.add_argument('--sintax-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'sintax',
help="Input file is in SINTAX fasta format. If NCBI taxonomy provided with --taxonomy, taxid and scientific name will be added for each sequence.")
group.add_argument('--embl-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'embl',
help="Input file is in embl nucleic format")
group.add_argument('--genbank-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'genbank',
help="Input file is in genbank nucleic format")
group.add_argument('--ngsfilter-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'ngsfilter',
help="Input file is an ngsfilter file. If not using tags, use ':' or 'None:None' or '-:-' or any combination")
group.add_argument('--ecopcr-result-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'ecopcr',
help="Input file is the result of an ecoPCR (version 2)")
group.add_argument('--ecoprimers-result-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'ecoprimers',
help="Input file is the result of an ecoprimers")
group.add_argument('--tabular-input',
action="store_const", dest="obi:inputformat",
default=None,
const=b'tabular',
help="Input file is a tabular file")
group.add_argument('--no-skip-on-error',
action="store_false", dest="obi:skiperror",
default=True,
help="Don't skip sequence entries with parsing errors (default: they are skipped)")
group.add_argument('--no-quality',
action="store_true", dest="obi:noquality",
default=False,
help="Do not import fastQ quality")
group.add_argument('--quality-sanger',
action="store_const", dest="obi:qualityformat",
default=None,
const=b'sanger',
help="Fastq quality is encoded following sanger format (standard fastq)")
group.add_argument('--quality-solexa',
action="store_const", dest="obi:qualityformat",
default=None,
const=b'solexa',
help="Fastq quality is encoded following solexa sequencer format")
group.add_argument('--nuc',
action="store_const", dest="obi:moltype",
default=None,
const=b'nuc',
help="Input file contains nucleic sequences")
group.add_argument('--prot',
action="store_const", dest="obi:moltype",
default=None,
const=b'pep',
help="Input file contains protein sequences")
group.add_argument('--input-na-string',
action="store", dest="obi:inputnastring",
default="NA",
type=str,
help="String associated with Non Available (NA) values in the input")
def __addTabularOption(optionManager):
group = optionManager.add_argument_group("Input and output format options for tabular files")
group.add_argument('--no-header',
action="store_false", dest="obi:header",
default=True,
help="Don't print the header (first line with column names")
group.add_argument('--sep',
action="store", dest="obi:sep",
default="\t",
type=unescaped_str,
help="Column separator")
def __addTabularInputOption(optionManager):
group = optionManager.add_argument_group("Input format options for tabular files")
__addTabularOption(optionManager)
group.add_argument('--dec',
action="store", dest="obi:dec",
default=".",
type=str,
help="Decimal separator")
group.add_argument('--strip-white',
action="store_false", dest="obi:stripwhite",
default=True,
help="Remove white chars at the beginning and the end of values")
group.add_argument('--blank-line-skip',
action="store_false", dest="obi:blanklineskip",
default=True,
help="Skip empty lines")
group.add_argument('--comment-char',
action="store", dest="obi:commentchar",
default="#",
type=str,
help="Lines starting by this char are considered as comment")
def __addTabularOutputOption(optionManager):
group = optionManager.add_argument_group("Output format options for tabular files")
__addTabularOption(optionManager)
group.add_argument('--na-int-stay-na',
action="store_false", dest="obi:na_int_to_0",
help="NA (Non available) integer values should be exported as NA in tabular output (default: they are converted to 0 for tabular output).") # TODO
def __addTaxdumpInputOption(optionManager): # TODO maybe not the best way to do it
group = optionManager.add_argument_group("Input format options for taxdump")
group.add_argument('--taxdump',
action="store_true", dest="obi:taxdump",
default=False,
help="Whether the input is a taxdump")
def __addTaxonomyOption(optionManager):
group = optionManager.add_argument_group("Input format options for taxonomy")
group.add_argument('--taxonomy',
action="store", dest="obi:taxoURI",
default=None,
help="Taxonomy URI")
#TODO option bool to download taxo if URI doesn't exist
def addMinimalInputOption(optionManager):
__addInputOption(optionManager)
def addImportInputOption(optionManager):
__addInputOption(optionManager)
__addImportInputOption(optionManager)
def addTabularInputOption(optionManager):
__addTabularInputOption(optionManager)
def addTabularOutputOption(optionManager):
__addTabularOutputOption(optionManager)
def addTaxonomyOption(optionManager):
__addTaxonomyOption(optionManager)
def addTaxdumpInputOption(optionManager):
__addTaxdumpInputOption(optionManager)
def addAllInputOption(optionManager):
__addInputOption(optionManager)
__addImportInputOption(optionManager)
__addTabularInputOption(optionManager)
__addTabularOutputOption(optionManager)
__addTaxonomyOption(optionManager)
__addTaxdumpInputOption(optionManager)
def __addOutputOption(optionManager):
optionManager.add_argument(
dest='obi:outputURI',
metavar='OUTPUT',
help='Data destination URI')
def __addDMSOutputOption(optionManager):
group = optionManager.add_argument_group("Output options for DMS data")
group.add_argument('--no-create-dms',
action="store_true", dest="obi:nocreatedms",
default=False,
help="Don't create an output DMS if it does not already exist")
def __addEltLimitOption(optionManager):
group = optionManager.add_argument_group("Option to limit the number of elements per line in columns")
group.add_argument('--max-elts',
action="store", dest="obi:maxelts",
metavar='<N>',
default=1000000,
type=int,
help="Maximum number of elements per line in a column "
"(e.g. the number of different keys in a dictionary-type "
"key from sequence headers). If the number of different keys "
"is greater than N, the values are stored as character strings")
def __addExportOutputOption(optionManager):
group = optionManager.add_argument_group("Output format options for exported files")
group.add_argument('-o',
dest='obi:outputURI',
metavar='OUTPUT',
help='Data destination URI')
group.add_argument('--fasta-output',
action="store_const", dest="obi:outputformat",
default=None,
const=b'fasta',
help="Output file is in sanger fasta format")
group.add_argument('--fastq-output',
action="store_const", dest="obi:outputformat",
default=None,
const=b'fastq',
help="Output file is in fastq format")
group.add_argument('--tab-output',
action="store_const", dest="obi:outputformat",
default=None,
const=b'tabular',
help="Output file is in tabular format")
group.add_argument('--metabaR-output',
action="store_const", dest="obi:outputformat",
default=None,
const=b'metabaR',
help="Export the files needed by the obifiles_to_metabarlist function of the metabaR package")
group.add_argument('--metabaR-prefix',
action="store", dest="obi:metabarprefix",
type=str,
help="Prefix for the files when using --metabaR-output option")
group.add_argument('--metabaR-ngsfilter',
action="store", dest="obi:metabarngsfilter",
type=str,
default=None,
help="URI to the ngsfilter view when using --metabaR-output option (if not provided, it is not exported)")
group.add_argument('--metabaR-samples',
action="store", dest="obi:metabarsamples",
type=str,
default=None,
help="URI to the sample metadata view when using --metabaR-output option (if not provided, it is built as just a list of the sample names)")
group.add_argument('--only-keys',
action="append", dest="obi:only_keys",
type=str,
default=[],
help="Only export the given keys (columns).")
group.add_argument('--print-na',
action="store_true", dest="obi:printna",
default=False,
help="Print Non Available (NA) values in the output")
group.add_argument('--output-na-string',
action="store", dest="obi:outputnastring",
default="NA",
type=str,
help="String associated with Non Available (NA) values in the output")
def __addNoProgressBarOption(optionManager):
group = optionManager.add_argument_group("Option to deactivate the display of the progress bar")
group.add_argument('--no-progress-bar',
action="store_true", dest="obi:noprogressbar",
default=False,
help="Do not display progress bar")
def addMinimalOutputOption(optionManager):
__addOutputOption(optionManager)
__addDMSOutputOption(optionManager)
def addTabularOutputOption(optionManager):
__addTabularOption(optionManager)
def addExportOutputOption(optionManager):
__addExportOutputOption(optionManager)
__addTabularOutputOption(optionManager)
def addAllOutputOption(optionManager):
__addOutputOption(optionManager)
__addDMSOutputOption(optionManager)
__addExportOutputOption(optionManager)
__addTabularOutputOption(optionManager)
def addNoProgressBarOption(optionManager):
__addNoProgressBarOption(optionManager)
def addEltLimitOption(optionManager):
__addEltLimitOption(optionManager)

View File

@ -0,0 +1,65 @@
#cython: language_level=3
cdef extern from "stdio.h":
struct FILE
int fprintf(FILE *stream, char *format, ...)
int fputs(char *string, FILE *stream)
FILE* stderr
ctypedef unsigned int off_t "unsigned long long"
cdef extern from "unistd.h":
int fsync(int fd);
cdef extern from "time.h":
struct tm :
int tm_yday
int tm_hour
int tm_min
int tm_sec
enum: CLOCKS_PER_SEC
ctypedef int time_t
ctypedef int clock_t
ctypedef int suseconds_t
struct timeval:
time_t tv_sec # seconds */
suseconds_t tv_usec # microseconds */
struct timezone :
int tz_minuteswest; # minutes west of Greenwich
int tz_dsttime; # type of DST correction
int gettimeofday(timeval *tv, timezone *tz)
tm *gmtime_r(time_t *clock, tm *result)
time_t time(time_t *tloc)
clock_t clock()
cdef class ProgressBar:
cdef off_t maxi
cdef clock_t starttime
cdef clock_t lasttime
cdef clock_t tickcount
cdef int freq
cdef int cycle
cdef int arrow
cdef int lastlog
cdef bint ontty
cdef int fd
cdef bint cut
cdef bytes _head
cdef char *chead
cdef object logger
cdef char *wheel
cdef char *spaces
cdef char* diese
cdef clock_t clock(self)

View File

@ -0,0 +1,157 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
from ..utils cimport str2bytes, bytes2str
from .config cimport getConfiguration
import sys
cdef class ProgressBar:
cdef clock_t clock(self):
cdef clock_t t
cdef timeval tp
cdef clock_t s
<void> gettimeofday(&tp,NULL)
s = <clock_t> (<double> tp.tv_usec * 1.e-6 * <double> CLOCKS_PER_SEC)
t = tp.tv_sec * CLOCKS_PER_SEC + s
return t
def __init__(self,
off_t maxi,
dict config={},
str head="",
double seconds=5,
cut=False):
self.starttime = self.clock()
self.lasttime = self.starttime
self.tickcount = <clock_t> (seconds * CLOCKS_PER_SEC)
self.freq = 1
self.cycle = 0
self.arrow = 0
self.lastlog = 0
if not config:
config=getConfiguration()
self.ontty = sys.stderr.isatty()
if (maxi<=0):
maxi=1
self.maxi = maxi
self.head = head
self.chead = self._head
self.cut = cut
self.logger=config[config["__root_config__"]]["logger"]
self.wheel = '|/-\\'
self.spaces=' ' \
' ' \
' ' \
' ' \
' '
self.diese ='##########' \
'##########' \
'##########' \
'##########' \
'##########'
def __call__(self, object pos, bint force=False):
cdef off_t ipos
cdef clock_t elapsed
cdef clock_t newtime
cdef clock_t delta
cdef clock_t more
cdef double percent
cdef tm remain
cdef int days,hour,minu,sec
cdef off_t fraction
cdef int twentyth
self.cycle+=1
if self.cycle % self.freq == 0 or force:
self.cycle=1
newtime = self.clock()
delta = newtime - self.lasttime
self.lasttime = newtime
elapsed = newtime - self.starttime
# print(" ",delta,elapsed,elapsed/CLOCKS_PER_SEC,self.tickcount)
if delta < self.tickcount / 5 :
self.freq*=2
elif delta > self.tickcount * 5 and self.freq>1:
self.freq/=2
if callable(pos):
ipos=pos()
else:
ipos=pos
if ipos==0:
ipos=1
percent = <double>ipos/<double>self.maxi
more = <time_t>((<double>elapsed / percent * (1. - percent))/CLOCKS_PER_SEC)
<void>gmtime_r(&more, &remain)
days = remain.tm_yday
hour = remain.tm_hour
minu = remain.tm_min
sec = remain.tm_sec
if self.ontty:
fraction=<int>(percent * 50.)
self.arrow=(self.arrow+1) % 4
if days:
<void>fprintf(stderr,b'\r%s %5.1f %% |%.*s%c%.*s] remain : %d days %02d:%02d:%02d\033[K',
self.chead,
percent*100,
fraction,self.diese,
self.wheel[self.arrow],
50-fraction,self.spaces,
days,hour,minu,sec)
else:
<void>fprintf(stderr,b'\r%s %5.1f %% |%.*s%c%.*s] remain : %02d:%02d:%02d\033[K',
self.chead,
percent*100.,
fraction,self.diese,
self.wheel[self.arrow],
50-fraction,self.spaces,
hour,minu,sec)
if self.cut:
tenth = int(percent * 10)
if tenth != self.lastlog:
if self.ontty:
<void>fputs(b'\n',stderr)
self.logger.info('%s %5.1f %% remain : %02d:%02d:%02d\033[K' % (
bytes2str(self._head),
percent*100.,
hour,minu,sec))
self.lastlog=tenth
else:
self.cycle+=1
property head:
def __get__(self):
return self._head
def __set__(self,str value):
self._head=str2bytes(value)
self.chead=self._head

10
python/obitools3/apps/temp.pxd Executable file
View File

@ -0,0 +1,10 @@
#cython: language_level=3
'''
Created on 28 juillet 2017
@author: coissac
'''
from obitools3.dms.dms cimport DMS
from obitools3.utils cimport tobytes,tostr

96
python/obitools3/apps/temp.pyx Executable file
View File

@ -0,0 +1,96 @@
#cython: language_level=3
'''
Created on 28 juillet 2017
@author: coissac
'''
from os import environb,getpid
from os.path import join, isdir
from tempfile import TemporaryDirectory, _get_candidate_names
from shutil import rmtree
from atexit import register
from obitools3.dms.dms import DMS
from obitools3.apps.config import getConfiguration
from obitools3.apps.config import logger
cpdef get_temp_dir():
"""
Returns a temporary directory object specific of this instance of obitools.
This is an application function. It cannot be called out of an obi command.
It requires a valid configuration.
If the function is called several time from the same obi session, the same
directory is returned.
If the OBITMP environment variable exist, the temporary directory is created
inside this directory.
The directory is automatically destroyed at the end of the end of the process.
@return: a temporary python directory object.
"""
cdef bytes tmpdirname
cdef dict config = getConfiguration()
root = config["__root_config__"]
try:
return config[root]["tempdir"].name
except KeyError:
pass
try:
basedir=environb[b'OBITMP']
except KeyError:
basedir=None
tmp = TemporaryDirectory(dir=basedir)
config[root]["tempdir"]=tmp
return tmp.name
cpdef get_temp_dir_name():
"""
Returns the name of the temporary directory object
specific of this instance of obitools.
@return: the name of the temporary directory.
@see get_temp_dir
"""
return get_temp_dir_name().name
cpdef get_temp_dms():
cdef bytes tmpdirname # @DuplicatedSignature
cdef dict config = getConfiguration() # @DuplicatedSignature
cdef DMS tmpdms
root = config["__root_config__"]
try:
return config[root]["tempdms"]
except KeyError:
pass
tmpdirname=get_temp_dir()
tempname = join(tmpdirname,
b"obi.%d.%s" % (getpid(),
tobytes(next(_get_candidate_names())))
)
tmpdms = DMS.new(tempname)
config[root]["tempdms"]=tmpdms
return tmpdms

1
python/obitools3/commands/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/.DS_Store

View File

@ -0,0 +1,231 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.dms.column.column cimport Column
from functools import reduce
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes, tostr
from io import BufferedWriter
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
QUALITY_COLUMN, \
COUNT_COLUMN, \
TAXID_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_INT
from obitools3.dms.capi.obitaxonomy cimport MIN_LOCAL_TAXID
import time
import math
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Annotate sequences with their corresponding NCBI taxid found from the taxon scientific name"
def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi addtaxids specific options')
group.add_argument('-t', '--taxid-tag',
action="store",
dest="addtaxids:taxid_tag",
metavar="<TAXID_TAG>",
default=b"TAXID",
help="Name of the tag to store the found taxid "
"(default: 'TAXID').")
group.add_argument('-n', '--taxon-name-tag',
action="store",
dest="addtaxids:taxon_name_tag",
metavar="<SCIENTIFIC_NAME_TAG>",
default=b"SCIENTIFIC_NAME",
help="Name of the tag giving the scientific name of the taxon "
"(default: 'SCIENTIFIC_NAME').")
group.add_argument('-g', '--try-genus-match',
action="store_true", dest="addtaxids:try_genus_match",
default=False,
help="Try matching the first word of <SCIENTIFIC_NAME_TAG> when can't find corresponding taxid for a taxon. "
"If there is a match it is added in the 'parent_taxid' tag. (Can be used by 'obi taxonomy' to add the taxon under that taxid).")
group.add_argument('-a', '--restricting-ancestor',
action="store",
dest="addtaxids:restricting_ancestor",
metavar="<RESTRICTING_ANCESTOR>",
default=None,
help="Enables to restrict the search of taxids under an ancestor specified by its taxid.")
group.add_argument('-l', '--log-file',
action="store",
dest="addtaxids:log_file",
metavar="<LOG_FILE>",
default='',
help="Path to a log file to write informations about not found taxids.")
def run(config):
DMS.obi_atexit()
logger("info", "obi addtaxids")
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
i_view_name = input[1].name
# Open the output: only the DMS, as the output view is going to be created by cloning the input view
# (could eventually be done via an open_uri() argument)
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view_name = output[1]
# stdout output: create temporary view
if type(output_0)==BufferedWriter:
o_dms = i_dms
i=0
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in output DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
imported_view_name = o_view_name
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
if i_dms != o_dms:
imported_view_name = i_view_name
i=0
while imported_view_name in o_dms: # Making sure view name is unique in output DMS
imported_view_name = i_view_name+b"_"+str2bytes(str(i))
i+=1
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
i_view = o_dms[imported_view_name]
# Clone output view from input view
o_view = i_view.clone(o_view_name)
if o_view is None:
raise Exception("Couldn't create output view")
i_view.close()
# Open taxonomy
taxo_uri = open_uri(config['obi']['taxoURI'])
if taxo_uri is None or taxo_uri[2] == bytes:
raise Exception("Couldn't open taxonomy")
taxo = taxo_uri[1]
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(o_view), config)
else:
pb = None
try:
if config['addtaxids']['log_file']:
logfile = open(config['addtaxids']['log_file'], 'w')
else:
logfile = None
if config['addtaxids']['try_genus_match']:
try_genus = True
else:
try_genus = False
if 'restricting_ancestor' in config['addtaxids']:
res_anc = int(config['addtaxids']['restricting_ancestor'])
else:
res_anc = None
taxid_column_name = config['addtaxids']['taxid_tag']
parent_taxid_column_name = "PARENT_TAXID" # TODO macro
taxon_name_column_name = config['addtaxids']['taxon_name_tag']
taxid_column = Column.new_column(o_view, taxid_column_name, OBI_INT)
parent_taxid_column = Column.new_column(o_view, parent_taxid_column_name, OBI_INT)
taxon_name_column = o_view[taxon_name_column_name]
found_count = 0
not_found_count = 0
parent_found_count = 0
for i in range(len(o_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
taxon_name = taxon_name_column[i]
taxon = taxo.get_taxon_by_name(taxon_name, res_anc)
if taxon is not None:
taxid_column[i] = taxon.taxid
found_count+=1
elif try_genus: # try finding genus or other parent taxon from the first word
#print(i, o_view[i].id)
taxon_name_sp = taxon_name.split(b" ")
taxon = taxo.get_taxon_by_name(taxon_name_sp[0], res_anc)
if taxon is not None:
parent_taxid_column[i] = taxon.taxid
parent_found_count+=1
if logfile:
print("Found parent taxon for", tostr(taxon_name), file=logfile)
else:
not_found_count+=1
if logfile:
print("No taxid found for", tostr(taxon_name), file=logfile)
else:
not_found_count+=1
if logfile:
print("No taxid found for", tostr(taxon_name), file=logfile)
except Exception, e:
raise RollbackException("obi addtaxids error, rollbacking view: "+str(e), o_view)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
logger("info", "\nTaxids found: "+str(found_count)+"/"+str(len(o_view))+" ("+str(round(found_count*100.0/len(o_view), 2))+"%)")
if config['addtaxids']['try_genus_match']:
logger("info", "\nParent taxids found: "+str(parent_found_count)+"/"+str(len(o_view))+" ("+str(round(parent_found_count*100.0/len(o_view), 2))+"%)")
logger("info", "\nTaxids not found: "+str(not_found_count)+"/"+str(len(o_view))+" ("+str(round(not_found_count*100.0/len(o_view), 2))+"%)")
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[input[0].name]
input_view_name=[i_view_name]
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "addtaxids", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(o_dms, imported_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,18 @@
#cython: language_level=3
cpdef align_columns(bytes dms_n,
bytes input_view_1_n,
bytes output_view_n,
bytes input_view_2_n=*,
bytes input_column_1_n=*,
bytes input_column_2_n=*,
bytes input_elt_1_n=*,
bytes input_elt_2_n=*,
bytes id_column_1_n=*,
bytes id_column_2_n=*,
double threshold=*, bint normalize=*,
int reference=*, bint similarity_mode=*,
bint print_seq=*, bint print_count=*,
bytes comments=*,
int thread_count=*)

View File

@ -0,0 +1,285 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
obi_lcs_align_two_columns
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
import time
import sys
__title__="Align one sequence column with itself or two sequence columns"
def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi align specific options')
group.add_argument('--input-2', '-I',
action="store", dest="align:inputuri2",
metavar='<INPUT URI>',
default="",
type=str,
help="Eventually, the URI of the second input to align with the first one.")
group.add_argument('--threshold','-t',
action="store", dest="align:threshold",
metavar='<THRESHOLD>',
default=0.0,
type=float,
help="Score threshold. If the score is normalized and expressed in similarity (default),"
" it is an identity, e.g. 0.95 for an identity of 95%%. If the score is normalized"
" and expressed in distance, it is (1.0 - identity), e.g. 0.05 for an identity of 95%%."
" If the score is not normalized and expressed in similarity, it is the length of the"
" Longest Common Subsequence. If the score is not normalized and expressed in distance,"
" it is (reference length - LCS length)."
" Only sequence pairs with a similarity above <THRESHOLD> are printed. Default: 0.00"
" (no threshold).")
group.add_argument('--longest-length','-L',
action="store_const", dest="align:reflength",
default=0,
const=1,
help="The reference length is the length of the longest sequence."
" Default: the reference length is the length of the alignment.")
group.add_argument('--shortest-length','-l',
action="store_const", dest="align:reflength",
default=0,
const=2,
help="The reference length is the length of the shortest sequence."
" Default: the reference length is the length of the alignment.")
group.add_argument('--raw','-r',
action="store_false", dest="align:normalize",
default=True,
help="Raw score, not normalized. Default: score is normalized with the reference sequence length.")
group.add_argument('--distance','-D',
action="store_false", dest="align:similarity",
default=True,
help="Score is expressed in distance. Default: score is expressed in similarity.")
group.add_argument('--print-seq','-s',
action="store_true", dest="align:printseq",
default=False,
help="The nucleotide sequences are written in the output view. Default: they are not written.")
group.add_argument('--print-count','-n',
action="store_true", dest="align:printcount",
default=False,
help="Sequence counts are written in the output view. Default: they are not written.")
group.add_argument('--thread-count','-p', # TODO should probably be in a specific option group
action="store", dest="align:threadcount",
metavar='<THREAD COUNT>',
default=1,
type=int,
help="Number of threads to use for the computation. Default: one.")
cpdef align_columns(bytes dms_n,
bytes input_view_1_n,
bytes output_view_n,
bytes input_view_2_n=b"",
bytes input_column_1_n=b"",
bytes input_column_2_n=b"",
bytes input_elt_1_n=b"",
bytes input_elt_2_n=b"",
bytes id_column_1_n=b"",
bytes id_column_2_n=b"",
double threshold=0.0, bint normalize=True,
int reference=0, bint similarity_mode=True,
bint print_seq=False, bint print_count=False,
bytes comments=b"{}",
int thread_count=1) :
if input_view_2_n == b"" and input_column_2_n == b"" :
if obi_lcs_align_one_column(dms_n, \
input_view_1_n, \
input_column_1_n, \
input_elt_1_n, \
id_column_1_n, \
output_view_n, \
comments, \
print_seq, \
print_count, \
threshold, normalize, reference, similarity_mode,
thread_count) < 0 :
raise Exception("Error aligning sequences")
else:
if obi_lcs_align_two_columns(dms_n, \
input_view_1_n, \
input_view_2_n, \
input_column_1_n, \
input_column_2_n, \
input_elt_1_n, \
input_elt_2_n, \
id_column_1_n, \
id_column_2_n, \
output_view_n, \
comments, \
print_seq, \
print_count, \
threshold, normalize, reference, similarity_mode) < 0 :
raise Exception("Error aligning sequences")
def run(config):
DMS.obi_atexit()
logger("info", "obi align")
# Open the input: only the DMS
input = open_uri(config['obi']['inputURI'],
dms_only=True)
if input is None:
raise Exception("Could not read input")
i_dms = input[0]
i_dms_name = input[0].name
i_uri = input[1]
i_view_name = i_uri.split(b"/")[0]
i_column_name = b""
i_element_name = b""
if len(i_uri.split(b"/")) >= 2:
i_column_name = i_uri.split(b"/")[1]
if len(i_uri.split(b"/")) == 3:
i_element_name = i_uri.split(b"/")[2]
if len(i_uri.split(b"/")) > 3:
raise Exception("Input URI contains too many elements:", config['obi']['inputURI'])
# Open the second input if there is one
i_dms_2 = None
i_dms_name_2 = b""
original_i_view_name_2 = b""
i_view_name_2 = b""
i_column_name_2 = b""
i_element_name_2 = b""
if config['align']['inputuri2']:
input_2 = open_uri(config['align']['inputuri2'],
dms_only=True)
if input_2 is None:
raise Exception("Could not read second input")
i_dms_2 = input_2[0]
i_dms_name_2 = i_dms_2.name
i_uri_2 = input_2[1]
original_i_view_name_2 = i_uri_2.split(b"/")[0]
if len(i_uri_2.split(b"/")) >= 2:
i_column_name_2 = i_uri_2.split(b"/")[1]
if len(i_uri_2.split(b"/")) == 3:
i_element_name_2 = i_uri_2.split(b"/")[2]
if len(i_uri_2.split(b"/")) > 3:
raise Exception("Input URI contains too many elements:", config['align']['inputuri2'])
# If the 2 input DMS are not the same, temporarily import 2nd input view in first input DMS
if i_dms != i_dms_2:
temp_i_view_name_2 = original_i_view_name_2
i=0
while temp_i_view_name_2 in i_dms: # Making sure view name is unique in input DMS
temp_i_view_name_2 = original_i_view_name_2+b"_"+str2bytes(str(i))
i+=1
i_view_name_2 = temp_i_view_name_2
View.import_view(i_dms_2.full_path[:-7], i_dms.full_path[:-7], original_i_view_name_2, i_view_name_2)
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
o_dms_name = o_dms.name
final_o_view_name = output[1]
o_view_name = final_o_view_name
# If stdout output or the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms or type(output_0)==BufferedWriter:
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
i_dms_list = [i_dms_name]
if i_dms_name_2:
i_dms_list.append(i_dms_name_2)
i_view_list = [i_view_name]
if original_i_view_name_2:
i_view_list.append(original_i_view_name_2)
comments = View.print_config(config, "align", command_line, input_dms_name=i_dms_list, input_view_name=i_view_list)
# Call cython alignment function
# Using default ID columns of the view. TODO discuss adding option
align_columns(i_dms.name_with_full_path, \
i_view_name, \
o_view_name, \
input_view_2_n = i_view_name_2, \
input_column_1_n = i_column_name, \
input_column_2_n = i_column_name_2, \
input_elt_1_n = i_element_name, \
input_elt_2_n = i_element_name_2, \
id_column_1_n = b"", \
id_column_2_n = b"", \
threshold = config['align']['threshold'], \
normalize = config['align']['normalize'], \
reference = config['align']['reflength'], \
similarity_mode = config['align']['similarity'], \
print_seq = config['align']['printseq'], \
print_count = config['align']['printcount'], \
comments = comments, \
thread_count = config['align']['threadcount'])
# If the input and output DMS are not the same, export result view to output DMS
if i_dms != o_dms:
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
# Save command config in output DMS comments
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the two input DMS are different, delete the temporary input view in the first input DMS
if i_dms_2 and i_dms != i_dms_2:
View.delete_view(i_dms, i_view_name_2)
i_dms_2.close()
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,4 @@
#cython: language_level=3
cdef object buildAlignment(object direct, object reverse)

View File

@ -0,0 +1,293 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_QUAL
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
from obitools3.libalign._qsrassemble import QSolexaRightReverseAssemble
from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequence
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.utils cimport str2bytes
from io import BufferedWriter
import sys
import os
from cpython.exc cimport PyErr_CheckSignals
__title__="Align paired-ended reads"
def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi alignpairedend specific options')
group.add_argument('-R', '--reverse-reads',
action="store", dest="alignpairedend:reverse",
metavar="<URI>",
default=None,
type=str,
help="URI to the reverse reads if they are in a different view than the forward reads")
# group.add_argument('--score-min',
# action="store", dest="alignpairedend:smin",
# metavar="#.###",
# default=None,
# type=float,
# help="Minimum score for keeping alignments. "
# "(for kmer alignment) The score is an approximation of the number of nucleotides matching in the overlap of the alignment.")
# group.add_argument('-A', '--true-ali',
# action="store_true", dest="alignpairedend:trueali",
# default=False,
# help="Performs gap free end alignment of sequences instead of using kmers to compute alignments (slower).")
group.add_argument('-k', '--kmer-size',
action="store", dest="alignpairedend:kmersize",
metavar="#",
default=3,
type=int,
help="K-mer size for kmer comparisons, between 1 and 4 (default: 3)")
la = QSolexaReverseAssemble()
ra = QSolexaRightReverseAssemble()
cdef object buildAlignment(object direct, object reverse):
if len(direct)==0 or len(reverse)==0:
return None
la.seqA = direct
la.seqB = reverse
ali=la()
ali.direction='left'
ra.seqA = direct
ra.seqB = reverse
rali=ra()
rali.direction='right'
if ali.score < rali.score:
ali = rali
return ali
def alignmentIterator(entries, aligner):
if type(entries) == list:
two_views = True
forward = entries[0]
reverse = entries[1]
entries_len = len(forward)
else:
two_views = False
entries_len = len(entries)
for i in range(entries_len):
if two_views:
seqF = forward[i]
seqR = reverse[i]
else:
seqF = Nuc_Seq.new_from_stored(entries[i])
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQUENCE_COLUMN], quality=seqF[REVERSE_QUALITY_COLUMN])
seqR.index = i
ali = aligner(seqF, seqR)
if ali is None:
continue
yield ali
def run(config):
DMS.obi_atexit()
logger("info", "obi alignpairedend")
# Open the input
two_views = False
forward = None
reverse = None
input = None
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not open input reads")
if input[2] != View_NUC_SEQS:
raise NotImplementedError('obi alignpairedend only works on NUC_SEQS views')
if "reverse" in config["alignpairedend"]:
two_views = True
forward = input[1]
rinput = open_uri(config["alignpairedend"]["reverse"])
if rinput is None:
raise Exception("Could not open reverse reads")
if rinput[2] != View_NUC_SEQS:
raise NotImplementedError('obi alignpairedend only works on NUC_SEQS views')
reverse = rinput[1]
if len(forward) != len(reverse):
raise Exception("Error: the number of forward and reverse reads are different")
entries = [forward, reverse]
input_dms_name = [forward.dms.name, reverse.dms.name]
input_view_name = [forward.name, reverse.name]
else:
entries = input[1]
input_dms_name = [entries.dms.name]
input_view_name = [entries.name]
if two_views:
entries_len = len(forward)
else:
entries_len = len(entries)
# Open the output
output = open_uri(config['obi']['outputURI'],
input=False,
newviewtype=View_NUC_SEQS)
if output is None:
raise Exception("Could not create output view")
output_0 = output[0]
o_dms = output[0]
# stdout output: create temporary view
if type(output_0)==BufferedWriter:
i_dms = forward.dms # using any dms
o_dms = i_dms
i=0
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
o_view = View_NUC_SEQS.new(o_dms, o_view_name, quality=True)
else:
o_view = output[1]
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL)
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(entries_len, config)
else:
pb = None
#if config['alignpairedend']['trueali']:
# kmer_ali = False
# aligner = buildAlignment
#else :
kmer_ali = True
if type(entries) == list:
forward = entries[0]
reverse = entries[1]
if len(forward) == 0 or len(reverse) == 0:
aligner = None
else:
aligner = Kmer_similarity(forward, \
view2=reverse, \
kmer_size=config['alignpairedend']['kmersize'], \
reversed_column=None)
else:
if len(entries) == 0:
aligner = None
else:
aligner = Kmer_similarity(entries, \
column2=entries[REVERSE_SEQUENCE_COLUMN], \
qual_column2=entries[REVERSE_QUALITY_COLUMN], \
kmer_size=config['alignpairedend']['kmersize'], \
reversed_column=entries[b'reversed']) # column created by the ngsfilter tool
ba = alignmentIterator(entries, aligner)
i = 0
for ali in ba:
if pb is not None:
pb(i)
PyErr_CheckSignals()
consensus = o_view[i]
if two_views:
consensus[b"R1_parent"] = forward[i].id
consensus[b"R2_parent"] = reverse[i].id
if not two_views:
seqF = entries[i]
else:
seqF = forward[i]
if ali.overlap_len > 0 :
buildConsensus(ali, consensus, seqF)
else:
if not two_views:
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQUENCE_COLUMN], quality = seqF[REVERSE_QUALITY_COLUMN])
else:
seqR = reverse[i]
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
if kmer_ali :
ali.free()
i+=1
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
if kmer_ali and aligner is not None:
aligner.free()
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If stdout output, delete the temporary imported view used to create the final file
if type(output_0)==BufferedWriter:
View_NUC_SEQS.delete_view(o_dms, o_view_name)
output_0.close()
# Close all DMS
input[0].close(force=True)
if two_views:
rinput[0].close(force=True)
o_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,405 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from functools import reduce
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from io import BufferedWriter
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
QUALITY_COLUMN, \
COUNT_COLUMN, \
TAXID_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_STR
from obitools3.dms.column.column cimport Column
import time
import math
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Annotate views with new tags and edit existing annotations"
SPECIAL_COLUMNS = [NUC_SEQUENCE_COLUMN, ID_COLUMN, DEFINITION_COLUMN, QUALITY_COLUMN]
def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi annotate specific options')
group.add_argument('--seq-rank', # TODO seq/elt/line???
action="store_true",
dest="annotate:add_rank",
default=False,
help="Add a rank attribute to the sequence "
"indicating the sequence position in the data.")
group.add_argument('-R', '--rename-tag',
action="append",
dest="annotate:rename_tags",
metavar="<OLD_NAME:NEW_NAME>",
type=str,
default=[],
help="Change tag name from OLD_NAME to NEW_NAME.")
group.add_argument('-D', '--delete-tag',
action="append",
dest="annotate:delete_tags",
metavar="<TAG_NAME>",
type=str,
default=[],
help="Delete tag TAG_NAME.")
group.add_argument('-S', '--set-tag',
action="append",
dest="annotate:set_tags",
metavar="<TAG_NAME:PYTHON_EXPRESSION>",
type=str,
default=[],
help="Add a new tag named TAG_NAME with "
"a value computed from PYTHON_EXPRESSION.")
group.add_argument('--set-identifier',
action="store",
dest="annotate:set_identifier",
metavar="<PYTHON_EXPRESSION>",
type=str,
default=None,
help="Set sequence identifier with "
"a value computed from PYTHON_EXPRESSION.")
group.add_argument('--set-sequence',
action="store",
dest="annotate:set_sequence",
metavar="<PYTHON_EXPRESSION>",
type=str,
default=None,
help="Change the sequence itself with "
"a value computed from PYTHON_EXPRESSION.")
group.add_argument('--set-definition',
action="store",
dest="annotate:set_definition",
metavar="<PYTHON_EXPRESSION>",
type=str,
default=None,
help="Set sequence definition with "
"a value computed from PYTHON_EXPRESSION.")
group.add_argument('--run',
action="store",
dest="annotate:run",
metavar="<PYTHON_EXPRESSION>",
type=str,
default=None,
help="Run a python expression on each element.")
group.add_argument('-C', '--clear',
action="store_true",
dest="annotate:clear",
default=False,
help="Clear all tags except the obligatory ones.")
group.add_argument('-k','--keep',
action='append',
dest="annotate:keep",
metavar="<TAG>",
default=[],
type=str,
help="Only keep this tag. (Can be specified several times.)")
group.add_argument('--length',
action="store_true",
dest="annotate:length",
default=False,
help="Add 'seq_length' tag with sequence length.")
group.add_argument('--with-taxon-at-rank',
action='append',
dest="annotate:taxon_at_rank",
metavar="<RANK_NAME>",
default=[],
type=str,
help="Add taxonomy annotation at the specified rank level RANK_NAME.")
def sequenceTaggerGenerator(config, taxo=None):
toSet=None
newId=None
newDef=None
newSeq=None
length=None
add_rank=None
run=None
if 'set_tags' in config['annotate']: # TODO default option problem, to fix
toSet = [x.split(':',1) for x in config['annotate']['set_tags'] if len(x.split(':',1))==2]
if 'set_identifier' in config['annotate']:
newId = config['annotate']['set_identifier']
if 'set_definition' in config['annotate']:
newDef = config['annotate']['set_definition']
if 'set_sequence' in config['annotate']:
newSeq = config['annotate']['set_sequence']
if 'length' in config['annotate']:
length = config['annotate']['length']
if 'add_rank' in config["annotate"]:
add_rank = config["annotate"]["add_rank"]
if 'run' in config['annotate']:
run = config['annotate']['run']
counter = [0]
for i in range(len(toSet)):
for j in range(len(toSet[i])):
toSet[i][j] = tobytes(toSet[i][j])
annoteRank=[]
if config['annotate']['taxon_at_rank']:
if taxo is not None:
annoteRank = config['annotate']['taxon_at_rank']
else:
raise Exception("A taxonomy must be provided to annotate taxon ranks")
def sequenceTagger(seq):
if counter[0]>=0:
counter[0]+=1
for rank in annoteRank:
if TAXID_COLUMN in seq:
taxid = seq[TAXID_COLUMN]
if taxid is not None:
rtaxid = taxo.get_taxon_at_rank(taxid, rank)
if rtaxid is not None:
scn = taxo.get_scientific_name(rtaxid)
else:
scn=None
seq[rank]=rtaxid
if "%s_name"%rank not in seq.view:
Column.new_column(seq.view, "%s_name"%rank, OBI_STR)
seq["%s_name"%rank]=scn
if add_rank:
seq['seq_rank']=counter[0]
for i,v in toSet:
try:
if taxo is not None:
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
else:
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
val = eval(v, environ, seq)
except Exception: # set string if not a valid expression
val = v
seq[i]=val
if length:
seq['seq_length']=len(seq)
if newId is not None:
try:
if taxo is not None:
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
else:
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
val = eval(newId, environ, seq)
except Exception: # set string if not a valid expression
val = newId
seq.id=val
if newDef is not None:
try:
if taxo is not None:
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
else:
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
val = eval(newDef, environ, seq)
except Exception: # set string if not a valid expression
val = newDef
seq.definition=val
if newSeq is not None:
try:
if taxo is not None:
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
else:
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
val = eval(newSeq, environ, seq)
except Exception: # set string if not a valid expression
val = newSeq
seq.seq=val
if 'seq_length' in seq:
seq['seq_length']=len(seq)
# Delete quality since it must match the sequence.
# TODO discuss deleting for each sequence separately
if QUALITY_COLUMN in seq:
seq.view.delete_column(QUALITY_COLUMN)
if run is not None:
try:
if taxo is not None:
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
else:
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
eval(run, environ, seq)
except Exception,e:
raise e
return sequenceTagger
def run(config):
DMS.obi_atexit()
logger("info", "obi annotate")
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
i_view_name = input[1].name
# Open the output: only the DMS, as the output view is going to be created by cloning the input view
# (could eventually be done via an open_uri() argument)
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view_name = output[1]
# stdout output: create temporary view
if type(output_0)==BufferedWriter:
o_dms = i_dms
i=0
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in output DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
imported_view_name = o_view_name
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
if i_dms != o_dms:
imported_view_name = i_view_name
i=0
while imported_view_name in o_dms: # Making sure view name is unique in output DMS
imported_view_name = i_view_name+b"_"+str2bytes(str(i))
i+=1
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
i_view = o_dms[imported_view_name]
# Clone output view from input view
o_view = i_view.clone(o_view_name)
if o_view is None:
raise Exception("Couldn't create output view")
i_view.close()
# Open taxonomy if there is one
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
taxo_uri = open_uri(config['obi']['taxoURI'])
if taxo_uri is None or taxo_uri[2] == bytes:
raise Exception("Couldn't open taxonomy")
taxo = taxo_uri[1]
else :
taxo = None
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(o_view), config)
else:
pb = None
try:
# Apply editions
# Editions at view level
if 'delete_tags' in config['annotate']:
toDelete = config['annotate']['delete_tags'][:]
if 'rename_tags' in config['annotate']:
toRename = [x.split(':',1) for x in config['annotate']['rename_tags'] if len(x.split(':',1))==2]
if 'clear' in config['annotate']:
clear = config['annotate']['clear']
if 'keep' in config['annotate']:
keep = config['annotate']['keep']
for i in range(len(toDelete)):
toDelete[i] = tobytes(toDelete[i])
for i in range(len(toRename)):
for j in range(len(toRename[i])):
toRename[i][j] = tobytes(toRename[i][j])
for i in range(len(keep)):
keep[i] = tobytes(keep[i])
keep = set(keep)
if clear or keep:
keys = [k for k in o_view.keys()]
for k in keys:
if k not in keep and k not in SPECIAL_COLUMNS:
o_view.delete_column(k)
else:
for k in toDelete:
o_view.delete_column(k)
for old_name, new_name in toRename:
if old_name in o_view:
o_view.rename_column(old_name, new_name)
# Editions at line level
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
for i in range(len(o_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
sequenceTagger(o_view[i])
except Exception, e:
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[input[0].name]
input_view_name=[i_view_name]
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(o_dms, imported_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,117 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.build_reference_db cimport build_reference_db
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from io import BufferedWriter
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Build a reference database for ecotag"
def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi build_ref_db specific options')
group.add_argument('--threshold','-t',
action="store", dest="build_ref_db:threshold",
metavar='<THRESHOLD>',
default=0.99,
type=float,
help="Score threshold as a normalized identity, e.g. 0.95 for an identity of 95%%. Default: 0.99.")
def run(config):
DMS.obi_atexit()
logger("info", "obi build_ref_db")
# Open the input: only the DMS
input = open_uri(config['obi']['inputURI'],
dms_only=True)
if input is None:
raise Exception("Could not read input")
i_dms = input[0]
i_dms_name = input[0].name
i_view_name = input[1]
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If stdout output or the input and output DMS are not the same, build the database creating a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
# Read taxonomy name
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[i_dms_name]
input_view_name= [i_view_name]
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
comments = View.print_config(config, "build_ref_db", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
if build_reference_db(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(taxonomy_name), tobytes(o_view_name), comments, config['build_ref_db']['threshold']) < 0:
raise Exception("Error building a reference database")
# If the input and output DMS are not the same, export result view to output DMS
if i_dms != o_dms:
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# Save command config in DMS comments
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

168
python/obitools3/commands/cat.pyx Executable file
View File

@ -0,0 +1,168 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.view.view cimport View
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_COLUMN, \
QUALITY_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.column.column cimport Column
from io import BufferedWriter
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Concatenate views"
def addOptions(parser):
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi cat specific options')
group.add_argument("-c",
action="append", dest="cat:views_to_cat",
metavar="<VIEW_NAME>",
default=[],
type=str,
help="URI of a view to concatenate. (e.g. 'my_dms/my_view'). "
"Several -c options can be used on the same "
"command line.")
def run(config):
DMS.obi_atexit()
logger("info", "obi cat")
# Check the views to concatenate
idms_list = []
iview_list = []
total_len = 0
remove_qual = False
remove_rev_qual = False
v_type = View_NUC_SEQS
for v_uri in config["cat"]["views_to_cat"]:
input = open_uri(v_uri)
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
if input[2] != View_NUC_SEQS: # Check view type (output view is nuc_seqs view if all input view are nuc_seqs view)
v_type = View
if QUALITY_COLUMN not in i_view: # Check if keep quality column in output view (if all input views have it)
remove_qual = True
if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
remove_rev_qual = True
total_len += len(i_view)
idms_list.append(i_dms)
iview_list.append(i_view.name)
i_view.close()
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
newviewtype=v_type)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view = output[1]
# stdout output
if type(output_0)==BufferedWriter:
o_dms = i_dms
# Initialize quality columns and their associated sequence columns if needed
if type(output_0) != BufferedWriter:
if not remove_qual:
if NUC_SEQUENCE_COLUMN not in o_view:
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
if not remove_rev_qual:
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
# Initialize multiple elements columns
if type(output_0)!=BufferedWriter:
dict_cols = {}
for v_uri in config["cat"]["views_to_cat"]:
v = open_uri(v_uri)[1]
for coln in v.keys():
col = v[coln]
if v[coln].nb_elements_per_line > 1:
if coln not in dict_cols:
dict_cols[coln] = {}
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
dict_cols[coln]['obitype'] = v[coln].data_type_int
else:
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
v.close()
for coln in dict_cols:
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']), dict_column=True)
# Initialize the progress bar
if not config['obi']['noprogressbar']:
pb = ProgressBar(total_len, config)
else:
pb = None
i = 0
for v_uri in config["cat"]["views_to_cat"]:
v = open_uri(v_uri)[1]
for entry in v:
PyErr_CheckSignals()
if pb is not None:
pb(i)
if type(output_0)==BufferedWriter:
rep = repr(entry)
output_0.write(str2bytes(rep)+b"\n")
else:
try:
o_view[i] = entry
except:
print("\nError with entry:", repr(entry))
print(repr(o_view))
i+=1
v.close()
# Deletes quality columns if needed
if type(output_0)!=BufferedWriter:
if QUALITY_COLUMN in o_view and remove_qual :
o_view.delete_column(QUALITY_COLUMN)
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
o_view.delete_column(REVERSE_QUALITY_COLUMN)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in DMS comments
command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[vname for vname in iview_list])
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
for d in idms_list:
d.close(force=True)
o_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,146 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiclean cimport obi_clean
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from io import BufferedWriter
import sys
__title__="Tag a set of sequences for PCR and sequencing errors identification"
def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi clean specific options')
group.add_argument('--distance', '-d',
action="store", dest="clean:distance",
metavar='<DISTANCE>',
default=1.0,
type=float,
help="Maximum numbers of errors between two variant sequences. Default: 1.")
group.add_argument('--sample-tag', '-s',
action="store",
dest="clean:sample-tag-name",
metavar="<SAMPLE TAG NAME>",
type=str,
help="Name of the tag where merged sample count informations are kept (typically generated by obi uniq, usually MERGED_sample, default: None).")
group.add_argument('--ratio', '-r',
action="store", dest="clean:ratio",
metavar='<RATIO>',
default=0.5,
type=float,
help="Maximum ratio between the counts of two sequences so that the less abundant one can be considered"
" a variant of the more abundant one. Default: 0.5.")
group.add_argument('--heads-only', '-H',
action="store_true",
dest="clean:heads-only",
default=False,
help="Only sequences labeled as heads are kept in the output. Default: False")
# group.add_argument('--cluster-tags', '-C',
# action="store_true",
# dest="clean:cluster-tags",
# default=False,
# help="Adds tags for each sequence giving its cluster's head and weight for each sample.")
group.add_argument('--thread-count','-p', # TODO should probably be in a specific option group
action="store", dest="clean:thread-count",
metavar='<THREAD COUNT>',
default=-1,
type=int,
help="Number of threads to use for the computation. Default: the maximum available.")
def run(config):
DMS.obi_atexit()
logger("info", "obi clean")
# Open the input: only the DMS
input = open_uri(config['obi']['inputURI'],
dms_only=True)
if input is None:
raise Exception("Could not read input")
i_dms = input[0]
i_dms_name = input[0].name
i_view_name = input[1]
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
if 'sample-tag-name' not in config['clean']:
config['clean']['sample-tag-name'] = ""
if obi_clean(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], config['clean']['thread-count']) < 0:
raise Exception("Error running obiclean")
# If the input and output DMS are not the same, export result view to output DMS
if i_dms != o_dms:
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# Save command config in DMS comments
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,30 @@
#cython: language_level=3
from obitools3.apps.optiongroups import addMinimalInputOption
from obitools3.uri.decode import open_uri
from obitools3.dms import DMS
from obitools3.dms.capi.obidms cimport obi_clean_dms
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes
__title__="Clean a DMS from unfinished views and columns"
def addOptions(parser):
addMinimalInputOption(parser)
def run(config):
DMS.obi_atexit()
logger("info", "obi clean_dms")
dms_path = tobytes(config['obi']['inputURI'])
if b'.obidms' in dms_path:
dms_path = dms_path.split(b'.obidms')[0]
if obi_clean_dms(dms_path) < 0 :
raise Exception("Error cleaning DMS", config['obi']['inputURI'])
logger("info", "Done.")

View File

@ -0,0 +1,68 @@
#cython: language_level=3
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.dms import DMS
from obitools3.apps.optiongroups import addMinimalInputOption
from obitools3.dms.capi.obiview cimport COUNT_COLUMN
from cpython.exc cimport PyErr_CheckSignals
__title__="Count sequence records"
def addOptions(parser):
addMinimalInputOption(parser)
group = parser.add_argument_group('obi count specific options')
group.add_argument('-s','--sequence',
action="store_true", dest="count:sequence",
default=False,
help="Prints only the number of sequence records (much faster, default: False).")
group.add_argument('-a','--all',
action="store_true", dest="count:all",
default=False,
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False).")
group.add_argument('-c','--count-tag',
action="store", dest="count:countcol",
default='COUNT',
type=str,
help="Name of the tag/column associated with the count information (default: COUNT).")
def run(config):
DMS.obi_atexit()
logger("info", "obi count")
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input")
entries = input[1]
countcol = config['count']['countcol']
count1 = len(entries)
count2 = 0
if countcol in entries and ((config['count']['sequence'] == config['count']['all']) or (config['count']['all'])) :
for e in entries:
PyErr_CheckSignals()
count2+=e[countcol]
if countcol in entries and (config['count']['sequence'] == config['count']['all']):
print(count1,count2)
elif countcol in entries and config['count']['all']:
print(count2)
else:
print(count1)
input[0].close(force=True)

View File

@ -0,0 +1,242 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.capi.obidms cimport OBIDMS_p
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.view import View
from libc.stdlib cimport malloc, free
from libc.stdint cimport int32_t
import sys
from io import BufferedWriter
__title__="in silico PCR"
# TODO: add option to output unique ids
def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ecopcr specific options')
group.add_argument('--primer1', '-F',
action="store", dest="ecopcr:primer1",
metavar='<PRIMER>',
type=str,
required=True,
help="Forward primer, length must be less than or equal to 32")
group.add_argument('--primer2', '-R',
action="store", dest="ecopcr:primer2",
metavar='<PRIMER>',
type=str,
required=True,
help="Reverse primer, length must be less than or equal to 32")
group.add_argument('--error', '-e',
action="store", dest="ecopcr:error",
metavar='<ERROR>',
default=0,
type=int,
help="Maximum number of errors (mismatches) allowed per primer. Default: 0.")
group.add_argument('--min-length', '-l',
action="store",
dest="ecopcr:min-length",
metavar="<MINIMUM LENGTH>",
type=int,
default=0,
help="Minimum length of the in silico amplified DNA fragment, excluding primers.")
group.add_argument('--max-length', '-L',
action="store",
dest="ecopcr:max-length",
metavar="<MAXIMUM LENGTH>",
type=int,
default=0,
help="Maximum length of the in silico amplified DNA fragment, excluding primers.")
group.add_argument('--restrict-to-taxid', '-r',
action="append",
dest="ecopcr:restrict-to-taxid",
metavar="<TAXID>",
type=int,
default=[],
help="Only the sequence records corresponding to the taxonomic group identified "
"by TAXID are considered for the in silico PCR. The TAXID is an integer "
"that can be found in the NCBI taxonomic database.")
group.add_argument('--ignore-taxid', '-i',
action="append",
dest="ecopcr:ignore-taxid",
metavar="<TAXID>",
type=int,
default=[],
help="The sequences of the taxonomic group identified by TAXID are not considered for the in silico PCR.")
group.add_argument('--circular', '-c',
action="store_true",
dest="ecopcr:circular",
default=False,
help="Considers that the input sequences are circular (e.g. mitochondrial or chloroplastic DNA).")
group.add_argument('--salt-concentration', '-a',
action="store",
dest="ecopcr:salt-concentration",
metavar="<FLOAT>",
type=float,
default=0.05,
help="Salt concentration used for estimating the Tm. Default: 0.05.")
group.add_argument('--salt-correction-method', '-m',
action="store",
dest="ecopcr:salt-correction-method",
metavar="<1|2>",
type=int,
default=1,
help="Defines the method used for estimating the Tm (melting temperature) between the primers and their corresponding "
"target sequences. SANTALUCIA: 1, or OWCZARZY: 2. Default: 1.")
group.add_argument('--keep-primers', '-p',
action="store_true",
dest="ecopcr:keep-primers",
default=False,
help="Whether to keep the primers attached to the output sequences (default: the primers are cut out).")
group.add_argument('--keep-nucs', '-D',
action="store",
dest="ecopcr:keep-nucs",
metavar="<N>",
type=int,
default=0,
help="Keeps N nucleotides on each side of the in silico amplified sequences, "
"not including the primers (implying that primers are automatically kept if N > 0).")
group.add_argument('--kingdom-mode', '-k',
action="store_true",
dest="ecopcr:kingdom-mode",
default=False,
help="Print in the output the kingdom of the in silico amplified sequences (default: print the superkingdom).")
def run(config):
cdef int32_t* restrict_to_taxids_p = NULL
cdef int32_t* ignore_taxids_p = NULL
restrict_to_taxids_len = len(config['ecopcr']['restrict-to-taxid'])
restrict_to_taxids_p = <int32_t*> malloc((restrict_to_taxids_len + 1) * sizeof(int32_t)) # +1 for the -1 flagging the end of the array
for i in range(restrict_to_taxids_len) :
restrict_to_taxids_p[i] = config['ecopcr']['restrict-to-taxid'][i]
restrict_to_taxids_p[restrict_to_taxids_len] = -1
ignore_taxids_len = len(config['ecopcr']['ignore-taxid'])
ignore_taxids_p = <int32_t*> malloc((ignore_taxids_len + 1) * sizeof(int32_t)) # +1 for the -1 flagging the end of the array
for i in range(ignore_taxids_len) :
ignore_taxids_p[i] = config['ecopcr']['ignore-taxid'][i]
ignore_taxids_p[ignore_taxids_len] = -1
DMS.obi_atexit()
logger("info", "obi ecopcr")
# Open the input: only the DMS
input = open_uri(config['obi']['inputURI'],
dms_only=True)
if input is None:
raise Exception("Could not read input")
i_dms = input[0]
i_dms_name = input[0].name
i_view_name = input[1]
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
o_dms_name = output[0].name
o_view_name = output[1]
# Open the taxonomy DMS
taxdms = open_uri(config['obi']['taxoURI'],
dms_only=True)
if taxdms is None:
raise Exception("Could not open taxonomy DMS")
tax_dms = taxdms[0]
tax_dms_name = taxdms[0].name
# Read taxonomy name
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
# If stdout output create a temporary view in the input dms that will be deleted afterwards.
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[i_dms_name]
input_view_name= [i_view_name]
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
comments = View.print_config(config, "ecopcr", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
# TODO: primers in comments?
if obi_ecopcr(i_dms.name_with_full_path, tobytes(i_view_name),
tax_dms.name_with_full_path, tobytes(taxonomy_name), \
o_dms.name_with_full_path, tobytes(o_view_name), comments, \
tobytes(config['ecopcr']['primer1']), tobytes(config['ecopcr']['primer2']), \
config['ecopcr']['error'], \
config['ecopcr']['min-length'], config['ecopcr']['max-length'], \
restrict_to_taxids_p, ignore_taxids_p, \
config['ecopcr']['circular'], config['ecopcr']['salt-concentration'], config['ecopcr']['salt-correction-method'], \
config['ecopcr']['keep-nucs'], config['ecopcr']['keep-primers'], config['ecopcr']['kingdom-mode']) < 0:
raise Exception("Error running ecopcr")
# Save command config in DMS comments
o_dms.record_command_line(command_line)
free(restrict_to_taxids_p)
free(ignore_taxids_p)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[o_view_name]), file=sys.stderr)
# If stdout output, delete the temporary result view in the input DMS
if type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
i_dms.close(force=True)
o_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,158 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiecotag cimport obi_ecotag
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
import sys
from io import BufferedWriter
__title__="Taxonomic assignment of sequences"
def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ecotag specific options')
group.add_argument('--ref-database','-R',
action="store", dest="ecotag:ref_view",
metavar='<REF_VIEW>',
type=str,
help="URI of the view containing the reference database as built by the build_ref_db command.")
group.add_argument('--minimum-identity','-m',
action="store", dest="ecotag:threshold",
metavar='<THRESHOLD>',
default=0.0,
type=float,
help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. "
"Default: 0.00 (no threshold).")
group.add_argument('--minimum-circle','-c',
action="store", dest="ecotag:bubble_threshold",
metavar='<CIRCLE_THRESHOLD>',
default=0.99,
type=float,
help="Minimum identity considered for the assignment circle "
"(sequence is assigned to the LCA of all sequences within a similarity circle of the best matches; "
"the threshold for this circle is the highest value between <CIRCLE_THRESHOLD> and the best assignment score found for the query sequence). "
"Give value as a normalized identity, e.g. 0.95 for an identity of 95%%. "
"Default: 0.99.")
def run(config):
DMS.obi_atexit()
logger("info", "obi ecotag")
# Open the query view: only the DMS
input = open_uri(config['obi']['inputURI'],
dms_only=True)
if input is None:
raise Exception("Could not read input")
i_dms = input[0]
i_dms_name = input[0].name
i_view_name = input[1]
# Open the reference view: only the DMS
ref = open_uri(config['ecotag']['ref_view'],
dms_only=True)
if ref is None:
raise Exception("Could not read reference view URI")
ref_dms = ref[0]
ref_dms_name = ref[0].name
ref_view_name = ref[1]
# Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
if config['ecotag']['bubble_threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
raise Exception(f"Error: The threshold demanded ({config['ecotag']['bubble_threshold']}) is lower than the threshold used to build the reference database ({float(ref_dms[ref_view_name].comments['ref_db_threshold'])}).")
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
# Read taxonomy DMS and name
taxo = open_uri(config['obi']['taxoURI'],
dms_only=True)
taxo_dms_name = taxo[0].name
taxo_dms = taxo[0]
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[i_dms_name]
input_view_name= [i_view_name]
input_dms_name.append(ref_dms_name)
input_view_name.append(ref_view_name)
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
comments = View.print_config(config, "ecotag", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
ref_dms.name_with_full_path, tobytes(ref_view_name), \
taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
tobytes(o_view_name), comments, \
config['ecotag']['threshold'], \
config['ecotag']['bubble_threshold']) < 0:
raise Exception("Error running ecotag")
# If the input and output DMS are not the same, export result view to output DMS
if i_dms != o_dms:
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
# Save command config in DMS comments
o_dms.record_command_line(command_line)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
taxo_dms.close(force=True)
ref_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,189 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.dms import DMS
from obitools3.dms.obiseq import Nuc_Seq
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
from obitools3.writers.tab import TabWriter
from obitools3.format.tab import TabFormat
from obitools3.utils cimport tobytes, tostr
from obitools3.apps.optiongroups import addMinimalInputOption, \
addExportOutputOption, \
addNoProgressBarOption
import sys
import io
from cpython.exc cimport PyErr_CheckSignals
__title__="Export a view to a different file format"
def addOptions(parser):
addMinimalInputOption(parser)
addExportOutputOption(parser)
addNoProgressBarOption(parser)
def run(config):
DMS.obi_atexit()
logger("info", "obi export : exports a view to a different file format")
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input")
iview = input[1]
if 'outputformat' not in config['obi']:
if iview.type == b"NUC_SEQS_VIEW":
if QUALITY_COLUMN in iview:
config['obi']['outputformat'] = b'fastq'
else:
config['obi']['outputformat'] = b'fasta'
else:
config['obi']['outputformat'] = b'tabular'
# Open the output
output = open_uri(config['obi']['outputURI'],
input=False)
if output is None:
raise Exception("Could not open output URI")
output_object = output[0]
writer = output[1]
# Check that the input view has the type NUC_SEQS if needed # TODO discuss, maybe bool property
if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") : # Nuc_Seq_Stored? TODO
raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
if config['obi']['only'] is not None:
withoutskip = min(input[4], config['obi']['only'])
else:
withoutskip = input[4]
if config['obi']['skip'] is not None:
skip = min(input[4], config['obi']['skip'])
else:
skip = 0
# Initialize the progress bar
if config['obi']['noprogressbar']:
pb = None
else:
pb = ProgressBar(withoutskip - skip, config)
if config['obi']['outputformat'] == b'metabaR':
# Check prefix
if "metabarprefix" not in config["obi"]:
raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)")
else:
metabaRprefix = config["obi"]["metabarprefix"]
i=0
for seq in iview :
PyErr_CheckSignals()
if pb is not None:
pb(i)
try:
writer(seq)
except (StopIteration, BrokenPipeError, IOError):
break
i+=1
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
if config['obi']['outputformat'] == b'metabaR':
# Export ngsfilter file if view provided
if 'metabarngsfilter' in config['obi']:
ngsfilter_input = open_uri(config['obi']['metabarngsfilter'])
if ngsfilter_input is None:
raise Exception("Could not read ngsfilter view for metabaR output")
ngsfilter_view = ngsfilter_input[1]
ngsfilter_output = open(config['obi']['metabarprefix']+'.ngsfilter', 'w')
for line in ngsfilter_view:
line_to_print = b""
line_to_print += line[b'experiment']
line_to_print += b"\t"
line_to_print += line[b'sample']
line_to_print += b"\t"
line_to_print += line[b'forward_tag']
line_to_print += b":"
line_to_print += line[b'reverse_tag']
line_to_print += b"\t"
line_to_print += line[b'forward_primer']
line_to_print += b"\t"
line_to_print += line[b'reverse_primer']
line_to_print += b"\t"
line_to_print += line[b'additional_info']
print(tostr(line_to_print), file=ngsfilter_output)
if ngsfilter_input[0] != input[0]:
ngsfilter_input[0].close()
ngsfilter_output.close()
# Export sample metadata
samples_output = open(config['obi']['metabarprefix']+'_samples.csv', 'w')
# Export sample metadata file if view provided
if 'metabarsamples' in config['obi']:
samples_input = open_uri(config['obi']['metabarsamples'])
if samples_input is None:
raise Exception("Could not read sample view for metabaR output")
samples_view = samples_input[1]
# Export with tab formatter
TabWriter(TabFormat(header=True, sep='\t',),
samples_output,
header=True)
if samples_input[0] != input[0]:
samples_input[0].close()
# Else export just sample names from main view
else:
sample_list = []
if 'MERGED_sample' in iview:
sample_list = iview['MERGED_sample'].keys()
elif 'sample' not in iview:
for seq in iview:
sample = seq['sample']
if sample not in sample_list:
sample_list.append(sample)
else:
logger("warning", "Can not read sample list from main view for metabaR sample list export")
print("sample_id", file=samples_output)
for sample in sample_list:
line_to_print = b""
line_to_print += sample
line_to_print += b"\t"
print(tostr(line_to_print), file=samples_output)
samples_output.close()
# TODO save command in input dms?
if not BrokenPipeError and not IOError:
output_object.close()
iview.close()
input[0].close(force=True)
logger("info", "Done.")
if BrokenPipeError or IOError:
sys.stderr.close()

View File

@ -0,0 +1,416 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from functools import reduce
import time
import re
import sys
import ast
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
__title__="Grep view lines that match the given predicates"
# TODO should sequences that have a grepped attribute at None be grepped or not? (in obi1 they are but....)
def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group("obi grep specific options")
group.add_argument("--predicate", "-p",
action="append", dest="grep:grep_predicates",
metavar="<PREDICATE>",
default=[],
type=str,
help="Python boolean expression to be evaluated in the "
"sequence/line context. The attribute name can be "
"used in the expression as a variable name. "
"An extra variable named 'sequence' or 'line' refers "
"to the sequence or line object itself. "
"Several -p options can be used on the same "
"command line.")
group.add_argument("-S", "--sequence",
action="store", dest="grep:seq_pattern",
metavar="<REGULAR_PATTERN>",
type=str,
help="Regular expression pattern used to select "
"the sequence. The pattern is case insensitive.")
group.add_argument("-D", "--definition",
action="store", dest="grep:def_pattern",
metavar="<REGULAR_PATTERN>",
type=str,
help="Regular expression pattern used to select "
"the definition of the sequence. The pattern is case insensitive.")
group.add_argument("-I", "--identifier",
action="store", dest="grep:id_pattern",
metavar="<REGULAR_PATTERN>",
type=str,
help="Regular expression pattern used to select "
"the identifier of the sequence. The pattern is case insensitive.")
group.add_argument("--id-list",
action="store", dest="grep:id_list",
metavar="<FILE_NAME>",
type=str,
help="File containing the identifiers of the sequences to select.")
group.add_argument("-a", "--attribute",
action="append", dest="grep:attribute_patterns",
type=str,
default=[],
metavar="<ATTRIBUTE_NAME>:<REGULAR_PATTERN>",
help="Regular expression pattern matched against "
"the attributes of the sequence. "
"The pattern is case sensitive. "
"Several -a options can be used on the same "
"command line.")
group.add_argument("-A", "--has-attribute",
action="append", dest="grep:attributes",
type=str,
default=[],
metavar="<ATTRIBUTE_NAME>",
help="Select records with the attribute <ATTRIBUTE_NAME> "
"defined (not set to NA value). "
"Several -A options can be used on the same "
"command line.")
group.add_argument("-L", "--lmax",
action="store", dest="grep:lmax",
metavar="<MAX_LENGTH>",
type=int,
help="Keep sequences shorter than MAX_LENGTH.")
group.add_argument("-l", "--lmin",
action="store", dest="grep:lmin",
metavar="<MIN_LENGTH>",
type=int,
help="Keep sequences longer than MIN_LENGTH.")
group.add_argument("-v", "--invert-selection",
action="store_true", dest="grep:invert_selection",
default=False,
help="Invert the selection.")
group=parser.add_argument_group("Taxonomy filtering specific options") #TODO put somewhere else? not in grep
group.add_argument('--require-rank',
action="append", dest="grep:required_ranks",
metavar="<RANK_NAME>",
type=str,
default=[],
help="Select sequences with a taxid that is or has "
"a parent of rank <RANK_NAME>.")
group.add_argument('-r', '--required',
action="append", dest="grep:required_taxids",
metavar="<TAXID>",
type=int,
default=[],
help="Select the sequences having the ancestor of taxid <TAXID>. "
"If several ancestors are specified (with \n'-r taxid1 -r taxid2'), "
"the sequences having at least one of them are selected.")
# TODO useless option equivalent to -r -v?
group.add_argument('-i','--ignore',
action="append", dest="grep:ignored_taxids",
metavar="<TAXID>",
type=int,
default=[],
help="Ignore the sequences having the ancestor of taxid <TAXID>. "
"If several ancestors are specified (with \n'-r taxid1 -r taxid2'), "
"the sequences having at least one of them are ignored.")
def obi_compile_eval(str expr):
class MyVisitor(ast.NodeTransformer):
def visit_Str(self, node: ast.Str):
result = ast.Bytes(s = node.s.encode('utf-8'))
return ast.copy_location(result, node)
expr = "obi_eval_result="+expr
tree = ast.parse(expr)
optimizer = MyVisitor()
tree = optimizer.visit(tree)
return compile(tree, filename="<ast>", mode="exec")
def obi_eval(compiled_expr, loc_env, line):
exec(compiled_expr, {}, loc_env)
obi_eval_result = loc_env["obi_eval_result"]
return obi_eval_result
def Filter_generator(options, tax_filter, i_view):
# Initialize conditions
predicates = None
if "grep_predicates" in options:
predicates = [obi_compile_eval(p) for p in options["grep_predicates"]]
attributes = None
if "attributes" in options and len(options["attributes"]) > 0:
attributes = options["attributes"]
for attribute in attributes:
if attribute not in i_view:
return None
lmax = None
if "lmax" in options:
lmax = options["lmax"]
lmin = None
if "lmin" in options:
lmin = options["lmin"]
invert_selection = options["invert_selection"]
id_set = None
if "id_list" in options:
id_set = set(x.strip() for x in open(options["id_list"], 'rb'))
# Initialize the regular expression patterns
seq_pattern = None
if "seq_pattern" in options:
seq_pattern = re.compile(tobytes(options["seq_pattern"]), re.I)
id_pattern = None
if "id_pattern" in options:
id_pattern = re.compile(tobytes(options["id_pattern"]))
def_pattern = None
if "def_pattern" in options:
def_pattern = re.compile(tobytes(options["def_pattern"]))
attribute_patterns={}
if "attribute_patterns" in options and len(options["attribute_patterns"]) > 0:
for p in options["attribute_patterns"]:
attribute, pattern = p.split(":", 1)
if attribute not in i_view:
return None
attribute_patterns[tobytes(attribute)] = re.compile(tobytes(pattern))
def filter(line, loc_env):
cdef bint good = True
if seq_pattern and hasattr(line, "seq"):
good = <bint>(seq_pattern.search(line.seq))
if good and id_pattern and hasattr(line, "id"):
good = <bint>(id_pattern.search(line.id))
if good and id_set is not None and hasattr(line, "id"):
good = line.id in id_set
if good and def_pattern and hasattr(line, "definition"):
good = <bint>(def_pattern.search(line.definition))
if good and attributes: # TODO discuss that we test not None
good = reduce(lambda bint x, bint y: x and y,
(line[attribute] is not None for attribute in attributes),
True)
if good and attribute_patterns:
good = (reduce(lambda bint x, bint y : x and y,
(line[attribute] is not None for attribute in attribute_patterns),
True)
and
reduce(lambda bint x, bint y: x and y,
(<bint>(attribute_patterns[attribute].search(tobytes(str(line[attribute]))))
for attribute in attribute_patterns),
True)
)
if good and predicates:
good = (reduce(lambda bint x, bint y: x and y,
(bool(obi_eval(p, loc_env, line))
for p in predicates), True))
if good and lmin:
good = len(line) >= lmin
if good and lmax:
good = len(line) <= lmax
if good:
good = tax_filter(line)
if invert_selection :
good = not good
return good
return filter
def Taxonomy_filter_generator(taxo, options):
if (("required_ranks" in options and options["required_ranks"]) or \
("required_taxids" in options and options["required_taxids"]) or \
("ignored_taxids" in options and options["ignored_taxids"])) and \
(taxo is None):
raise RollbackException("obi grep error: can't use taxonomy options without providing a taxonomy. Rollbacking view")
if taxo is not None:
def tax_filter(seq):
good = True
if b'TAXID' in seq and seq[b'TAXID'] is not None: # TODO use macro
taxid = seq[b'TAXID']
if "required_ranks" in options and options["required_ranks"]:
taxon_at_rank = reduce(lambda x,y: x and y,
(taxo.get_taxon_at_rank(seq[b'TAXID'], rank) is not None
for rank in options["required_ranks"]),
True)
good = good and taxon_at_rank
if "required_taxids" in options and options["required_taxids"]:
good = good and reduce(lambda x,y: x or y,
(taxo.is_ancestor(r, taxid)
for r in options["required_taxids"]),
False)
if "ignored_taxids" in options and options["ignored_taxids"]:
good = good and not reduce(lambda x,y: x or y,
(taxo.is_ancestor(r,taxid)
for r in options["ignored_taxids"]),
False)
return good
else:
def tax_filter(seq):
return True
return tax_filter
def run(config):
DMS.obi_atexit()
logger("info", "obi grep")
# Open the input
input = open_uri(config["obi"]["inputURI"])
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted afterwards.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
taxo_uri = open_uri(config["obi"]["taxoURI"])
if taxo_uri is None or taxo_uri[2] == bytes:
raise Exception("Couldn't open taxonomy")
taxo = taxo_uri[1]
else :
taxo = None
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view), config)
else:
pb = None
# Apply filter
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
filter = Filter_generator(config["grep"], tax_filter, i_view)
selection = Line_selection(i_view)
if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
for i in range(len(i_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
selection.append(i)
elif filter is not None : # filter is None if no line will be selected because some columns don't exist
for i in range(len(i_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
line = i_view[i]
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
good = filter(line, loc_env)
if good :
selection.append(i)
if pb is not None:
pb(len(i_view), force=True)
print("", file=sys.stderr)
# Create output view with the line selection
try:
o_view = selection.materialize(o_view_name)
except Exception, e:
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
logger("info", "Grepped %d entries" % len(o_view))
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[input[0].name]
input_view_name=[input[1].name]
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "grep", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_dms.record_command_line(command_line)
# If input and output DMS are not the same, export the temporary view to the output DMS
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,129 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
from obitools3.apps.optiongroups import addExportOutputOption
import time
import sys
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
__title__="Keep the N first lines of a view"
def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addExportOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi head specific options')
group.add_argument('-n', '--sequence-count',
action="store", dest="head:count",
metavar='<N>',
default=10,
type=int,
help="Number of first records to keep.")
def run(config):
DMS.obi_atexit()
logger("info", "obi head")
# Open the input
input = open_uri(config["obi"]["inputURI"])
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
n = min(config['head']['count'], len(i_view))
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(n, config)
else:
pb = None
selection = Line_selection(i_view)
for i in range(n):
PyErr_CheckSignals()
if pb is not None:
pb(i)
selection.append(i)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Create output view with the line selection
try:
o_view = selection.materialize(o_view_name)
except Exception, e:
raise RollbackException("obi head error, rollbacking view: "+str(e), o_view)
# Save command config in DMS comments
command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "head", command_line, input_dms_name=[i_dms.name], input_view_name=[i_view.name])
o_dms.record_command_line(command_line)
# If input and output DMS are not the same, export the temporary view to the output DMS
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,58 @@
#cython: language_level=3
from obitools3.apps.optiongroups import addMinimalInputOption
from obitools3.uri.decode import open_uri
from obitools3.dms import DMS
from obitools3.dms.view import View
from obitools3.utils cimport bytes2str
__title__="Command line histories and view history graphs"
def addOptions(parser):
addMinimalInputOption(parser)
group=parser.add_argument_group('obi history specific options')
group.add_argument('--bash', '-b',
action="store_const", dest="history:format",
default="bash",
const="bash",
help="Print history in bash format")
group.add_argument('--dot', '-d',
action="store_const", dest="history:format",
default="bash",
const="dot",
help="Print history in DOT format (default: bash format)")
group.add_argument('--ascii', '-a',
action="store_const", dest="history:format",
default="bash",
const="ascii",
help="Print history in ASCII format (only for views; default: bash format)")
def run(config):
cdef object entries
DMS.obi_atexit()
input = open_uri(config['obi']['inputURI'])
entries = input[1]
if config['history']['format'] == "bash" :
print(bytes2str(entries.bash_history))
elif config['history']['format'] == "dot" :
print(bytes2str(entries.dot_history_graph))
elif config['history']['format'] == "ascii" :
if isinstance(entries, View):
print(bytes2str(entries.ascii_history))
else:
raise Exception("ASCII history only available for views")
input[0].close(force=True)

View File

@ -0,0 +1,552 @@
#cython: language_level=3
import sys
import os
import re
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.view.view cimport View
from obitools3.dms.view import RollbackException
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.dms import DMS
from obitools3.dms.taxo.taxo cimport Taxonomy
from obitools3.files.uncompress cimport CompressedFile
from obitools3.utils cimport tobytes, \
tostr, \
get_obitype, \
update_obitype
from obitools3.dms.capi.obiview cimport VIEW_TYPE_NUC_SEQS, \
NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
QUALITY_COLUMN, \
COUNT_COLUMN, \
TAXID_COLUMN, \
MERGED_PREFIX, \
SCIENTIFIC_NAME_COLUMN
from obitools3.dms.capi.obidms cimport obi_import_view
from obitools3.dms.capi.obitypes cimport obitype_t, \
OBI_VOID, \
OBI_QUAL, \
OBI_STR, \
OBI_INT
from obitools3.dms.capi.obierrno cimport obi_errno
from obitools3.apps.optiongroups import addImportInputOption, \
addTabularInputOption, \
addTaxdumpInputOption, \
addMinimalOutputOption, \
addNoProgressBarOption, \
addTaxonomyOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
import ast
__title__="Import sequences from different formats into a DMS"
default_config = { 'destview' : None,
'skip' : 0,
'only' : None,
'skiperror' : False,
'seqinformat' : None,
'moltype' : 'nuc',
'source' : None
}
def addOptions(parser):
addImportInputOption(parser)
addTabularInputOption(parser)
addTaxdumpInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi import specific options')
group.add_argument('--preread',
action="store_true", dest="import:preread",
default=False,
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
"a much faster import. This option is not recommended and will slow down the import in any other case.")
group.add_argument('--space-priority',
action="store_true", dest="import:space_priority",
default=False,
help="If importing a view into another DMS, do it by importing each line, saving disk space if the original view "
"has a line selection associated.")
# group.add_argument('--only-id',
# action="store", dest="import:onlyid",
# help="only id")
def run(config):
cdef tuple input
cdef tuple output
cdef int i
cdef type value_type
cdef obitype_t value_obitype
cdef obitype_t old_type
cdef obitype_t new_type
cdef bint get_quality
cdef bint NUC_SEQS_view
cdef bint silva
cdef bint rdp
cdef bint unite
cdef bint sintax
cdef int nb_elts
cdef object d
cdef View view
cdef object entries
cdef object entry
cdef Column id_col
cdef Column def_col
cdef Column seq_col
cdef Column qual_col
cdef Column old_column
cdef Column sci_name_col
cdef bytes sci_name
cdef bint rewrite
cdef dict dcols
cdef int skipping
cdef bytes tag
cdef object value
cdef list elt_names
cdef int old_nb_elements_per_line
cdef int new_nb_elements_per_line
cdef list old_elements_names
cdef list new_elements_names
cdef ProgressBar pb
global obi_errno
DMS.obi_atexit()
logger("info", "obi import: imports an object (file(s), obiview, taxonomy...) into a DMS")
entry_count = -1
pb = None
if not config['obi']['taxdump']:
input = open_uri(config['obi']['inputURI'])
if input is None: # TODO check for bytes instead now?
raise Exception("Could not open input URI")
if config['obi']['only'] is not None:
entry_count = min(input[4], config['obi']['only'])
else:
entry_count = input[4]
if entry_count > 0:
logger("info", "Importing %d entries", entry_count)
else:
logger("info", "Importing an unknown number of entries")
# TODO a bit dirty?
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
v = View_NUC_SEQS
else:
v = View
else:
v = None
if config['obi']['taxdump'] or (isinstance(input[1], View) and not config['import']['space_priority']):
dms_only=True
else:
dms_only=False
output = open_uri(config['obi']['outputURI'],
input=False,
newviewtype=v,
dms_only=dms_only)
if output is None:
raise Exception("Could not open output")
o_dms = output[0]
# Read taxdump
if config['obi']['taxdump']: # The input is a taxdump to import in a DMS
# Check if taxonomy name isn't already taken
taxo_name = output[1].split(b'/')[1]
if Taxonomy.exists(o_dms, taxo_name):
raise Exception("Taxonomy name already exists in this DMS")
taxo = Taxonomy.open_taxdump(o_dms, config['obi']['inputURI'])
taxo.write(taxo_name)
taxo.close()
o_dms.record_command_line(" ".join(sys.argv[1:]))
o_dms.close(force=True)
logger("info", "Done.")
return
# Open taxonomy if there is one
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
taxo_uri = open_uri(config['obi']['taxoURI'])
if taxo_uri is None or taxo_uri[2] == bytes:
raise Exception("Couldn't open taxonomy")
taxo = taxo_uri[1]
else :
taxo = None
# If importing a view between two DMS and not wanting to save space if line selection in original view, use C API
if isinstance(input[1], View) and not config['import']['space_priority']:
if obi_import_view(input[0].name_with_full_path, o_dms.name_with_full_path, input[1].name, tobytes((config['obi']['outputURI'].split('/'))[-1])) < 0 :
input[0].close(force=True)
output[0].close(force=True)
raise Exception("Error importing a view in a DMS")
o_dms.record_command_line(" ".join(sys.argv[1:]))
input[0].close(force=True)
output[0].close(force=True)
logger("info", "Done.")
return
# Reinitialize the progress bar
if entry_count >= 0 and config['obi']['noprogressbar'] == False:
pb = ProgressBar(entry_count, config)
else:
pb = None
NUC_SEQS_view = False
if isinstance(output[1], View) :
view = output[1]
if output[2] == View_NUC_SEQS :
NUC_SEQS_view = True
else:
raise NotImplementedError()
# Save basic columns in variables for optimization
if NUC_SEQS_view :
id_col = view[ID_COLUMN]
def_col = view[DEFINITION_COLUMN]
seq_col = view[NUC_SEQUENCE_COLUMN]
# Prepare taxon scientific name and taxid refs if RDP/SILVA/UNITE/SINTAX formats
silva = False
rdp = False
unite = False
sintax=False
if 'inputformat' in config['obi'] and (config['obi']['inputformat'] == b"silva" or \
config['obi']['inputformat'] == b"rdp" or \
config['obi']['inputformat'] == b"unite" or \
config['obi']['inputformat'] == b"sintax"):
#if taxo is None:
# raise Exception("A taxonomy (as built by 'obi import --taxdump') must be provided for SILVA and RDP files")
if config['obi']['inputformat'] == b"silva":
silva = True
elif config['obi']['inputformat'] == b"rdp":
rdp = True
elif config['obi']['inputformat'] == b"unite":
unite = True
elif config['obi']['inputformat'] == b"sintax":
sintax = True
sci_name_col = Column.new_column(view, SCIENTIFIC_NAME_COLUMN, OBI_STR)
if taxo is not None:
taxid_col = Column.new_column(view, TAXID_COLUMN, OBI_INT)
dcols = {}
# First read through the entries to prepare columns with dictionaries as they are very time-expensive to rewrite
if config['import']['preread']:
logger("info", "First readthrough...")
entries = input[1]
i = 0
dict_dict = {}
for entry in entries:
PyErr_CheckSignals()
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
if config['obi']['skiperror']:
i-=1
continue
else:
raise Exception("obi import error in first readthrough")
if pb is not None:
pb(i)
elif not i%50000:
logger("info", "Read %d entries", i)
for tag in entry :
newtag = tag
if tag[:7] == b"merged_":
newtag = MERGED_PREFIX+tag[7:]
if type(entry[tag]) == dict :
if tag in dict_dict:
dict_dict[newtag][0].update(entry[tag].keys())
else:
dict_dict[newtag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
i+=1
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
for tag in dict_dict:
dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
nb_elements_per_line=len(dict_dict[tag][0]), \
elements_names=list(dict_dict[tag][0]), \
dict_column=True), \
dict_dict[tag][1])
# Reinitialize the input
if isinstance(input[0], CompressedFile):
input_is_file = True
# Reinitialize the progress bar
if entry_count >= 0 and config['obi']['noprogressbar'] == False:
pb = ProgressBar(entry_count, config)
else:
pb = None
try:
input[0].close()
except AttributeError:
pass
input = open_uri(config['obi']['inputURI'], force_file=input_is_file)
if input is None:
raise Exception("Could not open input URI")
# if 'onlyid' in config['import']:
# onlyid = tobytes(config['import']['onlyid'])
# else:
# onlyid = None
entries = input[1]
i = 0
for entry in entries :
PyErr_CheckSignals()
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
if config['obi']['skiperror']:
continue
else:
raise RollbackException("obi import error, rollbacking view", view)
if pb is not None:
pb(i)
elif not i%50000:
logger("info", "Imported %d entries", i)
# if onlyid is not None and entry.id != onlyid:
# continue
try:
if NUC_SEQS_view:
id_col[i] = entry.id
def_col[i] = entry.definition
seq_col[i] = entry.seq
# Check if there is a sequencing quality associated by checking the first entry # TODO haven't found a more robust solution yet
if i == 0:
get_quality = QUALITY_COLUMN in entry
if get_quality:
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL)
qual_col = view[QUALITY_COLUMN]
if get_quality:
qual_col[i] = entry.quality
# Parse taxon scientific name if RDP or Silva or Unite file
if (rdp or silva or unite or sintax):
if rdp or silva:
sci_names = entry.definition.split(b";")
sci_name_col[i] = sci_names[-1]
elif unite:
sci_names = entry.id.split(b'|')[-1].split(b';')
sci_name_col[i] = re.sub(b'[a-zA-Z]__', b'', sci_names[-1])
elif sintax:
reconstructed_line = entry.id+b' '+entry.definition[:-1]
splitted_reconstructed_line = reconstructed_line.split(b';')
taxa = splitted_reconstructed_line[1].split(b'=')[1]
taxa = splitted_reconstructed_line[1].split(b',')
sci_names = []
for t in taxa:
tf = t.split(b':')[1]
sci_names.append(tf)
sci_name_col[i] = sci_names[-1]
id_col[i] = reconstructed_line.split(b';')[0]
def_col[i] = reconstructed_line
# Fond taxid if taxonomy provided
if taxo is not None :
for sci_name in reversed(sci_names):
if unite:
sci_name = re.sub(b'[a-zA-Z]__', b'', sci_name)
if sci_name.split()[0] != b'unidentified' and sci_name.split()[0] != b'uncultured' and sci_name.split()[0] != b'metagenome':
taxon = taxo.get_taxon_by_name(sci_name)
if taxon is not None:
sci_name_col[i] = taxon.name
taxid_col[i] = taxon.taxid
#print(taxid_col[i], sci_name_col[i])
break
for tag in entry :
if tag != ID_COLUMN and tag != DEFINITION_COLUMN and tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN : # TODO dirty
value = entry[tag]
if tag == b"taxid":
tag = TAXID_COLUMN
if tag == b"count":
tag = COUNT_COLUMN
if tag == b"scientific_name":
tag = SCIENTIFIC_NAME_COLUMN
if tag[:7] == b"merged_":
tag = MERGED_PREFIX+tag[7:]
if type(value) == bytes and value[:1]==b"[" :
try:
if type(eval(value)) == list:
value = eval(value)
#print(value)
except:
pass
if tag not in dcols :
value_type = type(value)
nb_elts = 1
value_obitype = OBI_VOID
dict_col = False
if value_type == dict :
nb_elts = len(value)
elt_names = list(value)
dict_col = True
else :
nb_elts = 1
elt_names = None
if value_type == list :
tuples = True
else:
tuples = False
value_obitype = get_obitype(value)
if value_obitype != OBI_VOID :
dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names, dict_column=dict_col, tuples=tuples), value_obitype)
# Fill value
dcols[tag][0][i] = value
# TODO else log error?
else :
rewrite = False
# Check type adequation
old_type = dcols[tag][1]
new_type = OBI_VOID
new_type = update_obitype(old_type, value)
if old_type != new_type :
rewrite = True
try:
# Check that it's not the case where the first entry contained a dict of length 1 and now there is a new key
if type(value) == dict and \
dcols[tag][0].nb_elements_per_line == 1 \
and set(dcols[tag][0].elements_names) != set(value.keys()) :
raise IndexError # trigger column rewrite
# Fill value
dcols[tag][0][i] = value
except (IndexError, OverflowError):
value_type = type(value)
old_column = dcols[tag][0]
old_nb_elements_per_line = old_column.nb_elements_per_line
new_nb_elements_per_line = 0
old_elements_names = old_column.elements_names
new_elements_names = None
#####################################################################
# Check the length and keys of column lines if needed
if value_type == dict : # Check dictionary keys
for k in value :
if k not in old_elements_names :
new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
rewrite = True
break
elif value_type == list or value_type == tuple : # Check vector length
if old_nb_elements_per_line < len(value) :
new_nb_elements_per_line = len(value)
rewrite = True
#####################################################################
if rewrite :
if new_nb_elements_per_line == 0 and new_elements_names is not None :
new_nb_elements_per_line = len(new_elements_names)
# Reset obierrno
obi_errno = 0
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
new_data_type=new_type,
new_nb_elements_per_line=new_nb_elements_per_line,
new_elements_names=new_elements_names,
rewrite_last_line=False),
new_type)
# Update the dictionary:
for t in dcols :
dcols[t] = (view[t], dcols[t][1])
# Fill value
dcols[tag][0][i] = value
except Exception as e:
print("\nCould not import sequence:\n", repr(entry), "\nError raised:", e, "\n/!\ Check if '--input-na-string' option needs to be set")
if 'skiperror' in config['obi'] and not config['obi']['skiperror']:
raise e
else:
pass
i-=1 # overwrite problematic entry
i+=1
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
logger("info", "Imported %d entries", len(view))
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
view.write_config(config, "import", command_line, input_str=[os.path.abspath(config['obi']['inputURI'])])
output[0].record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
try:
input[0].close()
except AttributeError:
pass
try:
output[0].close(force=True)
except AttributeError:
pass
logger("info", "Done.")

View File

@ -0,0 +1,50 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.uri.decode import open_uri
from obitools3.dms import DMS
from obitools3.utils cimport tobytes
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
from obitools3.apps.optiongroups import addMinimalInputOption
import sys
import io
from subprocess import Popen, PIPE
from cpython.exc cimport PyErr_CheckSignals
__title__="Less equivalent"
def addOptions(parser):
addMinimalInputOption(parser)
def run(config):
DMS.obi_atexit()
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input")
iview = input[1]
process = Popen(["less"], stdin=PIPE)
for seq in iview :
PyErr_CheckSignals()
try:
process.stdin.write(tobytes(repr(seq)))
process.stdin.write(b"\n")
except (StopIteration, BrokenPipeError, IOError):
break
sys.stderr.close()
process.stdin.close()
process.wait()
iview.close()
input[0].close(force=True)

View File

@ -0,0 +1,41 @@
#cython: language_level=3
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.dms import DMS
from obitools3.dms.taxo.taxo cimport Taxonomy
from obitools3.apps.optiongroups import addMinimalInputOption
from obitools3.utils cimport tostr, bytes2str_object
__title__="Print a preview of a DMS, view, column...."
def addOptions(parser):
addMinimalInputOption(parser)
group = parser.add_argument_group('obi ls specific options')
group.add_argument('-l',
action="store_true", dest="ls:longformat",
default=False,
help="Detailed list in long format with all metadata.")
def run(config):
DMS.obi_atexit()
logger("info", "obi ls")
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input")
# Print representation
if config['ls']['longformat']:
print(input[1].repr_longformat())
else:
print(repr(input[1]))
input[0].close(force=True)

View File

@ -0,0 +1,720 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view import RollbackException, View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column, Column_line
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
from obitools3.libalign.apat_pattern import Primer_search
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.capi.apat cimport MAX_PATTERN
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.utils cimport tobytes, str2bytes
from libc.stdint cimport INT32_MAX
from functools import reduce
import math
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
MAX_PAT_LEN = 31
__title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers"
def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ngsfilter specific options')
group.add_argument('-t','--info-view',
action="store", dest="ngsfilter:info_view",
metavar="<URI>",
type=str,
default=None,
required=True,
help="URI to the view containing the samples definition (with tags, primers, sample names,...).\n"
"\nWarning: primer lengths must be less than or equal to 32")
group.add_argument('-R', '--reverse-reads',
action="store", dest="ngsfilter:reverse",
metavar="<URI>",
default=None,
type=str,
help="URI to the reverse reads if the paired-end reads haven't been aligned yet")
group.add_argument('-u','--unidentified',
action="store", dest="ngsfilter:unidentified",
metavar="<URI>",
type=str,
default=None,
help="URI to the view used to store the sequences unassigned to any sample. Those sequences are untrimmed.")
group.add_argument('--no-tags',
action="store_true", dest="ngsfilter:notags",
default=False,
help="Use this option if your experiment does not use tags to identify samples")
group.add_argument('-e','--error',
action="store", dest="ngsfilter:error",
metavar="###",
type=int,
default=2,
help="Number of errors allowed for matching primers [default = 2]")
class Primer:
collection={}
def __init__(self, sequence, taglength, forward=True, max_errors=2, verbose=False, primer_pair_idx=0, primer_idx=0):
'''
@param sequence:
@type sequence:
@param direct:
@type direct:
'''
assert len(sequence) <= MAX_PAT_LEN, "Primer %s is too long, 31 bp max" % sequence
assert sequence not in Primer.collection \
or Primer.collection[sequence]==taglength, \
"Primer %s must always be used with tags of the same length" % sequence
Primer.collection[sequence]=taglength
self.primer_pair_idx = primer_pair_idx
self.primer_idx = primer_idx
self.is_revcomp = False
self.revcomp = None
self.raw=sequence
self.sequence = Nuc_Seq(b"primer", sequence)
self.lseq = len(self.sequence)
self.max_errors=max_errors
self.taglength=taglength
self.forward = forward
self.verbose=verbose
def reverse_complement(self):
p = Primer(self.raw,
self.taglength,
not self.forward,
verbose=self.verbose,
max_errors=self.max_errors,
primer_pair_idx=self.primer_pair_idx,
primer_idx=self.primer_idx)
p.sequence=p.sequence.reverse_complement
p.is_revcomp = True
p.revcomp = None
return p
def __hash__(self):
return hash(str(self.raw))
def __eq__(self,primer):
return self.raw==primer.raw
def __call__(self, sequence, same_sequence=False, pattern=0, begin=0):
if len(sequence) <= self.lseq:
return None
ali = self.aligner.search_one_primer(sequence.seq,
self.primer_pair_idx,
self.primer_idx,
reverse_comp=self.is_revcomp,
same_sequence=same_sequence,
pattern_ref=pattern,
begin=begin)
if ali is None: # no match
return None
errors, start = ali.first_encountered()
if errors <= self.max_errors:
end = start + self.lseq
if self.taglength is not None:
if self.sequence.is_revcomp:
if (len(sequence)-end) >= self.taglength:
tag_start = len(sequence) - end - self.taglength
tag = sequence.reverse_complement[tag_start:tag_start+self.taglength].seq
else:
tag=None
else:
if start >= self.taglength:
tag = tobytes((sequence[start - self.taglength:start].seq).lower()) # turn back to lowercase because apat turned to uppercase
else:
tag=None
else:
tag=None
return errors,start,end,tag
return None
def __str__(self):
return "%s: %s" % ({True:'D',False:'R'}[self.forward],self.raw)
__repr__=__str__
cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
infos = {}
primer_list = []
i=0
for p in info_view:
# Check primer length: should not be longer than 32, the max allowed by the apat lib
if len(p[b'forward_primer']) > 32:
raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
if len(p[b'reverse_primer']) > 32:
raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
forward=Primer(p[b'forward_primer'],
len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
True,
max_errors=max_errors,
verbose=verbose,
primer_pair_idx=i,
primer_idx=0)
fp = infos.get(forward,{})
infos[forward]=fp
reverse=Primer(p[b'reverse_primer'],
len(p[b'reverse_tag']) if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None,
False,
max_errors=max_errors,
verbose=verbose,
primer_pair_idx=i,
primer_idx=1)
primer_list.append((p[b'forward_primer'], p[b'reverse_primer']))
rp = infos.get(reverse,{})
infos[reverse]=rp
if not_aligned:
cf=forward
cr=reverse
cf.revcomp = forward.reverse_complement()
cr.revcomp = reverse.reverse_complement()
dpp=fp.get(cr,{})
fp[cr]=dpp
rpp=rp.get(cf,{})
rp[cf]=rpp
else:
cf=forward.reverse_complement()
cr=reverse.reverse_complement()
dpp=fp.get(cr,{})
fp[cr]=dpp
rpp=rp.get(cf,{})
rp[cf]=rpp
tags = (p[b'forward_tag'] if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
p[b'reverse_tag'] if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None)
if tags != (None, None):
assert tags not in dpp, \
"Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse)
# Save additional data
special_keys = [b'forward_primer', b'reverse_primer', b'forward_tag', b'reverse_tag']
data={}
for key in p:
if key not in special_keys:
data[key] = p[key]
dpp[tags] = data
rpp[tags] = data
i+=1
return infos, primer_list
cdef tuple annotate(sequences, infos, no_tags, verbose=False):
def sortMatch(match):
if match[1] is None:
return INT32_MAX
else:
return match[1][1]
def sortReverseMatch(match):
if match[1] is None:
return -1
else:
return match[1][1]
not_aligned = len(sequences) > 1
sequences[0] = sequences[0].clone()
if not_aligned:
sequences[0][b"R1_parent"] = sequences[0].id
sequences[0][b"R2_parent"] = sequences[1].id
if not_aligned:
sequences[1] = sequences[1].clone()
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
for seq in sequences:
if hasattr(seq, "quality_array") and seq.quality_array is not None:
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array),0)/len(seq.quality_array)*10
seq[b'avg_quality']=q
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[0:10]),0)
seq[b'head_quality']=q
if len(seq.quality_array[10:-10]) :
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[10:-10]),0)/len(seq.quality_array[10:-10])*10
seq[b'mid_quality']=q
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[-10:]),0)
seq[b'tail_quality']=q
# Try direct matching:
directmatch = []
for seq in sequences:
new_seq = True
pattern = 0
for p in infos:
if pattern == MAX_PATTERN:
new_seq = True
pattern = 0
# Saving original primer as 4th member of the tuple to serve as correct key in infos dict even if it might be reversed complemented (not here)
directmatch.append((p, p(seq, same_sequence=not new_seq, pattern=pattern), seq, p))
new_seq = False
pattern+=1
# Choose match closer to the start of (one of the) sequence(s)
directmatch = sorted(directmatch, key=sortMatch)
all_direct_matches = directmatch
directmatch = directmatch[0] if directmatch[0][1] is not None else None
if directmatch is None:
if not_aligned:
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
sequences[0][b'error']=b'No primer match'
return False, sequences[0]
if id(directmatch[2]) == id(sequences[0]):
first_match_first_seq = True
else:
first_match_first_seq = False
match = directmatch[2][directmatch[1][1]:directmatch[1][2]]
if not not_aligned:
sequences[0][b'seq_length_ori']=len(sequences[0])
if not not_aligned or first_match_first_seq:
sequences[0] = sequences[0][directmatch[1][2]:]
else:
sequences[1] = sequences[1][directmatch[1][2]:]
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
if directmatch[0].forward:
sequences[0][b'direction']=b'forward'
sequences[0][b'forward_errors']=directmatch[1][0]
sequences[0][b'forward_primer']=directmatch[0].raw
sequences[0][b'forward_match']=match.seq
else:
sequences[0][b'direction']=b'reverse'
sequences[0][b'reverse_errors']=directmatch[1][0]
sequences[0][b'reverse_primer']=directmatch[0].raw
sequences[0][b'reverse_match']=match.seq
# Keep only paired reverse primer
infos = infos[directmatch[0]]
reverse_primer = list(infos.keys())[0]
direct_primer = directmatch[0]
# If not aligned, look for other match in already computed matches (choose the one that makes the biggest amplicon)
if not_aligned:
i=1
# TODO comment
while i<len(all_direct_matches) and \
(all_direct_matches[i][1] is None or \
all_direct_matches[i][0].forward == directmatch[0].forward or \
all_direct_matches[i][0] == directmatch[0] or \
reverse_primer != all_direct_matches[i][0]) :
i+=1
if i < len(all_direct_matches):
reversematch = all_direct_matches[i]
else:
reversematch = None
# Cut reverse primer out of 1st matched seq if it contains it, because if it's also in the other sequence, the next step will "choose" only the one on the other sequence
if not_aligned:
# do it on same seq
if first_match_first_seq:
r = reverse_primer.revcomp(sequences[0])
else:
r = reverse_primer.revcomp(sequences[1])
if r is not None: # found
if first_match_first_seq :
sequences[0] = sequences[0][:r[1]]
else:
sequences[1] = sequences[1][:r[1]]
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
# do the same on the other seq
if first_match_first_seq:
r = direct_primer.revcomp(sequences[1])
else:
r = direct_primer.revcomp(sequences[0])
if r is not None: # found
if first_match_first_seq:
sequences[1] = sequences[1][:r[1]]
else:
sequences[0] = sequences[0][:r[1]]
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality
# Look for other primer in the other direction on the sequence, or
# If sequences are not already aligned and reverse primer not found in most likely sequence (the one without the forward primer), try matching on the same sequence than the first match (primer in the other direction)
if not not_aligned or (not_aligned and (reversematch is None or reversematch[1] is None)):
if not_aligned and first_match_first_seq:
seq_to_match = sequences[1]
else:
seq_to_match = sequences[0]
reversematch = []
# Compute begin
#begin=directmatch[1][2]+1 # end of match + 1 on the same sequence -- No, already cut out forward primer
# Try reverse matching on the other sequence:
new_seq = True
pattern = 0
for p in infos:
if pattern == MAX_PATTERN:
new_seq = True
pattern = 0
if not_aligned:
primer=p.revcomp
else:
primer=p
# Saving original primer as 4th member of the tuple to serve as correct key in infos dict even if it might have been reversed complemented
# (3rd member already used by directmatch)
reversematch.append((primer, primer(seq_to_match, same_sequence=not new_seq, pattern=pattern, begin=0), None, p))
new_seq = False
pattern+=1
# Choose match closer to the end of the sequence
reversematch = sorted(reversematch, key=sortReverseMatch, reverse=True)
all_reverse_matches = reversematch
reversematch = reversematch[0] if reversematch[0][1] is not None else None
if reversematch is None and None not in infos:
if directmatch[0].forward:
message = b'No reverse primer match'
else:
message = b'No direct primer match'
sequences[0][b'error']=message
return False, sequences[0]
if reversematch is None:
sequences[0][b'status']=b'partial'
if directmatch[0].forward:
tags=(directmatch[1][3],None)
else:
tags=(None,directmatch[1][3])
samples = infos[None]
else:
sequences[0][b'status']=b'full'
if not not_aligned or first_match_first_seq:
match = sequences[0][reversematch[1][1]:reversematch[1][2]]
else:
match = sequences[1][reversematch[1][1]:reversematch[1][2]]
match = match.reverse_complement
if not not_aligned:
sequences[0] = sequences[0][0:reversematch[1][1]]
elif first_match_first_seq:
sequences[1] = sequences[1][reversematch[1][2]:]
if not directmatch[0].forward:
sequences[1] = sequences[1].reverse_complement
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
else:
sequences[0] = sequences[0][reversematch[1][2]:]
if directmatch[0].forward:
tags=(directmatch[1][3], reversematch[1][3])
sequences[0][b'reverse_errors'] = reversematch[1][0]
sequences[0][b'reverse_primer'] = reversematch[0].raw
sequences[0][b'reverse_match'] = match.seq
else:
tags=(reversematch[1][3], directmatch[1][3])
sequences[0][b'forward_errors'] = reversematch[1][0]
sequences[0][b'forward_primer'] = reversematch[0].raw
sequences[0][b'forward_match'] = match.seq
if tags[0] is not None:
sequences[0][b'forward_tag'] = tags[0]
if tags[1] is not None:
sequences[0][b'reverse_tag'] = tags[1]
samples = infos[reversematch[3]]
if not directmatch[0].forward:
sequences[0] = sequences[0].reverse_complement
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
else:
sequences[0][b'reversed'] = False # used by the alignpairedend tool (in kmer_similarity.c)
sample=None
if not no_tags:
if tags[0] is not None: # Direct tag known
if tags[1] is not None: # Reverse tag known
sample = samples.get(tags, None)
else: # Only direct tag known
s=[samples[x] for x in samples if x[0]==tags[0]]
if len(s)==1:
sample=s[0]
elif len(s)>1:
sequences[0][b'error']=b'Did not found reverse tag'
return False, sequences[0]
else:
sample=None
else:
if tags[1] is not None: # Only reverse tag known
s=[samples[x] for x in samples if x[1]==tags[1]]
if len(s)==1:
sample=s[0]
elif len(s)>1:
sequences[0][b'error']=b'Did not found forward tag'
return False, sequences[0]
else:
sample=None
if sample is None:
sequences[0][b'error']=b"No sample with that tag combination"
return False, sequences[0]
sequences[0].update(sample)
if not not_aligned:
sequences[0][b'seq_length']=len(sequences[0])
return True, sequences[0]
def run(config):
DMS.obi_atexit()
logger("info", "obi ngsfilter")
assert config['ngsfilter']['info_view'] is not None, "Option -t must be specified"
# Open the input
forward = None
reverse = None
input = None
not_aligned = False
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not open input reads")
if input[2] != View_NUC_SEQS:
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
i_dms = input[0]
if "reverse" in config["ngsfilter"]:
forward = input[1]
rinput = open_uri(config["ngsfilter"]["reverse"])
if rinput is None:
raise Exception("Could not open reverse reads")
if rinput[2] != View_NUC_SEQS:
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
reverse = rinput[1]
if len(forward) != len(reverse):
raise Exception("Error: the number of forward and reverse reads are different")
entries = [forward, reverse]
not_aligned = True
input_dms_name = [forward.dms.name, reverse.dms.name]
input_view_name = [forward.name, reverse.name]
else:
entries = input[1]
input_dms_name = [entries.dms.name]
input_view_name = [entries.name]
if not_aligned:
entries_len = len(forward)
else:
entries_len = len(entries)
# Open the output
output = open_uri(config['obi']['outputURI'],
input=False,
newviewtype=View_NUC_SEQS)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view = output[1]
# If stdout output, create a temporary view in the input dms that will be deleted afterwards.
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
# Open the view containing the informations about the tags and the primers
info_input = open_uri(config['ngsfilter']['info_view'])
if info_input is None:
raise Exception("Could not read the view containing the informations about the tags and the primers")
info_view = info_input[1]
input_dms_name.append(info_input[0].name)
input_view_name.append(info_input[1].name)
# Open the unidentified view
if 'unidentified' in config['ngsfilter'] and config['ngsfilter']['unidentified'] is not None: # TODO keyError if undefined problem
unidentified_input = open_uri(config['ngsfilter']['unidentified'],
input=False,
newviewtype=View_NUC_SEQS)
if unidentified_input is None:
raise Exception("Could not open the view containing the unidentified reads")
unidentified = unidentified_input[1]
else:
unidentified = None
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(entries_len, config)
else:
pb = None
# Check and store primers and tags
try:
infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned) # TODO obi verbose option
except RollbackException, e:
if unidentified is not None:
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
else:
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
aligner = Primer_search(primer_list, config['ngsfilter']['error'])
for p in infos:
p.aligner = aligner
for paired_p in infos[p]:
paired_p.aligner = aligner
if paired_p.revcomp is not None:
paired_p.revcomp.aligner = aligner
if not_aligned: # create columns used by alignpairedend tool
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
if unidentified is not None:
Column.new_column(unidentified, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(unidentified, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=unidentified[REVERSE_SEQUENCE_COLUMN].version)
g = 0
u = 0
i = 0
no_tags = config['ngsfilter']['notags']
try:
for i in range(entries_len):
PyErr_CheckSignals()
if pb is not None:
pb(i)
if not_aligned:
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
else:
modseq = [Nuc_Seq.new_from_stored(entries[i])]
good, oseq = annotate(modseq, infos, no_tags)
if good:
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
g+=1
elif unidentified is not None:
# Untrim sequences (put original back)
if len(modseq) > 1:
oseq[REVERSE_SEQUENCE_COLUMN] = reverse[i].seq
oseq[REVERSE_QUALITY_COLUMN] = reverse[i].quality
unidentified[u].set(oseq.id, forward[i].seq, definition=oseq.definition, quality=forward[i].quality, tags=oseq)
else:
unidentified[u].set(oseq.id, entries[i].seq, definition=oseq.definition, quality=entries[i].quality, tags=oseq)
u+=1
except Exception, e:
if unidentified is not None:
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
else:
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
if unidentified is not None:
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
# Add comment about unidentified seqs
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
o_dms.record_command_line(command_line)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If stdout output, delete the temporary result view in the input DMS
if type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
i_dms.close(force=True)
o_dms.close(force=True)
info_input[0].close(force=True)
if unidentified is not None:
unidentified_input[0].close(force=True)
aligner.free()
logger("info", "Done.")

View File

@ -0,0 +1,87 @@
#cython: language_level=3
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.dms import DMS
from obitools3.apps.optiongroups import addMinimalInputOption
from obitools3.dms.view.view cimport View
from obitools3.utils cimport tostr
import os
import shutil
__title__="Delete a view"
def addOptions(parser):
addMinimalInputOption(parser)
def run(config):
DMS.obi_atexit()
logger("info", "obi rm")
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input")
# Check that it's a view
if isinstance(input[1], View) :
view = input[1]
else:
raise NotImplementedError()
dms = input[0]
# Get the path to the view file to remove
path = dms.full_path # dms path
view_path=path+b"/VIEWS/"
view_path+=view.name
view_path+=b".obiview"
to_remove = {}
# For each column:
for col_alias in view.keys():
col = view[col_alias]
col_name = col.original_name
col_version = col.version
col_type = col.data_type
col_ref = (col_name, col_version)
# build file name and AVL file names
col_file_name = f"{tostr(path)}/{tostr(col.original_name)}.obicol/{tostr(col.original_name)}@{col.version}.odc"
if col_type in [b'OBI_CHAR', b'OBI_QUAL', b'OBI_STR', b'OBI_SEQ']:
avl_file_name = f"{tostr(path)}/OBIBLOB_INDEXERS/{tostr(col.original_name)}_{col.version}_indexer"
else:
avl_file_name = None
to_remove[col_ref] = [col_file_name, avl_file_name]
# For each view:
do_not_remove = []
for vn in dms:
v = dms[vn]
# ignore the one being deleted
if v.name != view.name:
# check that none of the column is referenced, if referenced, remove from list to remove
cols = [(v[c].original_name, v[c].version) for c in v.keys()]
for col_ref in to_remove:
if col_ref in cols:
do_not_remove.append(col_ref)
for nr in do_not_remove:
to_remove.pop(nr)
# Close the view and the DMS
view.close()
input[0].close(force=True)
#print(to_remove)
# rm AFTER view and DMS close
os.remove(view_path)
for col in to_remove:
os.remove(to_remove[col][0])
if to_remove[col][1] is not None:
shutil.rmtree(to_remove[col][1])

View File

@ -0,0 +1,168 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
from obitools3.dms.capi.obitypes cimport OBI_BOOL, \
OBI_CHAR, \
OBI_FLOAT, \
OBI_INT, \
OBI_QUAL, \
OBI_SEQ, \
OBI_STR, \
OBIBool_NA, \
OBIChar_NA, \
OBIFloat_NA, \
OBIInt_NA
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
OBI_CHAR: OBIChar_NA,
OBI_FLOAT: OBIFloat_NA,
OBI_INT: OBIInt_NA,
OBI_QUAL: [],
OBI_SEQ: b"",
OBI_STR: b""}
__title__="Sort view lines according to the value of a given attribute"
def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi sort specific options')
group.add_argument('--key', '-k',
action="append", dest="sort:keys",
metavar='<TAG NAME>',
default=[],
type=str,
help="Attribute used to sort the sequence records.")
group.add_argument('--reverse', '-r',
action="store_true", dest="sort:reverse",
default=False,
help="Sort in reverse order.")
def line_cmp(line, key, pb):
pb
if line[key] is None:
return NULL_VALUE[line.view[key].data_type_int]
else:
return line[key]
def run(config):
DMS.obi_atexit()
logger("info", "obi sort")
# Open the input
input = open_uri(config["obi"]["inputURI"])
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view), config)
else:
pb = None
keys = config['sort']['keys']
selection = Line_selection(i_view)
for i in range(len(i_view)): # TODO special function?
PyErr_CheckSignals()
selection.append(i)
for k in keys: # TODO order?
PyErr_CheckSignals()
if pb is not None:
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
else:
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, None), reverse=config['sort']['reverse'])
if pb is not None:
pb(len(i_view), force=True)
print("", file=sys.stderr)
# Create output view with the sorted line selection
try:
o_view = selection.materialize(o_view_name)
except Exception, e:
raise RollbackException("obi sort error, rollbacking view: "+str(e), o_view)
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[input[0].name]
input_view_name=[input[1].name]
o_view.write_config(config, "sort", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_dms.record_command_line(command_line)
# If input and output DMS are not the same, export the temporary view to the output DMS
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,105 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Split"
def addOptions(parser):
addMinimalInputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group("obi split specific options")
group.add_argument('-p','--prefix',
action="store", dest="split:prefix",
metavar="<PREFIX>",
help="Prefix added to each subview name (included undefined)")
group.add_argument('-t','--tag-name',
action="store", dest="split:tagname",
metavar="<TAG_NAME>",
help="Attribute/tag used to split the input")
group.add_argument('-u','--undefined',
action="store", dest="split:undefined",
default=b'UNDEFINED',
metavar="<VIEW_NAME>",
help="Name of the view where undefined sequenced are stored (will be PREFIX_VIEW_NAME)")
def run(config):
DMS.obi_atexit()
logger("info", "obi split")
# Open the input
input = open_uri(config["obi"]["inputURI"])
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view), config)
else:
pb = None
tag_to_split = config["split"]["tagname"]
undefined = tobytes(config["split"]["undefined"])
selections = {}
# Go through input view and split
for i in range(len(i_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
line = i_view[i]
if tag_to_split not in line or line[tag_to_split] is None or len(line[tag_to_split])==0:
value = undefined
else:
value = line[tag_to_split]
if value not in selections:
selections[value] = Line_selection(i_view)
selections[value].append(i)
if pb is not None:
pb(len(i_view), force=True)
print("", file=sys.stderr)
# Create output views with the line selection
try:
for cat in selections:
o_view_name = config["split"]["prefix"].encode()+cat
o_view = selections[cat].materialize(o_view_name)
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[input[0].name]
input_view_name=[input[1].name]
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "split", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_view.close()
except Exception, e:
raise RollbackException("obi split error, rollbacking view: "+str(e), o_view)
i_dms.record_command_line(command_line)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,296 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.dms.capi.obiview cimport COUNT_COLUMN
from obitools3.utils cimport tostr
from functools import reduce
import math
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Compute basic statistics for attribute values"
'''
`obi stats` computes basic statistics for attribute values of sequence records.
The sequence records can be categorized or not using one or several ``-c`` options.
By default, only the number of sequence records and the total count are computed for each category.
Additional statistics can be computed for attribute values in each category, such as:
- minimum value (``-m`` option)
- maximum value (``-M`` option)
- mean value (``-a`` option)
- variance (``-v`` option)
- standard deviation (``-s`` option)
The result is a contingency table with the different categories in rows, and the
computed statistics in columns.
'''
# TODO: when is the taxonomy possibly used?
def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
group=parser.add_argument_group('obi stats specific options')
group.add_argument('-c','--category-attribute',
action="append", dest="stats:categories",
metavar="<Attribute Name>",
default=[],
help="Attribute used to categorize the records.")
group.add_argument('-m','--min',
action="append", dest="stats:minimum",
metavar="<Attribute Name>",
default=[],
help="Compute the minimum value of attribute for each category.")
group.add_argument('-M','--max',
action="append", dest="stats:maximum",
metavar="<Attribute Name>",
default=[],
help="Compute the maximum value of attribute for each category.")
group.add_argument('-a','--mean',
action="append", dest="stats:mean",
metavar="<Attribute Name>",
default=[],
help="Compute the mean value of attribute for each category.")
group.add_argument('-v','--variance',
action="append", dest="stats:var",
metavar="<Attribute Name>",
default=[],
help="Compute the variance of attribute for each category.")
group.add_argument('-s','--std-dev',
action="append", dest="stats:sd",
metavar="<Attribute Name>",
default=[],
help="Compute the standard deviation of attribute for each category.")
def statistics(values, attributes, func):
stat={}
lstat={}
for var in attributes:
if var in values:
stat[var]={}
lstat[var]=0
for c in values[var]:
v = values[var][c]
m = func(v)
stat[var][c]=m
lm=len(str(m))
if lm > lstat[var]:
lstat[var]=lm
return stat, lstat
def minimum(values, options):
return statistics(values, options['minimum'], min)
def maximum(values, options):
return statistics(values, options['maximum'], max)
def mean(values, options):
def average(v):
s = reduce(lambda x,y:x+y,v,0)
return float(s)/len(v)
return statistics(values, options['mean'], average)
def variance(v):
if len(v)==1:
return 0
s = reduce(lambda x,y:(x[0]+y,x[1]+y**2),v,(0.,0.))
var = round(s[1]/(len(v)-1) - s[0]**2/len(v)/(len(v)-1), 5) # round to go around shady python rounding stuff when var is actually 0
if var == -0.0: # then fix -0 to +0 if was rounded to -0
var = 0.0
return var
def varpop(values, options):
return statistics(values, options['var'], variance)
def sd(values, options):
def stddev(v):
return math.sqrt(variance(v))
return statistics(values, options['sd'], stddev)
def run(config):
DMS.obi_atexit()
logger("info", "obi stats")
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input view")
i_view = input[1]
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
taxo_uri = open_uri(config['obi']['taxoURI'])
if taxo_uri is None or taxo_uri[2] == bytes:
raise Exception("Couldn't open taxonomy")
taxo = taxo_uri[1]
else :
taxo = None
statistics = set(config['stats']['minimum']) | set(config['stats']['maximum']) | set(config['stats']['mean']) | set(config['stats']['var']) | set(config['stats']['sd'])
total = 0
catcount={}
totcount={}
values={}
lcat=0
# Initialize the progress bar
pb = ProgressBar(len(i_view), config)
for i in range(len(i_view)):
PyErr_CheckSignals()
pb(i)
line = i_view[i]
category = []
for c in config['stats']['categories']:
try:
if taxo is not None:
loc_env = {'sequence': line, 'line': line, 'taxonomy': taxo}
else:
loc_env = {'sequence': line, 'line': line}
v = eval(c, loc_env, line)
lv=len(str(v))
if lv > lcat:
lcat=lv
category.append(v)
except:
category.append(None)
if 4 > lcat:
lcat=4
category=tuple(category)
catcount[category]=catcount.get(category,0)+1
try:
totcount[category]=totcount.get(category,0)+line[COUNT_COLUMN]
except KeyError:
totcount[category]=totcount.get(category,0)+1
for var in statistics:
if var in line and line[var] is not None:
v = line[var]
if var not in values:
values[var]={}
if category not in values[var]:
values[var][category]=[]
values[var][category].append(v)
pb(i, force=True)
print("", file=sys.stderr)
mini, lmini = minimum(values, config['stats'])
maxi, lmaxi = maximum(values, config['stats'])
avg, lavg = mean(values, config['stats'])
varp, lvarp = varpop(values, config['stats'])
sigma, lsigma = sd(values, config['stats'])
pcat = "%%-%ds" % lcat
if config['stats']['minimum']:
minvar= "min_%%-%ds" % max(len(x) for x in config['stats']['minimum'])
else:
minvar= "%s"
if config['stats']['maximum']:
maxvar= "max_%%-%ds" % max(len(x) for x in config['stats']['maximum'])
else:
maxvar= "%s"
if config['stats']['mean']:
meanvar= "mean_%%-%ds" % max(len(x) for x in config['stats']['mean'])
else:
meanvar= "%s"
if config['stats']['var']:
varvar= "var_%%-%ds" % max(len(x) for x in config['stats']['var'])
else:
varvar= "%s"
if config['stats']['sd']:
sdvar= "sd_%%-%ds" % max(len(x) for x in config['stats']['sd'])
else:
sdvar= "%s"
hcat = ""
for x in config['stats']['categories']:
hcat += pcat % x
hcat += "\t"
for x in config['stats']['minimum']:
hcat += minvar % x
hcat += "\t"
for x in config['stats']['maximum']:
hcat += maxvar % x
hcat += "\t"
for x in config['stats']['mean']:
hcat += meanvar % x
hcat += "\t"
for x in config['stats']['var']:
hcat += varvar % x
hcat += "\t"
for x in config['stats']['sd']:
hcat += sdvar % x
hcat += "\t"
hcat += "count\ttotal"
print(hcat)
sorted_stats = sorted(catcount.items(), key = lambda kv:(totcount[kv[0]]), reverse=True)
for i in range(len(sorted_stats)):
c = sorted_stats[i][0]
for v in c:
if type(v) == bytes:
print(pcat % tostr(v)+"\t", end="")
else:
print(pcat % str(v)+"\t", end="")
for m in config['stats']['minimum']:
print((("%%%dd" % lmini[m]) % mini[m][c])+"\t", end="")
for m in config['stats']['maximum']:
print((("%%%dd" % lmaxi[m]) % maxi[m][c])+"\t", end="")
for m in config['stats']['mean']:
print((("%%%df" % lavg[m]) % avg[m][c])+"\t", end="")
for m in config['stats']['var']:
print((("%%%df" % lvarp[m]) % varp[m][c])+"\t", end="")
for m in config['stats']['sd']:
print((("%%%df" % lsigma[m]) % sigma[m][c])+"\t", end="")
print("%d" %catcount[c]+"\t", end="")
print("%d" %totcount[c]+"\t")
input[0].close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,130 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
__title__="Keep the N last lines of a view"
def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi tail specific options')
group.add_argument('-n', '--sequence-count',
action="store", dest="tail:count",
metavar='<N>',
default=10,
type=int,
help="Number of last records to keep.")
def run(config):
DMS.obi_atexit()
logger("info", "obi tail")
# Open the input
input = open_uri(config["obi"]["inputURI"])
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
start = max(len(i_view) - config['tail']['count'], 0)
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view) - start, config)
else:
pb = None
selection = Line_selection(i_view)
for i in range(start, len(i_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
selection.append(i)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
comments = View.get_config_dict(config, "tail", command_line, input_dms_name=[i_dms.name], input_view_name=[i_view.name])
# Create output view with the line selection
try:
o_view = selection.materialize(o_view_name)
except Exception, e:
raise RollbackException("obi tail error, rollbacking view: "+str(e), o_view)
# Save command config in DMS comments
command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "tail", command_line, input_dms_name=[i_dms.name], input_view_name=[i_view.name])
o_dms.record_command_line(command_line)
# If input and output DMS are not the same, export the temporary view to the output DMS
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -0,0 +1,230 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.dms.column.column cimport Column
from functools import reduce
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes, tostr
from io import BufferedWriter
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
QUALITY_COLUMN, \
COUNT_COLUMN, \
TAXID_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_INT
from obitools3.dms.capi.obitaxonomy cimport MIN_LOCAL_TAXID
import time
import math
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Add taxa with a new generated taxid to an NCBI taxonomy database"
def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi taxonomy specific options')
group.add_argument('-n', '--taxon-name-tag',
action="store",
dest="taxonomy:taxon_name_tag",
metavar="<SCIENTIFIC_NAME_TAG>",
default=b"SCIENTIFIC_NAME",
help="Name of the tag giving the scientific name of the taxon "
"(default: 'SCIENTIFIC_NAME').")
# group.add_argument('-g', '--try-genus-match',
# action="store_true", dest="taxonomy:try_genus_match",
# default=False,
# help="Try matching the first word of <SCIENTIFIC_NAME_TAG> when can't find corresponding taxid for a taxon. "
# "If there is a match it is added in the 'parent_taxid' tag. (Can be used by 'obi taxonomy' to add the taxon under that taxid).")
group.add_argument('-a', '--restricting-ancestor',
action="store",
dest="taxonomy:restricting_ancestor",
metavar="<RESTRICTING_ANCESTOR>",
default=None,
help="Enables to restrict the addition of taxids under an ancestor specified by its taxid.")
group.add_argument('-t', '--taxid-tag',
action="store",
dest="taxonomy:taxid_tag",
metavar="<TAXID_TAG>",
default=b"TAXID",
help="Name of the tag to store the new taxid "
"(default: 'TAXID').")
group.add_argument('-l', '--log-file',
action="store",
dest="taxonomy:log_file",
metavar="<LOG_FILE>",
default='',
help="Path to a log file to write informations about added taxids.")
def run(config):
DMS.obi_atexit()
logger("info", "obi taxonomy")
# Open the input
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
i_view_name = input[1].name
# Open the output: only the DMS, as the output view is going to be created by cloning the input view
# (could eventually be done via an open_uri() argument)
output = open_uri(config['obi']['outputURI'],
input=False,
dms_only=True)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view_name = output[1]
# stdout output: create temporary view
if type(output_0)==BufferedWriter:
o_dms = i_dms
i=0
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in output DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
imported_view_name = o_view_name
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
if i_dms != o_dms:
imported_view_name = i_view_name
i=0
while imported_view_name in o_dms: # Making sure view name is unique in output DMS
imported_view_name = i_view_name+b"_"+str2bytes(str(i))
i+=1
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
i_view = o_dms[imported_view_name]
# Clone output view from input view
o_view = i_view.clone(o_view_name)
if o_view is None:
raise Exception("Couldn't create output view")
i_view.close()
# Open taxonomy
taxo_uri = open_uri(config['obi']['taxoURI'])
if taxo_uri is None or taxo_uri[2] == bytes:
raise Exception("Couldn't open taxonomy")
taxo = taxo_uri[1]
# Initialize the progress bar
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(o_view), config)
else:
pb = None
try:
if config['taxonomy']['log_file']:
logfile = open(config['taxonomy']['log_file'], 'w')
else:
logfile = sys.stdout
if 'restricting_ancestor' in config['taxonomy']:
res_anc = int(config['taxonomy']['restricting_ancestor'])
else:
res_anc = None
taxid_column_name = config['taxonomy']['taxid_tag']
parent_taxid_column_name = "PARENT_TAXID" # TODO macro
taxon_name_column_name = config['taxonomy']['taxon_name_tag']
taxid_column = Column.new_column(o_view, taxid_column_name, OBI_INT)
if parent_taxid_column_name in o_view:
parent_taxid_column = o_view[parent_taxid_column_name]
else:
parent_taxid_column = None
#parent_taxid_column = Column.new_column(o_view, parent_taxid_column_name, OBI_INT)
taxon_name_column = o_view[taxon_name_column_name]
for i in range(len(o_view)):
PyErr_CheckSignals()
#if pb is not None:
# pb(i)
taxon_name = taxon_name_column[i]
taxon = taxo.get_taxon_by_name(taxon_name, res_anc)
if taxon is not None:
taxid_column[i] = taxon.taxid
if logfile:
print(f"Found taxon '{tostr(taxon_name)}' already existing with taxid {taxid_column[i]}", file=logfile)
else: # try finding genus or other parent taxon from the first word
#print(i, o_view[i].id)
if parent_taxid_column is not None and parent_taxid_column[i] is not None:
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', parent_taxid_column[i])
if logfile:
print(f"Adding taxon '{tostr(taxon_name)}' under provided parent {parent_taxid_column[i]} with taxid {taxid_column[i]}", file=logfile)
else:
taxon_name_sp = taxon_name.split(b" ")
taxon = taxo.get_taxon_by_name(taxon_name_sp[0], res_anc)
if taxon is not None:
parent_taxid_column[i] = taxon.taxid
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', taxon.taxid)
if logfile:
print(f"Adding taxon '{tostr(taxon_name)}' under '{tostr(taxon.name)}' ({taxon.taxid}) with taxid {taxid_column[i]}", file=logfile)
else:
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', res_anc)
if logfile:
print(f"Adding taxon '{tostr(taxon_name)}' under provided restricting ancestor {res_anc} with taxid {taxid_column[i]}", file=logfile)
taxo.write(taxo.name, update=True)
except Exception, e:
raise RollbackException("obi taxonomy error, rollbacking view: "+str(e), o_view)
#if pb is not None:
# pb(i, force=True)
# print("", file=sys.stderr)
#logger("info", "\nTaxa already in the taxonomy: "+str(found_count)+"/"+str(len(o_view))+" ("+str(round(found_count*100.0/len(o_view), 2))+"%)")
#logger("info", "\nParent taxids found: "+str(parent_found_count)+"/"+str(len(o_view))+" ("+str(round(parent_found_count*100.0/len(o_view), 2))+"%)")
#logger("info", "\nTaxids not found: "+str(not_found_count)+"/"+str(len(o_view))+" ("+str(round(not_found_count*100.0/len(o_view), 2))+"%)")
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[input[0].name]
input_view_name=[i_view_name]
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "taxonomy", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(o_dms, imported_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

Some files were not shown because too many files have changed in this diff Show More