New command: obi taxonomy to add local taxa (closes #64)
This commit is contained in:
230
python/obitools3/commands/taxonomy.pyx
Normal file
230
python/obitools3/commands/taxonomy.pyx
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.column.column cimport Column
|
||||||
|
from functools import reduce
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes, str2bytes, tostr
|
||||||
|
from io import BufferedWriter
|
||||||
|
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||||
|
ID_COLUMN, \
|
||||||
|
DEFINITION_COLUMN, \
|
||||||
|
QUALITY_COLUMN, \
|
||||||
|
COUNT_COLUMN, \
|
||||||
|
TAXID_COLUMN
|
||||||
|
from obitools3.dms.capi.obitypes cimport OBI_INT
|
||||||
|
from obitools3.dms.capi.obitaxonomy cimport MIN_LOCAL_TAXID
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Add taxa with a new generated taxid to an NCBI taxonomy database"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addTaxonomyOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi taxonomy specific options')
|
||||||
|
|
||||||
|
group.add_argument('-n', '--taxon-name-tag',
|
||||||
|
action="store",
|
||||||
|
dest="taxonomy:taxon_name_tag",
|
||||||
|
metavar="<SCIENTIFIC_NAME_TAG>",
|
||||||
|
default=b"SCIENTIFIC_NAME",
|
||||||
|
help="Name of the tag giving the scientific name of the taxon "
|
||||||
|
"(default: 'SCIENTIFIC_NAME').")
|
||||||
|
|
||||||
|
# group.add_argument('-g', '--try-genus-match',
|
||||||
|
# action="store_true", dest="taxonomy:try_genus_match",
|
||||||
|
# default=False,
|
||||||
|
# help="Try matching the first word of <SCIENTIFIC_NAME_TAG> when can't find corresponding taxid for a taxon. "
|
||||||
|
# "If there is a match it is added in the 'parent_taxid' tag. (Can be used by 'obi taxonomy' to add the taxon under that taxid).")
|
||||||
|
|
||||||
|
group.add_argument('-a', '--restricting-ancestor',
|
||||||
|
action="store",
|
||||||
|
dest="taxonomy:restricting_ancestor",
|
||||||
|
metavar="<RESTRICTING_ANCESTOR>",
|
||||||
|
default=None,
|
||||||
|
help="Enables to restrict the addition of taxids under an ancestor specified by its taxid.")
|
||||||
|
|
||||||
|
group.add_argument('-t', '--taxid-tag',
|
||||||
|
action="store",
|
||||||
|
dest="taxonomy:taxid_tag",
|
||||||
|
metavar="<TAXID_TAG>",
|
||||||
|
default=b"TAXID",
|
||||||
|
help="Name of the tag to store the new taxid "
|
||||||
|
"(default: 'TAXID').")
|
||||||
|
|
||||||
|
group.add_argument('-l', '--log-file',
|
||||||
|
action="store",
|
||||||
|
dest="taxonomy:log_file",
|
||||||
|
metavar="<LOG_FILE>",
|
||||||
|
default='',
|
||||||
|
help="Path to a log file to write informations about added taxids.")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi taxonomy")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_view = input[1]
|
||||||
|
i_view_name = input[1].name
|
||||||
|
|
||||||
|
# Open the output: only the DMS, as the output view is going to be created by cloning the input view
|
||||||
|
# (could eventually be done via an open_uri() argument)
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
dms_only=True)
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
|
o_view_name = output[1]
|
||||||
|
|
||||||
|
# stdout output: create temporary view
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
i=0
|
||||||
|
o_view_name = b"temp"
|
||||||
|
while o_view_name in i_dms: # Making sure view name is unique in output DMS
|
||||||
|
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
imported_view_name = o_view_name
|
||||||
|
|
||||||
|
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
||||||
|
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
||||||
|
if i_dms != o_dms:
|
||||||
|
imported_view_name = i_view_name
|
||||||
|
i=0
|
||||||
|
while imported_view_name in o_dms: # Making sure view name is unique in output DMS
|
||||||
|
imported_view_name = i_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
||||||
|
i_view = o_dms[imported_view_name]
|
||||||
|
|
||||||
|
# Clone output view from input view
|
||||||
|
o_view = i_view.clone(o_view_name)
|
||||||
|
if o_view is None:
|
||||||
|
raise Exception("Couldn't create output view")
|
||||||
|
i_view.close()
|
||||||
|
|
||||||
|
# Open taxonomy
|
||||||
|
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||||
|
if taxo_uri is None or taxo_uri[2] == bytes:
|
||||||
|
raise Exception("Couldn't open taxonomy")
|
||||||
|
taxo = taxo_uri[1]
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(len(o_view), config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
if config['taxonomy']['log_file']:
|
||||||
|
logfile = open(config['taxonomy']['log_file'], 'w')
|
||||||
|
else:
|
||||||
|
logfile = sys.stdout
|
||||||
|
if 'restricting_ancestor' in config['taxonomy']:
|
||||||
|
res_anc = int(config['taxonomy']['restricting_ancestor'])
|
||||||
|
else:
|
||||||
|
res_anc = None
|
||||||
|
taxid_column_name = config['taxonomy']['taxid_tag']
|
||||||
|
parent_taxid_column_name = "PARENT_TAXID" # TODO macro
|
||||||
|
taxon_name_column_name = config['taxonomy']['taxon_name_tag']
|
||||||
|
taxid_column = Column.new_column(o_view, taxid_column_name, OBI_INT)
|
||||||
|
if parent_taxid_column_name in o_view:
|
||||||
|
parent_taxid_column = o_view[parent_taxid_column_name]
|
||||||
|
else:
|
||||||
|
parent_taxid_column = None
|
||||||
|
#parent_taxid_column = Column.new_column(o_view, parent_taxid_column_name, OBI_INT)
|
||||||
|
taxon_name_column = o_view[taxon_name_column_name]
|
||||||
|
|
||||||
|
for i in range(len(o_view)):
|
||||||
|
PyErr_CheckSignals()
|
||||||
|
#if pb is not None:
|
||||||
|
# pb(i)
|
||||||
|
taxon_name = taxon_name_column[i]
|
||||||
|
taxon = taxo.get_taxon_by_name(taxon_name, res_anc)
|
||||||
|
if taxon is not None:
|
||||||
|
taxid_column[i] = taxon.taxid
|
||||||
|
if logfile:
|
||||||
|
print(f"Found taxon '{tostr(taxon_name)}' already existing with taxid {taxid_column[i]}", file=logfile)
|
||||||
|
else: # try finding genus or other parent taxon from the first word
|
||||||
|
#print(i, o_view[i].id)
|
||||||
|
if parent_taxid_column is not None and parent_taxid_column[i] is not None:
|
||||||
|
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', parent_taxid_column[i])
|
||||||
|
if logfile:
|
||||||
|
print(f"Adding taxon '{tostr(taxon_name)}' under provided parent {parent_taxid_column[i]} with taxid {taxid_column[i]}", file=logfile)
|
||||||
|
else:
|
||||||
|
taxon_name_sp = taxon_name.split(b" ")
|
||||||
|
taxon = taxo.get_taxon_by_name(taxon_name_sp[0], res_anc)
|
||||||
|
if taxon is not None:
|
||||||
|
parent_taxid_column[i] = taxon.taxid
|
||||||
|
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', taxon.taxid)
|
||||||
|
if logfile:
|
||||||
|
print(f"Adding taxon '{tostr(taxon_name)}' under '{tostr(taxon.name)}' ({taxon.taxid}) with taxid {taxid_column[i]}", file=logfile)
|
||||||
|
else:
|
||||||
|
taxid_column[i] = taxo.add_taxon(taxon_name, 'species', res_anc)
|
||||||
|
if logfile:
|
||||||
|
print(f"Adding taxon '{tostr(taxon_name)}' under provided restricting ancestor {res_anc} with taxid {taxid_column[i]}", file=logfile)
|
||||||
|
|
||||||
|
taxo.write(taxo.name, update=True)
|
||||||
|
|
||||||
|
except Exception, e:
|
||||||
|
raise RollbackException("obi taxonomy error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
|
#if pb is not None:
|
||||||
|
# pb(i, force=True)
|
||||||
|
# print("", file=sys.stderr)
|
||||||
|
|
||||||
|
#logger("info", "\nTaxa already in the taxonomy: "+str(found_count)+"/"+str(len(o_view))+" ("+str(round(found_count*100.0/len(o_view), 2))+"%)")
|
||||||
|
#logger("info", "\nParent taxids found: "+str(parent_found_count)+"/"+str(len(o_view))+" ("+str(round(parent_found_count*100.0/len(o_view), 2))+"%)")
|
||||||
|
#logger("info", "\nTaxids not found: "+str(not_found_count)+"/"+str(len(o_view))+" ("+str(round(not_found_count*100.0/len(o_view), 2))+"%)")
|
||||||
|
|
||||||
|
# Save command config in View and DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
input_dms_name=[input[0].name]
|
||||||
|
input_view_name=[i_view_name]
|
||||||
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
|
o_view.write_config(config, "taxonomy", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
|
View.delete_view(o_dms, imported_view_name)
|
||||||
|
o_dms.close(force=True)
|
||||||
|
i_dms.close(force=True)
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
@ -58,7 +58,7 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
|||||||
|
|
||||||
OBIDMS_taxonomy_p obi_read_taxdump(const_char_p taxdump)
|
OBIDMS_taxonomy_p obi_read_taxdump(const_char_p taxdump)
|
||||||
|
|
||||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p tax_name)
|
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p tax_name, bint update)
|
||||||
|
|
||||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
||||||
|
|
||||||
|
@ -19,8 +19,8 @@ cdef class Taxonomy(OBIWrapper) :
|
|||||||
cpdef Taxon get_taxon_by_idx(self, int idx)
|
cpdef Taxon get_taxon_by_idx(self, int idx)
|
||||||
cpdef Taxon get_taxon_by_taxid(self, int taxid)
|
cpdef Taxon get_taxon_by_taxid(self, int taxid)
|
||||||
cpdef Taxon get_taxon_by_name(self, object taxon_name, object restricting_taxid=*)
|
cpdef Taxon get_taxon_by_name(self, object taxon_name, object restricting_taxid=*)
|
||||||
cpdef write(self, object prefix)
|
cpdef write(self, object prefix, bint update=*)
|
||||||
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=*)
|
cpdef int add_taxon(self, object name, object rank_name, int parent_taxid, int min_taxid=*)
|
||||||
cpdef object get_species(self, int taxid)
|
cpdef object get_species(self, int taxid)
|
||||||
cpdef object get_genus(self, int taxid)
|
cpdef object get_genus(self, int taxid)
|
||||||
cpdef object get_family(self, int taxid)
|
cpdef object get_family(self, int taxid)
|
||||||
|
@ -174,6 +174,7 @@ cdef class Taxonomy(OBIWrapper) :
|
|||||||
|
|
||||||
|
|
||||||
cpdef Taxon get_taxon_by_name(self, object taxon_name, object restricting_taxid=None):
|
cpdef Taxon get_taxon_by_name(self, object taxon_name, object restricting_taxid=None):
|
||||||
|
#print(taxon_name)
|
||||||
taxon = self._name_dict.get(tobytes(taxon_name), None)
|
taxon = self._name_dict.get(tobytes(taxon_name), None)
|
||||||
if not taxon:
|
if not taxon:
|
||||||
return None
|
return None
|
||||||
@ -282,12 +283,12 @@ cdef class Taxonomy(OBIWrapper) :
|
|||||||
yield Taxon(taxon_capsule, self)
|
yield Taxon(taxon_capsule, self)
|
||||||
|
|
||||||
|
|
||||||
cpdef write(self, object prefix) :
|
cpdef write(self, object prefix, bint update=False) :
|
||||||
if obi_write_taxonomy(self._dms.pointer(), self.pointer(), tobytes(prefix)) < 0 :
|
if obi_write_taxonomy(self._dms.pointer(), self.pointer(), tobytes(prefix), update) < 0 :
|
||||||
raise Exception("Error writing the taxonomy to binary files")
|
raise Exception("Error writing the taxonomy to binary files")
|
||||||
|
|
||||||
|
|
||||||
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
|
cpdef int add_taxon(self, object name, object rank_name, int parent_taxid, int min_taxid=10000000) :
|
||||||
cdef int taxid
|
cdef int taxid
|
||||||
taxid = obi_taxo_add_local_taxon(self.pointer(), tobytes(name), tobytes(rank_name), parent_taxid, min_taxid)
|
taxid = obi_taxo_add_local_taxon(self.pointer(), tobytes(name), tobytes(rank_name), parent_taxid, min_taxid)
|
||||||
if taxid < 0 :
|
if taxid < 0 :
|
||||||
@ -329,6 +330,7 @@ cdef class Taxonomy(OBIWrapper) :
|
|||||||
if taxon is not None:
|
if taxon is not None:
|
||||||
while taxon.taxid != 1:
|
while taxon.taxid != 1:
|
||||||
yield taxon
|
yield taxon
|
||||||
|
#print(taxon.taxid)
|
||||||
taxon = taxon.parent
|
taxon = taxon.parent
|
||||||
yield taxon
|
yield taxon
|
||||||
else:
|
else:
|
||||||
|
@ -1092,7 +1092,7 @@ static int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxo
|
|||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
|
|
||||||
// Create file
|
// Create file
|
||||||
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
|
file_descriptor = open(file_name, O_RDWR | O_CREAT, 0777);
|
||||||
if (file_descriptor < 0)
|
if (file_descriptor < 0)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
@ -1196,7 +1196,7 @@ static int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* t
|
|||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
|
|
||||||
// Create file
|
// Create file
|
||||||
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
|
file_descriptor = open(file_name, O_RDWR | O_CREAT, 0777);
|
||||||
if (file_descriptor < 0)
|
if (file_descriptor < 0)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
@ -1472,7 +1472,7 @@ static int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxo
|
|||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
|
|
||||||
// Create file
|
// Create file
|
||||||
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
|
file_descriptor = open(file_name, O_RDWR | O_CREAT, 0777);
|
||||||
if (file_descriptor < 0)
|
if (file_descriptor < 0)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
@ -1760,7 +1760,7 @@ static int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax
|
|||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
|
|
||||||
// Create file
|
// Create file
|
||||||
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
|
file_descriptor = open(file_name, O_RDWR | O_CREAT, 0777);
|
||||||
if (file_descriptor < 0)
|
if (file_descriptor < 0)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
@ -3250,47 +3250,48 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name)
|
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name, bool update)
|
||||||
{
|
{
|
||||||
char* taxonomy_path;
|
char* taxonomy_path;
|
||||||
|
|
||||||
// Build the taxonomy directory path
|
if (!update) {
|
||||||
taxonomy_path = get_taxonomy_path(dms, tax_name);
|
// Build the taxonomy directory path
|
||||||
if (taxonomy_path == NULL)
|
taxonomy_path = get_taxonomy_path(dms, tax_name);
|
||||||
return -1;
|
if (taxonomy_path == NULL)
|
||||||
|
return -1;
|
||||||
// Try to create the directory
|
// Try to create the directory
|
||||||
if (mkdir(taxonomy_path, 00777) < 0)
|
if (mkdir(taxonomy_path, 00777) < 0)
|
||||||
{
|
{
|
||||||
if (errno == EEXIST)
|
if (errno == EEXIST)
|
||||||
obidebug(1, "\nA taxonomy already exists with this name.");
|
obidebug(1, "\nA taxonomy already exists with this name.");
|
||||||
obidebug(1, "\nProblem creating a new taxonomy directory");
|
obidebug(1, "\nProblem creating a new taxonomy directory");
|
||||||
|
free(taxonomy_path);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(taxonomy_path);
|
if (write_ranks_idx(dms, tax, tax_name) < 0)
|
||||||
|
return -1;
|
||||||
if (write_ranks_idx(dms, tax, tax_name) < 0)
|
if (write_taxonomy_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if (write_taxonomy_idx(dms, tax, tax_name) < 0)
|
if (write_names_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if (write_names_idx(dms, tax, tax_name) < 0)
|
if (write_merged_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if (write_merged_idx(dms, tax, tax_name) < 0)
|
// Write preferred names if there are some
|
||||||
return -1;
|
if (tax->preferred_names != NULL)
|
||||||
// Check if there are local taxa (if so last taxon is local)
|
{
|
||||||
|
if (write_preferred_names_idx(dms, tax, tax_name) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Write local taxa if there are some
|
||||||
if ((tax->taxa)->local_count > 0)
|
if ((tax->taxa)->local_count > 0)
|
||||||
{
|
{
|
||||||
if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
|
if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// Write preferred names if there are some
|
|
||||||
if (tax->preferred_names != NULL)
|
|
||||||
{
|
|
||||||
if (write_preferred_names_idx(dms, tax, tax_name) < 0)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3302,16 +3303,17 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
|||||||
if (taxonomy)
|
if (taxonomy)
|
||||||
{
|
{
|
||||||
// Update local informations (local taxa and preferred names) if there are any
|
// Update local informations (local taxa and preferred names) if there are any
|
||||||
if ((taxonomy->taxa)->local_count > 0)
|
// Done with write_taxo, edits all needed files. Only ldx file was edited in OBI1 but it led to issues. Discussable
|
||||||
{
|
// if ((taxonomy->taxa)->local_count > 0)
|
||||||
if (taxonomy->dms == NULL)
|
// {
|
||||||
{
|
// if (taxonomy->dms == NULL)
|
||||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
// {
|
||||||
obidebug(1, "\nError closing a taxonomy with local files but no DMS associated (probably read directly from taxdump)"); // TODO discuss
|
// obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
}
|
// obidebug(1, "\nError closing a taxonomy with local files but no DMS associated (probably read directly from taxdump)"); // TODO discuss
|
||||||
if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
|
// }
|
||||||
return -1;
|
// if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
|
||||||
}
|
// return -1;
|
||||||
|
// }
|
||||||
|
|
||||||
// Write preferred names if there are some
|
// Write preferred names if there are some
|
||||||
if (taxonomy->preferred_names)
|
if (taxonomy->preferred_names)
|
||||||
@ -3377,9 +3379,10 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
|||||||
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||||
{
|
{
|
||||||
int32_t taxid;
|
int32_t taxid;
|
||||||
|
int32_t count;
|
||||||
ecotx_t* taxon;
|
ecotx_t* taxon;
|
||||||
int i;
|
int i;
|
||||||
// econame_t* name_struct;
|
econame_t* name_struct;
|
||||||
|
|
||||||
// Enlarge the structure memory for a new taxon
|
// Enlarge the structure memory for a new taxon
|
||||||
tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (((tax->taxa)->count) + 1));
|
tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (((tax->taxa)->count) + 1));
|
||||||
@ -3441,42 +3444,65 @@ int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char
|
|||||||
((tax->taxa)->local_count)++;
|
((tax->taxa)->local_count)++;
|
||||||
(tax->taxa)->buffer_size = (tax->taxa)->count;
|
(tax->taxa)->buffer_size = (tax->taxa)->count;
|
||||||
|
|
||||||
// // Add new name in names structure // Commented because the new name was not added in the .ndx file in the OBITools1
|
// Add new name in names structure // On the OBI1, the new name was not added in the .ndx file but it could create issues
|
||||||
// // Allocate memory for new name
|
// Allocate memory for new name
|
||||||
// tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1));
|
tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1));
|
||||||
// if (tax->names == NULL)
|
if (tax->names == NULL)
|
||||||
// {
|
{
|
||||||
// obi_set_errno(OBI_MALLOC_ERROR);
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
// obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
|
obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
|
||||||
// return -1;
|
return -1;
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// // Add new name
|
// Add new name
|
||||||
// name_struct = (tax->names)->names + ((tax->names)->count);
|
name_struct = (tax->names)->names + ((tax->names)->count);
|
||||||
// name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
|
name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
|
||||||
// if (name_struct->name == NULL)
|
if (name_struct->name == NULL)
|
||||||
// {
|
{
|
||||||
// obi_set_errno(OBI_MALLOC_ERROR);
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
// obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
|
obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
|
||||||
// return -1;
|
return -1;
|
||||||
// }
|
}
|
||||||
// strcpy(name_struct->name, name);
|
strcpy(name_struct->name, name);
|
||||||
// name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char));
|
name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char));
|
||||||
// if (name_struct->class_name == NULL)
|
if (name_struct->class_name == NULL)
|
||||||
// {
|
{
|
||||||
// obi_set_errno(OBI_MALLOC_ERROR);
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
// obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon");
|
obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon");
|
||||||
// return -1;
|
return -1;
|
||||||
// }
|
}
|
||||||
// strcpy(name_struct->class_name, "scientific name");
|
strcpy(name_struct->class_name, "scientific name");
|
||||||
// name_struct->is_scientific_name = true;
|
name_struct->is_scientific_name = true;
|
||||||
// name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1;
|
name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1;
|
||||||
//
|
|
||||||
// // Sort names in alphabetical order
|
// Update name count
|
||||||
// qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names);
|
((tax->names)->count)++;
|
||||||
//
|
|
||||||
// // Update name count
|
// Sort names in alphabetical order
|
||||||
// ((tax->names)->count)++;
|
qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names);
|
||||||
|
|
||||||
|
// Add to merged index
|
||||||
|
tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * ((tax->merged_idx)->count + 1));
|
||||||
|
if (tax->merged_idx == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError reallocating memory for a taxonomy structure");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
count = (tax->merged_idx)->count;
|
||||||
|
(tax->merged_idx)->count = count + 1;
|
||||||
|
(tax->merged_idx)->merged[count].taxid = taxid;
|
||||||
|
(tax->merged_idx)->merged[count].idx = taxon->idx;
|
||||||
|
|
||||||
|
//fprintf(stderr, "\nEntered in merged taxon.idx=%d", (tax->merged_idx)->merged[(tax->merged_idx)->count -1].idx);
|
||||||
|
//fprintf(stderr, "\nEntered in merged taxon.taxid=%d", (tax->merged_idx)->merged[(tax->merged_idx)->count -1].taxid);
|
||||||
|
//fprintf(stderr, "\nEntered in merged at %d", (tax->merged_idx)->count -1);
|
||||||
|
//taxon = obi_taxo_get_taxon_with_taxid(tax, taxid);
|
||||||
|
//fprintf(stderr, "\ntaxon=%x", taxon);
|
||||||
|
//fprintf(stderr, "\ntaxon.taxid=%d", taxon->taxid);
|
||||||
|
//fprintf(stderr, "\ntaxon.name=%s", taxon->name);
|
||||||
|
//fprintf(stderr, "\ntaxon.idx=%d\n\n", ((tax->merged_idx)->count));
|
||||||
|
|
||||||
return taxid;
|
return taxid;
|
||||||
}
|
}
|
||||||
@ -3547,11 +3573,12 @@ int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon
|
|||||||
name_struct->is_scientific_name = false;
|
name_struct->is_scientific_name = false;
|
||||||
name_struct->taxon = taxon;
|
name_struct->taxon = taxon;
|
||||||
|
|
||||||
|
// Update preferred name count
|
||||||
|
((tax->preferred_names)->count)++;
|
||||||
|
|
||||||
// Sort preferred names in alphabetical order
|
// Sort preferred names in alphabetical order
|
||||||
qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names);
|
qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names);
|
||||||
|
|
||||||
// Update preferred name count
|
|
||||||
((tax->preferred_names)->count)++;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -3669,8 +3696,10 @@ ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid
|
|||||||
else if (indexed_taxon->idx == -1)
|
else if (indexed_taxon->idx == -1)
|
||||||
current_taxon = NULL; // TODO discuss what to do when old deleted taxon
|
current_taxon = NULL; // TODO discuss what to do when old deleted taxon
|
||||||
else
|
else
|
||||||
|
{
|
||||||
current_taxon = (taxonomy->taxa->taxon)+(indexed_taxon->idx);
|
current_taxon = (taxonomy->taxa->taxon)+(indexed_taxon->idx);
|
||||||
|
//fprintf(stderr, "\n>>>idx %d, taxid %d<<<\n", indexed_taxon->idx, indexed_taxon->taxid);
|
||||||
|
}
|
||||||
return current_taxon;
|
return current_taxon;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,6 +239,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
* @param dms A pointer on the DMS to which the taxonomy belongs.
|
* @param dms A pointer on the DMS to which the taxonomy belongs.
|
||||||
* @param tax A pointer on the taxonomy structure.
|
* @param tax A pointer on the taxonomy structure.
|
||||||
* @param tax_name The name (prefix) of the taxonomy.
|
* @param tax_name The name (prefix) of the taxonomy.
|
||||||
|
* @param update Whether files should be rewritten or if it's a new taxonomy (set to true e.g. after adding local taxa).
|
||||||
*
|
*
|
||||||
* @returns An integer value indicating the success of the operation.
|
* @returns An integer value indicating the success of the operation.
|
||||||
* @retval 0 on success.
|
* @retval 0 on success.
|
||||||
@ -247,7 +248,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
* @since 2016
|
* @since 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name, bool update);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Reference in New Issue
Block a user