Taxonomy: added functions to check if a taxonomy already exists in a

DMS, and added taxdump import from a compressed file
This commit is contained in:
Celine Mercier
2018-07-28 16:48:11 +02:00
parent 1de308a856
commit 7f6d1597fc
5 changed files with 120 additions and 12 deletions

View File

@ -165,9 +165,9 @@ def __addTaxdumpInputOption(optionManager):
group = optionManager.add_argument_group("Input format options for taxdump")
group.add_argument('--taxdump',
action="store", dest="obi:taxdump",
default=None,
help="Taxdump path")
action="store_true", dest="obi:taxdump",
default=False,
help="Whether the input is a taxdump")
def addMinimalInputOption(optionManager):
__addInputOption(optionManager)

View File

@ -37,7 +37,9 @@ cdef extern from "obidms_taxonomy.h" nogil:
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
int obi_taxonomy_exists(OBIDMS_p dms, const char* taxonomy_name)
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
OBIDMS_taxonomy_p obi_read_taxdump(const_char_p taxdump)

View File

@ -2,7 +2,8 @@
from obitools3.utils cimport str2bytes, bytes2str, tobytes, tostr
from ..capi.obitaxonomy cimport obi_read_taxonomy, \
from ..capi.obitaxonomy cimport obi_taxonomy_exists, \
obi_read_taxonomy, \
obi_read_taxdump, \
obi_write_taxonomy, \
obi_close_taxonomy, \
@ -17,6 +18,8 @@ from ..capi.obitaxonomy cimport obi_read_taxonomy, \
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
import tarfile
cdef class Taxonomy(OBIWrapper) :
# TODO function to import taxonomy?
@ -25,6 +28,16 @@ cdef class Taxonomy(OBIWrapper) :
return <OBIDMS_taxonomy_p>(self._pointer)
@staticmethod
def exists(DMS dms, object name) :
e = obi_taxonomy_exists(dms.pointer(), tobytes(name))
if e < 0:
raise RuntimeError("Error : Cannot check if taxonomy %s exists"
% tostr(name))
else:
return e
@staticmethod
def open(DMS dms, object name) :
@ -46,27 +59,45 @@ cdef class Taxonomy(OBIWrapper) :
taxo._ranks = []
for r in range((<OBIDMS_taxonomy_p>pointer).ranks.count) :
taxo._ranks.append(obi_taxo_rank_index_to_label(r, (<OBIDMS_taxonomy_p>pointer).ranks))
return taxo
@staticmethod
def open_taxdump(DMS dms, object name) :
def open_taxdump(DMS dms, object path) :
cdef void* pointer
cdef Taxonomy taxo
pointer = <void*>obi_read_taxdump(tobytes(name))
cdef bytes path_b
cdef int idx
path_b = tobytes(path)
folder_path = path_b
if path_b.endswith(b"tar.gz") or path_b.endswith(b"tar"):
idx = path_b.index(b".tar")
folder_path = path_b[:idx]
if path_b.endswith(b"tar.gz"):
tar = tarfile.open(path_b, "r:gz")
tar.extractall(path=tostr(folder_path))
tar.close()
elif path_b.endswith(b"tar"):
tar = tarfile.open(path_b, "r:")
tar.extractall(path=tostr(folder_path))
tar.close()
pointer = <void*>obi_read_taxdump(folder_path)
if pointer == NULL :
raise RuntimeError("Error : Cannot read taxonomy %s"
% tostr(name))
% tostr(folder_path))
taxo = OBIWrapper.new_wrapper(Taxonomy, pointer)
dms.register(taxo)
taxo._dms = dms
taxo._name = tobytes(name)
taxo._name = folder_path
taxo._ranks = []
for r in range((<OBIDMS_taxonomy_p>pointer).ranks.count) :

View File

@ -2740,6 +2740,35 @@ int read_names_dmp(const char* taxdump, OBIDMS_taxonomy_p tax)
**********************************************************************/
int obi_taxonomy_exists(OBIDMS_p dms, const char* taxonomy_name)
{
char* taxonomy_path;
DIR* dir;
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
if (taxonomy_path == NULL)
return -1;
dir = opendir(taxonomy_path);
if (dir)
{
/* Directory exists. */
closedir(dir);
return 1;
}
else if (ENOENT == errno)
{
/* Directory does not exist. */
return 0;
}
else
{
/* opendir() failed for some other reason. */
return -1;
}
}
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
{
OBIDMS_taxonomy_p tax;
@ -3705,7 +3734,7 @@ ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static int32_t rankindex = -1;
static int32_t rankindex = -1;
if (taxonomy == NULL)
{
@ -3738,3 +3767,23 @@ const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks)
return (ranks->label)[rank_idx];
}
int obi_taxo_is_taxid_included(OBIDMS_taxonomy_p taxonomy,
int32_t* restrict_to_taxids,
int32_t count,
int32_t taxid)
{
int i;
ecotx_t* taxon;
taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid);
if (taxon)
for (i=0; i < count; i++)
if ((taxon->taxid == restrict_to_taxids[i]) ||
(obi_taxo_is_taxon_under_taxid(taxon, restrict_to_taxids[i])))
return 1;
return 0;
}

View File

@ -183,6 +183,25 @@ typedef struct OBIDMS_taxonomy_t {
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
/**
* @brief Function checking whether a taxonomy is already registered in a DMS using its name.
*
* @param dms The path to the taxdump directory.
*
* @param dms A pointer on the DMS.
* @param taxonomy_name The name (prefix) of the taxonomy.
*
* @retval 1 if the taxonomy exists.
* @retval 0 if the taxonomy does not exist
* @retval -1 if an error occurred.
*
* @since June 2018
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_taxonomy_exists(OBIDMS_p dms, const char* taxonomy_name);
/**
* @brief Function reading an NCBI taxdump and loading its information into a taxonomy structure.
*
@ -414,3 +433,10 @@ ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
* @see rank_label_to_index()
*/
const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks);
// TODO
int obi_taxo_is_taxid_included(OBIDMS_taxonomy_p taxonomy,
int32_t* restrict_to_taxids,
int32_t count,
int32_t taxid);