Taxonomy: added functions to check if a taxonomy already exists in a
DMS, and added taxdump import from a compressed file
This commit is contained in:
@ -165,9 +165,9 @@ def __addTaxdumpInputOption(optionManager):
|
||||
group = optionManager.add_argument_group("Input format options for taxdump")
|
||||
|
||||
group.add_argument('--taxdump',
|
||||
action="store", dest="obi:taxdump",
|
||||
default=None,
|
||||
help="Taxdump path")
|
||||
action="store_true", dest="obi:taxdump",
|
||||
default=False,
|
||||
help="Whether the input is a taxdump")
|
||||
|
||||
def addMinimalInputOption(optionManager):
|
||||
__addInputOption(optionManager)
|
||||
|
@ -38,6 +38,8 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
|
||||
|
||||
|
||||
int obi_taxonomy_exists(OBIDMS_p dms, const char* taxonomy_name)
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxdump(const_char_p taxdump)
|
||||
|
@ -2,7 +2,8 @@
|
||||
|
||||
from obitools3.utils cimport str2bytes, bytes2str, tobytes, tostr
|
||||
|
||||
from ..capi.obitaxonomy cimport obi_read_taxonomy, \
|
||||
from ..capi.obitaxonomy cimport obi_taxonomy_exists, \
|
||||
obi_read_taxonomy, \
|
||||
obi_read_taxdump, \
|
||||
obi_write_taxonomy, \
|
||||
obi_close_taxonomy, \
|
||||
@ -17,6 +18,8 @@ from ..capi.obitaxonomy cimport obi_read_taxonomy, \
|
||||
|
||||
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
||||
|
||||
import tarfile
|
||||
|
||||
|
||||
cdef class Taxonomy(OBIWrapper) :
|
||||
# TODO function to import taxonomy?
|
||||
@ -25,6 +28,16 @@ cdef class Taxonomy(OBIWrapper) :
|
||||
return <OBIDMS_taxonomy_p>(self._pointer)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def exists(DMS dms, object name) :
|
||||
e = obi_taxonomy_exists(dms.pointer(), tobytes(name))
|
||||
if e < 0:
|
||||
raise RuntimeError("Error : Cannot check if taxonomy %s exists"
|
||||
% tostr(name))
|
||||
else:
|
||||
return e
|
||||
|
||||
|
||||
@staticmethod
|
||||
def open(DMS dms, object name) :
|
||||
|
||||
@ -51,22 +64,40 @@ cdef class Taxonomy(OBIWrapper) :
|
||||
|
||||
|
||||
@staticmethod
|
||||
def open_taxdump(DMS dms, object name) :
|
||||
def open_taxdump(DMS dms, object path) :
|
||||
|
||||
cdef void* pointer
|
||||
cdef Taxonomy taxo
|
||||
cdef bytes path_b
|
||||
cdef int idx
|
||||
|
||||
pointer = <void*>obi_read_taxdump(tobytes(name))
|
||||
path_b = tobytes(path)
|
||||
folder_path = path_b
|
||||
|
||||
if path_b.endswith(b"tar.gz") or path_b.endswith(b"tar"):
|
||||
idx = path_b.index(b".tar")
|
||||
folder_path = path_b[:idx]
|
||||
|
||||
if path_b.endswith(b"tar.gz"):
|
||||
tar = tarfile.open(path_b, "r:gz")
|
||||
tar.extractall(path=tostr(folder_path))
|
||||
tar.close()
|
||||
elif path_b.endswith(b"tar"):
|
||||
tar = tarfile.open(path_b, "r:")
|
||||
tar.extractall(path=tostr(folder_path))
|
||||
tar.close()
|
||||
|
||||
pointer = <void*>obi_read_taxdump(folder_path)
|
||||
if pointer == NULL :
|
||||
raise RuntimeError("Error : Cannot read taxonomy %s"
|
||||
% tostr(name))
|
||||
% tostr(folder_path))
|
||||
|
||||
taxo = OBIWrapper.new_wrapper(Taxonomy, pointer)
|
||||
|
||||
dms.register(taxo)
|
||||
|
||||
taxo._dms = dms
|
||||
taxo._name = tobytes(name)
|
||||
taxo._name = folder_path
|
||||
|
||||
taxo._ranks = []
|
||||
for r in range((<OBIDMS_taxonomy_p>pointer).ranks.count) :
|
||||
|
@ -2740,6 +2740,35 @@ int read_names_dmp(const char* taxdump, OBIDMS_taxonomy_p tax)
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
int obi_taxonomy_exists(OBIDMS_p dms, const char* taxonomy_name)
|
||||
{
|
||||
char* taxonomy_path;
|
||||
DIR* dir;
|
||||
|
||||
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
|
||||
if (taxonomy_path == NULL)
|
||||
return -1;
|
||||
|
||||
dir = opendir(taxonomy_path);
|
||||
if (dir)
|
||||
{
|
||||
/* Directory exists. */
|
||||
closedir(dir);
|
||||
return 1;
|
||||
}
|
||||
else if (ENOENT == errno)
|
||||
{
|
||||
/* Directory does not exist. */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* opendir() failed for some other reason. */
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
|
||||
{
|
||||
OBIDMS_taxonomy_p tax;
|
||||
@ -3705,7 +3734,7 @@ ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
static int32_t rankindex = -1;
|
||||
static int32_t rankindex = -1;
|
||||
|
||||
if (taxonomy == NULL)
|
||||
{
|
||||
@ -3738,3 +3767,23 @@ const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks)
|
||||
return (ranks->label)[rank_idx];
|
||||
}
|
||||
|
||||
|
||||
int obi_taxo_is_taxid_included(OBIDMS_taxonomy_p taxonomy,
|
||||
int32_t* restrict_to_taxids,
|
||||
int32_t count,
|
||||
int32_t taxid)
|
||||
{
|
||||
int i;
|
||||
ecotx_t* taxon;
|
||||
|
||||
taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid);
|
||||
|
||||
if (taxon)
|
||||
for (i=0; i < count; i++)
|
||||
if ((taxon->taxid == restrict_to_taxids[i]) ||
|
||||
(obi_taxo_is_taxon_under_taxid(taxon, restrict_to_taxids[i])))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -183,6 +183,25 @@ typedef struct OBIDMS_taxonomy_t {
|
||||
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function checking whether a taxonomy is already registered in a DMS using its name.
|
||||
*
|
||||
* @param dms The path to the taxdump directory.
|
||||
*
|
||||
* @param dms A pointer on the DMS.
|
||||
* @param taxonomy_name The name (prefix) of the taxonomy.
|
||||
*
|
||||
* @retval 1 if the taxonomy exists.
|
||||
* @retval 0 if the taxonomy does not exist
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since June 2018
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_taxonomy_exists(OBIDMS_p dms, const char* taxonomy_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function reading an NCBI taxdump and loading its information into a taxonomy structure.
|
||||
*
|
||||
@ -414,3 +433,10 @@ ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
* @see rank_label_to_index()
|
||||
*/
|
||||
const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks);
|
||||
|
||||
|
||||
// TODO
|
||||
int obi_taxo_is_taxid_included(OBIDMS_taxonomy_p taxonomy,
|
||||
int32_t* restrict_to_taxids,
|
||||
int32_t count,
|
||||
int32_t taxid);
|
||||
|
Reference in New Issue
Block a user