export: now exports header for tabular files by default and added option
to only export specific columns
This commit is contained in:
@ -137,10 +137,10 @@ def __addImportInputOption(optionManager):
|
|||||||
def __addTabularOption(optionManager):
|
def __addTabularOption(optionManager):
|
||||||
group = optionManager.add_argument_group("Input and output format options for tabular files")
|
group = optionManager.add_argument_group("Input and output format options for tabular files")
|
||||||
|
|
||||||
group.add_argument('--header',
|
group.add_argument('--no-header',
|
||||||
action="store_true", dest="obi:header",
|
action="store_false", dest="obi:header",
|
||||||
default=False,
|
default=True,
|
||||||
help="First line of tabular file contains column names")
|
help="Don't print the header (first line with column names")
|
||||||
|
|
||||||
group.add_argument('--sep',
|
group.add_argument('--sep',
|
||||||
action="store", dest="obi:sep",
|
action="store", dest="obi:sep",
|
||||||
@ -297,6 +297,12 @@ def __addExportOutputOption(optionManager):
|
|||||||
const=b'tabular',
|
const=b'tabular',
|
||||||
help="Output file is in tabular format")
|
help="Output file is in tabular format")
|
||||||
|
|
||||||
|
group.add_argument('--only-keys',
|
||||||
|
action="append", dest="obi:only_keys",
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
help="Only export the given keys (columns).")
|
||||||
|
|
||||||
group.add_argument('--print-na',
|
group.add_argument('--print-na',
|
||||||
action="store_true", dest="obi:printna",
|
action="store_true", dest="obi:printna",
|
||||||
default=False,
|
default=False,
|
||||||
|
@ -4,6 +4,6 @@ cdef class TabFormat:
|
|||||||
cdef bint header
|
cdef bint header
|
||||||
cdef bint first_line
|
cdef bint first_line
|
||||||
cdef bytes NAString
|
cdef bytes NAString
|
||||||
cdef list tags
|
cdef set tags
|
||||||
cdef bytes sep
|
cdef bytes sep
|
||||||
cdef bint NAIntTo0
|
cdef bint NAIntTo0
|
@ -10,7 +10,8 @@ import sys
|
|||||||
|
|
||||||
cdef class TabFormat:
|
cdef class TabFormat:
|
||||||
|
|
||||||
def __init__(self, header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True):
|
def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True):
|
||||||
|
self.tags = set(tags)
|
||||||
self.header = header
|
self.header = header
|
||||||
self.first_line = True
|
self.first_line = True
|
||||||
self.NAString = NAString
|
self.NAString = NAString
|
||||||
@ -20,46 +21,53 @@ cdef class TabFormat:
|
|||||||
@cython.boundscheck(False)
|
@cython.boundscheck(False)
|
||||||
def __call__(self, object data):
|
def __call__(self, object data):
|
||||||
|
|
||||||
|
cdef set ktags
|
||||||
|
cdef list tags = [key for key in data]
|
||||||
|
|
||||||
line = []
|
line = []
|
||||||
|
|
||||||
if self.first_line:
|
if self.tags is not None and self.tags:
|
||||||
self.tags = [k for k in data.keys()]
|
ktags = self.tags
|
||||||
|
else:
|
||||||
|
ktags = set(tags)
|
||||||
|
|
||||||
if self.header and self.first_line:
|
if self.header and self.first_line:
|
||||||
for k in self.tags:
|
for k in ktags:
|
||||||
if isinstance(data.view[k], Column_multi_elts):
|
if k in tags:
|
||||||
keys = data.view[k].keys()
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
keys.sort()
|
keys = data.view[k].keys()
|
||||||
for k2 in keys:
|
keys.sort()
|
||||||
line.append(tobytes(k)+b':'+tobytes(k2))
|
for k2 in keys:
|
||||||
else:
|
line.append(tobytes(k)+b':'+tobytes(k2))
|
||||||
line.append(tobytes(k))
|
else:
|
||||||
|
line.append(tobytes(k))
|
||||||
r = self.sep.join(value for value in line)
|
r = self.sep.join(value for value in line)
|
||||||
r += b'\n'
|
r += b'\n'
|
||||||
line = []
|
line = []
|
||||||
|
|
||||||
for k in self.tags:
|
for k in ktags:
|
||||||
value = data[k]
|
if k in tags:
|
||||||
if isinstance(data.view[k], Column_multi_elts):
|
value = data[k]
|
||||||
keys = data.view[k].keys()
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
keys.sort()
|
keys = data.view[k].keys()
|
||||||
if value is None: # all keys at None
|
keys.sort()
|
||||||
for k2 in keys: # TODO could be much more efficient
|
if value is None: # all keys at None
|
||||||
line.append(self.NAString)
|
for k2 in keys: # TODO could be much more efficient
|
||||||
else:
|
line.append(self.NAString)
|
||||||
for k2 in keys: # TODO could be much more efficient
|
else:
|
||||||
if value[k2] is not None:
|
for k2 in keys: # TODO could be much more efficient
|
||||||
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
if value[k2] is not None:
|
||||||
else:
|
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
||||||
if self.NAIntTo0 and isinstance(data.view[k], Column_multi_elts_int):
|
|
||||||
line.append(b"0")
|
|
||||||
else:
|
else:
|
||||||
line.append(self.NAString)
|
if self.NAIntTo0 and isinstance(data.view[k], Column_multi_elts_int):
|
||||||
else:
|
line.append(b"0")
|
||||||
if value is not None or (self.NAIntTo0 and isinstance(data.view[k], Column_int)):
|
else:
|
||||||
line.append(str2bytes(str(bytes2str_object(value))))
|
line.append(self.NAString)
|
||||||
else:
|
else:
|
||||||
line.append(self.NAString)
|
if value is not None or (self.NAIntTo0 and isinstance(data.view[k], Column_int)):
|
||||||
|
line.append(str2bytes(str(bytes2str_object(value))))
|
||||||
|
else:
|
||||||
|
line.append(self.NAString)
|
||||||
|
|
||||||
if self.header and self.first_line:
|
if self.header and self.first_line:
|
||||||
r += self.sep.join(value for value in line)
|
r += self.sep.join(value for value in line)
|
||||||
|
@ -476,6 +476,18 @@ def open_uri(uri,
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
commentchar=b'#'
|
commentchar=b'#'
|
||||||
|
|
||||||
|
if b"only_keys" in qualifiers:
|
||||||
|
only_keys=qualifiers[b"only_keys"][0] # not sure that works but no one ever uses qualifiers
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
only_keys_str=config["obi"]["only_keys"]
|
||||||
|
only_keys=[]
|
||||||
|
for key in only_keys_str:
|
||||||
|
only_keys.append(tobytes(key))
|
||||||
|
except KeyError:
|
||||||
|
only_keys=[]
|
||||||
|
|
||||||
|
|
||||||
if format is not None:
|
if format is not None:
|
||||||
if seqtype==b"nuc":
|
if seqtype==b"nuc":
|
||||||
objclass = Nuc_Seq # Nuc_Seq_Stored? TODO
|
objclass = Nuc_Seq # Nuc_Seq_Stored? TODO
|
||||||
@ -486,7 +498,7 @@ def open_uri(uri,
|
|||||||
only=only,
|
only=only,
|
||||||
nastring=nastring)
|
nastring=nastring)
|
||||||
else:
|
else:
|
||||||
iseq = FastaNucWriter(FastaFormat(printNAKeys=printna, NAString=nastring),
|
iseq = FastaNucWriter(FastaFormat(tags=only_keys, printNAKeys=printna, NAString=nastring),
|
||||||
file,
|
file,
|
||||||
skip=skip,
|
skip=skip,
|
||||||
only=only)
|
only=only)
|
||||||
@ -499,7 +511,7 @@ def open_uri(uri,
|
|||||||
noquality=noquality,
|
noquality=noquality,
|
||||||
nastring=nastring)
|
nastring=nastring)
|
||||||
else:
|
else:
|
||||||
iseq = FastqWriter(FastqFormat(printNAKeys=printna, NAString=nastring),
|
iseq = FastqWriter(FastqFormat(tags=only_keys, printNAKeys=printna, NAString=nastring),
|
||||||
file,
|
file,
|
||||||
skip=skip,
|
skip=skip,
|
||||||
only=only)
|
only=only)
|
||||||
@ -535,7 +547,7 @@ def open_uri(uri,
|
|||||||
skip = skip,
|
skip = skip,
|
||||||
only = only)
|
only = only)
|
||||||
else:
|
else:
|
||||||
iseq = TabWriter(TabFormat(header=header, NAString=nastring, sep=sep, NAIntTo0=na_int_to_0),
|
iseq = TabWriter(TabFormat(tags=only_keys, header=header, NAString=nastring, sep=sep, NAIntTo0=na_int_to_0),
|
||||||
file,
|
file,
|
||||||
skip=skip,
|
skip=skip,
|
||||||
only=only,
|
only=only,
|
||||||
@ -571,7 +583,7 @@ def open_uri(uri,
|
|||||||
commentchar)
|
commentchar)
|
||||||
else: # default export is in fasta? or tab? TODO
|
else: # default export is in fasta? or tab? TODO
|
||||||
objclass = Nuc_Seq # Nuc_Seq_Stored? TODO
|
objclass = Nuc_Seq # Nuc_Seq_Stored? TODO
|
||||||
iseq = FastaNucWriter(FastaFormat(printNAKeys=printna, NAString=nastring),
|
iseq = FastaNucWriter(FastaFormat(tags=only_keys, printNAKeys=printna, NAString=nastring),
|
||||||
file,
|
file,
|
||||||
skip=skip,
|
skip=skip,
|
||||||
only=only)
|
only=only)
|
||||||
|
Reference in New Issue
Block a user