diff --git a/python/obitools3/format/fasta.pyx b/python/obitools3/format/fasta.pyx index b09e36a..54c13fa 100644 --- a/python/obitools3/format/fasta.pyx +++ b/python/obitools3/format/fasta.pyx @@ -7,10 +7,11 @@ from obitools3.utils cimport bytes2str cdef class FastaFormat: - def __init__(self, list tags=[], bint printNAKeys=False): + def __init__(self, list tags=[], bint printNAKeys=False, bytes NAString=b"NA"): self.headerFormatter = HeaderFormat("fasta", - tags, - printNAKeys) + tags=tags, + printNAKeys=printNAKeys, + NAString=NAString) @cython.boundscheck(False) def __call__(self, object data): @@ -24,5 +25,5 @@ cdef class FastaFormat: brawseq = b'\n'.join(lines) - return bytes2str(self.headerFormatter(data) + b"\n" + brawseq) + return self.headerFormatter(data) + b"\n" + brawseq diff --git a/python/obitools3/format/fastq.pyx b/python/obitools3/format/fastq.pyx index 4bda2dc..b149187 100644 --- a/python/obitools3/format/fastq.pyx +++ b/python/obitools3/format/fastq.pyx @@ -5,28 +5,25 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN from obitools3.utils cimport bytes2str, str2bytes, tobytes -# TODO quality offset option +# TODO quality offset option? cdef class FastqFormat: - def __init__(self, list tags=[], bint printNAKeys=False): + def __init__(self, list tags=[], bint printNAKeys=False, bytes NAString=b"NA"): self.headerFormatter = HeaderFormat("fastq", - tags, - printNAKeys) + tags=tags, + printNAKeys=printNAKeys, + NAString=NAString) @cython.boundscheck(False) def __call__(self, object data): - cdef bytes quality + cdef bytes quality - if hasattr(data, "quality_str"): - quality = str2bytes(data.quality_str) # TODO quality_bytes property + if hasattr(data, "quality_bytes"): + quality = data.quality_bytes elif hasattr(data, "quality"): quality = tobytes(data.quality) else: raise AttributeError("No quality when exporting to fastq") # TODO discuss - return bytes2str(self.headerFormatter(data) + - b"\n" + - data[NUC_SEQUENCE_COLUMN] + - b"\n+\n" + - quality) + return self.headerFormatter(data) + b"\n" + data[NUC_SEQUENCE_COLUMN] + b"\n+\n" + quality diff --git a/python/obitools3/format/header.pxd b/python/obitools3/format/header.pxd index e08ed9f..7a627bf 100644 --- a/python/obitools3/format/header.pxd +++ b/python/obitools3/format/header.pxd @@ -2,6 +2,7 @@ cdef class HeaderFormat: cdef bytes start cdef set tags - cdef bint printNaKeys + cdef bint printNAKeys + cdef bytes NAString cdef size_t headerBufferLength \ No newline at end of file diff --git a/python/obitools3/format/header.pyx b/python/obitools3/format/header.pyx index 1fcf237..9e24106 100644 --- a/python/obitools3/format/header.pyx +++ b/python/obitools3/format/header.pyx @@ -7,13 +7,14 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \ COUNT_COLUMN from obitools3.utils cimport str2bytes +from obitools3.dms.column.column cimport Column_line cdef class HeaderFormat: SPECIAL_KEYS = [NUC_SEQUENCE_COLUMN, ID_COLUMN, DEFINITION_COLUMN, QUALITY_COLUMN] - def __init__(self, str format="fasta", list tags=[], bint printNAKeys=False): + def __init__(self, str format="fasta", list tags=[], bint printNAKeys=False, bytes NAString=b"NA"): ''' @param format: @type format: `str` @@ -23,10 +24,14 @@ cdef class HeaderFormat: @param printNAKeys: @type printNAKeys: `bool` + + @param NAString: + @type NAString: `bytes` ''' self.tags = set(tags) - self.printNaKeys = printNAKeys + self.printNAKeys = printNAKeys + self.NAString = NAString if format=="fasta": self.start=b">" @@ -43,7 +48,6 @@ cdef class HeaderFormat: cdef list lines = [b""] cdef bytes tagline - if self.tags is not None and self.tags: ktags = self.tags else: @@ -52,9 +56,17 @@ cdef class HeaderFormat: for k in ktags: if k in tags: value = data[k] - if value is not None or self.printNaKeys: - lines.append(k + b"=" + str2bytes(str(data[k]))) #TODO bytes() method on values (str equivalent) - + if value is None: + if self.printNAKeys: + value = self.NAString + else: + if type(value) == Column_line: + value = value.bytes() + else: + value = str2bytes(str(value)) # TODO ugly but how else? + if value is not None: + lines.append(k + b"=" + value + b";") + if len(lines) > 1: tagline=b" ".join(lines) else: