165 lines
4.6 KiB
Python
Executable File
165 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import struct
|
|
import sys
|
|
|
|
#####
|
|
#
|
|
#
|
|
# Generic file function
|
|
#
|
|
#
|
|
#####
|
|
|
|
def universalOpen(file):
|
|
if isinstance(file,str):
|
|
if file[-3:] == '.gz':
|
|
rep = gzip.open(file)
|
|
else:
|
|
rep = open(file)
|
|
else:
|
|
rep = file
|
|
return rep
|
|
|
|
def universalTell(file):
|
|
if isinstance(file, gzip.GzipFile):
|
|
file=file.myfileobj
|
|
return file.tell()
|
|
|
|
def fileSize(file):
|
|
if isinstance(file, gzip.GzipFile):
|
|
file=file.myfileobj
|
|
pos = file.tell()
|
|
file.seek(0,2)
|
|
length = file.tell()
|
|
file.seek(pos,0)
|
|
return length
|
|
|
|
def progressBar(pos,max,reset=False,delta=[]):
|
|
if reset:
|
|
del delta[:]
|
|
if not delta:
|
|
delta.append(time.time())
|
|
delta.append(time.time())
|
|
|
|
delta[1]=time.time()
|
|
elapsed = delta[1]-delta[0]
|
|
percent = float(pos)/max * 100
|
|
remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
|
|
bar = '#' * int(percent/2)
|
|
bar+= '|/-\\-'[pos % 5]
|
|
bar+= ' ' * (50 - int(percent/2))
|
|
sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
|
|
|
|
def endLessIterator(endedlist):
|
|
for x in endedlist:
|
|
yield x
|
|
while(1):
|
|
yield endedlist[-1]
|
|
|
|
class ColumnFile(object):
|
|
|
|
def __init__(self,stream,sep=None,strip=True,types=None,skip=None):
|
|
if isinstance(stream,str):
|
|
self._stream = open(stream)
|
|
elif hasattr(stream,'next'):
|
|
self._stream = stream
|
|
else:
|
|
raise ValueError,'stream must be string or an iterator'
|
|
self._delimiter=sep
|
|
self._strip=strip
|
|
if types:
|
|
self._types=[x for x in types]
|
|
for i in xrange(len(self._types)):
|
|
if self._types[i] is bool:
|
|
self._types[i]=ColumnFile.str2bool
|
|
else:
|
|
self._types=None
|
|
self._skip = skip
|
|
|
|
def str2bool(x):
|
|
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
|
|
|
|
str2bool = staticmethod(str2bool)
|
|
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def next(self):
|
|
ligne = self._stream.next()
|
|
while ligne[0] == self._skip:
|
|
ligne = self._stream.next()
|
|
data = ligne.split(self._delimiter)
|
|
if self._strip or self._types:
|
|
data = [x.strip() for x in data]
|
|
if self._types:
|
|
it = endLessIterator(self._types)
|
|
data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
|
|
return data
|
|
|
|
|
|
def ecoPCRResultIterator(file):
|
|
file = universalOpen(file)
|
|
data = ColumnFile(file,
|
|
sep='|',
|
|
types=(str,int,int,
|
|
str,int,str,
|
|
int,str,int,
|
|
str,int,str,
|
|
str,str,int,
|
|
str,int,int,
|
|
str,str),skip='#')
|
|
|
|
for ac, sq_len, taxid,\
|
|
rank, sp_taxid, species,\
|
|
ge_taxid, genus, fa_taxid,\
|
|
family, sk_taxid, s_kgdom,\
|
|
strand, oligo_1, error_1,\
|
|
oligo_2, error_2, amp_len,\
|
|
sq_des, definition in data:
|
|
|
|
yield {'ac':ac, 'sq_len':sq_len, 'taxid':taxid,
|
|
'rank':rank, 'sp_taxid':sp_taxid, 'species':species,
|
|
'ge_taxid':ge_taxid, 'genus':genus, 'fa_taxid':fa_taxid,
|
|
'family':family, 'sk_taxid':sk_taxid, 's_kgdom':s_kgdom,
|
|
'strand':strand, 'oligo_1':oligo_1, 'error_1':error_1,
|
|
'oligo_2':oligo_2, 'error_2':error_2, 'amp_len':amp_len,
|
|
'sq_des':sq_des, 'definition':definition}
|
|
|
|
|
|
def ecoRecordIterator(file):
|
|
file = universalOpen(file)
|
|
(recordCount,) = struct.unpack('> I',file.read(4))
|
|
|
|
for i in xrange(recordCount):
|
|
(recordSize,)=struct.unpack('>I',file.read(4))
|
|
record = file.read(recordSize)
|
|
yield record
|
|
|
|
|
|
def ecoNameIterator(file):
|
|
|
|
for record in ecoRecordIterator(file):
|
|
lrecord = len(record)
|
|
lnames = lrecord - 16
|
|
(isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
|
|
name=names[:namelength]
|
|
classname=names[namelength:]
|
|
yield (name,classname,indextaxid)
|
|
|
|
|
|
def ecoTaxonomicIterator(file):
|
|
|
|
for record in ecoRecordIterator(file):
|
|
lrecord = len(record)
|
|
lnames = lrecord - 16
|
|
(taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
|
|
yield (taxid,rankid,parentidx,name)
|
|
|
|
|
|
def ecoRankIterator(file=None):
|
|
|
|
for record in ecoRecordIterator(file):
|
|
yield record
|