merging and fixed git conflict with obiavl.h

This commit is contained in:
Celine Mercier
2016-04-15 13:23:29 +02:00
34 changed files with 1068 additions and 288 deletions

View File

@ -7,107 +7,109 @@ Created on 13 fevr. 2014
from distutils import log
import os
from Cython.Distutils import build_ext as ori_build_ext # @UnresolvedImport
from Cython.Compiler import Options as cython_options # @UnresolvedImport
from distutils.errors import DistutilsSetupError
class build_ext(ori_build_ext):
try:
from Cython.Distutils import build_ext as ori_build_ext # @UnresolvedImport
from Cython.Compiler import Options as cython_options # @UnresolvedImport
class build_ext(ori_build_ext):
def modifyDocScripts(self):
build_dir_file=open("doc/sphinx/build_dir.txt","w")
print(self.build_lib,file=build_dir_file)
build_dir_file.close()
def initialize_options(self):
ori_build_ext.initialize_options(self) # @UndefinedVariable
self.littlebigman = None
self.built_files = None
def finalize_options(self):
ori_build_ext.finalize_options(self) # @UndefinedVariable
self.set_undefined_options('littlebigman',
('littlebigman', 'littlebigman'))
self.set_undefined_options('build_files',
('files', 'built_files'))
self.cython_c_in_temp = 1
if self.littlebigman =='-DLITTLE_END':
if self.define is None:
self.define=[('LITTLE_END',None)]
else:
self.define.append('LITTLE_END',None)
def substitute_sources(self,exe_name,sources):
"""
Substitutes source file name starting by an @ by the actual
name of the built file (see --> build_files)
"""
sources = list(sources)
for i in range(len(sources)):
message = "%s :-> %s" % (exe_name,sources[i])
if sources[i][0]=='@':
try:
filename = self.built_files[sources[i][1:]]
except KeyError:
tmpfilename = os.path.join(self.build_temp,sources[i][1:])
if os.path.isfile (tmpfilename):
filename = tmpfilename
else:
raise DistutilsSetupError(
'The %s filename declared in the source '
'files of the program %s have not been '
'built by the installation process' % (sources[i],
exe_name))
sources[i]=filename
log.info("%s changed to %s",message,filename)
else:
log.info("%s ok",message)
return sources
def build_extensions(self):
# First, sanity-check the 'extensions' list
for ext in self.extensions:
ext.sources = self.substitute_sources(ext.name,ext.sources)
def modifyDocScripts(self):
build_dir_file=open("doc/sphinx/build_dir.txt","w")
print(self.build_lib,file=build_dir_file)
build_dir_file.close()
self.check_extensions_list(self.extensions)
for ext in self.extensions:
log.info("%s :-> %s",ext.name,ext.sources)
ext.sources = self.cython_sources(ext.sources, ext)
self.build_extension(ext)
def run(self):
self.modifyDocScripts()
for cmd_name in self.get_sub_commands():
self.run_command(cmd_name)
cython_options.annotate = True
ori_build_ext.run(self) # @UndefinedVariable
def has_files(self):
return self.distribution.has_files()
def has_executables(self):
return self.distribution.has_executables()
def initialize_options(self):
ori_build_ext.initialize_options(self) # @UndefinedVariable
self.littlebigman = None
self.built_files = None
sub_commands = [('build_files',has_files),
('build_cexe', has_executables)
] + \
ori_build_ext.sub_commands
def finalize_options(self):
ori_build_ext.finalize_options(self) # @UndefinedVariable
self.set_undefined_options('littlebigman',
('littlebigman', 'littlebigman'))
self.set_undefined_options('build_files',
('files', 'built_files'))
self.cython_c_in_temp = 1
if self.littlebigman =='-DLITTLE_END':
if self.define is None:
self.define=[('LITTLE_END',None)]
else:
self.define.append('LITTLE_END',None)
def substitute_sources(self,exe_name,sources):
"""
Substitutes source file name starting by an @ by the actual
name of the built file (see --> build_files)
"""
sources = list(sources)
for i in range(len(sources)):
message = "%s :-> %s" % (exe_name,sources[i])
if sources[i][0]=='@':
try:
filename = self.built_files[sources[i][1:]]
except KeyError:
tmpfilename = os.path.join(self.build_temp,sources[i][1:])
if os.path.isfile (tmpfilename):
filename = tmpfilename
else:
raise DistutilsSetupError(
'The %s filename declared in the source '
'files of the program %s have not been '
'built by the installation process' % (sources[i],
exe_name))
sources[i]=filename
log.info("%s changed to %s",message,filename)
else:
log.info("%s ok",message)
return sources
def build_extensions(self):
# First, sanity-check the 'extensions' list
for ext in self.extensions:
ext.sources = self.substitute_sources(ext.name,ext.sources)
self.check_extensions_list(self.extensions)
for ext in self.extensions:
log.info("%s :-> %s",ext.name,ext.sources)
ext.sources = self.cython_sources(ext.sources, ext)
self.build_extension(ext)
def run(self):
self.modifyDocScripts()
for cmd_name in self.get_sub_commands():
self.run_command(cmd_name)
cython_options.annotate = True
ori_build_ext.run(self) # @UndefinedVariable
def has_files(self):
return self.distribution.has_files()
def has_executables(self):
return self.distribution.has_executables()
sub_commands = [('build_files',has_files),
('build_cexe', has_executables)
] + \
ori_build_ext.sub_commands
except ImportError:
from distutils.command import build_ext # @UnusedImport

View File

@ -0,0 +1,36 @@
'''
Created on 22 janv. 2016
@author: coissac
'''
import sys
from urllib import request
import os.path
from obidistutils.serenity.util import get_serenity_dir
from obidistutils.serenity.rerun import rerun_with_anothe_python
from obidistutils.serenity.checkpython import is_a_virtualenv_python
getpipurl="https://bootstrap.pypa.io/get-pip.py"
def bootstrap():
getpipfile=os.path.join(get_serenity_dir(),"get-pip.py")
with request.urlopen(getpipurl) as getpip:
with open(getpipfile,"wb") as out:
for l in getpip:
out.write(l)
python = sys.executable
if is_a_virtualenv_python():
command= "%s %s" % (python,getpipfile)
else:
command= "%s %s --user" % (python,getpipfile)
os.system(command)
rerun_with_anothe_python(python)

View File

@ -7,8 +7,13 @@ Created on 2 oct. 2014
import re
import os
import pip # @UnresolvedImport
from pip.utils import get_installed_distributions # @UnresolvedImport
try:
import pip # @UnresolvedImport
from pip.utils import get_installed_distributions # @UnresolvedImport
except ImportError:
from .bootstrappip import bootstrap
bootstrap()
from distutils.version import StrictVersion # @UnusedImport
from distutils.errors import DistutilsError
from distutils import log

View File

@ -59,7 +59,7 @@ def serenity_virtualenv(envname,package,version,minversion='3.4',maxversion=None
clear=True,
symlinks=False,
with_pip=True)
# check the newly created virtualenv
return serenity_virtualenv(envname,package,version)

View File

@ -16,30 +16,21 @@ It defines classes_and_methods
@deffield updated: Updated
'''
import sys
import pkgutil
import argparse
import logging
import json
default_config = {
default_config = { 'software' : "The OBITools",
'log' : False,
'loglevel' : 'INFO',
'progress' : True
}
'obi' : { 'log' : True,
'loglevel' : 'INFO',
'version' : False,
'progress' : True
}
}
root_config_name='obi'
from obitools3 import command
from obitools3.apps.config import getConfiguration # @UnresolvedImport
from obitools3.version import version
__all__ = []
__all__ = []
__version__ = version
__date__ = '2014-09-28'
__date__ = '2014-09-28'
__updated__ = '2014-09-28'
DEBUG = 1
@ -47,182 +38,12 @@ TESTRUN = 0
PROFILE = 0
def loadCommand(name,loader):
'''
Load a command module from its name and an ImpLoader
This function is for internal use
@param name: name of the module
@type name: str
@param loader: the module loader
@type loader: ImpLoader
@return the loaded module
@rtype: module
'''
module = loader.find_module(name).load_module(name)
return module
def getCommandsList():
'''
Returns the list of sub-commands available to the main `obi` command
@return: a dict instance with key corresponding to each command and
value corresponding to the module
@rtype: dict
'''
cmds = dict((x[1],loadCommand(x[1],x[0]))
for x in pkgutil.iter_modules(command.__path__)
if not x[2])
return cmds
def getLogger(config):
'''
Returns the logger as defined by the command line option
or by the config file
:param config:
'''
output = config['obi']['outputfilename']
level = config['obi']['loglevel']
logfile= config['obi']['log']
rootlogger = logging.getLogger()
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
stderrHandler = logging.StreamHandler(sys.stderr)
stderrHandler.setFormatter(logFormatter)
rootlogger.addHandler(stderrHandler)
if logfile:
fileHandler = logging.FileHandler("%s.log" % output)
fileHandler.setFormatter(logFormatter)
rootlogger.addHandler(fileHandler)
try:
loglevel = getattr(logging, level)
except:
loglevel = logging.INFO
rootlogger.setLevel(loglevel)
config['obi']['logger']=rootlogger
return rootlogger
class ObiParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
def buildArgumentParser():
parser = ObiParser()
parser.add_argument('--version', dest='obi:version',
action='store_true',
default=False,
help='Print the version of the OBITools')
parser.add_argument('--no-log', dest='obi:log',
action='store_false',
default=None,
help='Do not create a logfile for the data analyze')
parser.add_argument('--no-progress', dest='obi:progress',
action='store_false',
default=None,
help='Do not print the progress bar during analyzes')
subparsers = parser.add_subparsers(title='subcommands',
description='valid subcommands',
help='additional help')
commands = getCommandsList()
for c in commands:
module = commands[c]
if hasattr(module, "run"):
if hasattr(module, "__title__"):
sub = subparsers.add_parser(c,help=module.__title__)
else:
sub = subparsers.add_parser(c)
if hasattr(module, "addOptions"):
module.addOptions(sub)
sub.set_defaults(**{'obi:module' : module})
return parser
def buildDefaultConfiguration():
global default_config
commands = getCommandsList()
for c in commands:
module = commands[c]
assert hasattr(module, "run")
if hasattr(module, 'default_config'):
default_config[c]=module.default_config
else:
default_config[c]={}
return default_config
def getConfiguration():
global default_config
if '__done__' in default_config:
return default_config
parser = buildArgumentParser()
options = vars(parser.parse_args())
config = buildDefaultConfiguration()
for k in options:
section,key = k.split(':')
s = config[section]
if options[k] is not None:
s[key]=options[k]
if config['obi']['version']:
print("The OBITools - Version %s" % __version__)
sys.exit(0)
if not 'module' in config['obi']:
print('\nError: No obi command specified',file=sys.stderr)
parser.print_help()
sys.exit(2)
if config['obi']['outputfilename'] is None:
config['obi']['outputfilename']=config['obi']['indexfilename']
getLogger(config)
config['__done__']=True
return config
if __name__ =="__main__":
config = getConfiguration()
config = getConfiguration(root_config_name,
default_config)
config['obi']['module'].run(config)
config[root_config_name]['module'].run(config)

View File

View File

@ -0,0 +1,3 @@
#cython: language_level=3
cpdef buildArgumentParser(str configname, str softname)

View File

@ -0,0 +1,61 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import argparse
import sys
from .command import getCommandsList
class ObiParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
cpdef buildArgumentParser(str configname,
str softname):
parser = ObiParser()
parser.add_argument('--version', dest='%s:version' % configname,
action='store_true',
default=False,
help='Print the version of %s' % softname)
parser.add_argument('--log', dest='%s:log' % configname,
action='store',
type=str,
default=None,
help='Create a logfile')
parser.add_argument('--no-progress', dest='%s:progress' % configname,
action='store_false',
default=None,
help='Do not print the progress bar during analyzes')
subparsers = parser.add_subparsers(title='subcommands',
description='valid subcommands',
help='additional help')
commands = getCommandsList()
for c in commands:
module = commands[c]
if hasattr(module, "run"):
if hasattr(module, "__title__"):
sub = subparsers.add_parser(c,help=module.__title__)
else:
sub = subparsers.add_parser(c)
if hasattr(module, "addOptions"):
module.addOptions(sub)
sub.set_defaults(**{'%s:module' % configname : module})
return parser

View File

@ -0,0 +1,3 @@
#cython: language_level=3
cdef object loadCommand(str name,loader)

View File

@ -0,0 +1,44 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import pkgutil
from obitools3 import commands
cdef object loadCommand(str name,loader):
'''
Load a command module from its name and an ImpLoader
This function is for internal use
@param name: name of the module
@type name: str
@param loader: the module loader
@type loader: ImpLoader
@return the loaded module
@rtype: module
'''
module = loader.find_module(name).load_module(name)
return module
def getCommandsList():
'''
Returns the list of sub-commands available to the main `obi` command
@return: a dict instance with key corresponding to each command and
value corresponding to the module
@rtype: dict
'''
cdef dict cmds = dict((x[1],loadCommand(x[1],x[0]))
for x in pkgutil.iter_modules(commands.__path__)
if not x[2])
return cmds

View File

@ -0,0 +1,10 @@
#cython: language_level=3
cpdef str setRootConfigName(str rootname)
cpdef str getRootConfigName()
cdef dict buildDefaultConfiguration(str root_config_name,
dict config)
cpdef dict getConfiguration(str root_config_name=?,
dict config=?)

View File

@ -0,0 +1,103 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import sys
from .command import getCommandsList
from .logging cimport getLogger
from .arguments cimport buildArgumentParser
from ..version import version
from _curses import version
cdef dict __default_config__ = {}
cpdef str setRootConfigName(str rootname):
global __default_config__
if '__root_config__' in __default_config__:
if __default_config__["__root_config__"] in __default_config__:
__default_config__[rootname]=__default_config__[__default_config__["__root_config__"]]
del __default_config__[__default_config__["__root_config__"]]
__default_config__['__root_config__']=rootname
return rootname
cpdef str getRootConfigName():
global __default_config__
return __default_config__.get('__root_config__',None)
cdef dict buildDefaultConfiguration(str root_config_name,
dict config):
global __default_config__
__default_config__.clear()
setRootConfigName(root_config_name)
__default_config__[root_config_name]=config
config['version']=version
commands = getCommandsList()
for c in commands:
module = commands[c]
assert hasattr(module, "run")
if hasattr(module, 'default_config'):
__default_config__[c]=module.default_config
else:
__default_config__[c]={}
return __default_config__
cpdef dict getConfiguration(str root_config_name="__default__",
dict config={}):
global __default_config__
if '__done__' in __default_config__:
return __default_config__
if root_config_name=="__default__":
raise RuntimeError("No root_config_name specified")
if not config:
raise RuntimeError("Base configuration is empty")
config = buildDefaultConfiguration(root_config_name,
config)
parser = buildArgumentParser(root_config_name,
config[root_config_name]['software'])
options = vars(parser.parse_args())
if options['%s:version' % root_config_name]:
print("%s - Version %s" % (config[root_config_name]['software'],
config[root_config_name]['version']))
sys.exit(0)
for k in options:
section,key = k.split(':')
s = config[section]
if options[k] is not None:
s[key]=options[k]
if not 'module' in config[root_config_name]:
print('\nError: No command specified',file=sys.stderr)
parser.print_help()
sys.exit(2)
getLogger(config)
config['__done__']=True
return config

View File

@ -0,0 +1,3 @@
#cython: language_level=3
cpdef getLogger(dict config)

View File

@ -0,0 +1,46 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import logging
import sys
cpdef getLogger(dict config):
'''
Returns the logger as defined by the command line option
or by the config file
:param config:
'''
root = config["__root_config__"]
level = config[root]['loglevel']
logfile= config[root]['log']
rootlogger = logging.getLogger()
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
stderrHandler = logging.StreamHandler(sys.stderr)
stderrHandler.setFormatter(logFormatter)
rootlogger.addHandler(stderrHandler)
if logfile:
fileHandler = logging.FileHandler(logfile)
fileHandler.setFormatter(logFormatter)
rootlogger.addHandler(fileHandler)
try:
loglevel = getattr(logging, level)
except:
loglevel = logging.INFO
rootlogger.setLevel(loglevel)
config[root]['logger']=rootlogger
return rootlogger

View File

@ -0,0 +1,65 @@
#cython: language_level=3
from ..utils cimport str2bytes
cdef extern from "stdio.h":
struct FILE
int fprintf(FILE *stream, char *format, ...)
FILE* stderr
ctypedef unsigned int off_t "unsigned long long"
cdef extern from "unistd.h":
int fsync(int fd);
cdef extern from "time.h":
struct tm :
int tm_yday
int tm_hour
int tm_min
int tm_sec
enum: CLOCKS_PER_SEC
ctypedef int time_t
ctypedef int clock_t
ctypedef int suseconds_t
struct timeval:
time_t tv_sec # seconds */
suseconds_t tv_usec # microseconds */
struct timezone :
int tz_minuteswest; # minutes west of Greenwich
int tz_dsttime; # type of DST correction
int gettimeofday(timeval *tv, timezone *tz)
tm *gmtime_r(time_t *clock, tm *result)
time_t time(time_t *tloc)
clock_t clock()
cdef class ProgressBar:
cdef off_t maxi
cdef clock_t starttime
cdef clock_t lasttime
cdef clock_t tickcount
cdef int freq
cdef int cycle
cdef int arrow
cdef int lastlog
cdef bint ontty
cdef int fd
cdef bytes head
cdef char *chead
cdef object logger
cdef char *wheel
cdef char *spaces
cdef char* diese
cdef clock_t clock(self)

View File

@ -0,0 +1,138 @@
#cython: language_level=3
'''
Created on 27 mars 2016
@author: coissac
'''
import sys
from ..utils cimport bytes2str
cdef class ProgressBar:
cdef clock_t clock(self):
cdef clock_t t
cdef timeval tp
cdef clock_t s
<void> gettimeofday(&tp,NULL)
s = <clock_t> (<double> tp.tv_usec * 1.e-6 * <double> CLOCKS_PER_SEC)
t = tp.tv_sec * CLOCKS_PER_SEC + s
return t
def __init__(self,
off_t maxi,
dict config,
str head="",
double seconde=0.1):
self.starttime = self.clock()
self.lasttime = self.starttime
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
self.freq = 1
self.cycle = 0
self.arrow = 0
self.lastlog = 0
self.ontty = sys.stderr.isatty()
if (maxi<=0):
maxi=1
self.maxi = maxi
self.head = str2bytes(head)
self.chead= self.head
self.logger=config[config["__root_config__"]]["logger"]
self.wheel = '|/-\\'
self.spaces=' ' \
' ' \
' ' \
' ' \
' '
self.diese ='##########' \
'##########' \
'##########' \
'##########' \
'##########'
def __call__(self,object pos):
cdef off_t ipos
cdef clock_t elapsed
cdef clock_t newtime
cdef clock_t delta
cdef clock_t more
cdef double percent
cdef tm remain
cdef int days,hour,minu,sec
cdef off_t fraction
cdef int twentyth
self.cycle+=1
if self.cycle % self.freq == 0:
self.cycle=1
newtime = self.clock()
delta = newtime - self.lasttime
self.lasttime = newtime
elapsed = newtime - self.starttime
# print(" ",delta,elapsed,elapsed/CLOCKS_PER_SEC,self.tickcount)
if delta < self.tickcount / 5 :
self.freq*=2
elif delta > self.tickcount * 5 and self.freq>1:
self.freq/=2
if callable(pos):
ipos=pos()
else:
ipos=pos
if ipos==0:
ipos=1
percent = <double>ipos/<double>self.maxi
more = <time_t>((<double>elapsed / percent * (1. - percent))/CLOCKS_PER_SEC)
<void>gmtime_r(&more, &remain)
days = remain.tm_yday
hour = remain.tm_hour
minu = remain.tm_min
sec = remain.tm_sec
if self.ontty:
fraction=<int>(percent * 50.)
self.arrow=(self.arrow+1) % 4
self.diese[fraction]=0
self.spaces[50 - fraction]=0
if days:
<void>fprintf(stderr,b'\r%s %5.1f %% |%s%c%s] remain : %d days %02d:%02d:%02d',
self.chead,
percent*100,
self.diese,self.wheel[self.arrow],self.spaces,
days,hour,minu,sec)
else:
<void>fprintf(stderr,b'\r%s %5.1f %% |%s%c%s] remain : %02d:%02d:%02d',
self.chead,
percent*100.,
self.diese,self.wheel[self.arrow],self.spaces,
hour,minu,sec)
self.diese[fraction]=b'#'
self.spaces[50 - fraction]=b' '
twentyth = int(percent * 20)
if twentyth != self.lastlog:
if self.ontty:
<void>fprintf(stderr,b'\n')
self.logger.info('%s %5.1f %% remain : %02d:%02d:%02d' % (
bytes2str(self.head),
percent*100.,
hour,minu,sec))
self.lastlog=twentyth
else:
self.cycle+=1

View File

View File

@ -0,0 +1,44 @@
'''
Created on 8 mars 2016
@author: coissac
'''
from obitools3.apps.progress import ProgressBar # @UnresolvedImport
import time
__title__="Counts sequences in a sequence set"
default_config = { 'countmode' : None
}
def addOptions(parser):
parser.add_argument(dest='obi:input', metavar='obi:input',
nargs='?',
default=None,
help='input data set' )
group=parser.add_argument_group('Obicount specific options')
group.add_argument('-s','--sequence',
action="store_true", dest="count:sequence",
default=False,
help="Prints only the number of sequence records."
)
group.add_argument('-a','--all',
action="store_true", dest="count:all",
default=False,
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False)."
)
def run(config):
# The code of my command
pb = ProgressBar(1000,config,seconde=1)
for i in range(1,1001):
pb(i)
time.sleep(0.01)

View File

View File

@ -0,0 +1,6 @@
#cython: language_level=3
cdef class LineBuffer:
cdef object fileobj
cdef int size

View File

@ -0,0 +1,23 @@
#cython: language_level=3
'''
Created on 30 mars 2016
@author: coissac
'''
cdef class LineBuffer:
def __init__(self,object fileobj,int size=100000000):
self.fileobj=fileobj
self.size=size
def __iter__(self):
cdef list buff = self.fileobj.readlines(self.size)
cdef str l
while buff:
for l in buff:
yield l
buff = self.fileobj.readlines(self.size)

View File

@ -0,0 +1,17 @@
#cython: language_level=3
cdef class MagicKeyFile:
cdef object stream
cdef str stream_mode
cdef object binary
cdef bytes key
cdef int keylength
cdef int pos
cpdef bytes read(self,int size=?)
cpdef int tell(self)
cdef class CompressedFile:
cdef object accessor

View File

@ -0,0 +1,114 @@
#cython: language_level=3
'''
Created on 28 mars 2016
@author: coissac
'''
import zipfile
import bz2
import gzip
import io
cdef class MagicKeyFile:
def __init__(self,stream,length=2):
binary=stream
self.stream = stream
self.stream_mode = None
if hasattr(stream, "mode"):
self.stream_mode = stream.mode
if (not 'b' in stream.mode and
hasattr(stream, "buffer") and
'b' in stream.buffer.mode):
binary=stream.buffer
if (self.stream_mode is None and
not (hasattr(stream, 'headers') and
hasattr(stream.headers, "keys") and
'Content-type' in stream.headers)):
raise TypeError("stream does not present the good interface")
self.binary=binary
self.key=binary.read(length)
self.keylength=length
self.pos=0
cpdef bytes read(self,int size=-1):
cdef bytes r
if self.pos < self.keylength:
if size > (self.keylength - self.pos):
size = size - self.keylength + self.pos
r = self.key[self.pos:] + self.binary.read(size)
self.pos=self.keylength + 1
elif size >=0 :
r = self.key[self.pos:(self.pos+size)]
self.pos+=size
else:
r = self.key[self.pos:] + self.binary.read(size)
self.pos=self.keylength + 1
else:
r = self.binary.read(size)
return r
cpdef int tell(self):
cdef int p
if self.pos < self.keylength:
p = self.pos
else:
p = self.tell()
return p
def __getattr__(self,name):
return getattr(self.binary, name)
cdef class CompressedFile:
def __init__(self,stream):
cdef int keylength
cdef MagicKeyFile magic
cdef str compressor
cdef bytes k
cdef object c
cdef dict compress = { 'zip' : (b'\x50\x4b\x03\x04',zipfile.ZipFile),
'bz2' : (b'\x42\x5a\x68',bz2.BZ2File),
'gz' : (b'\x1f\x8b\x08',gzip.open)
}
keylength = max([len(x[0]) for x in compress.values()])
magic=MagicKeyFile(stream,keylength)
self.accessor = None
for compressor in compress:
k,c = compress[compressor]
if magic.key.startswith(k):
self.accessor = c(magic)
if self.accessor is None:
self.accessor = magic
if ((hasattr(stream, 'headers') and
hasattr(stream.headers, "keys") and
'Content-type' in stream.headers and
stream.headers['Content-type'].startswith('text/')) or
'b' not in magic.stream_mode):
self.accessor = io.TextIOWrapper(self.accessor)
def __getattr__(self,name):
return getattr(self.accessor, name)
def __iter__(self):
for x in self.accessor:
yield x

View File

@ -0,0 +1,5 @@
#cython: language_level=3
from .uncompress cimport CompressedFile
cpdef CompressedFile uopen(str name, mode=?)

View File

@ -0,0 +1,23 @@
#cython: language_level=3
'''
Created on 25 mars 2016
@author: coissac
'''
from urllib.request import urlopen
cpdef CompressedFile uopen(str name, mode='r'):
cdef CompressedFile c
try:
f = urlopen(name)
except ValueError:
f = open(name,mode)
c = CompressedFile(f)
return c

View File

View File

@ -0,0 +1,8 @@
#cython: language_level=3
from .header cimport parseHeader
from ..files.universalopener cimport uopen
from ..files.linebuffer cimport LineBuffer

View File

@ -0,0 +1,46 @@
#cython: language_level=3
'''
Created on 30 mars 2016
@author: coissac
'''
def fastaIterator(lineiterator, int buffersize=100000000):
cdef LineBuffer lb
cdef str ident
cdef str definition
cdef dict tags
cdef list s
if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator)
if isinstance(lineiterator, LineBuffer):
lb=lineiterator
else:
lb=LineBuffer(lineiterator,buffersize)
i = iter(lb)
line = next(i)
while True:
ident,tags,definition = parseHeader(line)
s = []
line = next(i)
while line[0]!='>':
s.append(line[0:-1])
line = next(i)
sequence = "".join(s)
quality = None
yield { "id" : ident,
"definition" : definition,
"sequence" : sequence,
"quality" : quality,
"tags" : tags,
"annotation" : {}
}

View File

@ -0,0 +1,8 @@
#cython: language_level=3
from .header cimport parseHeader
from ..files.universalopener cimport uopen
from ..files.linebuffer cimport LineBuffer

View File

@ -0,0 +1,41 @@
#cython: language_level=3
'''
Created on 30 mars 2016
@author: coissac
'''
def fastqIterator(lineiterator, int buffersize=100000000):
cdef LineBuffer lb
cdef str ident
cdef str definition
cdef dict tags
if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator)
if isinstance(lineiterator, LineBuffer):
lb=lineiterator
else:
lb=LineBuffer(lineiterator,buffersize)
i = iter(lb)
for line in i:
ident,tags,definition = parseHeader(line)
sequence = next(i)[0:-1]
next(i)
quality = next(i)[0:-1]
yield { "id" : ident,
"definition" : definition,
"sequence" : sequence,
"quality" : quality,
"tags" : tags,
"annotation" : {}
}

View File

@ -0,0 +1,5 @@
#cython: language_level=3
cdef object __etag__(str x)
cpdef tuple parseHeader(str header)

View File

@ -0,0 +1,78 @@
#cython: language_level=3
'''
Created on 25 mars 2016
@author: coissac
'''
import re
__ret__ = re.compile('''(([^ ]+)=('[^']*'|"[^"]*"|[^;]+); *)+?''')
__re_int__ = re.compile("^[+-]?[0-9]+$")
__re_float__ = re.compile("^[+-]?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?$")
__re_str__ = re.compile("""^"[^"]*"|'[^']*'$""")
__re_dict__ = re.compile("""^\{\ *
(
("[^"]*"|'[^']*')
\ *:\ *
([^,}]+|
"[^"]*"|
'[^']*'
)
)?
(\ *,\ *
("[^"]*"|'[^']*')
\ *:\ *
([^,}]+|
"[^"]*"|
'[^']*'
)
)*\ *\}$""", re.VERBOSE)
cdef object __etag__(str x):
if __re_int__.match(x):
v=int(x)
elif __re_float__.match(x):
v=float(x)
elif __re_str__.match(x):
v=x[1:-1]
elif x=='False':
v=False
elif x=='True':
v=True
elif __re_dict__.match(x):
v=eval(x)
else:
v=x
return v
cpdef tuple parseHeader(str header):
cdef list m
cdef dict tags
cdef str definition
cdef str ident
cdef str second
m=header[1:-1].split(maxsplit=1)
ident=m[0]
if len(m)==1:
tags={}
definition=''
else:
second=m[1]
m = __ret__.findall(second)
if m:
tags = dict([(a[1],__etag__(a[2])) for a in m])
definition = second.split(m[-1][0],1)[1].strip()
else:
tags = {}
definition = second.strip()
return ident,tags,definition

View File

@ -1,5 +1,6 @@
#cython: language_level=3
cdef bytes str2bytes(str string)
cdef str bytes2str(bytes string)

View File

@ -1,7 +1,28 @@
#cython: language_level=3
import sys
import io
cdef bytes str2bytes(str string):
"""
Short cut to convert ascii encoded python string (str) to bytes
which can be easily converted to C-strings.
@param string: the python string to be converted.
@type string: str
@return a transcoded string
@rtype: bytes
"""
return string.encode('ascii')
cdef str bytes2str(bytes string):
return string.decode('ascii')
"""
Short cut to convert bytes (C-strings) to ascii encoded python string (str).
@param string: the binary (C-string) string to be converted.
@type string: bytes
@return an ascii transcoded string
@rtype: str
"""
return string.decode('ascii')