Add embl reader
This commit is contained in:
2
src/obitools/format/sequence/embl.py
Normal file
2
src/obitools/format/sequence/embl.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
from obitools.seqdb.embl.parser import emblIterator,emblParser
|
||||||
|
|
55
src/obitools/obischemas/kb/__init__.py
Normal file
55
src/obitools/obischemas/kb/__init__.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
"""
|
||||||
|
kb package is devoted to manage access to postgresql database from python
|
||||||
|
script
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class Connection(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
raise RuntimeError('pyROM.KB.Connection is an abstract class')
|
||||||
|
|
||||||
|
def cursor(self):
|
||||||
|
raise RuntimeError('pyROM.KB.Connection.cursor is an abstract function')
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
raise RuntimeError('pyROM.KB.Connection.commit is an abstract function')
|
||||||
|
|
||||||
|
def rollback(self):
|
||||||
|
raise RuntimeError('pyROM.KB.Connection.rollback is an abstract function')
|
||||||
|
|
||||||
|
def __call__(self,query):
|
||||||
|
return self.cursor().execute(query)
|
||||||
|
|
||||||
|
|
||||||
|
class Cursor(object):
|
||||||
|
|
||||||
|
def __init__(self,db):
|
||||||
|
raise RuntimeError('pyROM.KB.Cursor is an abstract class')
|
||||||
|
|
||||||
|
def execute(self,query):
|
||||||
|
raise RuntimeError('pyROM.KB.Cursor.execute is an abstract function')
|
||||||
|
|
||||||
|
__call__=execute
|
||||||
|
|
||||||
|
|
||||||
|
_current_connection = None # Static variable used to store connection to KB
|
||||||
|
|
||||||
|
def getConnection(*args,**kargs):
|
||||||
|
"""
|
||||||
|
return a connection to the database.
|
||||||
|
When call from database backend no argument are needed.
|
||||||
|
All connection returned by this function
|
||||||
|
"""
|
||||||
|
global _current_connection
|
||||||
|
|
||||||
|
if _current_connection==None or args or kargs :
|
||||||
|
try:
|
||||||
|
from obischemas.kb import backend
|
||||||
|
_current_connection = backend.Connection()
|
||||||
|
except ImportError:
|
||||||
|
from obischemas.kb import extern
|
||||||
|
_current_connection = extern.Connection(*args,**kargs)
|
||||||
|
return _current_connection
|
||||||
|
|
||||||
|
|
78
src/obitools/obischemas/kb/extern.py
Normal file
78
src/obitools/obischemas/kb/extern.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
"""
|
||||||
|
Module : KB.extern
|
||||||
|
Author : Eric Coissac
|
||||||
|
Date : 03/05/2004
|
||||||
|
|
||||||
|
Module wrapping psycopg interface module to allow connection
|
||||||
|
to a postgresql databases with the same interface from
|
||||||
|
backend and external script.
|
||||||
|
|
||||||
|
This module define a class usable from external script
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import sys
|
||||||
|
from obischemas import kb
|
||||||
|
|
||||||
|
class Connection(kb.Connection):
|
||||||
|
|
||||||
|
def __init__(self,*connectParam,**kconnectParam):
|
||||||
|
if connectParam:
|
||||||
|
self.connectParam=={'dsn':connectParam}
|
||||||
|
else:
|
||||||
|
self.connectParam=kconnectParam
|
||||||
|
print self.connectParam
|
||||||
|
self.db = psycopg2.connect(**(self.connectParam))
|
||||||
|
|
||||||
|
def restart(self):
|
||||||
|
ok=1
|
||||||
|
while (ok and ok < 1000):
|
||||||
|
try:
|
||||||
|
self.db = psycopg2.connect(**self.connectParam)
|
||||||
|
except:
|
||||||
|
ok+=1
|
||||||
|
else:
|
||||||
|
ok=0
|
||||||
|
|
||||||
|
|
||||||
|
def cursor(self):
|
||||||
|
curs = Cursor(self.db)
|
||||||
|
if hasattr(self,'autocommit') and self.autocommit:
|
||||||
|
curs.autocommit = self.autocommit
|
||||||
|
return curs
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
self.db.commit()
|
||||||
|
|
||||||
|
def rollback(self):
|
||||||
|
if hasattr(self,'db'):
|
||||||
|
self.db.rollback()
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
if hasattr(self,'db'):
|
||||||
|
self.rollback()
|
||||||
|
|
||||||
|
class Cursor(kb.Cursor):
|
||||||
|
|
||||||
|
def __init__(self,db):
|
||||||
|
self.db = db
|
||||||
|
self.curs = db.cursor()
|
||||||
|
|
||||||
|
def execute(self,query):
|
||||||
|
try:
|
||||||
|
self.curs.execute(query)
|
||||||
|
if hasattr(self,'autocommit') and self.autocommit:
|
||||||
|
self.db.commit()
|
||||||
|
except psycopg2.ProgrammingError,e:
|
||||||
|
print >>sys.stderr,"===> %s" % query
|
||||||
|
raise e
|
||||||
|
except psycopg2.IntegrityError,e:
|
||||||
|
print >>sys.stderr,"---> %s" % query
|
||||||
|
raise e
|
||||||
|
try:
|
||||||
|
label = [x[0] for x in self.curs.description]
|
||||||
|
return [dict(map(None,label,y))
|
||||||
|
for y in self.curs.fetchall()]
|
||||||
|
except TypeError:
|
||||||
|
return []
|
13
src/obitools/seqdb/embl/__init__.py
Normal file
13
src/obitools/seqdb/embl/__init__.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence
|
||||||
|
from obitools.location import locationGenerator,extractExternalRefs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class EmblSequence(AnnotatedNucSequence):
|
||||||
|
'''
|
||||||
|
Class used to represent a nucleic sequence issued from EMBL.
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
50
src/obitools/seqdb/embl/parser.py
Normal file
50
src/obitools/seqdb/embl/parser.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from obitools.seqdb import embl
|
||||||
|
from obitools.seqdb import nucEntryIterator
|
||||||
|
|
||||||
|
_featureMatcher = re.compile('(^FT .*\n)+', re.M)
|
||||||
|
_cleanFT = re.compile('^FT',re.M)
|
||||||
|
|
||||||
|
_headerMatcher = re.compile('^ID.+(?=\nFH )', re.DOTALL)
|
||||||
|
_seqMatcher = re.compile('(^ ).+(?=//\n)', re.DOTALL + re.M)
|
||||||
|
_cleanSeq = re.compile('[ \n0-9]+')
|
||||||
|
_acMatcher = re.compile('(?<=^AC ).+',re.M)
|
||||||
|
_deMatcher = re.compile('(^DE .+\n)+',re.M)
|
||||||
|
_cleanDe = re.compile('(^|\n)DE +')
|
||||||
|
|
||||||
|
def __emblparser(text):
|
||||||
|
try:
|
||||||
|
header = _headerMatcher.search(text).group()
|
||||||
|
|
||||||
|
ft = _featureMatcher.search(text).group()
|
||||||
|
ft = _cleanFT.sub(' ',ft)
|
||||||
|
|
||||||
|
seq = _seqMatcher.search(text).group()
|
||||||
|
seq = _cleanSeq.sub('',seq).upper()
|
||||||
|
|
||||||
|
acs = _acMatcher.search(text).group()
|
||||||
|
acs = acs.split()
|
||||||
|
ac = acs[0]
|
||||||
|
acs = acs[1:]
|
||||||
|
|
||||||
|
de = _deMatcher.search(header).group()
|
||||||
|
de = _cleanDe.sub(' ',de).strip().strip('.')
|
||||||
|
except AttributeError,e:
|
||||||
|
print >>sys.stderr,'======================================================='
|
||||||
|
print >>sys.stderr,text
|
||||||
|
print >>sys.stderr,'======================================================='
|
||||||
|
raise e
|
||||||
|
|
||||||
|
return (ac,seq,de,header,ft,acs)
|
||||||
|
|
||||||
|
def emblParser(text):
|
||||||
|
return embl.EmblSequence(*__emblparser(text))
|
||||||
|
|
||||||
|
|
||||||
|
def emblIterator(file):
|
||||||
|
for e in nucEntryIterator(file):
|
||||||
|
yield emblParser(e)
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user