Add embl reader

This commit is contained in:
2008-06-30 10:24:02 +00:00
parent a22b9f8e0d
commit 83e9e31bf2
5 changed files with 198 additions and 0 deletions

View File

@ -0,0 +1,2 @@
from obitools.seqdb.embl.parser import emblIterator,emblParser

View File

@ -0,0 +1,55 @@
"""
kb package is devoted to manage access to postgresql database from python
script
"""
class Connection(object):
def __init__(self):
raise RuntimeError('pyROM.KB.Connection is an abstract class')
def cursor(self):
raise RuntimeError('pyROM.KB.Connection.cursor is an abstract function')
def commit(self):
raise RuntimeError('pyROM.KB.Connection.commit is an abstract function')
def rollback(self):
raise RuntimeError('pyROM.KB.Connection.rollback is an abstract function')
def __call__(self,query):
return self.cursor().execute(query)
class Cursor(object):
def __init__(self,db):
raise RuntimeError('pyROM.KB.Cursor is an abstract class')
def execute(self,query):
raise RuntimeError('pyROM.KB.Cursor.execute is an abstract function')
__call__=execute
_current_connection = None # Static variable used to store connection to KB
def getConnection(*args,**kargs):
"""
return a connection to the database.
When call from database backend no argument are needed.
All connection returned by this function
"""
global _current_connection
if _current_connection==None or args or kargs :
try:
from obischemas.kb import backend
_current_connection = backend.Connection()
except ImportError:
from obischemas.kb import extern
_current_connection = extern.Connection(*args,**kargs)
return _current_connection

View File

@ -0,0 +1,78 @@
"""
Module : KB.extern
Author : Eric Coissac
Date : 03/05/2004
Module wrapping psycopg interface module to allow connection
to a postgresql databases with the same interface from
backend and external script.
This module define a class usable from external script
"""
import psycopg2
import sys
from obischemas import kb
class Connection(kb.Connection):
def __init__(self,*connectParam,**kconnectParam):
if connectParam:
self.connectParam=={'dsn':connectParam}
else:
self.connectParam=kconnectParam
print self.connectParam
self.db = psycopg2.connect(**(self.connectParam))
def restart(self):
ok=1
while (ok and ok < 1000):
try:
self.db = psycopg2.connect(**self.connectParam)
except:
ok+=1
else:
ok=0
def cursor(self):
curs = Cursor(self.db)
if hasattr(self,'autocommit') and self.autocommit:
curs.autocommit = self.autocommit
return curs
def commit(self):
self.db.commit()
def rollback(self):
if hasattr(self,'db'):
self.db.rollback()
def __del__(self):
if hasattr(self,'db'):
self.rollback()
class Cursor(kb.Cursor):
def __init__(self,db):
self.db = db
self.curs = db.cursor()
def execute(self,query):
try:
self.curs.execute(query)
if hasattr(self,'autocommit') and self.autocommit:
self.db.commit()
except psycopg2.ProgrammingError,e:
print >>sys.stderr,"===> %s" % query
raise e
except psycopg2.IntegrityError,e:
print >>sys.stderr,"---> %s" % query
raise e
try:
label = [x[0] for x in self.curs.description]
return [dict(map(None,label,y))
for y in self.curs.fetchall()]
except TypeError:
return []

View File

@ -0,0 +1,13 @@
from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence
from obitools.location import locationGenerator,extractExternalRefs
class EmblSequence(AnnotatedNucSequence):
'''
Class used to represent a nucleic sequence issued from EMBL.
'''

View File

@ -0,0 +1,50 @@
import re
import sys
from obitools.seqdb import embl
from obitools.seqdb import nucEntryIterator
_featureMatcher = re.compile('(^FT .*\n)+', re.M)
_cleanFT = re.compile('^FT',re.M)
_headerMatcher = re.compile('^ID.+(?=\nFH )', re.DOTALL)
_seqMatcher = re.compile('(^ ).+(?=//\n)', re.DOTALL + re.M)
_cleanSeq = re.compile('[ \n0-9]+')
_acMatcher = re.compile('(?<=^AC ).+',re.M)
_deMatcher = re.compile('(^DE .+\n)+',re.M)
_cleanDe = re.compile('(^|\n)DE +')
def __emblparser(text):
try:
header = _headerMatcher.search(text).group()
ft = _featureMatcher.search(text).group()
ft = _cleanFT.sub(' ',ft)
seq = _seqMatcher.search(text).group()
seq = _cleanSeq.sub('',seq).upper()
acs = _acMatcher.search(text).group()
acs = acs.split()
ac = acs[0]
acs = acs[1:]
de = _deMatcher.search(header).group()
de = _cleanDe.sub(' ',de).strip().strip('.')
except AttributeError,e:
print >>sys.stderr,'======================================================='
print >>sys.stderr,text
print >>sys.stderr,'======================================================='
raise e
return (ac,seq,de,header,ft,acs)
def emblParser(text):
return embl.EmblSequence(*__emblparser(text))
def emblIterator(file):
for e in nucEntryIterator(file):
yield emblParser(e)