Add embl reader
This commit is contained in:
2
src/obitools/format/sequence/embl.py
Normal file
2
src/obitools/format/sequence/embl.py
Normal file
@ -0,0 +1,2 @@
|
||||
from obitools.seqdb.embl.parser import emblIterator,emblParser
|
||||
|
55
src/obitools/obischemas/kb/__init__.py
Normal file
55
src/obitools/obischemas/kb/__init__.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
kb package is devoted to manage access to postgresql database from python
|
||||
script
|
||||
"""
|
||||
|
||||
|
||||
class Connection(object):
|
||||
|
||||
def __init__(self):
|
||||
raise RuntimeError('pyROM.KB.Connection is an abstract class')
|
||||
|
||||
def cursor(self):
|
||||
raise RuntimeError('pyROM.KB.Connection.cursor is an abstract function')
|
||||
|
||||
def commit(self):
|
||||
raise RuntimeError('pyROM.KB.Connection.commit is an abstract function')
|
||||
|
||||
def rollback(self):
|
||||
raise RuntimeError('pyROM.KB.Connection.rollback is an abstract function')
|
||||
|
||||
def __call__(self,query):
|
||||
return self.cursor().execute(query)
|
||||
|
||||
|
||||
class Cursor(object):
|
||||
|
||||
def __init__(self,db):
|
||||
raise RuntimeError('pyROM.KB.Cursor is an abstract class')
|
||||
|
||||
def execute(self,query):
|
||||
raise RuntimeError('pyROM.KB.Cursor.execute is an abstract function')
|
||||
|
||||
__call__=execute
|
||||
|
||||
|
||||
_current_connection = None # Static variable used to store connection to KB
|
||||
|
||||
def getConnection(*args,**kargs):
|
||||
"""
|
||||
return a connection to the database.
|
||||
When call from database backend no argument are needed.
|
||||
All connection returned by this function
|
||||
"""
|
||||
global _current_connection
|
||||
|
||||
if _current_connection==None or args or kargs :
|
||||
try:
|
||||
from obischemas.kb import backend
|
||||
_current_connection = backend.Connection()
|
||||
except ImportError:
|
||||
from obischemas.kb import extern
|
||||
_current_connection = extern.Connection(*args,**kargs)
|
||||
return _current_connection
|
||||
|
||||
|
78
src/obitools/obischemas/kb/extern.py
Normal file
78
src/obitools/obischemas/kb/extern.py
Normal file
@ -0,0 +1,78 @@
|
||||
"""
|
||||
Module : KB.extern
|
||||
Author : Eric Coissac
|
||||
Date : 03/05/2004
|
||||
|
||||
Module wrapping psycopg interface module to allow connection
|
||||
to a postgresql databases with the same interface from
|
||||
backend and external script.
|
||||
|
||||
This module define a class usable from external script
|
||||
"""
|
||||
|
||||
|
||||
import psycopg2
|
||||
import sys
|
||||
from obischemas import kb
|
||||
|
||||
class Connection(kb.Connection):
|
||||
|
||||
def __init__(self,*connectParam,**kconnectParam):
|
||||
if connectParam:
|
||||
self.connectParam=={'dsn':connectParam}
|
||||
else:
|
||||
self.connectParam=kconnectParam
|
||||
print self.connectParam
|
||||
self.db = psycopg2.connect(**(self.connectParam))
|
||||
|
||||
def restart(self):
|
||||
ok=1
|
||||
while (ok and ok < 1000):
|
||||
try:
|
||||
self.db = psycopg2.connect(**self.connectParam)
|
||||
except:
|
||||
ok+=1
|
||||
else:
|
||||
ok=0
|
||||
|
||||
|
||||
def cursor(self):
|
||||
curs = Cursor(self.db)
|
||||
if hasattr(self,'autocommit') and self.autocommit:
|
||||
curs.autocommit = self.autocommit
|
||||
return curs
|
||||
|
||||
def commit(self):
|
||||
self.db.commit()
|
||||
|
||||
def rollback(self):
|
||||
if hasattr(self,'db'):
|
||||
self.db.rollback()
|
||||
|
||||
def __del__(self):
|
||||
if hasattr(self,'db'):
|
||||
self.rollback()
|
||||
|
||||
class Cursor(kb.Cursor):
|
||||
|
||||
def __init__(self,db):
|
||||
self.db = db
|
||||
self.curs = db.cursor()
|
||||
|
||||
def execute(self,query):
|
||||
try:
|
||||
self.curs.execute(query)
|
||||
if hasattr(self,'autocommit') and self.autocommit:
|
||||
self.db.commit()
|
||||
except psycopg2.ProgrammingError,e:
|
||||
print >>sys.stderr,"===> %s" % query
|
||||
raise e
|
||||
except psycopg2.IntegrityError,e:
|
||||
print >>sys.stderr,"---> %s" % query
|
||||
raise e
|
||||
try:
|
||||
label = [x[0] for x in self.curs.description]
|
||||
return [dict(map(None,label,y))
|
||||
for y in self.curs.fetchall()]
|
||||
except TypeError:
|
||||
return []
|
13
src/obitools/seqdb/embl/__init__.py
Normal file
13
src/obitools/seqdb/embl/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence
|
||||
from obitools.location import locationGenerator,extractExternalRefs
|
||||
|
||||
|
||||
|
||||
class EmblSequence(AnnotatedNucSequence):
|
||||
'''
|
||||
Class used to represent a nucleic sequence issued from EMBL.
|
||||
'''
|
||||
|
||||
|
||||
|
||||
|
50
src/obitools/seqdb/embl/parser.py
Normal file
50
src/obitools/seqdb/embl/parser.py
Normal file
@ -0,0 +1,50 @@
|
||||
import re
|
||||
import sys
|
||||
|
||||
from obitools.seqdb import embl
|
||||
from obitools.seqdb import nucEntryIterator
|
||||
|
||||
_featureMatcher = re.compile('(^FT .*\n)+', re.M)
|
||||
_cleanFT = re.compile('^FT',re.M)
|
||||
|
||||
_headerMatcher = re.compile('^ID.+(?=\nFH )', re.DOTALL)
|
||||
_seqMatcher = re.compile('(^ ).+(?=//\n)', re.DOTALL + re.M)
|
||||
_cleanSeq = re.compile('[ \n0-9]+')
|
||||
_acMatcher = re.compile('(?<=^AC ).+',re.M)
|
||||
_deMatcher = re.compile('(^DE .+\n)+',re.M)
|
||||
_cleanDe = re.compile('(^|\n)DE +')
|
||||
|
||||
def __emblparser(text):
|
||||
try:
|
||||
header = _headerMatcher.search(text).group()
|
||||
|
||||
ft = _featureMatcher.search(text).group()
|
||||
ft = _cleanFT.sub(' ',ft)
|
||||
|
||||
seq = _seqMatcher.search(text).group()
|
||||
seq = _cleanSeq.sub('',seq).upper()
|
||||
|
||||
acs = _acMatcher.search(text).group()
|
||||
acs = acs.split()
|
||||
ac = acs[0]
|
||||
acs = acs[1:]
|
||||
|
||||
de = _deMatcher.search(header).group()
|
||||
de = _cleanDe.sub(' ',de).strip().strip('.')
|
||||
except AttributeError,e:
|
||||
print >>sys.stderr,'======================================================='
|
||||
print >>sys.stderr,text
|
||||
print >>sys.stderr,'======================================================='
|
||||
raise e
|
||||
|
||||
return (ac,seq,de,header,ft,acs)
|
||||
|
||||
def emblParser(text):
|
||||
return embl.EmblSequence(*__emblparser(text))
|
||||
|
||||
|
||||
def emblIterator(file):
|
||||
for e in nucEntryIterator(file):
|
||||
yield emblParser(e)
|
||||
|
||||
|
Reference in New Issue
Block a user