obiimport function for testing purposes

This commit is contained in:
Celine Mercier
2016-03-23 13:00:02 +01:00
parent 6bd42132c4
commit d44117d625

View File

@ -0,0 +1,196 @@
import sys
import argparse
import time
from obitools3.obidms._obidms import OBIDMS
def bufferedRead(fileobj,size=100000000):
buffer = fileobj.readlines(size)
while buffer:
for l in buffer:
yield l
buffer = fileobj.readlines(size)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert a fasta file in an OBIDMS.')
parser.add_argument('-i', '--input', dest='input_file', type=str,
help='Name of the file containing the sequences')
args = parser.parse_args()
d = OBIDMS('tdms')
view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW")
# for i in range(35000000) :
# if (not (i%500000)) :
# print(str(time.time())+'\t'+str(i))
# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
# view[i].set_id(id)
input_file = open(args.input_file, 'r')
input_file_buffered = bufferedRead(input_file)
#
# if args.input_file[-1:] == "a" :
#
# i = 0
# next = False
# first = True
#
# for line in input_file :
#
# if line[0] == ">" :
#
# if not first :
# # save seq
# #print(i, id, seq)
# view[i].set_sequence(seq)
# i+=1
#
# first = False
#
# #id = line.split(" ", 1)[0][1:]
# #rest = (line[:-1].split(" ", 1)[1]).split(";")
# #view[i].set_id(id)
#
# # description = ""
# # for j in range(len(rest)) :
# # if "=" in rest[j] :
# # rest[j] = rest[j].strip()
# # rest[j] = rest[j].split("=", 1)
# # column_name = rest[j][0]
# # v = rest[j][1]
# # if ((not v.isalpha()) and (v.isalnum())) :
# # conv_v = int(v)
# # elif (v == "True") or (v == "False") :
# # conv_v = bool(v)
# # else :
# # f = True
# # for letter in v :
# # if ((not letter.isalnum()) or (letter != ".")) :
# # f = False
# # if f :
# # conv_v = float(v)
# # else :
# # conv_v = v
# # view[i][column_name] = conv_v
# # else :
# # description+=rest[j]
# #
# # if description != "" :
# # description = description.strip()
# # view[i].set_description(description)
#
# #print(id)
# #print(rest)
# #print(description)
#
# next = True
#
# elif next == True :
#
# # if not (i % 1E5) :
# # print(i)
#
# seq = line[:-1]
# next = False
#
# elif not next :
#
# seq += line[:-1]
#
#
# elif args.input_file[-1:] == "q" :
#
# i = 0
# l = 0
# next = False
#
l=0
i=0
# while (True):
# l+=1
# line = input_file.readline()
# if line=="":
# break
for line in input_file_buffered :
#
# #if i > 1E7 :
# # print('hmm?')
#
# #if i == 10000000 :
# # break
#
if l%4 == 0 :
#
if (not (i%500000)) :
print(str(time.time())+'\t'+str(i))
# #
# # #print("header", line)
# #
id = line.split(" ", 1)[0][1:]
# # #print(id)
# # #rest = (line[:-1].split(" ", 1)[1]).split(";")
view[i].set_id(id)
#
i+=1
l+=1
#
# # description = ""
# # for j in range(len(rest)) :
# # if "=" in rest[j] :
# # rest[j] = rest[j].strip()
# # rest[j] = rest[j].split("=", 1)
# # column_name = rest[j][0]
# # #print("COLUMN", column_name)
# # v = rest[j][1]
# # if (v == "") and (column_name in view) and (view[column_name].get_data_type() == "OBI_SEQ") :
# # #print(">>>>>>YUP")
# # conv_v = "aa"
# # else :
# # if ((not v.isalpha()) and (v.isalnum())) :
# # conv_v = int(v)
# # elif (v == "True") or (v == "False") :
# # conv_v = bool(v)
# # else :
# # f = True
# # for letter in v :
# # if ((not letter.isalnum()) or (letter != ".")) :
# # f = False
# # if f :
# # conv_v = float(v)
# # else :
# # conv_v = v
# # view[i][column_name] = conv_v
# # else :
# # description+=rest[j]
# #
# # if description != "" :
# # description = description.strip()
# # view[i].set_description(description)
#
# # elif l%4 == 1 :
# #
# # seq = line[:-1]
# #print("seq", seq)
# # view[i].set_sequence(seq)
# # i+=1
#
# l+=1
#
#
input_file.close()
#print(view)
print(view.__repr__())
view.save_and_close()
d.close()
print("Done.")