From d44117d625852afd5931605cc410e8174ebaab90 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Wed, 23 Mar 2016 13:00:02 +0100 Subject: [PATCH] obiimport function for testing purposes --- python/obitools3/obiimport.py | 196 ++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 python/obitools3/obiimport.py diff --git a/python/obitools3/obiimport.py b/python/obitools3/obiimport.py new file mode 100644 index 0000000..921329f --- /dev/null +++ b/python/obitools3/obiimport.py @@ -0,0 +1,196 @@ +import sys +import argparse +import time + +from obitools3.obidms._obidms import OBIDMS + + +def bufferedRead(fileobj,size=100000000): + buffer = fileobj.readlines(size) + while buffer: + for l in buffer: + yield l + buffer = fileobj.readlines(size) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Convert a fasta file in an OBIDMS.') + + parser.add_argument('-i', '--input', dest='input_file', type=str, + help='Name of the file containing the sequences') + + args = parser.parse_args() + + d = OBIDMS('tdms') + + view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW") + +# for i in range(35000000) : +# if (not (i%500000)) : +# print(str(time.time())+'\t'+str(i)) +# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i) +# view[i].set_id(id) + + input_file = open(args.input_file, 'r') + input_file_buffered = bufferedRead(input_file) + +# +# if args.input_file[-1:] == "a" : +# +# i = 0 +# next = False +# first = True +# +# for line in input_file : +# +# if line[0] == ">" : +# +# if not first : +# # save seq +# #print(i, id, seq) +# view[i].set_sequence(seq) +# i+=1 +# +# first = False +# +# #id = line.split(" ", 1)[0][1:] +# #rest = (line[:-1].split(" ", 1)[1]).split(";") +# #view[i].set_id(id) +# +# # description = "" +# # for j in range(len(rest)) : +# # if "=" in rest[j] : +# # rest[j] = rest[j].strip() +# # rest[j] = rest[j].split("=", 1) +# # column_name = rest[j][0] +# # v = rest[j][1] +# # if ((not v.isalpha()) and (v.isalnum())) : +# # conv_v = int(v) +# # elif (v == "True") or (v == "False") : +# # conv_v = bool(v) +# # else : +# # f = True +# # for letter in v : +# # if ((not letter.isalnum()) or (letter != ".")) : +# # f = False +# # if f : +# # conv_v = float(v) +# # else : +# # conv_v = v +# # view[i][column_name] = conv_v +# # else : +# # description+=rest[j] +# # +# # if description != "" : +# # description = description.strip() +# # view[i].set_description(description) +# +# #print(id) +# #print(rest) +# #print(description) +# +# next = True +# +# elif next == True : +# +# # if not (i % 1E5) : +# # print(i) +# +# seq = line[:-1] +# next = False +# +# elif not next : +# +# seq += line[:-1] +# +# +# elif args.input_file[-1:] == "q" : +# +# i = 0 +# l = 0 +# next = False +# + l=0 + i=0 +# while (True): +# l+=1 +# line = input_file.readline() +# if line=="": +# break + for line in input_file_buffered : +# +# #if i > 1E7 : +# # print('hmm?') +# +# #if i == 10000000 : +# # break +# + if l%4 == 0 : +# + if (not (i%500000)) : + print(str(time.time())+'\t'+str(i)) +# # +# # #print("header", line) +# # + id = line.split(" ", 1)[0][1:] +# # #print(id) +# # #rest = (line[:-1].split(" ", 1)[1]).split(";") + view[i].set_id(id) +# + i+=1 + + l+=1 +# +# # description = "" +# # for j in range(len(rest)) : +# # if "=" in rest[j] : +# # rest[j] = rest[j].strip() +# # rest[j] = rest[j].split("=", 1) +# # column_name = rest[j][0] +# # #print("COLUMN", column_name) +# # v = rest[j][1] +# # if (v == "") and (column_name in view) and (view[column_name].get_data_type() == "OBI_SEQ") : +# # #print(">>>>>>YUP") +# # conv_v = "aa" +# # else : +# # if ((not v.isalpha()) and (v.isalnum())) : +# # conv_v = int(v) +# # elif (v == "True") or (v == "False") : +# # conv_v = bool(v) +# # else : +# # f = True +# # for letter in v : +# # if ((not letter.isalnum()) or (letter != ".")) : +# # f = False +# # if f : +# # conv_v = float(v) +# # else : +# # conv_v = v +# # view[i][column_name] = conv_v +# # else : +# # description+=rest[j] +# # +# # if description != "" : +# # description = description.strip() +# # view[i].set_description(description) +# +# # elif l%4 == 1 : +# # +# # seq = line[:-1] +# #print("seq", seq) +# # view[i].set_sequence(seq) +# # i+=1 +# +# l+=1 +# +# + input_file.close() + + #print(view) + print(view.__repr__()) + + view.save_and_close() + d.close() + + print("Done.")