obiimport function for testing purposes

2016-03-23 13:00:02 +01:00
parent 6bd42132c4
commit d44117d625
1 changed files with 196 additions and 0 deletions
--- a/python/obitools3/obiimport.py
+++ b/python/obitools3/obiimport.py
@ -0,0 +1,196 @@
 import sys
 import argparse
 import time
 from obitools3.obidms._obidms import OBIDMS
 def bufferedRead(fileobj,size=100000000):
   buffer = fileobj.readlines(size)
   while buffer:
       for l in buffer:
           yield l
       buffer = fileobj.readlines(size)
 if __name__ == '__main__': 
    parser = argparse.ArgumentParser(description='Convert a fasta file in an OBIDMS.')
    parser.add_argument('-i', '--input', dest='input_file', type=str,
                       help='Name of the file containing the sequences')
    args = parser.parse_args()
    d = OBIDMS('tdms')
    view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW")
 #    for i in range(35000000) :
 #        if (not (i%500000)) :
 #            print(str(time.time())+'\t'+str(i))
 #        id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
 #        view[i].set_id(id)
    input_file = open(args.input_file, 'r')
    input_file_buffered = bufferedRead(input_file)
 #         
 #     if args.input_file[-1:] == "a" :
 #     
 #         i = 0
 #         next = False
 #         first = True
 #         
 #         for line in input_file :
 #             
 #             if line[0] == ">" :
 #                 
 #                 if not first :
 #                     # save seq
 #                     #print(i, id, seq)
 #                     view[i].set_sequence(seq)
 #                     i+=1
 #                 
 #                 first = False
 #                     
 #                 #id = line.split(" ", 1)[0][1:]
 #                 #rest = (line[:-1].split(" ", 1)[1]).split(";")
 #                 #view[i].set_id(id)
 #                 
 # #                 description = ""
 # #                 for j in range(len(rest)) :
 # #                     if "=" in rest[j] :
 # #                         rest[j] = rest[j].strip()
 # #                         rest[j] = rest[j].split("=", 1)
 # #                         column_name = rest[j][0]
 # #                         v = rest[j][1]
 # #                         if ((not v.isalpha()) and (v.isalnum())) :
 # #                             conv_v = int(v)
 # #                         elif (v == "True") or (v == "False") :
 # #                             conv_v = bool(v)
 # #                         else :
 # #                             f = True
 # #                             for letter in v :
 # #                                 if ((not letter.isalnum()) or (letter != ".")) :
 # #                                     f = False
 # #                             if f :
 # #                                 conv_v = float(v)
 # #                             else : 
 # #                                 conv_v = v
 # #                         view[i][column_name] = conv_v
 # #                     else :
 # #                         description+=rest[j]
 # #                 
 # #                 if description != "" :
 # #                     description = description.strip()
 # #                     view[i].set_description(description)
 #                     
 #                 #print(id)
 #                 #print(rest)
 #                 #print(description)
 #                 
 #                 next = True
 #                 
 #             elif next == True :
 #                 
 #      #           if not (i % 1E5) :
 #      #               print(i)
 #                 
 #                 seq = line[:-1]
 #                 next = False
 #             
 #             elif not next :
 #                 
 #                 seq += line[:-1]
 # 
 # 
 #     elif args.input_file[-1:] == "q" :
 #         
 #         i = 0
 #         l = 0
 #         next = False
 #
    l=0
    i=0
 #     while (True):
 #         l+=1
 #         line = input_file.readline()
 #         if line=="":
 #             break
    for line in input_file_buffered :
 # 
 #             #if i > 1E7 :
 #             #    print('hmm?')
 #                         
 #             #if i == 10000000 :
 #             #    break
 #             
        if l%4 == 0 :
 #                
            if (not (i%500000)) :
                print(str(time.time())+'\t'+str(i))
 # # 
 # #                 #print("header", line)
 # # 
            id = line.split(" ", 1)[0][1:]
 # #                 #print(id)
 # #                 #rest = (line[:-1].split(" ", 1)[1]).split(";")
            view[i].set_id(id)
 # 
            i+=1
        l+=1
 #                 
 # #                 description = ""
 # #                 for j in range(len(rest)) :
 # #                     if "=" in rest[j] :
 # #                         rest[j] = rest[j].strip()
 # #                         rest[j] = rest[j].split("=", 1)
 # #                         column_name = rest[j][0]
 # #                         #print("COLUMN", column_name)
 # #                         v = rest[j][1]
 # #                         if (v == "") and (column_name in view) and (view[column_name].get_data_type() == "OBI_SEQ") :
 # #                             #print(">>>>>>YUP")
 # #                             conv_v = "aa"
 # #                         else :
 # #                             if ((not v.isalpha()) and (v.isalnum())) :
 # #                                 conv_v = int(v)
 # #                             elif (v == "True") or (v == "False") :
 # #                                 conv_v = bool(v)
 # #                             else :
 # #                                 f = True
 # #                                 for letter in v :
 # #                                     if ((not letter.isalnum()) or (letter != ".")) :
 # #                                         f = False
 # #                                 if f :
 # #                                     conv_v = float(v)
 # #                                 else :
 # #                                     conv_v = v
 # #                         view[i][column_name] = conv_v
 # #                     else :
 # #                         description+=rest[j]
 # #                 
 # #                 if description != "" :
 # #                     description = description.strip()
 # #                     view[i].set_description(description)
 #                 
 # #            elif l%4 == 1 :
 # #                
 # #                seq = line[:-1]
 #                 #print("seq", seq)
 # #                view[i].set_sequence(seq)
 # #                i+=1
 #                         
 #             l+=1
 #     
 #     
    input_file.close()
    #print(view)
    print(view.__repr__())
    view.save_and_close()
    d.close()
    print("Done.")