Branch to refactor and debug (AVLs bugged)
This commit is contained in:
@ -12,8 +12,6 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/encode.h
|
||||
@ -28,3 +26,5 @@
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/crc64.c
|
||||
../../../src/crc64.h
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
|
@ -14,7 +14,7 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
|
@ -14,7 +14,7 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
|
@ -14,7 +14,7 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
|
@ -14,7 +14,7 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
|
@ -14,7 +14,7 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
|
@ -10,19 +10,23 @@ from .capi.obitypes cimport OBISeq_NA, const_char_p
|
||||
|
||||
from obitools3.utils cimport str2bytes, bytes2str
|
||||
|
||||
from libc.stdlib cimport free
|
||||
from libc.string cimport strcmp
|
||||
|
||||
|
||||
cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef bytes value
|
||||
cdef char* value
|
||||
cdef object result
|
||||
value = <bytes> obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBISeq_NA : # TODO
|
||||
if strcmp(value, OBISeq_NA) == 0 :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
free(value)
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
@ -38,33 +42,35 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef bytes value
|
||||
cdef char* value
|
||||
cdef object result
|
||||
value = <bytes> obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
value = obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if value == OBISeq_NA :
|
||||
if strcmp(value, OBISeq_NA) == 0 :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
free(value)
|
||||
return result
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef bytes value
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef char* value
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = <bytes> obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBISeq_NA :
|
||||
if strcmp(value, OBISeq_NA) == 0 :
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = bytes2str(value)
|
||||
value_in_result = bytes2str(value)
|
||||
free(value)
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
|
@ -14,7 +14,7 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
|
@ -10,19 +10,22 @@ from .capi.obitypes cimport OBIStr_NA, const_char_p
|
||||
|
||||
from obitools3.utils cimport str2bytes, bytes2str
|
||||
|
||||
from libc.string cimport strcmp
|
||||
|
||||
|
||||
cdef class OBIDMS_column_str(OBIDMS_column):
|
||||
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef bytes value
|
||||
cdef char* value
|
||||
cdef object result
|
||||
value = <bytes> obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIStr_NA : # TODO
|
||||
if strcmp(value, OBIStr_NA) == 0 :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
@ -38,33 +41,35 @@ cdef class OBIDMS_column_str(OBIDMS_column):
|
||||
cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef bytes value
|
||||
cdef char* value
|
||||
cdef object result
|
||||
value = <bytes> obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
value = obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if value == OBIStr_NA :
|
||||
if strcmp(value, OBIStr_NA) == 0 :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
return result
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef bytes value
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef char* value
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = <bytes> obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIStr_NA :
|
||||
if strcmp(value, OBIStr_NA) == 0 :
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = bytes2str(value)
|
||||
value_in_result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
|
@ -12,8 +12,8 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/encode.h
|
||||
|
@ -26,7 +26,7 @@ cdef class OBI_Seq(dict) :
|
||||
self[bytes2str(DESCRIPTION_COLUMN)] = description
|
||||
|
||||
cpdef get_description(self) :
|
||||
return self.description
|
||||
return self.description # TODO no
|
||||
|
||||
cpdef get_sequence(self) :
|
||||
return self.sequence
|
||||
@ -48,28 +48,25 @@ cdef class OBI_Nuc_Seq(OBI_Seq) :
|
||||
cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
|
||||
|
||||
cpdef set_id(self, str id) :
|
||||
self.id = id
|
||||
self[bytes2str(ID_COLUMN)] = id
|
||||
|
||||
cpdef get_id(self) :
|
||||
return self.id
|
||||
return self[bytes2str(ID_COLUMN)]
|
||||
|
||||
cpdef set_description(self, str description) :
|
||||
self.description = description
|
||||
self[bytes2str(DESCRIPTION_COLUMN)] = description
|
||||
|
||||
cpdef get_description(self) :
|
||||
return self.description
|
||||
return self[bytes2str(DESCRIPTION_COLUMN)]
|
||||
|
||||
cpdef set_sequence(self, str sequence) :
|
||||
self.sequence = sequence
|
||||
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
|
||||
|
||||
cpdef get_sequence(self) :
|
||||
return self.sequence
|
||||
return self[bytes2str(NUC_SEQUENCE_COLUMN)]
|
||||
|
||||
def __str__(self) :
|
||||
return self.sequence # or not
|
||||
return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not
|
||||
|
||||
# cpdef str reverse_complement(self) : TODO in C ?
|
||||
# pass
|
||||
|
@ -12,8 +12,8 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/utils.c
|
||||
../../../src/utils.h
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/encode.h
|
||||
|
@ -5,7 +5,7 @@ import time
|
||||
from obitools3.obidms._obidms import OBIDMS
|
||||
|
||||
|
||||
def bufferedRead(fileobj,size=100000000):
|
||||
def bufferedRead(fileobj,size=209715200): ## 200 MB
|
||||
buffer = fileobj.readlines(size)
|
||||
while buffer:
|
||||
for l in buffer:
|
||||
@ -26,14 +26,16 @@ if __name__ == '__main__':
|
||||
|
||||
view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW")
|
||||
|
||||
# for i in range(35000000) :
|
||||
# if (not (i%500000)) :
|
||||
# print(str(time.time())+'\t'+str(i))
|
||||
# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
|
||||
# view[i].set_id(id)
|
||||
for i in range(35000000) :
|
||||
if (not (i%500000)) :
|
||||
print(str(time.time())+'\t'+str(i))
|
||||
id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
|
||||
view[i].set_id(id)
|
||||
if id != view[i]["ID"] :
|
||||
print("nope", id, view[i]["ID"])
|
||||
|
||||
input_file = open(args.input_file, 'r')
|
||||
input_file_buffered = bufferedRead(input_file)
|
||||
# input_file = open(args.input_file, 'r')
|
||||
# input_file_buffered = bufferedRead(input_file)
|
||||
|
||||
#
|
||||
# if args.input_file[-1:] == "a" :
|
||||
@ -111,37 +113,37 @@ if __name__ == '__main__':
|
||||
# l = 0
|
||||
# next = False
|
||||
#
|
||||
l=0
|
||||
i=0
|
||||
# l=0
|
||||
# i=0
|
||||
# while (True):
|
||||
# l+=1
|
||||
# line = input_file.readline()
|
||||
# if line=="":
|
||||
# break
|
||||
for line in input_file_buffered :
|
||||
# for line in input_file_buffered :
|
||||
#
|
||||
# #if i > 1E7 :
|
||||
# # print('hmm?')
|
||||
#
|
||||
# #if i == 10000000 :
|
||||
# # break
|
||||
# if i == 6000000 :
|
||||
# break
|
||||
#
|
||||
if l%4 == 0 :
|
||||
# if l%4 == 0 :
|
||||
#
|
||||
if (not (i%500000)) :
|
||||
print(str(time.time())+'\t'+str(i))
|
||||
# if (not (i%500000)) :
|
||||
# print(str(time.time())+'\t'+str(i))
|
||||
# #
|
||||
# # #print("header", line)
|
||||
# #
|
||||
id = line.split(" ", 1)[0][1:]
|
||||
print(id)
|
||||
# id = line.split(" ", 1)[0][1:]
|
||||
# print(id)
|
||||
# # #rest = (line[:-1].split(" ", 1)[1]).split(";")
|
||||
view[i].set_id(id)
|
||||
#print(view[i]["ID"])
|
||||
# view[i].set_id(id)
|
||||
# print(view[i]["ID"])
|
||||
#
|
||||
i+=1
|
||||
# i+=1
|
||||
|
||||
l+=1
|
||||
# l+=1
|
||||
#
|
||||
# # description = ""
|
||||
# # for j in range(len(rest)) :
|
||||
@ -186,7 +188,7 @@ if __name__ == '__main__':
|
||||
# l+=1
|
||||
#
|
||||
#
|
||||
input_file.close()
|
||||
# input_file.close()
|
||||
|
||||
#print(view)
|
||||
print(view.__repr__())
|
||||
|
Reference in New Issue
Block a user