Branch to refactor and debug (AVLs bugged)

This commit is contained in:
Celine Mercier
2016-04-08 15:38:57 +02:00
parent edc4fd7b3e
commit 019dfc01b4
32 changed files with 1553 additions and 812 deletions

View File

@ -12,8 +12,6 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/encode.h
@ -28,3 +26,5 @@
../../../src/murmurhash2.h
../../../src/crc64.c
../../../src/crc64.h
../../../src/utils.c
../../../src/utils.h

View File

@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/utils.c
../../../src/utils.h
../../../src/obiavl.h
../../../src/obiavl.c

View File

@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/utils.c
../../../src/utils.h
../../../src/obiavl.h
../../../src/obiavl.c

View File

@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/utils.c
../../../src/utils.h
../../../src/obiavl.h
../../../src/obiavl.c

View File

@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/utils.c
../../../src/utils.h
../../../src/obiavl.h
../../../src/obiavl.c

View File

@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/utils.c
../../../src/utils.h
../../../src/obiavl.h
../../../src/obiavl.c

View File

@ -10,19 +10,23 @@ from .capi.obitypes cimport OBISeq_NA, const_char_p
from obitools3.utils cimport str2bytes, bytes2str
from libc.stdlib cimport free
from libc.string cimport strcmp
cdef class OBIDMS_column_seq(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef bytes value
cdef char* value
cdef object result
value = <bytes> obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBISeq_NA : # TODO
if strcmp(value, OBISeq_NA) == 0 :
result = None
else :
result = bytes2str(value)
free(value)
return result
cpdef set_line(self, index_t line_nb, object value):
@ -38,33 +42,35 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef bytes value
cdef char* value
cdef object result
value = <bytes> obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
value = obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBISeq_NA :
if strcmp(value, OBISeq_NA) == 0 :
result = None
else :
result = bytes2str(value)
free(value)
return result
cpdef object get_line(self, index_t line_nb) :
cdef bytes value
cdef object value_in_result
cdef dict result
cdef char* value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = <bytes> obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBISeq_NA :
if strcmp(value, OBISeq_NA) == 0 :
value_in_result = None
else :
value_in_result = bytes2str(value)
value_in_result = bytes2str(value)
free(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False

View File

@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/utils.c
../../../src/utils.h
../../../src/obiavl.h
../../../src/obiavl.c

View File

@ -10,19 +10,22 @@ from .capi.obitypes cimport OBIStr_NA, const_char_p
from obitools3.utils cimport str2bytes, bytes2str
from libc.string cimport strcmp
cdef class OBIDMS_column_str(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef bytes value
cdef char* value
cdef object result
value = <bytes> obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIStr_NA : # TODO
if strcmp(value, OBIStr_NA) == 0 :
result = None
else :
result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
return result
cpdef set_line(self, index_t line_nb, object value):
@ -38,33 +41,35 @@ cdef class OBIDMS_column_str(OBIDMS_column):
cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef bytes value
cdef char* value
cdef object result
value = <bytes> obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
value = obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIStr_NA :
if strcmp(value, OBIStr_NA) == 0 :
result = None
else :
result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
return result
cpdef object get_line(self, index_t line_nb) :
cdef bytes value
cdef object value_in_result
cdef dict result
cdef char* value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = <bytes> obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIStr_NA :
if strcmp(value, OBIStr_NA) == 0 :
value_in_result = None
else :
value_in_result = bytes2str(value)
value_in_result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False

View File

@ -12,8 +12,8 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/utils.c
../../../src/utils.h
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/encode.h

View File

@ -26,7 +26,7 @@ cdef class OBI_Seq(dict) :
self[bytes2str(DESCRIPTION_COLUMN)] = description
cpdef get_description(self) :
return self.description
return self.description # TODO no
cpdef get_sequence(self) :
return self.sequence
@ -48,28 +48,25 @@ cdef class OBI_Nuc_Seq(OBI_Seq) :
cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
cpdef set_id(self, str id) :
self.id = id
self[bytes2str(ID_COLUMN)] = id
cpdef get_id(self) :
return self.id
return self[bytes2str(ID_COLUMN)]
cpdef set_description(self, str description) :
self.description = description
self[bytes2str(DESCRIPTION_COLUMN)] = description
cpdef get_description(self) :
return self.description
return self[bytes2str(DESCRIPTION_COLUMN)]
cpdef set_sequence(self, str sequence) :
self.sequence = sequence
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
cpdef get_sequence(self) :
return self.sequence
return self[bytes2str(NUC_SEQUENCE_COLUMN)]
def __str__(self) :
return self.sequence # or not
return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not
# cpdef str reverse_complement(self) : TODO in C ?
# pass

View File

@ -12,8 +12,8 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/utils.c
../../../src/utils.h
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/encode.h

View File

@ -5,7 +5,7 @@ import time
from obitools3.obidms._obidms import OBIDMS
def bufferedRead(fileobj,size=100000000):
def bufferedRead(fileobj,size=209715200): ## 200 MB
buffer = fileobj.readlines(size)
while buffer:
for l in buffer:
@ -26,14 +26,16 @@ if __name__ == '__main__':
view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW")
# for i in range(35000000) :
# if (not (i%500000)) :
# print(str(time.time())+'\t'+str(i))
# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
# view[i].set_id(id)
for i in range(35000000) :
if (not (i%500000)) :
print(str(time.time())+'\t'+str(i))
id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
view[i].set_id(id)
if id != view[i]["ID"] :
print("nope", id, view[i]["ID"])
input_file = open(args.input_file, 'r')
input_file_buffered = bufferedRead(input_file)
# input_file = open(args.input_file, 'r')
# input_file_buffered = bufferedRead(input_file)
#
# if args.input_file[-1:] == "a" :
@ -111,37 +113,37 @@ if __name__ == '__main__':
# l = 0
# next = False
#
l=0
i=0
# l=0
# i=0
# while (True):
# l+=1
# line = input_file.readline()
# if line=="":
# break
for line in input_file_buffered :
# for line in input_file_buffered :
#
# #if i > 1E7 :
# # print('hmm?')
#
# #if i == 10000000 :
# # break
# if i == 6000000 :
# break
#
if l%4 == 0 :
# if l%4 == 0 :
#
if (not (i%500000)) :
print(str(time.time())+'\t'+str(i))
# if (not (i%500000)) :
# print(str(time.time())+'\t'+str(i))
# #
# # #print("header", line)
# #
id = line.split(" ", 1)[0][1:]
print(id)
# id = line.split(" ", 1)[0][1:]
# print(id)
# # #rest = (line[:-1].split(" ", 1)[1]).split(";")
view[i].set_id(id)
#print(view[i]["ID"])
# view[i].set_id(id)
# print(view[i]["ID"])
#
i+=1
# i+=1
l+=1
# l+=1
#
# # description = ""
# # for j in range(len(rest)) :
@ -186,7 +188,7 @@ if __name__ == '__main__':
# l+=1
#
#
input_file.close()
# input_file.close()
#print(view)
print(view.__repr__())