Adds class for uncompressing transparently compressed files on line
This commit is contained in:
0
python/obitools3/files/__init__.py
Normal file
0
python/obitools3/files/__init__.py
Normal file
16
python/obitools3/files/uncompress.pxd
Normal file
16
python/obitools3/files/uncompress.pxd
Normal file
@ -0,0 +1,16 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cdef class MagicKeyFile:
|
||||
cdef str stream_mode
|
||||
cdef object binary
|
||||
cdef bytes key
|
||||
cdef int keylength
|
||||
cdef int pos
|
||||
|
||||
cpdef bytes read(self,int size=?)
|
||||
cpdef int tell(self)
|
||||
|
||||
|
||||
cdef class CompressedFile:
|
||||
cdef object accessor
|
||||
|
111
python/obitools3/files/uncompress.pyx
Normal file
111
python/obitools3/files/uncompress.pyx
Normal file
@ -0,0 +1,111 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 28 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import zipfile
|
||||
import bz2
|
||||
import gzip
|
||||
|
||||
import io
|
||||
|
||||
cdef class MagicKeyFile:
|
||||
def __init__(self,stream,length=2):
|
||||
|
||||
self.stream_mode = None
|
||||
if hasattr(stream, "mode"):
|
||||
self.stream_mode = stream.mode
|
||||
if 'b' in stream.mode:
|
||||
binary=stream
|
||||
elif hasattr(stream, "buffer") and 'b' in stream.buffer.mode:
|
||||
binary=stream.buffer
|
||||
else:
|
||||
self.stream_mode = None
|
||||
|
||||
if self.stream_mode is None:
|
||||
raise TypeError("stream does not present the good interface")
|
||||
|
||||
self.binary=binary
|
||||
self.key=binary.read(length)
|
||||
self.keylength=length
|
||||
self.pos=0
|
||||
|
||||
cpdef bytes read(self,int size=-1):
|
||||
cdef bytes r
|
||||
|
||||
if self.pos < self.keylength:
|
||||
if size > (self.keylength - self.pos):
|
||||
size = size - self.keylength + self.pos
|
||||
r = self.key[self.pos:] + self.binary.read(size)
|
||||
self.pos=self.keylength + 1
|
||||
elif size >=0 :
|
||||
r = self.key[self.pos:(self.pos+size)]
|
||||
self.pos+=size
|
||||
else:
|
||||
r = self.key[self.pos:] + self.binary.read(size)
|
||||
self.pos=self.keylength + 1
|
||||
else:
|
||||
r = self.binary.read(size)
|
||||
|
||||
return r
|
||||
|
||||
cpdef int tell(self):
|
||||
cdef int p
|
||||
|
||||
if self.pos < self.keylength:
|
||||
p = self.pos
|
||||
else:
|
||||
p = self.tell()
|
||||
|
||||
return p
|
||||
|
||||
def __getattr__(self,name):
|
||||
return getattr(self.binary, name)
|
||||
|
||||
|
||||
|
||||
cdef class CompressedFile:
|
||||
|
||||
|
||||
def __init__(self,stream):
|
||||
cdef int keylength
|
||||
cdef MagicKeyFile magic
|
||||
cdef str compressor
|
||||
cdef bytes k
|
||||
cdef object c
|
||||
|
||||
cdef dict compress = { 'zip' : (b'\x50\x4b\x03\x04',zipfile.ZipFile),
|
||||
'bz2' : (b'\x42\x5a\x68',bz2.BZ2File),
|
||||
'gz' : (b'\x1f\x8b\x08',gzip.open)
|
||||
}
|
||||
|
||||
keylength = max([len(x[0]) for x in compress.values()])
|
||||
magic=MagicKeyFile(stream,keylength)
|
||||
|
||||
self.accessor = None
|
||||
|
||||
for compressor in compress:
|
||||
k,c = compress[compressor]
|
||||
if magic.key.startswith(k):
|
||||
self.accessor = c(magic)
|
||||
|
||||
if self.accessor is None:
|
||||
self.accessor = magic
|
||||
|
||||
|
||||
if 'b' not in magic.stream_mode:
|
||||
self.accessor = io.TextIOWrapper(self.accessor)
|
||||
|
||||
|
||||
def __getattr__(self,name):
|
||||
return getattr(self.accessor, name)
|
||||
|
||||
def __iter__(self):
|
||||
for x in self.accessor:
|
||||
yield x
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user