First commit

This commit is contained in:
2022-01-13 23:27:39 +01:00
parent dab6549cad
commit f53bf1b804
93 changed files with 11042 additions and 0 deletions

View File

@@ -0,0 +1,238 @@
package obiformats
import (
"compress/gzip"
"encoding/csv"
"fmt"
"io"
"log"
"os"
"strconv"
"strings"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
)
type __ecopcr_file__ struct {
file io.Reader
csv *csv.Reader
names map[string]int
version int
mode string
forward_primer string
reverse_primer string
}
func __readline__(stream io.Reader) string {
line := make([]byte, 1024)
char := make([]byte, 1)
i := 0
for n, err := stream.Read(char); err == nil && n == 1 && char[0] != '\n'; n, err = stream.Read(char) {
line[i] = char[0]
i++
}
return string(line[0:i])
}
func __read_ecopcr_bioseq__(file *__ecopcr_file__) (obiseq.BioSequence, error) {
record, err := file.csv.Read()
if err != nil {
return obiseq.NilBioSequence, err
}
name := strings.TrimSpace(record[0])
// Ensure that sequence name is unique accross a file.
if val, ok := file.names[name]; ok {
file.names[name]++
name = fmt.Sprintf("%s_%d", name, val)
} else {
file.names[name] = 1
}
var sequence []byte
var comment string
if file.version == 2 {
sequence = []byte(strings.TrimSpace(record[20]))
comment = strings.TrimSpace(record[21])
} else {
sequence = []byte(strings.TrimSpace(record[18]))
comment = strings.TrimSpace(record[19])
}
bseq := obiseq.MakeBioSequence(name, sequence, comment)
annotation := bseq.Annotations()
annotation["ac"] = name
annotation["seq_length"], _ = strconv.Atoi(strings.TrimSpace(record[1]))
annotation["taxid"], _ = strconv.Atoi(strings.TrimSpace(record[2]))
annotation["rank"] = strings.TrimSpace(record[3])
annotation["species_taxid"], _ = strconv.Atoi(strings.TrimSpace(record[4]))
annotation["species_name"] = strings.TrimSpace(record[5])
annotation["genus_taxid"], _ = strconv.Atoi(strings.TrimSpace(record[6]))
annotation["genus_name"] = strings.TrimSpace(record[7])
annotation["family_taxid"], _ = strconv.Atoi(strings.TrimSpace(record[8]))
annotation["family_name"] = strings.TrimSpace(record[9])
k_m_taxid := file.mode + "_taxid"
k_m_name := file.mode + "_name"
annotation[k_m_taxid], _ = strconv.Atoi(strings.TrimSpace(record[10]))
annotation[k_m_name] = strings.TrimSpace(record[11])
annotation["strand"] = strings.TrimSpace(record[12])
annotation["forward_primer"] = file.forward_primer
annotation["forward_match"] = strings.TrimSpace(record[13])
annotation["forward_mismatch"], _ = strconv.Atoi(strings.TrimSpace(record[14]))
delta := 0
if file.version == 2 {
value, err := strconv.ParseFloat(strings.TrimSpace(record[15]), 64)
if err != nil {
annotation["forward_tm"] = value
} else {
annotation["forward_tm"] = -1
}
delta++
}
annotation["reverse_primer"] = file.reverse_primer
annotation["reverse_match"] = strings.TrimSpace(record[15+delta])
annotation["reverse_mismatch"], _ = strconv.Atoi(strings.TrimSpace(record[16+delta]))
if file.version == 2 {
value, err := strconv.ParseFloat(strings.TrimSpace(record[17+delta]), 64)
if err != nil {
annotation["reverse_tm"] = value
} else {
annotation["reverse_tm"] = -1
}
delta++
}
annotation["amplicon_length"], _ = strconv.Atoi(strings.TrimSpace(record[17+delta]))
return bseq, nil
}
func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceBatch {
tag := make([]byte, 11)
n, _ := reader.Read(tag)
version := 1
if n == 11 && string(tag) == "#@ecopcr-v2" {
version = 2
}
line := __readline__(reader)
for !strings.HasPrefix(line, "# direct strand oligo1") {
line = __readline__(reader)
}
forward_primer := (strings.Split(line, " "))[6]
line = __readline__(reader)
for !strings.HasPrefix(line, "# reverse strand oligo2") {
line = __readline__(reader)
}
reverse_primer := (strings.Split(line, " "))[5]
line = __readline__(reader)
for !strings.HasPrefix(line, "# output in") {
line = __readline__(reader)
}
mode := (strings.Split(line, " "))[3]
file := csv.NewReader(reader)
file.Comma = '|'
file.Comment = '#'
file.TrimLeadingSpace = true
file.ReuseRecord = true
log.Printf("EcoPCR file version : %d Mode : %s\n", version, mode)
ecopcr := __ecopcr_file__{
file: reader,
csv: file,
names: make(map[string]int),
version: version,
mode: mode,
forward_primer: forward_primer,
reverse_primer: reverse_primer}
opt := MakeOptions(options)
new_iter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
new_iter.Add(1)
go func() {
new_iter.Wait()
close(new_iter.Channel())
}()
go func() {
seq, err := __read_ecopcr_bioseq__(&ecopcr)
slice := make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
i := 0
ii := 0
for err == nil {
slice = append(slice, seq)
ii++
if ii >= opt.BatchSize() {
new_iter.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
slice = make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
i++
ii = 0
}
seq, err = __read_ecopcr_bioseq__(&ecopcr)
}
if len(slice) > 0 {
new_iter.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
}
new_iter.Done()
if err != nil && err != io.EOF {
log.Panicf("%+v", err)
}
}()
return new_iter
}
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiseq.IBioSequence {
ib := ReadEcoPCRBatch(reader, options...)
return ib.SortBatches().IBioSequence()
}
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
var reader io.Reader
var greader io.Reader
var err error
reader, err = os.Open(filename)
if err != nil {
log.Printf("open file error: %+v", err)
return obiseq.NilIBioSequenceBatch, err
}
// Test if the flux is compressed by gzip
greader, err = gzip.NewReader(reader)
if err == nil {
reader = greader
}
return ReadEcoPCRBatch(reader, options...), nil
}
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
ib, err := ReadEcoPCRBatchFromFile(filename, options...)
return ib.SortBatches().IBioSequence(), err
}

246
pkg/obiformats/embl_read.go Normal file
View File

@@ -0,0 +1,246 @@
package obiformats
import (
"bufio"
"bytes"
"compress/gzip"
"io"
"log"
"os"
"strconv"
"strings"
"time"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
)
var __FILE_CHUNK_SIZE__ = 1 << 20
func __slice_grow__(slice []string) []string {
return slice
}
type __embl_chunk__ struct {
entries [][]string
order int
}
type __file_chunk__ struct {
raw io.Reader
order int
}
func __end_of_last_entry__(buff []byte) int {
// 6 5 43 2 1
// <CR>?<LF>//<CR>?<LF>
var i int
var state = 0
var start = 0
for i = len(buff) - 1; i >= 0 && state < 5; i-- {
switch state {
case 0: // outside of the pattern
if buff[i] == '\n' {
state = 1
}
case 1: // a \n have been matched
start = i + 2
switch buff[i] {
case '\r':
state = 2
case '/':
state = 3
case '\n':
state = 1
default:
state = 0
}
case 2: // a \r have been matched
switch buff[i] {
case '/':
state = 3
case '\n':
state = 1
default:
state = 0
}
case 3: // the first / have been matched
switch buff[i] {
case '/':
state = 4
case '\n':
state = 1
default:
state = 0
}
case 4: // the second / have been matched
switch buff[i] {
case '\n':
state = 5
default:
state = 0
}
}
}
if i > 0 {
return start
} else {
return -1
}
}
func __parse_embl_file__(input <-chan __file_chunk__, out obiseq.IBioSequenceBatch) {
for chunks := range input {
scanner := bufio.NewScanner(chunks.raw)
order := chunks.order
sequences := make(obiseq.BioSequenceSlice, 0, 100)
id := ""
scientific_name := ""
def_bytes := new(bytes.Buffer)
feat_bytes := new(bytes.Buffer)
seq_bytes := new(bytes.Buffer)
taxid := 1
for scanner.Scan() {
line := scanner.Text()
switch {
case strings.HasPrefix(line, "ID "):
id = strings.SplitN(line[5:], ";", 2)[0]
case strings.HasPrefix(line, "OS "):
scientific_name = strings.TrimSpace(line[5:])
case strings.HasPrefix(line, "DE "):
if def_bytes.Len() > 0 {
def_bytes.WriteByte(' ')
}
def_bytes.WriteString(strings.TrimSpace(line[5:]))
case strings.HasPrefix(line, "FH "):
feat_bytes.WriteString(line)
case line == "FH":
feat_bytes.WriteByte('\n')
feat_bytes.WriteString(line)
case strings.HasPrefix(line, "FT "):
feat_bytes.WriteByte('\n')
feat_bytes.WriteString(line)
if strings.HasPrefix(line, `FT /db_xref="taxon:`) {
taxid, _ = strconv.Atoi(strings.SplitN(line[37:], `"`, 2)[0])
}
case strings.HasPrefix(line, " "):
parts := strings.SplitN(line[5:], " ", 7)
for i := 0; i < 6; i++ {
seq_bytes.WriteString(parts[i])
}
case line == "//":
sequence := obiseq.MakeBioSequence(id,
seq_bytes.Bytes(),
def_bytes.String())
sequence.SetFeatures(feat_bytes.String())
annot := sequence.Annotations()
annot["scientific_name"] = scientific_name
annot["taxid"] = taxid
// log.Println(FormatFasta(sequence, FormatFastSeqJsonHeader))
sequences = append(sequences, sequence)
def_bytes = new(bytes.Buffer)
feat_bytes = new(bytes.Buffer)
seq_bytes = new(bytes.Buffer)
}
}
out.Channel() <- obiseq.MakeBioSequenceBatch(order, sequences...)
}
out.Done()
}
func __read_flat_file_chunk__(reader io.Reader, readers chan __file_chunk__) {
var err error
var buff []byte
size := 0
l := 0
i := 0
buff = make([]byte, 1<<20)
for err == nil {
for ; err == nil && l < len(buff); l += size {
size, err = reader.Read(buff[l:])
}
buff = buff[:l]
end := __end_of_last_entry__(buff)
remains := buff[end:]
buff = buff[:end]
io := bytes.NewBuffer(buff)
readers <- __file_chunk__{io, i}
i++
buff = make([]byte, __FILE_CHUNK_SIZE__)
copy(buff, remains)
l = len(remains)
}
close(readers)
}
// 6 5 43 2 1
// <CR>?<LF>//<CR>?<LF>
func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceBatch {
opt := MakeOptions(options)
entry_channel := make(chan __file_chunk__, opt.BufferSize())
new_iter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
// new_iter.Add(opt.ParallelWorkers())
new_iter.Add(2)
go func() {
new_iter.Wait()
for len(new_iter.Channel()) > 0 {
time.Sleep(time.Millisecond)
}
close(new_iter.Channel())
}()
// for j := 0; j < opt.ParallelWorkers(); j++ {
for j := 0; j < 2; j++ {
go __parse_embl_file__(entry_channel, new_iter)
}
go __read_flat_file_chunk__(reader, entry_channel)
return new_iter
}
func ReadEMBL(reader io.Reader, options ...WithOption) obiseq.IBioSequence {
ib := ReadEMBLBatch(reader, options...)
return ib.SortBatches().IBioSequence()
}
func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
var reader io.Reader
var greader io.Reader
var err error
reader, err = os.Open(filename)
if err != nil {
log.Printf("open file error: %+v", err)
return obiseq.NilIBioSequenceBatch, err
}
// Test if the flux is compressed by gzip
greader, err = gzip.NewReader(reader)
if err == nil {
reader = greader
}
return ReadEMBLBatch(reader, options...), nil
}
func ReadEMBLFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
ib, err := ReadEMBLBatchFromFile(filename, options...)
return ib.SortBatches().IBioSequence(), err
}

View File

@@ -0,0 +1,30 @@
package obiformats
import (
"strings"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
)
func ParseGuessedFastSeqHeader(sequence obiseq.BioSequence) {
if strings.HasPrefix(sequence.Definition(), "{") {
ParseFastSeqJsonHeader(sequence)
} else {
ParseFastSeqOBIHeader(sequence)
}
}
func IParseFastSeqHeaderBatch(iterator obiseq.IBioSequenceBatch, options ...WithOption) obiseq.IBioSequenceBatch {
opt := MakeOptions(options)
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
opt.ParallelWorkers(),
opt.BufferSize())
}
func IParseFastSeqHeader(iterator obiseq.IBioSequence, options ...WithOption) obiseq.IBioSequence {
opt := MakeOptions(options)
return IParseFastSeqHeaderBatch(iterator.IBioSequenceBatch(opt.BatchSize(),
opt.BufferSize()),
options...).SortBatches().IBioSequence()
}

View File

@@ -0,0 +1,5 @@
package obiformats
import "git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
type FormatHeader func(sequence obiseq.BioSequence) string

View File

@@ -0,0 +1,66 @@
package obiformats
import (
"strings"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
"github.com/goccy/go-json"
)
func _parse_json_header_(header string, annotations obiseq.Annotation) string {
start := -1
stop := -1
level := 0
lh := len(header)
for i := 0; (i < lh) && (stop < 0); i++ {
// fmt.Printf("[%d,%d-%d] : %d (%c) (%d,%c)\n", i, start, stop, header[i], header[i], '{', '{')
if level == 0 && header[i] == '{' {
start = i
}
if header[i] == '{' {
level++
}
if header[i] == '}' {
level--
}
if start >= 0 && level == 0 {
stop = i
}
}
if start < 0 || stop < 0 {
return header
}
stop++
json.Unmarshal([]byte(header)[start:stop], annotations)
return strings.TrimSpace(header[stop:])
}
func ParseFastSeqJsonHeader(sequence obiseq.BioSequence) {
sequence.SetDefinition(_parse_json_header_(sequence.Definition(),
sequence.Annotations()))
}
func FormatFastSeqJsonHeader(sequence obiseq.BioSequence) string {
annotations := sequence.Annotations()
if annotations != nil {
text, err := json.Marshal(sequence.Annotations())
if err != nil {
panic(err)
}
return string(text)
}
return ""
}

View File

@@ -0,0 +1,288 @@
package obiformats
import (
"bytes"
"fmt"
"regexp"
"strconv"
"strings"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
"github.com/goccy/go-json"
)
var __obi_header_value_string_pattern__ = regexp.MustCompile(`^'\s*([^']*'|"[^"]*")\s*;`)
var __obi_header_value_numeric_pattern__ = regexp.MustCompile(`^\s*([+-]?\.\d+|[+-]?\d+(\.\d*)?([eE][+-]?\d+)?)\s*;`)
func __match__dict__(text []byte) []int {
state := 0
level := 0
start := 0
instring := byte(0)
for i, r := range text {
if state == 2 {
if r == ';' {
// end of the pattern
return []int{start, i + 1}
}
if r != ' ' && r != '\t' {
// Bad character at the end of the pattern
return []int{}
}
}
if r == '{' && instring == 0 { // Beginning of dict
level++
if state == 0 {
// Beginning of the main dict
state++
start = i
}
continue
}
if state == 0 && r != ' ' && r != '\t' {
// It's not a dict
return []int{}
}
if state == 1 {
if r == '"' || r == '\'' {
if instring == 0 {
// start of a string
instring = r
} else {
if instring == r {
// end of a string
instring = 0
}
}
continue
}
}
if r == '}' && instring == 0 {
// end of a dict
level--
if level == 0 {
// end of the main dict
state++
}
}
}
return []int{}
}
func __match__key__(text []byte) []int {
state := 0
start := 0
for i, r := range text {
if state == 0 {
if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') {
// Beginning of the key
// fmt.Printf("Beginning of the key (%c) %d\n", r, i)
state++
start = i
continue
}
if r != ' ' && r != '\t' {
// It's not a key
return []int{}
}
continue
}
if state > 0 && r == '=' {
// End of thee pattern
// fmt.Printf("End of the pattern (%c) %d\n", r, i)
return []int{start, i + 1}
}
if state == 1 {
if r == ' ' || r == '\t' {
// End of the key
state++
continue
}
if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') ||
(r >= '0' && r <= '9') ||
r == '_' || r == '-' || r == '.' {
// Continuing the key
continue
}
// Not allowed character in a key
// fmt.Printf("Not allowed char (%c) %d\n", r, i)
return []int{}
}
if state == 2 && r != ' ' && r != '\t' {
// fmt.Printf("Not allowed char 2 (%c) %d\n", r, i)
// Not allowed character after a key
return []int{}
}
}
return []int{} // Not a key
}
func __match__general__(text []byte) []int {
for i, r := range text {
if r == ';' {
return []int{0, i + 1}
}
}
return []int{} // Not generic value
}
var __false__ = []byte{'f', 'a', 'l', 's', 'e'}
var __False__ = []byte{'F', 'a', 'l', 's', 'e'}
var __FALSE__ = []byte{'F', 'A', 'L', 'S', 'E'}
var __true__ = []byte{'t', 'r', 'u', 'e'}
var __True__ = []byte{'T', 'r', 'u', 'e'}
var __TRUE__ = []byte{'T', 'R', 'U', 'E'}
func __is_true__(text []byte) bool {
return (len(text) == 1 && (text[0] == 't' || text[0] == 'T')) ||
bytes.Equal(text, __true__) ||
bytes.Equal(text, __True__) ||
bytes.Equal(text, __TRUE__)
}
func __is_false__(text []byte) bool {
return (len(text) == 1 && (text[0] == 'f' || text[0] == 'F')) ||
bytes.Equal(text, __false__) ||
bytes.Equal(text, __False__) ||
bytes.Equal(text, __FALSE__)
}
func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
definition := []byte(sequence.Definition())
annotations := sequence.Annotations()
// all_matches := __obi_header_pattern__.FindAllSubmatchIndex(definition, -1)
d := definition
//for m := __obi_header_key_pattern__.FindIndex(definition); len(m) > 0; {
//fmt.Println(string(definition[0:20]), __match__key__(definition))
for m := __match__key__(definition); len(m) > 0; {
var bvalue []byte
var value interface{}
start := m[0]
stop := -1
key := string(bytes.TrimSpace(d[start:(m[1] - 1)]))
part := d[m[1]:]
// numeric value
m = __obi_header_value_numeric_pattern__.FindIndex(part)
if len(m) > 0 {
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
value, _ = strconv.ParseFloat(string(bvalue), 64)
stop = m[1] + 1
} else {
// string value
m = __obi_header_value_string_pattern__.FindIndex(part)
if len(m) > 0 {
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
value = string(bvalue[1:(len(bvalue) - 1)])
stop = m[1] + 1
} else {
// dict value
m = __match__dict__(part)
if len(m) > 0 {
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`))
var err error
if strings.HasPrefix(key, "merged_") ||
strings.HasSuffix(key, "_count") {
dict := make(map[string]int)
err = json.Unmarshal(j, &dict)
value = dict
} else {
dict := make(map[string]interface{})
err = json.Unmarshal(j, &dict)
value = dict
}
if err != nil {
value = string(bvalue)
}
stop = m[1] + 1
} else {
// Generic value
// m = __obi_header_value_general_pattern__.FindIndex(part)
m = __match__general__(part)
if len(m) > 0 {
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
if __is_false__(bvalue) {
value = false
} else {
if __is_true__(bvalue) {
value = true
} else {
value = string(bvalue)
}
}
stop = m[1] + 1
} else {
// no value
break
} // End of No value
} // End of not dict
} // End of not string
} // End of not numeric
annotations[key] = value
d = part[stop:]
//m = __obi_header_key_pattern__.FindIndex(d)
m = __match__key__(d)
}
sequence.SetDefinition(string(bytes.TrimSpace(d)))
}
func FormatFastSeqOBIHeader(sequence obiseq.BioSequence) string {
annotations := sequence.Annotations()
if annotations != nil {
var text strings.Builder
for key, value := range annotations {
switch t := value.(type) {
case string:
text.WriteString(fmt.Sprintf("%s=%s; ", key, t))
default:
text.WriteString(fmt.Sprintf("%s=%v; ", key, value))
}
}
return text.String()
}
return ""
}

View File

@@ -0,0 +1,104 @@
#include "fastseq_read.h"
static fast_kseq_t* _open_fast_sek(gzFile fp, int shift) {
fast_kseq_t* iterator;
iterator = (fast_kseq_t*)malloc(sizeof(fast_kseq_t));
if (iterator == NULL)
return NULL;
iterator->filez = fp;
iterator->finished = false;
iterator->shift = shift;
if (fp != Z_NULL) {
iterator->seq = kseq_init(fp);
if (iterator->seq == NULL) {
free(iterator);
iterator=NULL;
}
}
else {
free(iterator);
iterator=NULL;
}
return iterator;
}
/**
* @brief open a FastA or FastQ file gizzed or not
*
* @param filename a const char* indicating the path of the
* fast* file
* @return kseq_t* a pointer to a kseq_t structure or NULL on
* failing
*/
fast_kseq_t* open_fast_sek_file(const char* filename, int shift) {
gzFile fp;
fp = gzopen(filename, "r");
return _open_fast_sek(fp, shift);
}
fast_kseq_p open_fast_sek_fd(int fd, bool keep_open, int shift) {
gzFile fp;
if (keep_open)
fd = dup(fd);
fp = gzdopen(fd, "r");
return _open_fast_sek(fp, shift);
}
fast_kseq_p open_fast_sek_stdin(int shift) {
return open_fast_sek_fd(fileno(stdin), true, shift);
}
int64_t next_fast_sek(fast_kseq_t* iterator) {
int64_t l;
if (iterator == NULL || iterator->seq == NULL)
return -3;
l = kseq_read(iterator->seq);
iterator->finished = l==0;
if (l>0) l = gzoffset(iterator->filez);
return l;
}
int rewind_fast_sek(fast_kseq_t* iterator) {
if (iterator == NULL || iterator->seq == NULL)
return -3;
kseq_rewind(iterator->seq);
return 0;
}
int close_fast_sek(fast_kseq_t* iterator) {
gzFile fp;
kseq_t *seq;
int rep = -3;
if (iterator == NULL)
return rep;
fp = iterator->filez;
seq = iterator->seq;
free(iterator);
if (seq != NULL)
kseq_destroy(iterator->seq);
if (fp != Z_NULL)
rep = gzclose(fp);
return rep;
}

View File

@@ -0,0 +1,153 @@
package obiformats
// #cgo CFLAGS: -g -Wall
// #cgo LDFLAGS: -lz
// #include <stdlib.h>
// #include "fastseq_read.h"
import "C"
import (
"errors"
"fmt"
"log"
"os"
"time"
"unsafe"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/cutils"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
)
func __fastseq_reader__(seqfile C.fast_kseq_p,
iterator obiseq.IBioSequenceBatch,
batch_size int) {
var comment string
i := 0
ii := 0
slice := make(obiseq.BioSequenceSlice, 0, batch_size)
for l := int64(C.next_fast_sek(seqfile)); l > 0; l = int64(C.next_fast_sek(seqfile)) {
s := seqfile.seq
sequence := C.GoBytes(unsafe.Pointer(s.seq.s),
C.int(s.seq.l))
name := C.GoString(s.name.s)
if s.comment.l > C.ulong(0) {
comment = C.GoString(s.comment.s)
} else {
comment = ""
}
rep := obiseq.MakeBioSequence(name, sequence, comment)
if s.qual.l > C.ulong(0) {
cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l))
quality := make(obiseq.Quality, s.qual.l)
l := int(s.qual.l)
shift := uint8(seqfile.shift)
for j := 0; j < l; j++ {
quality[j] = uint8(cquality[j]) - shift
}
rep.SetQualities(quality)
}
slice = append(slice, rep)
ii++
if ii >= batch_size {
// log.Printf("\n==> Pushing sequence batch\n")
// start := time.Now()
iterator.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
// elapsed := time.Since(start)
// log.Printf("\n==>sequences pushed after %s\n", elapsed)
slice = make(obiseq.BioSequenceSlice, 0, batch_size)
i++
ii = 0
}
}
if len(slice) > 0 {
iterator.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
}
iterator.Done()
}
func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
opt := MakeOptions(options)
name := C.CString(filename)
defer C.free(unsafe.Pointer(name))
pointer := C.open_fast_sek_file(name, C.int32_t(opt.QualityShift()))
var err error
err = nil
if pointer == nil {
err = errors.New(fmt.Sprintf("Cannot open file %s", filename))
return obiseq.NilIBioSequenceBatch, err
}
size := int64(-1)
fi, err := os.Stat(filename)
if err == nil {
size = fi.Size()
log.Printf("File size of %s is %d bytes\n", filename, size)
} else {
size = -1
}
new_iter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
new_iter.Add(1)
go func() {
new_iter.Wait()
for len(new_iter.Channel()) > 0 {
time.Sleep(time.Millisecond)
}
close(new_iter.Channel())
log.Println("End of the fastq file reading")
}()
log.Println("Start of the fastq file reading")
go __fastseq_reader__(pointer, new_iter, opt.BatchSize())
parser := opt.ParseFastSeqHeader()
if parser != nil {
return IParseFastSeqHeaderBatch(new_iter, options...), err
}
return new_iter, err
}
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
ib, err := ReadFastSeqBatchFromFile(filename, options...)
return ib.SortBatches().IBioSequence(), err
}
func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
opt := MakeOptions(options)
new_iter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
new_iter.Add(1)
go func() {
new_iter.Wait()
close(new_iter.Channel())
}()
go __fastseq_reader__(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), new_iter, opt.BatchSize())
return new_iter
}
func ReadFastSeqFromStdin(options ...WithOption) obiseq.IBioSequence {
ib := ReadFastSeqBatchFromStdin(options...)
return ib.SortBatches().IBioSequence()
}

View File

@@ -0,0 +1,41 @@
#ifndef _READ_H
#define _READ_H
#include <zlib.h>
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include "kseq/kseq.h"
KSEQ_INIT(gzFile, gzread)
typedef struct {
kseq_t *seq;
bool finished;
int16_t shift;
gzFile filez;
} fast_kseq_t, *fast_kseq_p;
fast_kseq_t* open_fast_sek_file(const char* filename, int shift);
fast_kseq_t* open_fast_sek_fd(int fd, bool keep_open, int shift);
fast_kseq_t* open_fast_sek_stdin(int shift);
/**
* @brief read the next sequence on the fast* stream
*
* @param seq a kseq_t* created using function open_fast_sek
* @return int if greater than 0 represents the length of the
* sequence, otherwise indicates an error
* - -1 : no more sequence in the stream
* - -2 : too short quality sequence
* - -3 : called with NULL pointer
*/
int64_t next_fast_sek(fast_kseq_t* iterator);
int close_fast_sek(fast_kseq_t* iterator);
int rewind_fast_sek(fast_kseq_t* iterator);
#endif

View File

@@ -0,0 +1,164 @@
package obiformats
import (
"bytes"
"fmt"
"io"
"log"
"os"
"strings"
"sync"
"time"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
)
func min(x, y int) int {
if x < y {
return x
}
return y
}
func FormatFasta(seq obiseq.BioSequence, formater FormatHeader) string {
var fragments strings.Builder
s := seq.Sequence()
l := len(s)
fragments.Grow(l + int(l/60) + 10)
for i := 0; i < l; i += 60 {
to := min(i+60, l)
fmt.Fprintf(&fragments, "%s\n", string(s[i:to]))
}
folded := fragments.String()
folded = folded[:fragments.Len()-1]
info := formater(seq)
return fmt.Sprintf(">%s %s %s\n%s",
seq.Id(), info,
seq.Definition(),
folded)
}
func FormatFastaBatch(batch obiseq.BioSequenceBatch, formater FormatHeader) []byte {
var bs bytes.Buffer
for _, seq := range batch.Slice() {
bs.WriteString(FormatFasta(seq, formater))
bs.WriteString("\n")
}
return bs.Bytes()
}
func WriteFasta(iterator obiseq.IBioSequence, file io.Writer, options ...WithOption) error {
opt := MakeOptions(options)
header_format := opt.FormatFastSeqHeader()
for iterator.Next() {
seq := iterator.Get()
fmt.Fprintln(file, FormatFasta(seq, header_format))
}
return nil
}
func WriteFastaToFile(iterator obiseq.IBioSequence,
filename string,
options ...WithOption) error {
file, err := os.Create(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return err
}
return WriteFasta(iterator, file, options...)
}
func WriteFastaToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
return WriteFasta(iterator, os.Stdout, options...)
}
func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) error {
buffsize := iterator.BufferSize()
new_iter := obiseq.MakeIBioSequenceBatch(buffsize)
opt := MakeOptions(options)
nwriters := 4
chunkchan := make(chan FileChunck)
chunkwait := sync.WaitGroup{}
header_format := opt.FormatFastSeqHeader()
chunkwait.Add(nwriters)
go func() {
chunkwait.Wait()
for len(chunkchan) > 0 {
time.Sleep(time.Millisecond)
}
close(chunkchan)
}()
ff := func(iterator obiseq.IBioSequenceBatch) {
for iterator.Next() {
batch := iterator.Get()
chunkchan <- FileChunck{
FormatFastaBatch(batch, header_format),
batch.Order(),
}
new_iter.Channel() <- batch
}
new_iter.Done()
}
for i := 0; i < nwriters; i++ {
go ff(iterator.Split())
}
next_to_send := 0
received := make(map[int]FileChunck, 100)
go func() {
for chunk := range chunkchan {
if chunk.order == next_to_send {
file.Write(chunk.text)
next_to_send++
chunk, ok := received[next_to_send]
for ok {
file.Write(chunk.text)
delete(received, next_to_send)
next_to_send++
chunk, ok = received[next_to_send]
}
} else {
received[chunk.order] = chunk
}
}
}()
return nil
}
func WriteFastaBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) error {
return WriteFastaBatch(iterator, os.Stdout, options...)
}
func WriteFastaBatchToFile(iterator obiseq.IBioSequenceBatch,
filename string,
options ...WithOption) error {
file, err := os.Create(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return err
}
return WriteFastaBatch(iterator, file, options...)
}

View File

@@ -0,0 +1,168 @@
package obiformats
import (
"bytes"
"fmt"
"io"
"log"
"os"
"time"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
)
func FormatFastq(seq obiseq.BioSequence, quality_shift int, formater FormatHeader) string {
l := seq.Length()
q := seq.Qualities()
ascii := make([]byte, seq.Length())
for j := 0; j < l; j++ {
ascii[j] = uint8(q[j]) + uint8(quality_shift)
}
info := ""
if formater != nil {
info = formater(seq)
}
return fmt.Sprintf("@%s %s %s\n%s\n+\n%s",
seq.Id(), info,
seq.Definition(),
string(seq.Sequence()),
string(ascii),
)
}
func FormatFastqBatch(batch obiseq.BioSequenceBatch, quality_shift int,
formater FormatHeader) []byte {
var bs bytes.Buffer
for _, seq := range batch.Slice() {
bs.WriteString(FormatFastq(seq, quality_shift, formater))
bs.WriteString("\n")
}
return bs.Bytes()
}
func WriteFastq(iterator obiseq.IBioSequence, file io.Writer, options ...WithOption) error {
opt := MakeOptions(options)
header_format := opt.FormatFastSeqHeader()
quality := opt.QualityShift()
for iterator.Next() {
seq := iterator.Get()
fmt.Fprintln(file, FormatFastq(seq, quality, header_format))
}
return nil
}
func WriteFastqToFile(iterator obiseq.IBioSequence,
filename string,
options ...WithOption) error {
file, err := os.Create(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return err
}
return WriteFastq(iterator, file, options...)
}
func WriteFastqToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
return WriteFastq(iterator, os.Stdout, options...)
}
type FileChunck struct {
text []byte
order int
}
func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
buffsize := iterator.BufferSize()
new_iter := obiseq.MakeIBioSequenceBatch(buffsize)
opt := MakeOptions(options)
nwriters := 4
chunkchan := make(chan FileChunck)
header_format := opt.FormatFastSeqHeader()
quality := opt.QualityShift()
new_iter.Add(nwriters)
go func() {
new_iter.Wait()
for len(chunkchan) > 0 {
time.Sleep(time.Millisecond)
}
close(chunkchan)
for len(new_iter.Channel()) > 0 {
time.Sleep(time.Millisecond)
}
close(new_iter.Channel())
}()
ff := func(iterator obiseq.IBioSequenceBatch) {
for iterator.Next() {
batch := iterator.Get()
chunkchan <- FileChunck{
FormatFastqBatch(batch, quality, header_format),
batch.Order(),
}
new_iter.Channel() <- batch
}
new_iter.Done()
}
log.Println("Start of the fastq file reading")
for i := 0; i < nwriters; i++ {
go ff(iterator.Split())
}
next_to_send := 0
received := make(map[int]FileChunck, 100)
go func() {
for chunk := range chunkchan {
if chunk.order == next_to_send {
file.Write(chunk.text)
next_to_send++
chunk, ok := received[next_to_send]
for ok {
file.Write(chunk.text)
delete(received, next_to_send)
next_to_send++
chunk, ok = received[next_to_send]
}
} else {
received[chunk.order] = chunk
}
}
}()
return new_iter, nil
}
func WriteFastqBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
return WriteFastqBatch(iterator, os.Stdout, options...)
}
func WriteFastqBatchToFile(iterator obiseq.IBioSequenceBatch,
filename string,
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
file, err := os.Create(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return obiseq.NilIBioSequenceBatch, err
}
return WriteFastqBatch(iterator, file, options...)
}

View File

@@ -0,0 +1,5 @@
all:kseq.h kseq_test.c
$(CC) -g -O2 kseq_test.c -o kseq_test -lz
clean:
rm -f *.o kseq_test

223
pkg/obiformats/kseq/kseq.h Normal file
View File

@@ -0,0 +1,223 @@
/* The MIT License
Copyright (c) 2008 Genome Research Ltd (GRL).
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/* Contact: Heng Li <lh3@sanger.ac.uk> */
/* Last Modified: 12APR2009 */
#ifndef AC_KSEQ_H
#define AC_KSEQ_H
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
#define KS_SEP_TAB 1 // isspace() && !' '
#define KS_SEP_MAX 1
#define __KS_TYPE(type_t) \
typedef struct __kstream_t { \
char *buf; \
int begin, end, is_eof; \
type_t f; \
} kstream_t;
#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
#define __KS_BASIC(type_t, __bufsize) \
static inline kstream_t *ks_init(type_t f) \
{ \
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
ks->f = f; \
ks->buf = (char*)malloc(__bufsize); \
return ks; \
} \
static inline void ks_destroy(kstream_t *ks) \
{ \
if (ks) { \
free(ks->buf); \
free(ks); \
} \
}
#define __KS_GETC(__read, __bufsize) \
static inline int ks_getc(kstream_t *ks) \
{ \
if (ks->is_eof && ks->begin >= ks->end) return -1; \
if (ks->begin >= ks->end) { \
ks->begin = 0; \
ks->end = __read(ks->f, ks->buf, __bufsize); \
if (ks->end < __bufsize) ks->is_eof = 1; \
if (ks->end == 0) return -1; \
} \
return (int)ks->buf[ks->begin++]; \
}
#ifndef KSTRING_T
#define KSTRING_T kstring_t
typedef struct __kstring_t {
size_t l, m;
char *s;
} kstring_t;
#endif
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#define __KS_GETUNTIL(__read, __bufsize) \
static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
{ \
if (dret) *dret = 0; \
str->l = 0; \
if (ks->begin >= ks->end && ks->is_eof) return -1; \
for (;;) { \
int i; \
if (ks->begin >= ks->end) { \
if (!ks->is_eof) { \
ks->begin = 0; \
ks->end = __read(ks->f, ks->buf, __bufsize); \
if (ks->end < __bufsize) ks->is_eof = 1; \
if (ks->end == 0) break; \
} else break; \
} \
if (delimiter > KS_SEP_MAX) { \
for (i = ks->begin; i < ks->end; ++i) \
if (ks->buf[i] == delimiter) break; \
} else if (delimiter == KS_SEP_SPACE) { \
for (i = ks->begin; i < ks->end; ++i) \
if (isspace(ks->buf[i])) break; \
} else if (delimiter == KS_SEP_TAB) { \
for (i = ks->begin; i < ks->end; ++i) \
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
} else i = 0; /* never come to here! */ \
if (str->m - str->l < i - ks->begin + 1) { \
str->m = str->l + (i - ks->begin) + 1; \
kroundup32(str->m); \
str->s = (char*)realloc(str->s, str->m); \
} \
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
str->l = str->l + (i - ks->begin); \
ks->begin = i + 1; \
if (i < ks->end) { \
if (dret) *dret = ks->buf[i]; \
break; \
} \
} \
if (str->l == 0) { \
str->m = 1; \
str->s = (char*)calloc(1, 1); \
} \
str->s[str->l] = '\0'; \
return str->l; \
}
#define KSTREAM_INIT(type_t, __read, __bufsize) \
__KS_TYPE(type_t) \
__KS_BASIC(type_t, __bufsize) \
__KS_GETC(__read, __bufsize) \
__KS_GETUNTIL(__read, __bufsize)
#define __KSEQ_BASIC(type_t) \
static inline kseq_t *kseq_init(type_t fd) \
{ \
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
s->f = ks_init(fd); \
return s; \
} \
static inline void kseq_rewind(kseq_t *ks) \
{ \
ks->last_char = 0; \
ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
} \
static inline void kseq_destroy(kseq_t *ks) \
{ \
if (!ks) return; \
free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
ks_destroy(ks->f); \
free(ks); \
}
/* Return value:
>=0 length of the sequence (normal)
-1 end-of-file
-2 truncated quality string
*/
#define __KSEQ_READ \
static int kseq_read(kseq_t *seq) \
{ \
int c; \
kstream_t *ks = seq->f; \
if (seq->last_char == 0) { /* then jump to the next header line */ \
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
if (c == -1) return -1; /* end of file */ \
seq->last_char = c; \
} /* the first header char has been read */ \
seq->comment.l = seq->seq.l = seq->qual.l = 0; \
if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
if (isgraph(c)) { /* printable non-space character */ \
if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
seq->seq.m = seq->seq.l + 2; \
kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
} \
seq->seq.s[seq->seq.l++] = (char)c; \
} \
} \
if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
if (c != '+') return seq->seq.l; /* FASTA */ \
if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
seq->qual.m = seq->seq.m; \
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
} \
while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
if (c == -1) return -2; /* we should not stop here */ \
while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
seq->last_char = 0; /* we have not come to the next header line */ \
if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
return seq->seq.l; \
}
#define __KSEQ_TYPE(type_t) \
typedef struct { \
kstring_t name, comment, seq, qual; \
int last_char; \
kstream_t *f; \
} kseq_t;
#define KSEQ_INIT(type_t, __read) \
KSTREAM_INIT(type_t, __read, 4096) \
__KSEQ_TYPE(type_t) \
__KSEQ_BASIC(type_t) \
__KSEQ_READ
#endif

BIN
pkg/obiformats/kseq/kseq_test Executable file

Binary file not shown.

View File

@@ -0,0 +1,27 @@
#include <zlib.h>
#include <stdio.h>
#include "kseq.h"
KSEQ_INIT(gzFile, gzread)
int main(int argc, char *argv[])
{
gzFile fp;
kseq_t *seq;
int l;
if (argc == 1) {
fprintf(stderr, "Usage: %s <in.seq>\n", argv[0]);
return 1;
}
fp = gzopen(argv[1], "r");
seq = kseq_init(fp);
while ((l = kseq_read(seq)) >= 0) {
printf("name: %s\n", seq->name.s);
if (seq->comment.l) printf("comment: %s\n", seq->comment.s);
printf("seq: %s\n", seq->seq.s);
if (seq->qual.l) printf("qual: %s\n", seq->qual.s);
}
printf("return value: %d\n", l);
kseq_destroy(seq);
gzclose(fp);
return 0;
}

View File

@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleIdentifier</key>
<string>com.apple.xcode.dsym.kseq_test</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundlePackageType</key>
<string>dSYM</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
</dict>
</plist>

View File

@@ -0,0 +1,291 @@
>HWI-D00393:103:C6KCUANXX:2:2309:18209:70743_CONS_SUB_SUB reverse_score=72.0; count=2; direction=forward; experiment=australie; seq_a_mismatch=0; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; status=full; seq_a_deletion=0; seq_length=98; start=aaaac; merged_sample={'AN5-30_b': 1, 'AML-33_b': 1}; seq_a_insertion=0; mode=alignment; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
aaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagagtt
ggcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2209:18639:37342_CONS_SUB_SUB_CMP ali_length=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-45_b': 1, 'AN2-30_a': 1}; forward_score=72.0; seq_b_mismatch=0; start=ataaa; experiment=australie; seq_a_single=33; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ataaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2115:3400:66119_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-03_a': 1}; forward_score=72.0; score=367.38470594; seq_a_mismatch=0; forward_tag=gtacgact; seq_b_mismatch=0; start=ccaaa; experiment=australie; mid_quality=62.0797101449; avg_quality=58.9050632911; seq_a_single=33; score_norm=3.99331202109; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gtgtacat; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_03A; seq_b_single=33;
ccaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2310:20070:75862_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=23.1; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-12_b': 1}; forward_score=72.0; score=360.189837214; seq_a_mismatch=0; forward_tag=catcagtc; seq_b_mismatch=0; start=ccaaa; experiment=australie; mid_quality=53.0507246377; avg_quality=49.6265822785; seq_a_single=33; score_norm=3.91510692624; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=28.9; position=11_11D; seq_b_single=33;
ccaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggaataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2114:5633:21903_CONS_SUB_SUB merged_sample={'ABR-15_a': 1, 'AN5-12_b': 1, 'AW2-35_b': 1}; forward_score=72.0; seq_b_insertion=0; seq_a_insertion=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=3; seq_length=100; start=ccaaa; experiment=australie; reverse_score=72.0; mode=alignment; status=full; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ccaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1112:5602:81492_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-12_b': 1}; forward_score=72.0; score=367.699098517; seq_a_mismatch=0; forward_tag=catcagtc; seq_b_mismatch=0; start=ccaaa; experiment=australie; mid_quality=62.2463768116; avg_quality=59.0506329114; seq_a_single=33; score_norm=3.9967293317; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_11D; seq_b_single=33;
ccaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtcttgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1101:4074:21538_CONS_SUB_SUB merged_sample={'AN4-36_b': 51, 'AW2-04_b': 11, 'AW2-04_a': 10, 'AN4-38_b': 158, 'AN5-43_b': 2, 'ADR-14_b': 2, 'AW2-34_b': 10, 'AW2-34_a': 1, 'AN3-38_b': 1, 'ABR-30_b': 1, 'AW2-36_b': 17, 'AN2-15_a': 11, 'AN4-34_b': 44, 'AN5-45_b': 5, 'AW2-27_b': 1, 'AW2-27_a': 3, 'ADR-25_a': 1, 'AN4-19_b': 30, 'ABR-44_a': 2, 'ABR-31_b': 1, 'AN2-20_b': 1, 'AN1-43_a': 2, 'AN2-13_b': 21, 'AN5-11_b': 2, 'AN4-22_b': 16, 'AN3-14_b': 1, 'AN3-14_a': 3, 'AW2-35_b': 101, 'AN2-19_b': 1, 'ABR-33_a': 1, 'ABR-29_a': 11, 'AW1-40_b': 9, 'ADR-13_a': 5, 'ADR-06_b': 1, 'AN4-26_a': 1, 'AN5-15_b': 20, 'AN2-01_a': 5, 'ADR-21_b': 1, 'AW2-23_b': 29, 'AW2-23_a': 3, 'ADR-13_b': 1, 'AW2-21_a': 1, 'AWO-27_b': 1, 'AW2-21_b': 20, 'AN2-10_a': 9, 'AN5-13_b': 19, 'AN4-20_b': 1, 'AN2-30_a': 1, 'AN3-22_b': 4, 'AN3-25_b': 3, 'AN3-25_a': 5, 'AN4-24_b': 26, 'ABR-15_a': 34, 'AW2-36_a': 5, 'AW2-12_b': 7, 'AW2-12_a': 5, 'AN5-48_a': 1, 'AW1-06_a': 6, 'AN5-21_a': 1, 'AN5-21_b': 20, 'AN2-09_b': 11, 'ABR-13_b': 2, 'ABR-13_a': 23, 'AN2-09_a': 7, 'AN3-20_b': 2, 'AN2-07_a': 4, 'AN2-07_b': 8, 'AN4-44_b': 26, 'AN4-21_b': 19, 'AW2-03_a': 28, 'AN4-42_b': 30, 'AW2-03_b': 47, 'ADR-26_b': 1, 'AN1-03_a': 6, 'AML-47_a': 1, 'AWO-30_b': 1, 'AN1-03_b': 3, 'ADR-30_b': 1, 'AN4-37_b': 40, 'AW2-07_b': 43, 'AN4-40_a': 1, 'AN3-44_b': 1, 'AN4-40_b': 36, 'AW2-01_b': 32, 'AW2-01_a': 13, 'AN5-10_b': 64, 'AW1-11_a': 29, 'AN2-43_a': 1, 'AN2-43_b': 2, 'AN1-17_a': 6, 'AN5-37_b': 1, 'blk-12_b': 1, 'ABR-35_b': 1, 'blk-02_b': 1, 'AN2-45_b': 8, 'AN3-40_a': 1, 'AW1-17_a': 5, 'AN2-45_a': 7, 'AW2-17_b': 4, 'ABR-25_a': 1, 'ABR-08_b': 1, 'AN5-14_b': 6, 'AWO-10_b': 1, 'AN2-38_a': 3, 'AN5-33_b': 15, 'AN2-12_a': 7, 'blk-04_b': 1, 'AN3-08_b': 4, 'AN3-01_b': 2, 'AN5-12_b': 106, 'ABR-23_a': 2, 'AN3-42_b': 19, 'AN4-23_b': 96, 'AN2-35_b': 1, 'ABR-02_a': 7, 'AN5-35_a': 1, 'ABR-17_a': 5, 'AML-42_a': 1, 'ADR-10_b': 1, 'ABR-39_a': 14, 'AN3-20_a': 3, 'pos-01_a': 1, 'AN5-03_b': 13, 'AN3-05_a': 1, 'AWO-42_b': 1, 'ABR-14_b': 1, 'AW2-13_b': 2, 'AW2-13_a': 5, 'blk-06_b': 2, 'AML-24_a': 1, 'ADR-12_b': 1, 'ABR-09_a': 3, 'ABR-12_b': 1, 'AN3-03_b': 3, 'AN3-03_a': 4, 'ABR-12_a': 34, 'ABR-27_b': 1, 'AN4-11_b': 15, 'AN5-22_b': 11, 'AN3-47_b': 2, 'AN3-47_a': 1, 'AN4-13_b': 35, 'AN2-41_b': 1, 'AN4-41_b': 14, 'AN4-22_a': 1, 'AN2-02_b': 5, 'ADR-37_b': 1, 'AN4-15_b': 2, 'AN5-20_a': 1, 'AN5-20_b': 18, 'AN3-45_b': 4, 'AW2-40_b': 20, 'AN5-09_b': 2, 'AN5-09_a': 1, 'AW2-40_a': 5, 'AN4-47_b': 11, 'AWO-19_b': 1, 'ABR-41_a': 12, 'AW1-32_b': 1, 'AW1-30_a': 15, 'AN4-05_a': 1, 'AN3-41_a': 1, 'AN3-46_b': 2, 'AN5-24_b': 35, 'ABR-43_a': 1, 'AW1-34_b': 1, 'AN5-30_b': 20, 'AN5-30_a': 1, 'AW2-28_a': 10, 'AW2-28_b': 5, 'AN5-25_b': 16, 'AN4-01_a': 1, 'AN2-04_b': 2, 'AW2-30_a': 22, 'AML-19_a': 1, 'AN2-34_a': 2, 'AN2-38_b': 3, 'pos-06_a': 1, 'AN5-19_b': 4, 'blk-07_b': 1, 'ABR-47_a': 1, 'AN2-02_a': 7, 'AN3-43_b': 12, 'AW1-20_a': 2, 'AW2-39_a': 3, 'AW2-43_a': 1, 'AWO-41_a': 10, 'AN5-38_a': 1, 'AW2-43_b': 3, 'AN4-17_b': 12, 'AW2-07_a': 48, 'AN3-04_b': 2, 'AN4-35_b': 86, 'AW1-26_a': 8, 'AWO-34_b': 1, 'ABR-14_a': 13, 'AN2-13_a': 4, 'AN4-39_a': 1, 'ABR-01_a': 78, 'AN5-44_b': 1, 'AN4-39_b': 85, 'AW1-30_b': 4, 'AN2-31_b': 7, 'AN3-37_a': 1, 'AN4-12_b': 142, 'AN3-35_b': 12, 'ABR-42_a': 9, 'ABR-03_b': 1, 'AN3-17_b': 19, 'AML-08_a': 2, 'AW1-29_a': 4, 'AN2-05_b': 3, 'AN4-46_b': 39, 'AN2-05_a': 5, 'AN4-14_b': 4, 'AN5-23_b': 24, 'AN4-25_a': 1, 'AML-12_a': 1, 'AN3-34_a': 1, 'AN5-28_b': 31, 'AN3-34_b': 2, 'AN5-27_b': 3, 'ABR-32_b': 1, 'AWO-15_b': 1, 'ABR-46_a': 8, 'AW1-18_a': 76, 'AN3-13_b': 1, 'AN4-18_b': 2, 'AN4-24_a': 1, 'AWO-06_b': 1, 'AN5-42_b': 14, 'ABR-28_a': 2, 'AN2-40_a': 2, 'AW1-40_a': 9, 'AW2-35_a': 70, 'ABR-40_b': 2, 'AN2-10_b': 3, 'AN3-27_b': 3, 'ABR-44_b': 1, 'ADR-38_b': 1, 'AN3-19_b': 2, 'ABR-40_a': 22, 'AN4-06_b': 17, 'ADR-05_b': 1, 'AN2-12_b': 1, 'ABR-08_a': 6, 'AN5-41_b': 10, 'AWO-37_b': 1, 'AN4-29_b': 40, 'AW2-42_b': 3, 'AW1-27_a': 6, 'ADR-34_b': 1, 'AN4-45_b': 115, 'AWO-13_b': 1, 'AN5-34_b': 73, 'ABR-22_b': 2, 'AN5-39_b': 2, 'AW2-42_a': 1, 'AN2-04_a': 4, 'AN4-43_b': 19, 'AW1-08_b': 1, 'AW1-08_a': 1, 'AW1-11_b': 1, 'AN4-48_a': 1, 'AML-04_a': 1, 'AML-41_a': 1, 'AW1-35_a': 2}; count=3190; seq_b_insertion=0; status=full; seq_a_deletion=0; seq_length=100; start=ctaaa; experiment=australie; seq_a_insertion=0; mode=alignment; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1102:20365:63690_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-13_a': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=tagtcgca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgctctcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=01_09E; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgctcgttga
>HWI-D00393:103:C6KCUANXX:2:1109:5898:71477_CONS_SUB_SUB_CMP merged_sample={'AW2-04_a': 1, 'AN4-38_b': 1, 'AW1-07_b': 1, 'AN4-34_b': 5, 'AN4-19_b': 1, 'AN2-20_b': 1, 'AN2-13_b': 3, 'AN5-11_b': 1, 'AN3-14_a': 1, 'ABR-29_a': 1, 'AW2-23_b': 3, 'AW2-23_a': 1, 'AN4-28_b': 1, 'AW2-21_b': 3, 'AN4-24_b': 1, 'AN5-21_b': 1, 'ABR-13_a': 2, 'AN3-20_b': 1, 'ADR-11_b': 1, 'AN4-44_b': 1, 'AN3-46_b': 2, 'AN4-42_b': 1, 'AW2-03_b': 2, 'AN1-03_a': 1, 'AN4-37_b': 2, 'AW2-07_b': 3, 'AN4-40_b': 3, 'AW2-01_a': 1, 'AW1-11_a': 5, 'AN2-43_b': 2, 'pos-09_a': 1, 'ABR-25_a': 1, 'AN2-38_b': 1, 'AN3-08_a': 1, 'AN3-01_b': 1, 'AN5-12_b': 2, 'AN3-42_b': 3, 'AN4-23_b': 1, 'ABR-02_a': 1, 'ABR-17_a': 1, 'ABR-39_a': 1, 'ABR-14_a': 3, 'AW2-13_b': 1, 'ABR-12_a': 4, 'AN4-13_b': 2, 'AN2-41_b': 1, 'AN4-15_b': 1, 'AN5-20_b': 1, 'AW2-40_b': 3, 'ABR-41_a': 1, 'AN3-35_a': 1, 'AN3-43_b': 1, 'AN5-24_b': 2, 'AN5-30_b': 1, 'AN5-34_b': 3, 'AN5-19_b': 1, 'AWO-41_a': 1, 'AN5-38_a': 1, 'AW2-07_a': 1, 'AN4-35_b': 5, 'ABR-01_a': 4, 'AN4-39_b': 2, 'AN2-31_b': 1, 'AW1-30_a': 1, 'AN4-12_b': 2, 'ABR-42_a': 2, 'AN3-17_a': 1, 'AN3-17_b': 1, 'AN2-05_b': 1, 'AN4-46_b': 1, 'AN4-14_b': 4, 'AN5-23_b': 4, 'ABR-32_a': 1, 'ABR-46_a': 2, 'AW1-18_a': 4, 'AN4-18_b': 1, 'AW1-35_a': 1, 'AW2-35_b': 6, 'AW1-40_b': 3, 'AW2-35_a': 2, 'AN2-10_a': 2, 'AN2-02_a': 1, 'AN4-29_b': 1, 'AN4-45_b': 3, 'AW2-44_b': 1, 'ABR-22_b': 1, 'AW1-28_b': 1, 'AW2-42_b': 1, 'AN4-43_b': 2, 'AW1-27_a': 1}; count=159; status=full; seq_a_deletion=0; seq_length=100; start=ctaaa; experiment=australie; seq_a_insertion=0; mode=alignment; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:1116:11515:15328_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-44_b': 1, 'AN4-39_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; head_quality=37.0; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttactaggttaaggtctcgtttgttaa
>HWI-D00393:103:C6KCUANXX:2:1206:17870:33853_CONS_SUB_SUB ali_length=92; seq_ab_match=92; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-30_a': 7}; forward_score=72.0; seq_a_mismatch=0; forward_tag=cgctctcg; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gactgatg; goodAli=Alignement; count=7; seq_length=100; status=full; mode=alignment; position=02_12A; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcttaactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2106:7652:12042_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-40_b': 1, 'AW2-35_b': 1, 'ABR-01_a': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcgtaactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2115:4325:51738_CONS_SUB_SUB_CMP status=full; merged_sample={'AW1-29_a': 1, 'ABR-41_a': 1, 'AN2-02_a': 1, 'AN4-39_b': 1, 'AN2-07_a': 1}; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; forward_score=72.0; tail_quality=37.0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=5; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgctaa
>HWI-D00393:103:C6KCUANXX:2:2211:17027:90962_CONS_SUB_SUB status=full; merged_sample={'AML-46_a': 1, 'AN4-46_b': 7}; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; forward_score=72.0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=8; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcattcgttaa
>HWI-D00393:103:C6KCUANXX:2:1113:14380:43631_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-34_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=atgatcgc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08B; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagtatgtaaaggtctcgatcgttat
>HWI-D00393:103:C6KCUANXX:2:1116:10928:36920_CONS_SUB_SUB_CMP ali_length=92; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1, 'AN5-25_b': 1, 'AW2-01_a': 1}; forward_score=72.0; seq_a_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgtttgttaa
>HWI-D00393:103:C6KCUANXX:2:1204:11153:34132_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1, 'AN4-39_b': 1, 'AN2-31_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; status=full; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; mode=alignment; head_quality=37.0; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactattcagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2106:2429:2922_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1}; forward_score=72.0; score=371.700731814; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.7299270073; avg_quality=59.4522292994; seq_a_single=32; score_norm=3.99678206251; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=99; status=full; mode=alignment; head_quality=37.0; position=11_08F; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggtaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2201:17654:97503_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=34.6; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-34_b': 1}; forward_score=72.0; score=307.231253197; seq_a_mismatch=0; forward_tag=tcagtgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=53.8115942029; avg_quality=51.4113924051; seq_a_single=33; score_norm=3.33947014344; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tcagtgtc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.1; position=12_02B; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgtgaa
>HWI-D00393:103:C6KCUANXX:2:2206:15790:49247_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-24_b': 1, 'AWO-41_a': 1, 'AW2-35_a': 1, 'AN4-35_b': 1, 'AW2-07_a': 1, 'AN3-04_b': 1, 'AN2-13_a': 1, 'AN4-22_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=8; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttagggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2301:12930:40102_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-15_a': 1, 'AN4-39_b': 1}; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttaacaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2315:17692:90977_CONS_SUB_SUB_CMP ali_length=93; seq_a_deletion=0; reverse_score=72.0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1, 'ABR-41_a': 1, 'AN3-17_b': 1}; forward_score=72.0; seq_a_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=32; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; goodAli=Alignement; count=3; seq_length=99; mode=alignment; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactattagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1207:9047:41800_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-13_a': 2, 'AN5-19_a': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgctcgttaa
>HWI-D00393:103:C6KCUANXX:2:2210:14421:16016_CONS_SUB_SUB_CMP reverse_score=66.0; count=7; direction=reverse; seq_b_insertion=0; experiment=australie; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; seq_length=99; start=ctaaa; merged_sample={'AW2-12_b': 1, 'AN4-41_b': 1, 'AW2-07_a': 1, 'AN5-21_b': 1, 'AN2-15_a': 1, 'AN4-23_b': 1, 'AN4-37_b': 1}; seq_a_insertion=0; mode=alignment; status=full; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgtta
>HWI-D00393:103:C6KCUANXX:2:2211:13367:14752_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=32.6; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-18_a': 1}; forward_score=72.0; score=367.610399661; seq_a_mismatch=0; forward_tag=gtcacgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.0434782609; avg_quality=57.6329113924; seq_a_single=33; score_norm=3.9957652137; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gtcgtaga; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.6; position=02_10C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaagatctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2307:6880:68904_CONS_SUB_SUB_CMP ali_length=81; seq_ab_match=56; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-37_b': 1}; forward_score=72.0; score=59.9384545346; seq_a_mismatch=1; forward_tag=atcagtca; seq_b_mismatch=22; start=ctaaa; experiment=australie; mid_quality=52.6666666667; avg_quality=50.6835443038; seq_a_single=33; score_norm=0.73998092018; reverse_score=72.0; direction=reverse; seq_b_insertion=2; seq_b_deletion=13; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08E; seq_b_single=44;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaagtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1312:11682:89893_CONS_SUB_SUB_CMP ali_length=92; seq_a_deletion=0; reverse_score=72.0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-19_b': 1, 'AN5-10_b': 1, 'AN5-15_b': 1, 'AW2-34_b': 1, 'AN5-45_b': 1, 'AN4-40_b': 1}; forward_score=72.0; seq_a_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; status=full; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=6; seq_length=100; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcattaa
>HWI-D00393:103:C6KCUANXX:2:2109:18223:83128_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-15_a': 1, 'ABR-13_a': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgctctcg; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; head_quality=37.0; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttggcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2204:19781:26262_CONS_SUB_SUB_CMP ali_length=95; seq_ab_match=95; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-01_a': 1}; forward_score=72.0; score=379.691935178; seq_a_mismatch=0; forward_tag=gatcgcga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=63.5703703704; avg_quality=60.1419354839; seq_a_single=30; score_norm=3.9967572124; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=155; reverse_tag=actagatc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=01_07H; seq_b_single=30;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaagatctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2215:4598:28592_CONS_SUB_SUB_CMP status=full; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-40_b': 1, 'AW2-35_b': 1, 'AN5-03_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; goodAli=Alignement; count=3; seq_length=100; mode=alignment; head_quality=37.0;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggcctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2304:13327:3228_CONS_SUB_SUB merged_sample={'AN4-36_b': 1, 'AN4-29_b': 1, 'AW2-01_b': 1, 'AW2-35_a': 1, 'ABR-46_a': 1, 'AW1-18_a': 1, 'AW1-40_b': 1, 'AN4-24_b': 1, 'AW2-07_b': 2}; forward_score=72.0; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=10; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; status=full; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtcccgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1208:20375:7918_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-37_b': 1, 'AW2-30_a': 1, 'AN4-35_b': 1}; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; head_quality=37.0; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcatagctatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1215:20045:28654_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-12_b': 1, 'AN5-42_b': 1, 'AN4-23_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcctaactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1302:19280:14816_CONS_SUB_SUB_CMP status=full; merged_sample={'ABR-01_a': 1, 'AWO-46_b': 1, 'AN4-22_b': 1}; seq_b_insertion=0; seq_a_mismatch=0; forward_score=72.0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=3; seq_length=100; start=ctaaa; experiment=australie; seq_a_insertion=0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgtacgttaa
>HWI-D00393:103:C6KCUANXX:2:2214:8779:29491_CONS_SUB_SUB_CMP status=full; merged_sample={'AN5-24_b': 1, 'ADR-13_a': 2, 'AW2-35_a': 1}; seq_b_insertion=0; seq_a_insertion=0; forward_score=72.0; reverse_match=tttgtctgcttaattgcg; seq_b_mismatch=0; seq_a_deletion=0; count=4; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; seq_b_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; head_quality=37.0; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggctaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2104:3449:54357_CONS_SUB_SUB_CMP status=full; merged_sample={'AN4-13_b': 1, 'AN4-42_b': 1, 'AN4-38_b': 1, 'AN5-21_b': 1, 'AN4-46_b': 1, 'AN4-12_b': 6}; seq_b_insertion=0; seq_a_mismatch=0; forward_score=72.0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=11; seq_length=100; start=ctaaa; experiment=australie; seq_a_insertion=0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgttag
>HWI-D00393:103:C6KCUANXX:2:1102:14455:71108_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-23_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=actagatc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=ctatgcta; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_05G; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgtcag
>HWI-D00393:103:C6KCUANXX:2:1110:14996:73808_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-43_b': 1, 'AW2-40_b': 1, 'AW2-07_b': 2}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=4; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagtaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1204:14078:1983_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-13_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=tagtcgca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=atatagcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_04E; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttactaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1212:8502:98185_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-37_b': 1, 'AW2-35_a': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcatgactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1305:7874:81774_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-07_a': 1, 'AN4-34_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtcttgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1103:10874:79989_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=gtgtacat; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agcacagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_07C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttcaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1207:7817:59077_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1}; forward_score=72.0; score=366.948065081; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=60.9420289855; avg_quality=57.9113924051; seq_a_single=33; score_norm=3.9885659248; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08F; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgatcgttat
>HWI-D00393:103:C6KCUANXX:2:1308:2120:40513_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN3-42_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=atgatcgc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actctgct; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_03B; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcattcgttat
>HWI-D00393:103:C6KCUANXX:2:2206:6280:74968_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=acacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-40_a': 1}; forward_score=66.0; score=367.706336455; seq_a_mismatch=0; forward_tag=gatcgcga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=catcagtc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=05_08H; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttgaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1215:13979:31122_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgttgcttaattgcga; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-34_b': 1}; forward_score=72.0; score=371.70314446; seq_a_mismatch=0; forward_tag=atgatcgc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.6861313869; avg_quality=59.4140127389; seq_a_single=32; score_norm=3.99680800494; reverse_score=66.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=99; status=full; mode=alignment; head_quality=37.0; position=11_08B; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggtaagggtctcgttcgtta
>HWI-D00393:103:C6KCUANXX:2:2208:15903:75212_CONS_SUB_SUB ali_length=92; seq_ab_match=91; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-01_b': 1}; forward_score=72.0; score=359.811368897; seq_a_mismatch=1; forward_tag=actagatc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.0072463768; avg_quality=58.8417721519; seq_a_single=33; score_norm=3.91099314019; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tcagtgtc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_02G; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagtaggttaaggtctcgtttgttaa
>HWI-D00393:103:C6KCUANXX:2:1203:16969:99847_CONS_SUB_SUB ali_length=92; seq_ab_match=91; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-40_b': 1}; forward_score=72.0; score=359.526419012; seq_a_mismatch=0; forward_tag=ctgcgtac; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=60.9782608696; avg_quality=57.9430379747; seq_a_single=33; score_norm=3.90789585883; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08H; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataacaatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2213:12449:50581_CONS_SUB_SUB_CMP status=full; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-45_b': 1, 'AN5-25_b': 1, 'AN4-22_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; goodAli=Alignement; count=3; seq_length=100; mode=alignment; head_quality=37.0;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggtataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2310:15600:75905_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-03_a': 1}; forward_score=72.0; score=367.701511163; seq_a_mismatch=0; forward_tag=gtacgact; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.2753623188; avg_quality=59.0759493671; seq_a_single=33; score_norm=3.99675555612; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gtgtacat; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_03A; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggtaagggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:1214:16466:5047_CONS_SUB_SUB status=full; merged_sample={'AN2-07_b': 1, 'AN1-17_a': 1, 'AN3-25_b': 1, 'AN4-23_b': 1, 'AW2-35_a': 1}; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; forward_score=72.0; seq_b_deletion=0; seq_a_deletion=0; count=5; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcgggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1306:6166:18686_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=24.8; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-39_b': 1}; forward_score=72.0; score=200.442729055; seq_a_mismatch=0; forward_tag=gatgatct; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=40.5289855072; avg_quality=38.6898734177; seq_a_single=33; score_norm=2.17872531582; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=27.2; position=11_08G; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggctagagtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2312:19984:25051_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-03_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=acacacac; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actctgct; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_03A; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttagagtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:1209:10424:5048_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-43_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=acgacgag; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_09C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggataaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:1306:10017:29340_CONS_SUB_SUB_CMP merged_sample={'AN4-36_b': 1, 'AN4-12_b': 1, 'AW2-07_a': 1}; forward_score=72.0; direction=reverse; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; seq_b_deletion=0; status=full; seq_a_deletion=0; count=3; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttagcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1116:1609:19254_CONS_SUB_SUB ali_length=93; seq_ab_match=93; tail_quality=34.4; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-45_b': 1}; forward_score=72.0; score=365.502727331; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=55.8905109489; avg_quality=52.9936305732; seq_a_single=32; score_norm=3.93013685302; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=31.9; position=11_09E; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgtttt
>HWI-D00393:103:C6KCUANXX:2:1116:18814:29744_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_a': 1}; forward_score=72.0; score=367.048675141; seq_a_mismatch=0; forward_tag=acgacgag; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.7101449275; avg_quality=58.582278481; seq_a_single=33; score_norm=3.9896595124; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actagatc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_07C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcagattaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1301:18017:5670_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=91; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-29_b': 1}; forward_score=72.0; score=307.503017136; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=48.0362318841; avg_quality=46.0632911392; seq_a_single=33; score_norm=3.3424240993; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agcacagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=27.9; position=11_07E; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagccggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2304:14057:23271_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ADR-47_b': 1, 'AN3-35_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataattatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1105:1482:86390_CONS_SUB_SUB_CMP ali_length=94; seq_ab_match=94; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-13_b': 1}; forward_score=72.0; score=375.690301881; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=63.0735294118; avg_quality=59.7307692308; seq_a_single=31; score_norm=3.99670533916; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=156; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_11E; seq_b_single=31;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgtaaa
>HWI-D00393:103:C6KCUANXX:2:1214:1498:16792_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=36.4; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-01_a': 1}; forward_score=72.0; score=367.691974388; seq_a_mismatch=0; forward_tag=gatgatct; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.2608695652; avg_quality=59.0253164557; seq_a_single=33; score_norm=3.99665189552; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=acagcaca; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_02G; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggctgaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2112:6296:18402_CONS_SUB_SUB_CMP reverse_score=72.0; count=2; direction=reverse; seq_b_insertion=0; experiment=australie; seq_b_deletion=0; status=full; seq_a_deletion=0; seq_length=100; start=ctaaa; merged_sample={'AN4-11_b': 1, 'AN4-17_b': 1}; seq_a_insertion=0; mode=alignment; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggaataactatttagcaggttaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2202:15115:92220_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=gtgtacat; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agcacagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_07C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggtccgggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2308:15218:31319_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=35.1; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-30_a': 1}; forward_score=72.0; score=367.379880648; seq_a_mismatch=0; forward_tag=gcgtcagc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.7463768116; avg_quality=58.4936708861; seq_a_single=33; score_norm=3.99325957226; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agactatg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=06_01F; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactagttagcaggttaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:1303:8962:10788_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=91; tail_quality=35.6; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-40_b': 1}; forward_score=72.0; score=328.577396734; seq_a_mismatch=0; forward_tag=ctgcgtac; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=51.2898550725; avg_quality=49.2721518987; seq_a_single=33; score_norm=3.57149344276; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.1; position=11_08H; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactttttagcaggttaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:1309:18593:53694_CONS_SUB_SUB_CMP ali_length=92; seq_a_deletion=0; reverse_score=72.0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-43_b': 1, 'ABR-41_a': 1, 'AW2-35_a': 1}; forward_score=72.0; seq_a_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; status=full; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; mode=alignment; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactacttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1208:4517:87099_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-03_a': 1}; forward_score=72.0; score=367.701511163; seq_a_mismatch=0; forward_tag=gtacgact; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.2753623188; avg_quality=59.0759493671; seq_a_single=33; score_norm=3.99675555612; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gtgtacat; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_03A; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagaaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1115:14275:7184_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-45_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_09E; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttagagtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1209:11513:82442_CONS_SUB_SUB_CMP ali_length=94; seq_ab_match=94; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-39_a': 1}; forward_score=72.0; score=375.685590398; seq_a_mismatch=0; forward_tag=gatgatct; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=63.0294117647; avg_quality=59.6923076923; seq_a_single=31; score_norm=3.996655217; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=156; reverse_tag=actagatc; goodAli=Alignement; count=1; seq_length=98; status=full; mode=alignment; head_quality=37.0; position=03_07G; seq_b_single=31;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1214:6399:72570_CONS_SUB_SUB ali_length=92; seq_ab_match=88; tail_quality=24.5; reverse_match=tttgtctgcttaattacg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-30_a': 1}; forward_score=72.0; score=255.892896628; seq_a_mismatch=3; forward_tag=tctactga; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=40.4057971014; avg_quality=38.5253164557; seq_a_single=33; score_norm=2.78144452857; reverse_score=66.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tactatac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=26.6; position=03_06F; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgtgcgttaa
>HWI-D00393:103:C6KCUANXX:2:1305:3473:19356_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-07_b': 1}; forward_score=72.0; score=367.635869076; seq_a_mismatch=0; forward_tag=tactatac; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.9782608696; avg_quality=58.8164556962; seq_a_single=33; score_norm=3.99604205518; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actctgct; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_03F; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcgtaactatttagcaggttaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2302:5940:68325_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-37_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08E; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggtcgaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2309:13769:62059_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=91; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_a': 1}; forward_score=72.0; score=359.470664833; seq_a_mismatch=0; forward_tag=acgacgag; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=61.7536231884; avg_quality=58.6202531646; seq_a_single=33; score_norm=3.90728983514; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actagatc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_07C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaagttctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2315:8947:22476_CONS_SUB_SUB ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN3-25_a': 1}; forward_score=72.0; score=368.159719936; seq_a_mismatch=0; forward_tag=acacacac; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=58.4744525547; avg_quality=55.7388535032; seq_a_single=32; score_norm=3.95870666598; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=agactatg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=05_01A; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactgtttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1301:19819:95569_CONS_SUB_SUB_CMP ali_length=94; seq_ab_match=94; tail_quality=37.0; reverse_match=tttgtctttaattccgat; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-34_b': 1}; forward_score=72.0; score=375.699952465; seq_a_mismatch=0; forward_tag=tcagtgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=63.1911764706; avg_quality=59.8333333333; seq_a_single=31; score_norm=3.99680800494; reverse_score=60.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=156; reverse_tag=tcagtgtc; goodAli=Alignement; count=1; seq_length=98; status=full; mode=alignment; head_quality=37.0; position=12_02B; seq_b_single=31;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgtt
>HWI-D00393:103:C6KCUANXX:2:1313:4613:34271_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=32.3; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-11_a': 1}; forward_score=72.0; score=313.551003229; seq_a_mismatch=0; forward_tag=gtcacgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=56.6594202899; avg_quality=53.1518987342; seq_a_single=33; score_norm=3.40816307858; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgctctcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=25.6; position=02_09C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggtcaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2113:10988:9799_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=30.2; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-13_b': 1}; forward_score=72.0; score=361.020650934; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=55.6086956522; avg_quality=52.4936708861; seq_a_single=33; score_norm=3.92413751015; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=31.8; position=11_11E; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaatgtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2113:5686:82397_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=36.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'blk-12_b': 1}; forward_score=72.0; score=371.343139227; seq_a_mismatch=0; forward_tag=ctatgcta; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.1532846715; avg_quality=58.7643312102; seq_a_single=32; score_norm=3.99293698094; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=ctgcgtac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.1; position=08_12E; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggatagggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2311:15976:57660_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=91; tail_quality=30.1; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-39_b': 1}; forward_score=72.0; score=346.55597612; seq_a_mismatch=0; forward_tag=gatgatct; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=56.8550724638; avg_quality=53.8037974684; seq_a_single=33; score_norm=3.76691278391; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.4; position=11_08G; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggtgaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2205:20527:78743_CONS_SUB_SUB ali_length=95; seq_ab_match=93; tail_quality=29.1; reverse_match=tttgtctgcttaatggcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-12_b': 1}; forward_score=72.0; score=351.353534518; seq_a_mismatch=1; forward_tag=catcagtc; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=50.2074074074; avg_quality=47.8709677419; seq_a_single=30; score_norm=3.69845825808; reverse_score=66.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=155; reverse_tag=ctatgcta; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.1; position=11_05D; seq_b_single=30;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtcgcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1104:5688:6100_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-46_b': 1}; forward_score=72.0; score=367.370471331; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.0362318841; avg_quality=58.8670886076; seq_a_single=33; score_norm=3.99315729707; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_09F; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtcccattcgttaa
>HWI-D00393:103:C6KCUANXX:2:1111:18652:56309_CONS_SUB_SUB ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN2-12_a': 1}; forward_score=72.0; score=371.700731814; seq_a_mismatch=0; forward_tag=atatagcg; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.7299270073; avg_quality=59.4522292994; seq_a_single=32; score_norm=3.99678206251; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=tatgtcag; goodAli=Alignement; count=1; seq_length=99; status=full; mode=alignment; head_quality=37.0; position=04_04D; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttgttaa
>HWI-D00393:103:C6KCUANXX:2:1211:20612:10566_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08F; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataaccatttagctggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2113:19027:63568_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1}; forward_score=72.0; score=371.700731814; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.7299270073; avg_quality=59.4267515924; seq_a_single=32; score_norm=3.99678206251; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=36.6; position=11_08F; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagtaggctagagtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2201:15565:85738_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=32.1; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-23_b': 1}; forward_score=72.0; score=365.670520501; seq_a_mismatch=0; forward_tag=actagatc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=59.5289855072; avg_quality=56.3670886076; seq_a_single=33; score_norm=3.97467957066; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=ctatgcta; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_05G; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcatatctatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2215:13270:16769_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-01_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=actagatc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tcagtgtc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_02G; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgtccgttaa
>HWI-D00393:103:C6KCUANXX:2:1102:18567:11128_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN2-10_b': 1}; forward_score=72.0; score=370.995083415; seq_a_mismatch=0; forward_tag=gtcgtaga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.8248175182; avg_quality=58.5732484076; seq_a_single=32; score_norm=3.98919444533; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=atatagcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.6; position=10_04B; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttgacataactatttagcaggttaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:1106:7644:40758_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=90; tail_quality=34.2; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-35_b': 1}; forward_score=72.0; score=296.115079326; seq_a_mismatch=2; forward_tag=acgacgag; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=54.1449275362; avg_quality=51.6075949367; seq_a_single=33; score_norm=3.21864216659; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=34.0; position=11_08C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgttac
>HWI-D00393:103:C6KCUANXX:2:1111:17524:16890_CONS_SUB_SUB ali_length=94; seq_ab_match=94; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN2-12_b': 1}; forward_score=72.0; score=375.680651297; seq_a_mismatch=0; forward_tag=gactgatg; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.9264705882; avg_quality=59.6025641026; seq_a_single=31; score_norm=3.99660267337; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=156; reverse_tag=atatagcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=10_04D; seq_b_single=31;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttgtcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2109:4640:21588_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-10_b': 1}; forward_score=72.0; score=367.701511163; seq_a_mismatch=0; forward_tag=atgatcgc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.2173913043; avg_quality=59.0253164557; seq_a_single=33; score_norm=3.99675555612; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_11B; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcggtcgttaa
>HWI-D00393:103:C6KCUANXX:2:2212:14824:56282_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-11_a': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=gtcacgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgctctcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=02_09C; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggtcaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2306:10336:10418_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_b': 1, 'AN4-23_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; head_quality=37.0; seq_b_single=33;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataaccatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2216:11206:55822_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=92; tail_quality=29.8; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-30_a': 1}; forward_score=72.0; score=357.173125277; seq_a_mismatch=0; forward_tag=cgctctcg; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=54.6642335766; avg_quality=51.4777070064; seq_a_single=32; score_norm=3.84057123954; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=gactgatg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=29.5; position=02_12A; seq_b_single=32;
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcttaactatttagcaggttaaggtctcgttcgttat
>HWI-D00393:103:C6KCUANXX:2:2303:4215:99753_CONS_SUB_SUB ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgtattgcc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-41_b': 1}; forward_score=66.0; score=371.70314446; seq_a_mismatch=0; forward_tag=gtacgact; seq_b_mismatch=0; start=gaaaa; experiment=australie; mid_quality=62.7591240876; avg_quality=59.4522292994; seq_a_single=32; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=99; status=full; mode=alignment; head_quality=36.6; position=11_09A; seq_b_single=32;
gaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagagt
tggcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2209:18822:35652_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-29_b': 1}; forward_score=72.0; score=366.357546614; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=gtaaa; experiment=australie; mid_quality=60.7898550725; avg_quality=57.7784810127; seq_a_single=33; score_norm=3.98214724581; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agcacagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_07E; seq_b_single=33;
gtaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttggcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:2212:12163:84900_CONS_SUB_SUB merged_sample={'AN4-36_b': 1, 'AN4-39_b': 1, 'AN4-12_b': 1, 'AW2-03_b': 1, 'ABR-42_a': 1, 'AN4-46_b': 1, 'AN4-23_b': 1, 'AN4-37_b': 1, 'AN5-23_b': 1, 'AN5-41_b': 1, 'AW1-18_a': 1, 'AN4-35_b': 1, 'AW2-35_b': 1, 'AW2-35_a': 1}; reverse_score=72.0; seq_b_insertion=0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=14; seq_length=99; start=taaaa; experiment=australie; seq_a_insertion=0; mode=alignment; status=full; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
taaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagagt
tggcataactatttagcaggttaaggtctcgttcgttaa
>HWI-D00393:103:C6KCUANXX:2:1210:8971:79507_CONS_SUB_SUB_CMP merged_sample={'AN5-41_b': 1, 'AW2-01_a': 3, 'AN5-25_b': 2, 'AW2-07_b': 4, 'AN5-23_b': 3, 'AW2-01_b': 1}; ali_length=92; seq_b_insertion=0; seq_b_deletion=0; status=full; seq_a_deletion=0; count=14; seq_length=100; start=ttaaa; experiment=australie; seq_a_insertion=0; mode=alignment; seq_length_ori=158; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; seq_a_single=33; seq_b_single=33; goodAli=Alignement;
ttaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
ttagcataactattcagtaggttaaggtctcgttcgttaa

View File

@@ -0,0 +1,12 @@
>1 {"taxid" : 1234, "specie_name" : "Lupus lupus"}
acgtacgtacgtagc
>2 {"taxid" : 3243, "specie_name" : "Gallus gallus"} test
acgatcgatc
@3 {"taxid" : 3243, "specie_name" : "Gallus gallus"} test2
cgctagcatagc
cgatatgactta
+
78wo82usd980
d88fau
238ud8

View File

@@ -0,0 +1,141 @@
package ncbitaxdump
import (
"bufio"
"encoding/csv"
"errors"
"fmt"
"io"
"log"
"os"
"path"
"strconv"
"strings"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obitax"
)
func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
file := csv.NewReader(reader)
file.Comma = '|'
file.Comment = '#'
file.TrimLeadingSpace = true
file.ReuseRecord = true
for record, err := file.Read(); err == nil; record, err = file.Read() {
taxid, _ := strconv.Atoi(strings.TrimSpace(record[0]))
parent, _ := strconv.Atoi(strings.TrimSpace(record[1]))
rank := strings.TrimSpace(record[2])
taxonomy.AddNewTaxa(taxid, parent, rank, true, true)
}
taxonomy.ReindexParent()
}
func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int {
// file := csv.NewReader(reader)
// file.Comma = '|'
// file.Comment = '#'
// file.TrimLeadingSpace = true
// file.ReuseRecord = true
// file.LazyQuotes = true
file := bufio.NewReader(reader)
n := 0
for line, prefix, err := file.ReadLine(); err == nil; line, prefix, err = file.ReadLine() {
if prefix {
return -1
}
record := strings.Split(string(line), "|")
taxid, _ := strconv.Atoi(strings.TrimSpace(record[0]))
name := strings.TrimSpace(record[1])
classname := strings.TrimSpace(record[3])
if !onlysn || classname == "scientific name" {
n++
taxonomy.AddNewName(taxid, &name, &classname)
}
}
return n
}
func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
file := csv.NewReader(reader)
file.Comma = '|'
file.Comment = '#'
file.TrimLeadingSpace = true
file.ReuseRecord = true
n := 0
for record, err := file.Read(); err == nil; record, err = file.Read() {
oldtaxid, _ := strconv.Atoi(strings.TrimSpace(record[0]))
newtaxid, _ := strconv.Atoi(strings.TrimSpace(record[1]))
n++
taxonomy.AddNewAlias(newtaxid, oldtaxid)
}
return n
}
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
taxonomy := obitax.NewTaxonomy()
//
// Load the Taxonomy nodes
//
log.Printf("Loading Taxonomy nodes\n")
nodefile, err := os.Open(path.Join(directory, "nodes.dmp"))
if err != nil {
return nil, errors.New(fmt.Sprintf("Cannot open nodes file from '%s'",
directory))
}
defer nodefile.Close()
buffered := bufio.NewReader(nodefile)
loadNodeTable(buffered, taxonomy)
log.Printf("%d Taxonomy nodes read\n", taxonomy.Length())
//
// Load the Taxonomy nodes
//
log.Printf("Loading Taxon names\n")
namefile, nerr := os.Open(path.Join(directory, "names.dmp"))
if nerr != nil {
return nil, errors.New(fmt.Sprintf("Cannot open names file from '%s'",
directory))
}
defer namefile.Close()
n := loadNameTable(namefile, taxonomy, onlysn)
log.Printf("%d taxon names read\n", n)
//
// Load the merged taxa
//
log.Printf("Loading Merged taxa\n")
aliasfile, aerr := os.Open(path.Join(directory, "merged.dmp"))
if aerr != nil {
return nil, errors.New(fmt.Sprintf("Cannot open merged file from '%s'",
directory))
}
defer aliasfile.Close()
buffered = bufio.NewReader(aliasfile)
n = loadMergedTable(buffered, taxonomy)
log.Printf("%d merged taxa read\n", n)
return taxonomy, nil
}

158
pkg/obiformats/options.go Normal file
View File

@@ -0,0 +1,158 @@
package obiformats
import "git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
type __options__ struct {
fastseq_header_parser obiseq.SeqAnnotator
fastseq_header_writer func(obiseq.BioSequence) string
with_progress_bar bool
buffer_size int
batch_size int
quality_shift int
parallel_workers int
}
type Options struct {
pointer *__options__
}
type WithOption func(Options)
func MakeOptions(setters []WithOption) Options {
o := __options__{
fastseq_header_parser: ParseGuessedFastSeqHeader,
fastseq_header_writer: FormatFastSeqJsonHeader,
with_progress_bar: false,
buffer_size: 2,
quality_shift: 33,
parallel_workers: 4,
batch_size: 5000,
}
opt := Options{&o}
for _, set := range setters {
set(opt)
}
return opt
}
func (opt Options) QualityShift() int {
return opt.pointer.quality_shift
}
func (opt Options) BufferSize() int {
return opt.pointer.buffer_size
}
func (opt Options) BatchSize() int {
return opt.pointer.batch_size
}
func (opt Options) ParallelWorkers() int {
return opt.pointer.parallel_workers
}
func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator {
return opt.pointer.fastseq_header_parser
}
func (opt Options) FormatFastSeqHeader() func(obiseq.BioSequence) string {
return opt.pointer.fastseq_header_writer
}
func (opt Options) ProgressBar() bool {
return opt.pointer.with_progress_bar
}
func OptionsBufferSize(size int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.buffer_size = size
})
return f
}
// Allows to specify the ascii code corresponding to
// a quality of 0 in fastq encoded quality scores.
func OptionsQualityShift(shift int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.quality_shift = shift
})
return f
}
// Allows to specify a quality shift of 33, corresponding
// to a FastQ file qualities encoded following Sanger
// convention. This corresponds to Illumina produced FastQ
// files.
func OptionsQualitySanger() WithOption {
return OptionsQualityShift(33)
}
// Allows to specify a quality shift of 64, corresponding
// to a FastQ file qualities encoded following the Solexa
// convention.
func OptionsQualitySolexa() WithOption {
return OptionsQualityShift(64)
}
func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.fastseq_header_parser = parser
})
return f
}
func OptionFastSeqDoNotParseHeader() WithOption {
return OptionsFastSeqHeaderParser(nil)
}
func OptionsFastSeqDefaultHeaderParser() WithOption {
return OptionsFastSeqHeaderParser(ParseGuessedFastSeqHeader)
}
// OptionsFastSeqHeaderFormat allows foor specifying the format
// used to write FASTA and FASTQ sequence.
func OptionsFastSeqHeaderFormat(format func(obiseq.BioSequence) string) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.fastseq_header_writer = format
})
return f
}
func OptionsParallelWorkers(nworkers int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.parallel_workers = nworkers
})
return f
}
func OptionsBatchSize(size int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.batch_size = size
})
return f
}
func OptionsWithProgressBar() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.with_progress_bar = true
})
return f
}
func OptionsWithoutProgressBar() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.with_progress_bar = false
})
return f
}

View File

@@ -0,0 +1,93 @@
package obiformats
import (
"bufio"
"compress/gzip"
"io"
"log"
"os"
"strings"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
)
func GuessSeqFileType(firstline string) string {
switch {
case strings.HasPrefix(firstline, "#@ecopcr-v2"):
return "ecopcr"
case strings.HasPrefix(firstline, "#"):
return "ecopcr"
case strings.HasPrefix(firstline, ">"):
return "fasta"
case strings.HasPrefix(firstline, "@"):
return "fastq"
case strings.HasPrefix(firstline, "ID "):
return "embl"
case strings.HasPrefix(firstline, "LOCUS "):
return "genebank"
default:
return "unknown"
}
}
func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
var file *os.File
var reader io.Reader
var greader io.Reader
var err error
file, err = os.Open(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return obiseq.NilIBioSequenceBatch, err
}
reader = file
// Test if the flux is compressed by gzip
greader, err = gzip.NewReader(reader)
if err != nil {
file.Seek(0, 0)
} else {
log.Printf("File %s is gz compressed ", filename)
reader = greader
}
breader := bufio.NewReader(reader)
tag, _ := breader.Peek(30)
filetype := GuessSeqFileType(string(tag))
log.Printf("File guessed format : %s (tag: %s)",
filetype, (strings.Split(string(tag), "\n"))[0])
reader = breader
switch filetype {
case "fastq", "fasta":
file.Close()
is, _ := ReadFastSeqBatchFromFile(filename, options...)
return is, nil
case "ecopcr":
return ReadEcoPCRBatch(reader, options...), nil
case "embl":
return ReadEMBLBatch(reader, options...), nil
default:
log.Fatalf("File %s has guessed format %s which is not yet implemented",
filename, filetype)
}
return obiseq.NilIBioSequenceBatch, nil
}
func ReadSequencesFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
ib, err := ReadSequencesBatchFromFile(filename, options...)
return ib.SortBatches().IBioSequence(), err
}

View File

@@ -0,0 +1,79 @@
package obiformats
import (
"fmt"
"io"
"log"
"os"
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
)
func WriteSequences(iterator obiseq.IBioSequence,
file io.Writer,
options ...WithOption) error {
opts := MakeOptions(options)
header_format := opts.FormatFastSeqHeader()
quality := opts.QualityShift()
ok := iterator.Next()
if ok {
seq := iterator.Get()
if seq.HasQualities() {
fmt.Fprintln(file, FormatFastq(seq, quality, header_format))
WriteFastq(iterator, file, options...)
} else {
fmt.Fprintln(file, FormatFasta(seq, header_format))
WriteFasta(iterator, file, options...)
}
}
return nil
}
func WriteSequencesToFile(iterator obiseq.IBioSequence,
filename string,
options ...WithOption) error {
file, err := os.Create(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return err
}
return WriteSequences(iterator, file, options...)
}
func WriteSequencesToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
return WriteSequences(iterator, os.Stdout, options...)
}
// func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
// file io.Writer,
// options ...WithOption) error {
// opts := MakeOptions(options)
// header_format := opts.FormatFastSeqHeader()
// quality := opts.QualityShift()
// ok := iterator.Next()
// if ok {
// batch := iterator.Get()
// if batch.Slice()[0].HasQualities() {
// file.Write()
// fmt.Fprintln(file, FormatFastq(seq, quality, header_format))
// WriteFastq(iterator, file, options...)
// } else {
// fmt.Fprintln(file, FormatFasta(seq, header_format))
// WriteFasta(iterator, file, options...)
// }
// }
// return nil
// }