mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
First commit
This commit is contained in:
238
pkg/obiformats/ecopcr_read.go
Normal file
238
pkg/obiformats/ecopcr_read.go
Normal file
@@ -0,0 +1,238 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
type __ecopcr_file__ struct {
|
||||
file io.Reader
|
||||
csv *csv.Reader
|
||||
names map[string]int
|
||||
version int
|
||||
mode string
|
||||
forward_primer string
|
||||
reverse_primer string
|
||||
}
|
||||
|
||||
func __readline__(stream io.Reader) string {
|
||||
line := make([]byte, 1024)
|
||||
char := make([]byte, 1)
|
||||
|
||||
i := 0
|
||||
for n, err := stream.Read(char); err == nil && n == 1 && char[0] != '\n'; n, err = stream.Read(char) {
|
||||
line[i] = char[0]
|
||||
i++
|
||||
}
|
||||
return string(line[0:i])
|
||||
}
|
||||
|
||||
func __read_ecopcr_bioseq__(file *__ecopcr_file__) (obiseq.BioSequence, error) {
|
||||
|
||||
record, err := file.csv.Read()
|
||||
|
||||
if err != nil {
|
||||
return obiseq.NilBioSequence, err
|
||||
}
|
||||
|
||||
name := strings.TrimSpace(record[0])
|
||||
|
||||
// Ensure that sequence name is unique accross a file.
|
||||
if val, ok := file.names[name]; ok {
|
||||
file.names[name]++
|
||||
name = fmt.Sprintf("%s_%d", name, val)
|
||||
} else {
|
||||
file.names[name] = 1
|
||||
}
|
||||
|
||||
var sequence []byte
|
||||
var comment string
|
||||
|
||||
if file.version == 2 {
|
||||
sequence = []byte(strings.TrimSpace(record[20]))
|
||||
comment = strings.TrimSpace(record[21])
|
||||
|
||||
} else {
|
||||
sequence = []byte(strings.TrimSpace(record[18]))
|
||||
comment = strings.TrimSpace(record[19])
|
||||
}
|
||||
|
||||
bseq := obiseq.MakeBioSequence(name, sequence, comment)
|
||||
annotation := bseq.Annotations()
|
||||
|
||||
annotation["ac"] = name
|
||||
annotation["seq_length"], _ = strconv.Atoi(strings.TrimSpace(record[1]))
|
||||
annotation["taxid"], _ = strconv.Atoi(strings.TrimSpace(record[2]))
|
||||
annotation["rank"] = strings.TrimSpace(record[3])
|
||||
annotation["species_taxid"], _ = strconv.Atoi(strings.TrimSpace(record[4]))
|
||||
annotation["species_name"] = strings.TrimSpace(record[5])
|
||||
annotation["genus_taxid"], _ = strconv.Atoi(strings.TrimSpace(record[6]))
|
||||
annotation["genus_name"] = strings.TrimSpace(record[7])
|
||||
annotation["family_taxid"], _ = strconv.Atoi(strings.TrimSpace(record[8]))
|
||||
annotation["family_name"] = strings.TrimSpace(record[9])
|
||||
k_m_taxid := file.mode + "_taxid"
|
||||
k_m_name := file.mode + "_name"
|
||||
annotation[k_m_taxid], _ = strconv.Atoi(strings.TrimSpace(record[10]))
|
||||
annotation[k_m_name] = strings.TrimSpace(record[11])
|
||||
annotation["strand"] = strings.TrimSpace(record[12])
|
||||
annotation["forward_primer"] = file.forward_primer
|
||||
annotation["forward_match"] = strings.TrimSpace(record[13])
|
||||
annotation["forward_mismatch"], _ = strconv.Atoi(strings.TrimSpace(record[14]))
|
||||
|
||||
delta := 0
|
||||
if file.version == 2 {
|
||||
value, err := strconv.ParseFloat(strings.TrimSpace(record[15]), 64)
|
||||
if err != nil {
|
||||
annotation["forward_tm"] = value
|
||||
} else {
|
||||
annotation["forward_tm"] = -1
|
||||
}
|
||||
delta++
|
||||
}
|
||||
|
||||
annotation["reverse_primer"] = file.reverse_primer
|
||||
annotation["reverse_match"] = strings.TrimSpace(record[15+delta])
|
||||
annotation["reverse_mismatch"], _ = strconv.Atoi(strings.TrimSpace(record[16+delta]))
|
||||
|
||||
if file.version == 2 {
|
||||
value, err := strconv.ParseFloat(strings.TrimSpace(record[17+delta]), 64)
|
||||
if err != nil {
|
||||
annotation["reverse_tm"] = value
|
||||
} else {
|
||||
annotation["reverse_tm"] = -1
|
||||
}
|
||||
delta++
|
||||
}
|
||||
|
||||
annotation["amplicon_length"], _ = strconv.Atoi(strings.TrimSpace(record[17+delta]))
|
||||
|
||||
return bseq, nil
|
||||
}
|
||||
|
||||
func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceBatch {
|
||||
tag := make([]byte, 11)
|
||||
n, _ := reader.Read(tag)
|
||||
|
||||
version := 1
|
||||
if n == 11 && string(tag) == "#@ecopcr-v2" {
|
||||
version = 2
|
||||
}
|
||||
|
||||
line := __readline__(reader)
|
||||
for !strings.HasPrefix(line, "# direct strand oligo1") {
|
||||
line = __readline__(reader)
|
||||
}
|
||||
forward_primer := (strings.Split(line, " "))[6]
|
||||
|
||||
line = __readline__(reader)
|
||||
for !strings.HasPrefix(line, "# reverse strand oligo2") {
|
||||
line = __readline__(reader)
|
||||
}
|
||||
reverse_primer := (strings.Split(line, " "))[5]
|
||||
|
||||
line = __readline__(reader)
|
||||
for !strings.HasPrefix(line, "# output in") {
|
||||
line = __readline__(reader)
|
||||
}
|
||||
mode := (strings.Split(line, " "))[3]
|
||||
|
||||
file := csv.NewReader(reader)
|
||||
file.Comma = '|'
|
||||
file.Comment = '#'
|
||||
file.TrimLeadingSpace = true
|
||||
file.ReuseRecord = true
|
||||
|
||||
log.Printf("EcoPCR file version : %d Mode : %s\n", version, mode)
|
||||
|
||||
ecopcr := __ecopcr_file__{
|
||||
file: reader,
|
||||
csv: file,
|
||||
names: make(map[string]int),
|
||||
version: version,
|
||||
mode: mode,
|
||||
forward_primer: forward_primer,
|
||||
reverse_primer: reverse_primer}
|
||||
|
||||
opt := MakeOptions(options)
|
||||
|
||||
new_iter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
new_iter.Add(1)
|
||||
|
||||
go func() {
|
||||
new_iter.Wait()
|
||||
close(new_iter.Channel())
|
||||
}()
|
||||
|
||||
go func() {
|
||||
|
||||
seq, err := __read_ecopcr_bioseq__(&ecopcr)
|
||||
slice := make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
|
||||
i := 0
|
||||
ii := 0
|
||||
for err == nil {
|
||||
slice = append(slice, seq)
|
||||
ii++
|
||||
if ii >= opt.BatchSize() {
|
||||
new_iter.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
|
||||
slice = make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
|
||||
|
||||
i++
|
||||
ii = 0
|
||||
}
|
||||
|
||||
seq, err = __read_ecopcr_bioseq__(&ecopcr)
|
||||
}
|
||||
|
||||
if len(slice) > 0 {
|
||||
new_iter.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
|
||||
}
|
||||
|
||||
new_iter.Done()
|
||||
|
||||
if err != nil && err != io.EOF {
|
||||
log.Panicf("%+v", err)
|
||||
}
|
||||
|
||||
}()
|
||||
|
||||
return new_iter
|
||||
}
|
||||
|
||||
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiseq.IBioSequence {
|
||||
ib := ReadEcoPCRBatch(reader, options...)
|
||||
return ib.SortBatches().IBioSequence()
|
||||
}
|
||||
|
||||
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
var err error
|
||||
|
||||
reader, err = os.Open(filename)
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
// Test if the flux is compressed by gzip
|
||||
greader, err = gzip.NewReader(reader)
|
||||
if err == nil {
|
||||
reader = greader
|
||||
}
|
||||
|
||||
return ReadEcoPCRBatch(reader, options...), nil
|
||||
}
|
||||
|
||||
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
|
||||
ib, err := ReadEcoPCRBatchFromFile(filename, options...)
|
||||
return ib.SortBatches().IBioSequence(), err
|
||||
|
||||
}
|
||||
246
pkg/obiformats/embl_read.go
Normal file
246
pkg/obiformats/embl_read.go
Normal file
@@ -0,0 +1,246 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
var __FILE_CHUNK_SIZE__ = 1 << 20
|
||||
|
||||
func __slice_grow__(slice []string) []string {
|
||||
return slice
|
||||
}
|
||||
|
||||
type __embl_chunk__ struct {
|
||||
entries [][]string
|
||||
order int
|
||||
}
|
||||
|
||||
type __file_chunk__ struct {
|
||||
raw io.Reader
|
||||
order int
|
||||
}
|
||||
|
||||
func __end_of_last_entry__(buff []byte) int {
|
||||
// 6 5 43 2 1
|
||||
// <CR>?<LF>//<CR>?<LF>
|
||||
var i int
|
||||
var state = 0
|
||||
var start = 0
|
||||
for i = len(buff) - 1; i >= 0 && state < 5; i-- {
|
||||
switch state {
|
||||
case 0: // outside of the pattern
|
||||
if buff[i] == '\n' {
|
||||
state = 1
|
||||
}
|
||||
case 1: // a \n have been matched
|
||||
start = i + 2
|
||||
switch buff[i] {
|
||||
case '\r':
|
||||
state = 2
|
||||
case '/':
|
||||
state = 3
|
||||
case '\n':
|
||||
state = 1
|
||||
default:
|
||||
state = 0
|
||||
}
|
||||
case 2: // a \r have been matched
|
||||
switch buff[i] {
|
||||
case '/':
|
||||
state = 3
|
||||
case '\n':
|
||||
state = 1
|
||||
default:
|
||||
state = 0
|
||||
}
|
||||
case 3: // the first / have been matched
|
||||
switch buff[i] {
|
||||
case '/':
|
||||
state = 4
|
||||
case '\n':
|
||||
state = 1
|
||||
default:
|
||||
state = 0
|
||||
}
|
||||
case 4: // the second / have been matched
|
||||
switch buff[i] {
|
||||
case '\n':
|
||||
state = 5
|
||||
default:
|
||||
state = 0
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if i > 0 {
|
||||
return start
|
||||
} else {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
func __parse_embl_file__(input <-chan __file_chunk__, out obiseq.IBioSequenceBatch) {
|
||||
|
||||
for chunks := range input {
|
||||
scanner := bufio.NewScanner(chunks.raw)
|
||||
order := chunks.order
|
||||
sequences := make(obiseq.BioSequenceSlice, 0, 100)
|
||||
id := ""
|
||||
scientific_name := ""
|
||||
def_bytes := new(bytes.Buffer)
|
||||
feat_bytes := new(bytes.Buffer)
|
||||
seq_bytes := new(bytes.Buffer)
|
||||
taxid := 1
|
||||
for scanner.Scan() {
|
||||
|
||||
line := scanner.Text()
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(line, "ID "):
|
||||
id = strings.SplitN(line[5:], ";", 2)[0]
|
||||
case strings.HasPrefix(line, "OS "):
|
||||
scientific_name = strings.TrimSpace(line[5:])
|
||||
case strings.HasPrefix(line, "DE "):
|
||||
if def_bytes.Len() > 0 {
|
||||
def_bytes.WriteByte(' ')
|
||||
}
|
||||
def_bytes.WriteString(strings.TrimSpace(line[5:]))
|
||||
case strings.HasPrefix(line, "FH "):
|
||||
feat_bytes.WriteString(line)
|
||||
case line == "FH":
|
||||
feat_bytes.WriteByte('\n')
|
||||
feat_bytes.WriteString(line)
|
||||
case strings.HasPrefix(line, "FT "):
|
||||
feat_bytes.WriteByte('\n')
|
||||
feat_bytes.WriteString(line)
|
||||
if strings.HasPrefix(line, `FT /db_xref="taxon:`) {
|
||||
taxid, _ = strconv.Atoi(strings.SplitN(line[37:], `"`, 2)[0])
|
||||
}
|
||||
case strings.HasPrefix(line, " "):
|
||||
parts := strings.SplitN(line[5:], " ", 7)
|
||||
for i := 0; i < 6; i++ {
|
||||
seq_bytes.WriteString(parts[i])
|
||||
}
|
||||
case line == "//":
|
||||
sequence := obiseq.MakeBioSequence(id,
|
||||
seq_bytes.Bytes(),
|
||||
def_bytes.String())
|
||||
|
||||
sequence.SetFeatures(feat_bytes.String())
|
||||
|
||||
annot := sequence.Annotations()
|
||||
annot["scientific_name"] = scientific_name
|
||||
annot["taxid"] = taxid
|
||||
// log.Println(FormatFasta(sequence, FormatFastSeqJsonHeader))
|
||||
sequences = append(sequences, sequence)
|
||||
def_bytes = new(bytes.Buffer)
|
||||
feat_bytes = new(bytes.Buffer)
|
||||
seq_bytes = new(bytes.Buffer)
|
||||
}
|
||||
}
|
||||
out.Channel() <- obiseq.MakeBioSequenceBatch(order, sequences...)
|
||||
|
||||
}
|
||||
|
||||
out.Done()
|
||||
|
||||
}
|
||||
|
||||
func __read_flat_file_chunk__(reader io.Reader, readers chan __file_chunk__) {
|
||||
var err error
|
||||
var buff []byte
|
||||
|
||||
size := 0
|
||||
l := 0
|
||||
i := 0
|
||||
|
||||
buff = make([]byte, 1<<20)
|
||||
for err == nil {
|
||||
for ; err == nil && l < len(buff); l += size {
|
||||
size, err = reader.Read(buff[l:])
|
||||
}
|
||||
buff = buff[:l]
|
||||
end := __end_of_last_entry__(buff)
|
||||
remains := buff[end:]
|
||||
buff = buff[:end]
|
||||
io := bytes.NewBuffer(buff)
|
||||
readers <- __file_chunk__{io, i}
|
||||
i++
|
||||
buff = make([]byte, __FILE_CHUNK_SIZE__)
|
||||
copy(buff, remains)
|
||||
l = len(remains)
|
||||
}
|
||||
|
||||
close(readers)
|
||||
}
|
||||
|
||||
// 6 5 43 2 1
|
||||
// <CR>?<LF>//<CR>?<LF>
|
||||
func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceBatch {
|
||||
opt := MakeOptions(options)
|
||||
entry_channel := make(chan __file_chunk__, opt.BufferSize())
|
||||
|
||||
new_iter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
|
||||
// new_iter.Add(opt.ParallelWorkers())
|
||||
new_iter.Add(2)
|
||||
|
||||
go func() {
|
||||
new_iter.Wait()
|
||||
for len(new_iter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(new_iter.Channel())
|
||||
}()
|
||||
|
||||
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
||||
for j := 0; j < 2; j++ {
|
||||
go __parse_embl_file__(entry_channel, new_iter)
|
||||
}
|
||||
|
||||
go __read_flat_file_chunk__(reader, entry_channel)
|
||||
|
||||
return new_iter
|
||||
}
|
||||
|
||||
func ReadEMBL(reader io.Reader, options ...WithOption) obiseq.IBioSequence {
|
||||
ib := ReadEMBLBatch(reader, options...)
|
||||
return ib.SortBatches().IBioSequence()
|
||||
}
|
||||
|
||||
func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
var err error
|
||||
|
||||
reader, err = os.Open(filename)
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
// Test if the flux is compressed by gzip
|
||||
greader, err = gzip.NewReader(reader)
|
||||
if err == nil {
|
||||
reader = greader
|
||||
}
|
||||
|
||||
return ReadEMBLBatch(reader, options...), nil
|
||||
}
|
||||
|
||||
func ReadEMBLFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
|
||||
ib, err := ReadEMBLBatchFromFile(filename, options...)
|
||||
return ib.SortBatches().IBioSequence(), err
|
||||
|
||||
}
|
||||
30
pkg/obiformats/fastseq_header.go
Normal file
30
pkg/obiformats/fastseq_header.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func ParseGuessedFastSeqHeader(sequence obiseq.BioSequence) {
|
||||
if strings.HasPrefix(sequence.Definition(), "{") {
|
||||
ParseFastSeqJsonHeader(sequence)
|
||||
} else {
|
||||
ParseFastSeqOBIHeader(sequence)
|
||||
}
|
||||
}
|
||||
|
||||
func IParseFastSeqHeaderBatch(iterator obiseq.IBioSequenceBatch, options ...WithOption) obiseq.IBioSequenceBatch {
|
||||
opt := MakeOptions(options)
|
||||
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
||||
opt.ParallelWorkers(),
|
||||
opt.BufferSize())
|
||||
}
|
||||
|
||||
func IParseFastSeqHeader(iterator obiseq.IBioSequence, options ...WithOption) obiseq.IBioSequence {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
return IParseFastSeqHeaderBatch(iterator.IBioSequenceBatch(opt.BatchSize(),
|
||||
opt.BufferSize()),
|
||||
options...).SortBatches().IBioSequence()
|
||||
}
|
||||
5
pkg/obiformats/fastseq_interface.go
Normal file
5
pkg/obiformats/fastseq_interface.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package obiformats
|
||||
|
||||
import "git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
|
||||
type FormatHeader func(sequence obiseq.BioSequence) string
|
||||
66
pkg/obiformats/fastseq_json_header.go
Normal file
66
pkg/obiformats/fastseq_json_header.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
"github.com/goccy/go-json"
|
||||
)
|
||||
|
||||
func _parse_json_header_(header string, annotations obiseq.Annotation) string {
|
||||
|
||||
start := -1
|
||||
stop := -1
|
||||
level := 0
|
||||
lh := len(header)
|
||||
|
||||
for i := 0; (i < lh) && (stop < 0); i++ {
|
||||
// fmt.Printf("[%d,%d-%d] : %d (%c) (%d,%c)\n", i, start, stop, header[i], header[i], '{', '{')
|
||||
if level == 0 && header[i] == '{' {
|
||||
start = i
|
||||
}
|
||||
|
||||
if header[i] == '{' {
|
||||
level++
|
||||
}
|
||||
|
||||
if header[i] == '}' {
|
||||
level--
|
||||
}
|
||||
|
||||
if start >= 0 && level == 0 {
|
||||
stop = i
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if start < 0 || stop < 0 {
|
||||
return header
|
||||
}
|
||||
|
||||
stop++
|
||||
|
||||
json.Unmarshal([]byte(header)[start:stop], annotations)
|
||||
return strings.TrimSpace(header[stop:])
|
||||
}
|
||||
|
||||
func ParseFastSeqJsonHeader(sequence obiseq.BioSequence) {
|
||||
sequence.SetDefinition(_parse_json_header_(sequence.Definition(),
|
||||
sequence.Annotations()))
|
||||
}
|
||||
|
||||
func FormatFastSeqJsonHeader(sequence obiseq.BioSequence) string {
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
if annotations != nil {
|
||||
text, err := json.Marshal(sequence.Annotations())
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return string(text)
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
288
pkg/obiformats/fastseq_obi_header.go
Normal file
288
pkg/obiformats/fastseq_obi_header.go
Normal file
@@ -0,0 +1,288 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
"github.com/goccy/go-json"
|
||||
)
|
||||
|
||||
var __obi_header_value_string_pattern__ = regexp.MustCompile(`^'\s*([^']*'|"[^"]*")\s*;`)
|
||||
var __obi_header_value_numeric_pattern__ = regexp.MustCompile(`^\s*([+-]?\.\d+|[+-]?\d+(\.\d*)?([eE][+-]?\d+)?)\s*;`)
|
||||
|
||||
func __match__dict__(text []byte) []int {
|
||||
|
||||
state := 0
|
||||
level := 0
|
||||
start := 0
|
||||
instring := byte(0)
|
||||
|
||||
for i, r := range text {
|
||||
if state == 2 {
|
||||
if r == ';' {
|
||||
// end of the pattern
|
||||
return []int{start, i + 1}
|
||||
}
|
||||
|
||||
if r != ' ' && r != '\t' {
|
||||
// Bad character at the end of the pattern
|
||||
return []int{}
|
||||
}
|
||||
}
|
||||
|
||||
if r == '{' && instring == 0 { // Beginning of dict
|
||||
level++
|
||||
if state == 0 {
|
||||
// Beginning of the main dict
|
||||
state++
|
||||
start = i
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if state == 0 && r != ' ' && r != '\t' {
|
||||
// It's not a dict
|
||||
return []int{}
|
||||
}
|
||||
|
||||
if state == 1 {
|
||||
if r == '"' || r == '\'' {
|
||||
if instring == 0 {
|
||||
// start of a string
|
||||
instring = r
|
||||
} else {
|
||||
if instring == r {
|
||||
// end of a string
|
||||
instring = 0
|
||||
}
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if r == '}' && instring == 0 {
|
||||
// end of a dict
|
||||
level--
|
||||
|
||||
if level == 0 {
|
||||
// end of the main dict
|
||||
state++
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return []int{}
|
||||
}
|
||||
|
||||
func __match__key__(text []byte) []int {
|
||||
|
||||
state := 0
|
||||
start := 0
|
||||
|
||||
for i, r := range text {
|
||||
|
||||
if state == 0 {
|
||||
if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') {
|
||||
// Beginning of the key
|
||||
// fmt.Printf("Beginning of the key (%c) %d\n", r, i)
|
||||
state++
|
||||
start = i
|
||||
continue
|
||||
}
|
||||
|
||||
if r != ' ' && r != '\t' {
|
||||
// It's not a key
|
||||
return []int{}
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if state > 0 && r == '=' {
|
||||
// End of thee pattern
|
||||
// fmt.Printf("End of the pattern (%c) %d\n", r, i)
|
||||
return []int{start, i + 1}
|
||||
}
|
||||
|
||||
if state == 1 {
|
||||
if r == ' ' || r == '\t' {
|
||||
// End of the key
|
||||
state++
|
||||
continue
|
||||
}
|
||||
|
||||
if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') ||
|
||||
(r >= '0' && r <= '9') ||
|
||||
r == '_' || r == '-' || r == '.' {
|
||||
// Continuing the key
|
||||
continue
|
||||
}
|
||||
|
||||
// Not allowed character in a key
|
||||
// fmt.Printf("Not allowed char (%c) %d\n", r, i)
|
||||
return []int{}
|
||||
}
|
||||
|
||||
if state == 2 && r != ' ' && r != '\t' {
|
||||
// fmt.Printf("Not allowed char 2 (%c) %d\n", r, i)
|
||||
// Not allowed character after a key
|
||||
return []int{}
|
||||
}
|
||||
}
|
||||
|
||||
return []int{} // Not a key
|
||||
}
|
||||
|
||||
func __match__general__(text []byte) []int {
|
||||
|
||||
for i, r := range text {
|
||||
if r == ';' {
|
||||
return []int{0, i + 1}
|
||||
}
|
||||
}
|
||||
|
||||
return []int{} // Not generic value
|
||||
}
|
||||
|
||||
var __false__ = []byte{'f', 'a', 'l', 's', 'e'}
|
||||
var __False__ = []byte{'F', 'a', 'l', 's', 'e'}
|
||||
var __FALSE__ = []byte{'F', 'A', 'L', 'S', 'E'}
|
||||
|
||||
var __true__ = []byte{'t', 'r', 'u', 'e'}
|
||||
var __True__ = []byte{'T', 'r', 'u', 'e'}
|
||||
var __TRUE__ = []byte{'T', 'R', 'U', 'E'}
|
||||
|
||||
func __is_true__(text []byte) bool {
|
||||
return (len(text) == 1 && (text[0] == 't' || text[0] == 'T')) ||
|
||||
bytes.Equal(text, __true__) ||
|
||||
bytes.Equal(text, __True__) ||
|
||||
bytes.Equal(text, __TRUE__)
|
||||
}
|
||||
|
||||
func __is_false__(text []byte) bool {
|
||||
return (len(text) == 1 && (text[0] == 'f' || text[0] == 'F')) ||
|
||||
bytes.Equal(text, __false__) ||
|
||||
bytes.Equal(text, __False__) ||
|
||||
bytes.Equal(text, __FALSE__)
|
||||
}
|
||||
|
||||
func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
||||
definition := []byte(sequence.Definition())
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
// all_matches := __obi_header_pattern__.FindAllSubmatchIndex(definition, -1)
|
||||
|
||||
d := definition
|
||||
|
||||
//for m := __obi_header_key_pattern__.FindIndex(definition); len(m) > 0; {
|
||||
//fmt.Println(string(definition[0:20]), __match__key__(definition))
|
||||
for m := __match__key__(definition); len(m) > 0; {
|
||||
var bvalue []byte
|
||||
var value interface{}
|
||||
start := m[0]
|
||||
stop := -1
|
||||
key := string(bytes.TrimSpace(d[start:(m[1] - 1)]))
|
||||
part := d[m[1]:]
|
||||
|
||||
// numeric value
|
||||
m = __obi_header_value_numeric_pattern__.FindIndex(part)
|
||||
if len(m) > 0 {
|
||||
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
|
||||
value, _ = strconv.ParseFloat(string(bvalue), 64)
|
||||
stop = m[1] + 1
|
||||
} else {
|
||||
// string value
|
||||
|
||||
m = __obi_header_value_string_pattern__.FindIndex(part)
|
||||
if len(m) > 0 {
|
||||
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
|
||||
value = string(bvalue[1:(len(bvalue) - 1)])
|
||||
stop = m[1] + 1
|
||||
} else {
|
||||
|
||||
// dict value
|
||||
m = __match__dict__(part)
|
||||
if len(m) > 0 {
|
||||
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
|
||||
j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`))
|
||||
var err error
|
||||
if strings.HasPrefix(key, "merged_") ||
|
||||
strings.HasSuffix(key, "_count") {
|
||||
dict := make(map[string]int)
|
||||
err = json.Unmarshal(j, &dict)
|
||||
value = dict
|
||||
} else {
|
||||
dict := make(map[string]interface{})
|
||||
err = json.Unmarshal(j, &dict)
|
||||
value = dict
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
value = string(bvalue)
|
||||
}
|
||||
stop = m[1] + 1
|
||||
} else {
|
||||
|
||||
// Generic value
|
||||
|
||||
// m = __obi_header_value_general_pattern__.FindIndex(part)
|
||||
m = __match__general__(part)
|
||||
if len(m) > 0 {
|
||||
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
|
||||
|
||||
if __is_false__(bvalue) {
|
||||
value = false
|
||||
} else {
|
||||
if __is_true__(bvalue) {
|
||||
value = true
|
||||
} else {
|
||||
value = string(bvalue)
|
||||
}
|
||||
}
|
||||
|
||||
stop = m[1] + 1
|
||||
} else {
|
||||
// no value
|
||||
break
|
||||
} // End of No value
|
||||
} // End of not dict
|
||||
} // End of not string
|
||||
} // End of not numeric
|
||||
|
||||
annotations[key] = value
|
||||
|
||||
d = part[stop:]
|
||||
//m = __obi_header_key_pattern__.FindIndex(d)
|
||||
m = __match__key__(d)
|
||||
}
|
||||
|
||||
sequence.SetDefinition(string(bytes.TrimSpace(d)))
|
||||
}
|
||||
|
||||
func FormatFastSeqOBIHeader(sequence obiseq.BioSequence) string {
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
if annotations != nil {
|
||||
var text strings.Builder
|
||||
|
||||
for key, value := range annotations {
|
||||
switch t := value.(type) {
|
||||
case string:
|
||||
text.WriteString(fmt.Sprintf("%s=%s; ", key, t))
|
||||
default:
|
||||
text.WriteString(fmt.Sprintf("%s=%v; ", key, value))
|
||||
}
|
||||
}
|
||||
|
||||
return text.String()
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
104
pkg/obiformats/fastseq_read.c
Normal file
104
pkg/obiformats/fastseq_read.c
Normal file
@@ -0,0 +1,104 @@
|
||||
#include "fastseq_read.h"
|
||||
|
||||
|
||||
static fast_kseq_t* _open_fast_sek(gzFile fp, int shift) {
|
||||
fast_kseq_t* iterator;
|
||||
|
||||
iterator = (fast_kseq_t*)malloc(sizeof(fast_kseq_t));
|
||||
|
||||
|
||||
if (iterator == NULL)
|
||||
return NULL;
|
||||
|
||||
iterator->filez = fp;
|
||||
iterator->finished = false;
|
||||
iterator->shift = shift;
|
||||
|
||||
if (fp != Z_NULL) {
|
||||
iterator->seq = kseq_init(fp);
|
||||
|
||||
if (iterator->seq == NULL) {
|
||||
free(iterator);
|
||||
iterator=NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
free(iterator);
|
||||
iterator=NULL;
|
||||
}
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief open a FastA or FastQ file gizzed or not
|
||||
*
|
||||
* @param filename a const char* indicating the path of the
|
||||
* fast* file
|
||||
* @return kseq_t* a pointer to a kseq_t structure or NULL on
|
||||
* failing
|
||||
*/
|
||||
fast_kseq_t* open_fast_sek_file(const char* filename, int shift) {
|
||||
gzFile fp;
|
||||
|
||||
fp = gzopen(filename, "r");
|
||||
return _open_fast_sek(fp, shift);
|
||||
}
|
||||
|
||||
fast_kseq_p open_fast_sek_fd(int fd, bool keep_open, int shift) {
|
||||
gzFile fp;
|
||||
|
||||
if (keep_open)
|
||||
fd = dup(fd);
|
||||
|
||||
fp = gzdopen(fd, "r");
|
||||
return _open_fast_sek(fp, shift);
|
||||
}
|
||||
|
||||
fast_kseq_p open_fast_sek_stdin(int shift) {
|
||||
return open_fast_sek_fd(fileno(stdin), true, shift);
|
||||
}
|
||||
|
||||
|
||||
int64_t next_fast_sek(fast_kseq_t* iterator) {
|
||||
int64_t l;
|
||||
|
||||
if (iterator == NULL || iterator->seq == NULL)
|
||||
return -3;
|
||||
|
||||
l = kseq_read(iterator->seq);
|
||||
iterator->finished = l==0;
|
||||
if (l>0) l = gzoffset(iterator->filez);
|
||||
return l;
|
||||
}
|
||||
|
||||
int rewind_fast_sek(fast_kseq_t* iterator) {
|
||||
if (iterator == NULL || iterator->seq == NULL)
|
||||
return -3;
|
||||
|
||||
kseq_rewind(iterator->seq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int close_fast_sek(fast_kseq_t* iterator) {
|
||||
gzFile fp;
|
||||
kseq_t *seq;
|
||||
int rep = -3;
|
||||
|
||||
if (iterator == NULL)
|
||||
return rep;
|
||||
|
||||
fp = iterator->filez;
|
||||
seq = iterator->seq;
|
||||
|
||||
free(iterator);
|
||||
|
||||
if (seq != NULL)
|
||||
kseq_destroy(iterator->seq);
|
||||
|
||||
if (fp != Z_NULL)
|
||||
rep = gzclose(fp);
|
||||
|
||||
return rep;
|
||||
}
|
||||
|
||||
153
pkg/obiformats/fastseq_read.go
Normal file
153
pkg/obiformats/fastseq_read.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package obiformats
|
||||
|
||||
// #cgo CFLAGS: -g -Wall
|
||||
// #cgo LDFLAGS: -lz
|
||||
// #include <stdlib.h>
|
||||
// #include "fastseq_read.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/cutils"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func __fastseq_reader__(seqfile C.fast_kseq_p,
|
||||
iterator obiseq.IBioSequenceBatch,
|
||||
batch_size int) {
|
||||
var comment string
|
||||
i := 0
|
||||
ii := 0
|
||||
|
||||
slice := make(obiseq.BioSequenceSlice, 0, batch_size)
|
||||
|
||||
for l := int64(C.next_fast_sek(seqfile)); l > 0; l = int64(C.next_fast_sek(seqfile)) {
|
||||
|
||||
s := seqfile.seq
|
||||
|
||||
sequence := C.GoBytes(unsafe.Pointer(s.seq.s),
|
||||
C.int(s.seq.l))
|
||||
|
||||
name := C.GoString(s.name.s)
|
||||
|
||||
if s.comment.l > C.ulong(0) {
|
||||
comment = C.GoString(s.comment.s)
|
||||
} else {
|
||||
comment = ""
|
||||
}
|
||||
|
||||
rep := obiseq.MakeBioSequence(name, sequence, comment)
|
||||
|
||||
if s.qual.l > C.ulong(0) {
|
||||
cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l))
|
||||
quality := make(obiseq.Quality, s.qual.l)
|
||||
l := int(s.qual.l)
|
||||
shift := uint8(seqfile.shift)
|
||||
for j := 0; j < l; j++ {
|
||||
quality[j] = uint8(cquality[j]) - shift
|
||||
}
|
||||
|
||||
rep.SetQualities(quality)
|
||||
}
|
||||
slice = append(slice, rep)
|
||||
ii++
|
||||
if ii >= batch_size {
|
||||
// log.Printf("\n==> Pushing sequence batch\n")
|
||||
// start := time.Now()
|
||||
|
||||
iterator.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
|
||||
// elapsed := time.Since(start)
|
||||
// log.Printf("\n==>sequences pushed after %s\n", elapsed)
|
||||
|
||||
slice = make(obiseq.BioSequenceSlice, 0, batch_size)
|
||||
i++
|
||||
ii = 0
|
||||
}
|
||||
}
|
||||
if len(slice) > 0 {
|
||||
iterator.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
|
||||
}
|
||||
iterator.Done()
|
||||
|
||||
}
|
||||
|
||||
func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
name := C.CString(filename)
|
||||
defer C.free(unsafe.Pointer(name))
|
||||
|
||||
pointer := C.open_fast_sek_file(name, C.int32_t(opt.QualityShift()))
|
||||
|
||||
var err error
|
||||
err = nil
|
||||
|
||||
if pointer == nil {
|
||||
err = errors.New(fmt.Sprintf("Cannot open file %s", filename))
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
size := int64(-1)
|
||||
fi, err := os.Stat(filename)
|
||||
if err == nil {
|
||||
size = fi.Size()
|
||||
log.Printf("File size of %s is %d bytes\n", filename, size)
|
||||
} else {
|
||||
size = -1
|
||||
}
|
||||
|
||||
new_iter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
new_iter.Add(1)
|
||||
|
||||
go func() {
|
||||
new_iter.Wait()
|
||||
for len(new_iter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(new_iter.Channel())
|
||||
|
||||
log.Println("End of the fastq file reading")
|
||||
}()
|
||||
|
||||
log.Println("Start of the fastq file reading")
|
||||
|
||||
go __fastseq_reader__(pointer, new_iter, opt.BatchSize())
|
||||
parser := opt.ParseFastSeqHeader()
|
||||
if parser != nil {
|
||||
return IParseFastSeqHeaderBatch(new_iter, options...), err
|
||||
}
|
||||
|
||||
return new_iter, err
|
||||
}
|
||||
|
||||
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
|
||||
ib, err := ReadFastSeqBatchFromFile(filename, options...)
|
||||
return ib.SortBatches().IBioSequence(), err
|
||||
}
|
||||
|
||||
func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
|
||||
opt := MakeOptions(options)
|
||||
new_iter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
|
||||
new_iter.Add(1)
|
||||
|
||||
go func() {
|
||||
new_iter.Wait()
|
||||
close(new_iter.Channel())
|
||||
}()
|
||||
|
||||
go __fastseq_reader__(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), new_iter, opt.BatchSize())
|
||||
|
||||
return new_iter
|
||||
}
|
||||
|
||||
func ReadFastSeqFromStdin(options ...WithOption) obiseq.IBioSequence {
|
||||
ib := ReadFastSeqBatchFromStdin(options...)
|
||||
return ib.SortBatches().IBioSequence()
|
||||
}
|
||||
41
pkg/obiformats/fastseq_read.h
Normal file
41
pkg/obiformats/fastseq_read.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef _READ_H
|
||||
#define _READ_H
|
||||
|
||||
#include <zlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "kseq/kseq.h"
|
||||
|
||||
KSEQ_INIT(gzFile, gzread)
|
||||
|
||||
typedef struct {
|
||||
kseq_t *seq;
|
||||
bool finished;
|
||||
int16_t shift;
|
||||
gzFile filez;
|
||||
} fast_kseq_t, *fast_kseq_p;
|
||||
|
||||
|
||||
fast_kseq_t* open_fast_sek_file(const char* filename, int shift);
|
||||
fast_kseq_t* open_fast_sek_fd(int fd, bool keep_open, int shift);
|
||||
fast_kseq_t* open_fast_sek_stdin(int shift);
|
||||
|
||||
/**
|
||||
* @brief read the next sequence on the fast* stream
|
||||
*
|
||||
* @param seq a kseq_t* created using function open_fast_sek
|
||||
* @return int if greater than 0 represents the length of the
|
||||
* sequence, otherwise indicates an error
|
||||
* - -1 : no more sequence in the stream
|
||||
* - -2 : too short quality sequence
|
||||
* - -3 : called with NULL pointer
|
||||
*/
|
||||
int64_t next_fast_sek(fast_kseq_t* iterator);
|
||||
|
||||
|
||||
int close_fast_sek(fast_kseq_t* iterator);
|
||||
int rewind_fast_sek(fast_kseq_t* iterator);
|
||||
|
||||
#endif
|
||||
164
pkg/obiformats/fastseq_write_fasta.go
Normal file
164
pkg/obiformats/fastseq_write_fasta.go
Normal file
@@ -0,0 +1,164 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func min(x, y int) int {
|
||||
if x < y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
func FormatFasta(seq obiseq.BioSequence, formater FormatHeader) string {
|
||||
var fragments strings.Builder
|
||||
|
||||
s := seq.Sequence()
|
||||
l := len(s)
|
||||
|
||||
fragments.Grow(l + int(l/60) + 10)
|
||||
|
||||
for i := 0; i < l; i += 60 {
|
||||
to := min(i+60, l)
|
||||
fmt.Fprintf(&fragments, "%s\n", string(s[i:to]))
|
||||
}
|
||||
|
||||
folded := fragments.String()
|
||||
folded = folded[:fragments.Len()-1]
|
||||
info := formater(seq)
|
||||
return fmt.Sprintf(">%s %s %s\n%s",
|
||||
seq.Id(), info,
|
||||
seq.Definition(),
|
||||
folded)
|
||||
}
|
||||
|
||||
func FormatFastaBatch(batch obiseq.BioSequenceBatch, formater FormatHeader) []byte {
|
||||
var bs bytes.Buffer
|
||||
for _, seq := range batch.Slice() {
|
||||
bs.WriteString(FormatFasta(seq, formater))
|
||||
bs.WriteString("\n")
|
||||
}
|
||||
return bs.Bytes()
|
||||
}
|
||||
|
||||
func WriteFasta(iterator obiseq.IBioSequence, file io.Writer, options ...WithOption) error {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
header_format := opt.FormatFastSeqHeader()
|
||||
|
||||
for iterator.Next() {
|
||||
seq := iterator.Get()
|
||||
fmt.Fprintln(file, FormatFasta(seq, header_format))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func WriteFastaToFile(iterator obiseq.IBioSequence,
|
||||
filename string,
|
||||
options ...WithOption) error {
|
||||
|
||||
file, err := os.Create(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return WriteFasta(iterator, file, options...)
|
||||
}
|
||||
|
||||
func WriteFastaToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
|
||||
return WriteFasta(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) error {
|
||||
buffsize := iterator.BufferSize()
|
||||
new_iter := obiseq.MakeIBioSequenceBatch(buffsize)
|
||||
|
||||
opt := MakeOptions(options)
|
||||
nwriters := 4
|
||||
|
||||
chunkchan := make(chan FileChunck)
|
||||
chunkwait := sync.WaitGroup{}
|
||||
|
||||
header_format := opt.FormatFastSeqHeader()
|
||||
|
||||
chunkwait.Add(nwriters)
|
||||
|
||||
go func() {
|
||||
chunkwait.Wait()
|
||||
for len(chunkchan) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(chunkchan)
|
||||
}()
|
||||
|
||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
chunkchan <- FileChunck{
|
||||
FormatFastaBatch(batch, header_format),
|
||||
batch.Order(),
|
||||
}
|
||||
new_iter.Channel() <- batch
|
||||
}
|
||||
new_iter.Done()
|
||||
}
|
||||
|
||||
for i := 0; i < nwriters; i++ {
|
||||
go ff(iterator.Split())
|
||||
}
|
||||
|
||||
next_to_send := 0
|
||||
received := make(map[int]FileChunck, 100)
|
||||
|
||||
go func() {
|
||||
for chunk := range chunkchan {
|
||||
if chunk.order == next_to_send {
|
||||
file.Write(chunk.text)
|
||||
next_to_send++
|
||||
chunk, ok := received[next_to_send]
|
||||
for ok {
|
||||
file.Write(chunk.text)
|
||||
delete(received, next_to_send)
|
||||
next_to_send++
|
||||
chunk, ok = received[next_to_send]
|
||||
}
|
||||
} else {
|
||||
received[chunk.order] = chunk
|
||||
}
|
||||
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func WriteFastaBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) error {
|
||||
return WriteFastaBatch(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
func WriteFastaBatchToFile(iterator obiseq.IBioSequenceBatch,
|
||||
filename string,
|
||||
options ...WithOption) error {
|
||||
|
||||
file, err := os.Create(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return WriteFastaBatch(iterator, file, options...)
|
||||
}
|
||||
168
pkg/obiformats/fastseq_write_fastq.go
Normal file
168
pkg/obiformats/fastseq_write_fastq.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func FormatFastq(seq obiseq.BioSequence, quality_shift int, formater FormatHeader) string {
|
||||
|
||||
l := seq.Length()
|
||||
q := seq.Qualities()
|
||||
ascii := make([]byte, seq.Length())
|
||||
|
||||
for j := 0; j < l; j++ {
|
||||
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
||||
}
|
||||
|
||||
info := ""
|
||||
if formater != nil {
|
||||
info = formater(seq)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("@%s %s %s\n%s\n+\n%s",
|
||||
seq.Id(), info,
|
||||
seq.Definition(),
|
||||
string(seq.Sequence()),
|
||||
string(ascii),
|
||||
)
|
||||
}
|
||||
|
||||
func FormatFastqBatch(batch obiseq.BioSequenceBatch, quality_shift int,
|
||||
formater FormatHeader) []byte {
|
||||
var bs bytes.Buffer
|
||||
for _, seq := range batch.Slice() {
|
||||
bs.WriteString(FormatFastq(seq, quality_shift, formater))
|
||||
bs.WriteString("\n")
|
||||
}
|
||||
return bs.Bytes()
|
||||
}
|
||||
|
||||
func WriteFastq(iterator obiseq.IBioSequence, file io.Writer, options ...WithOption) error {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
header_format := opt.FormatFastSeqHeader()
|
||||
quality := opt.QualityShift()
|
||||
|
||||
for iterator.Next() {
|
||||
seq := iterator.Get()
|
||||
fmt.Fprintln(file, FormatFastq(seq, quality, header_format))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func WriteFastqToFile(iterator obiseq.IBioSequence,
|
||||
filename string,
|
||||
options ...WithOption) error {
|
||||
|
||||
file, err := os.Create(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return WriteFastq(iterator, file, options...)
|
||||
}
|
||||
|
||||
func WriteFastqToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
|
||||
return WriteFastq(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
type FileChunck struct {
|
||||
text []byte
|
||||
order int
|
||||
}
|
||||
|
||||
func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
buffsize := iterator.BufferSize()
|
||||
new_iter := obiseq.MakeIBioSequenceBatch(buffsize)
|
||||
|
||||
opt := MakeOptions(options)
|
||||
nwriters := 4
|
||||
|
||||
chunkchan := make(chan FileChunck)
|
||||
|
||||
header_format := opt.FormatFastSeqHeader()
|
||||
quality := opt.QualityShift()
|
||||
|
||||
new_iter.Add(nwriters)
|
||||
|
||||
go func() {
|
||||
new_iter.Wait()
|
||||
for len(chunkchan) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(chunkchan)
|
||||
for len(new_iter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(new_iter.Channel())
|
||||
}()
|
||||
|
||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
chunkchan <- FileChunck{
|
||||
FormatFastqBatch(batch, quality, header_format),
|
||||
batch.Order(),
|
||||
}
|
||||
new_iter.Channel() <- batch
|
||||
}
|
||||
new_iter.Done()
|
||||
}
|
||||
|
||||
log.Println("Start of the fastq file reading")
|
||||
for i := 0; i < nwriters; i++ {
|
||||
go ff(iterator.Split())
|
||||
}
|
||||
|
||||
next_to_send := 0
|
||||
received := make(map[int]FileChunck, 100)
|
||||
|
||||
go func() {
|
||||
for chunk := range chunkchan {
|
||||
if chunk.order == next_to_send {
|
||||
file.Write(chunk.text)
|
||||
next_to_send++
|
||||
chunk, ok := received[next_to_send]
|
||||
for ok {
|
||||
file.Write(chunk.text)
|
||||
delete(received, next_to_send)
|
||||
next_to_send++
|
||||
chunk, ok = received[next_to_send]
|
||||
}
|
||||
} else {
|
||||
received[chunk.order] = chunk
|
||||
}
|
||||
|
||||
}
|
||||
}()
|
||||
|
||||
return new_iter, nil
|
||||
}
|
||||
|
||||
func WriteFastqBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
return WriteFastqBatch(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
func WriteFastqBatchToFile(iterator obiseq.IBioSequenceBatch,
|
||||
filename string,
|
||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
|
||||
file, err := os.Create(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
return WriteFastqBatch(iterator, file, options...)
|
||||
}
|
||||
5
pkg/obiformats/kseq/Makefile
Normal file
5
pkg/obiformats/kseq/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
all:kseq.h kseq_test.c
|
||||
$(CC) -g -O2 kseq_test.c -o kseq_test -lz
|
||||
|
||||
clean:
|
||||
rm -f *.o kseq_test
|
||||
223
pkg/obiformats/kseq/kseq.h
Normal file
223
pkg/obiformats/kseq/kseq.h
Normal file
@@ -0,0 +1,223 @@
|
||||
/* The MIT License
|
||||
|
||||
Copyright (c) 2008 Genome Research Ltd (GRL).
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Contact: Heng Li <lh3@sanger.ac.uk> */
|
||||
|
||||
/* Last Modified: 12APR2009 */
|
||||
|
||||
#ifndef AC_KSEQ_H
|
||||
#define AC_KSEQ_H
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
|
||||
#define KS_SEP_TAB 1 // isspace() && !' '
|
||||
#define KS_SEP_MAX 1
|
||||
|
||||
#define __KS_TYPE(type_t) \
|
||||
typedef struct __kstream_t { \
|
||||
char *buf; \
|
||||
int begin, end, is_eof; \
|
||||
type_t f; \
|
||||
} kstream_t;
|
||||
|
||||
#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
|
||||
#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
|
||||
|
||||
#define __KS_BASIC(type_t, __bufsize) \
|
||||
static inline kstream_t *ks_init(type_t f) \
|
||||
{ \
|
||||
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
|
||||
ks->f = f; \
|
||||
ks->buf = (char*)malloc(__bufsize); \
|
||||
return ks; \
|
||||
} \
|
||||
static inline void ks_destroy(kstream_t *ks) \
|
||||
{ \
|
||||
if (ks) { \
|
||||
free(ks->buf); \
|
||||
free(ks); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define __KS_GETC(__read, __bufsize) \
|
||||
static inline int ks_getc(kstream_t *ks) \
|
||||
{ \
|
||||
if (ks->is_eof && ks->begin >= ks->end) return -1; \
|
||||
if (ks->begin >= ks->end) { \
|
||||
ks->begin = 0; \
|
||||
ks->end = __read(ks->f, ks->buf, __bufsize); \
|
||||
if (ks->end < __bufsize) ks->is_eof = 1; \
|
||||
if (ks->end == 0) return -1; \
|
||||
} \
|
||||
return (int)ks->buf[ks->begin++]; \
|
||||
}
|
||||
|
||||
#ifndef KSTRING_T
|
||||
#define KSTRING_T kstring_t
|
||||
typedef struct __kstring_t {
|
||||
size_t l, m;
|
||||
char *s;
|
||||
} kstring_t;
|
||||
#endif
|
||||
|
||||
#ifndef kroundup32
|
||||
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||
#endif
|
||||
|
||||
#define __KS_GETUNTIL(__read, __bufsize) \
|
||||
static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
|
||||
{ \
|
||||
if (dret) *dret = 0; \
|
||||
str->l = 0; \
|
||||
if (ks->begin >= ks->end && ks->is_eof) return -1; \
|
||||
for (;;) { \
|
||||
int i; \
|
||||
if (ks->begin >= ks->end) { \
|
||||
if (!ks->is_eof) { \
|
||||
ks->begin = 0; \
|
||||
ks->end = __read(ks->f, ks->buf, __bufsize); \
|
||||
if (ks->end < __bufsize) ks->is_eof = 1; \
|
||||
if (ks->end == 0) break; \
|
||||
} else break; \
|
||||
} \
|
||||
if (delimiter > KS_SEP_MAX) { \
|
||||
for (i = ks->begin; i < ks->end; ++i) \
|
||||
if (ks->buf[i] == delimiter) break; \
|
||||
} else if (delimiter == KS_SEP_SPACE) { \
|
||||
for (i = ks->begin; i < ks->end; ++i) \
|
||||
if (isspace(ks->buf[i])) break; \
|
||||
} else if (delimiter == KS_SEP_TAB) { \
|
||||
for (i = ks->begin; i < ks->end; ++i) \
|
||||
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
|
||||
} else i = 0; /* never come to here! */ \
|
||||
if (str->m - str->l < i - ks->begin + 1) { \
|
||||
str->m = str->l + (i - ks->begin) + 1; \
|
||||
kroundup32(str->m); \
|
||||
str->s = (char*)realloc(str->s, str->m); \
|
||||
} \
|
||||
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
|
||||
str->l = str->l + (i - ks->begin); \
|
||||
ks->begin = i + 1; \
|
||||
if (i < ks->end) { \
|
||||
if (dret) *dret = ks->buf[i]; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
if (str->l == 0) { \
|
||||
str->m = 1; \
|
||||
str->s = (char*)calloc(1, 1); \
|
||||
} \
|
||||
str->s[str->l] = '\0'; \
|
||||
return str->l; \
|
||||
}
|
||||
|
||||
#define KSTREAM_INIT(type_t, __read, __bufsize) \
|
||||
__KS_TYPE(type_t) \
|
||||
__KS_BASIC(type_t, __bufsize) \
|
||||
__KS_GETC(__read, __bufsize) \
|
||||
__KS_GETUNTIL(__read, __bufsize)
|
||||
|
||||
#define __KSEQ_BASIC(type_t) \
|
||||
static inline kseq_t *kseq_init(type_t fd) \
|
||||
{ \
|
||||
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
|
||||
s->f = ks_init(fd); \
|
||||
return s; \
|
||||
} \
|
||||
static inline void kseq_rewind(kseq_t *ks) \
|
||||
{ \
|
||||
ks->last_char = 0; \
|
||||
ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
|
||||
} \
|
||||
static inline void kseq_destroy(kseq_t *ks) \
|
||||
{ \
|
||||
if (!ks) return; \
|
||||
free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
|
||||
ks_destroy(ks->f); \
|
||||
free(ks); \
|
||||
}
|
||||
|
||||
/* Return value:
|
||||
>=0 length of the sequence (normal)
|
||||
-1 end-of-file
|
||||
-2 truncated quality string
|
||||
*/
|
||||
#define __KSEQ_READ \
|
||||
static int kseq_read(kseq_t *seq) \
|
||||
{ \
|
||||
int c; \
|
||||
kstream_t *ks = seq->f; \
|
||||
if (seq->last_char == 0) { /* then jump to the next header line */ \
|
||||
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
|
||||
if (c == -1) return -1; /* end of file */ \
|
||||
seq->last_char = c; \
|
||||
} /* the first header char has been read */ \
|
||||
seq->comment.l = seq->seq.l = seq->qual.l = 0; \
|
||||
if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
|
||||
if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
|
||||
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
|
||||
if (isgraph(c)) { /* printable non-space character */ \
|
||||
if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
|
||||
seq->seq.m = seq->seq.l + 2; \
|
||||
kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
|
||||
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
|
||||
} \
|
||||
seq->seq.s[seq->seq.l++] = (char)c; \
|
||||
} \
|
||||
} \
|
||||
if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
|
||||
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
|
||||
if (c != '+') return seq->seq.l; /* FASTA */ \
|
||||
if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
|
||||
seq->qual.m = seq->seq.m; \
|
||||
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
|
||||
} \
|
||||
while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
|
||||
if (c == -1) return -2; /* we should not stop here */ \
|
||||
while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
|
||||
if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
|
||||
seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
|
||||
seq->last_char = 0; /* we have not come to the next header line */ \
|
||||
if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
|
||||
return seq->seq.l; \
|
||||
}
|
||||
|
||||
#define __KSEQ_TYPE(type_t) \
|
||||
typedef struct { \
|
||||
kstring_t name, comment, seq, qual; \
|
||||
int last_char; \
|
||||
kstream_t *f; \
|
||||
} kseq_t;
|
||||
|
||||
#define KSEQ_INIT(type_t, __read) \
|
||||
KSTREAM_INIT(type_t, __read, 4096) \
|
||||
__KSEQ_TYPE(type_t) \
|
||||
__KSEQ_BASIC(type_t) \
|
||||
__KSEQ_READ
|
||||
|
||||
#endif
|
||||
BIN
pkg/obiformats/kseq/kseq_test
Executable file
BIN
pkg/obiformats/kseq/kseq_test
Executable file
Binary file not shown.
27
pkg/obiformats/kseq/kseq_test.c
Normal file
27
pkg/obiformats/kseq/kseq_test.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <zlib.h>
|
||||
#include <stdio.h>
|
||||
#include "kseq.h"
|
||||
KSEQ_INIT(gzFile, gzread)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
gzFile fp;
|
||||
kseq_t *seq;
|
||||
int l;
|
||||
if (argc == 1) {
|
||||
fprintf(stderr, "Usage: %s <in.seq>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
fp = gzopen(argv[1], "r");
|
||||
seq = kseq_init(fp);
|
||||
while ((l = kseq_read(seq)) >= 0) {
|
||||
printf("name: %s\n", seq->name.s);
|
||||
if (seq->comment.l) printf("comment: %s\n", seq->comment.s);
|
||||
printf("seq: %s\n", seq->seq.s);
|
||||
if (seq->qual.l) printf("qual: %s\n", seq->qual.s);
|
||||
}
|
||||
printf("return value: %d\n", l);
|
||||
kseq_destroy(seq);
|
||||
gzclose(fp);
|
||||
return 0;
|
||||
}
|
||||
20
pkg/obiformats/kseq/kseq_test.dSYM/Contents/Info.plist
Normal file
20
pkg/obiformats/kseq/kseq_test.dSYM/Contents/Info.plist
Normal file
@@ -0,0 +1,20 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>English</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>com.apple.xcode.dsym.kseq_test</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>dSYM</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1</string>
|
||||
</dict>
|
||||
</plist>
|
||||
Binary file not shown.
291
pkg/obiformats/kseq/s1.fasta
Normal file
291
pkg/obiformats/kseq/s1.fasta
Normal file
@@ -0,0 +1,291 @@
|
||||
>HWI-D00393:103:C6KCUANXX:2:2309:18209:70743_CONS_SUB_SUB reverse_score=72.0; count=2; direction=forward; experiment=australie; seq_a_mismatch=0; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; status=full; seq_a_deletion=0; seq_length=98; start=aaaac; merged_sample={'AN5-30_b': 1, 'AML-33_b': 1}; seq_a_insertion=0; mode=alignment; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
aaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagagtt
|
||||
ggcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2209:18639:37342_CONS_SUB_SUB_CMP ali_length=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-45_b': 1, 'AN2-30_a': 1}; forward_score=72.0; seq_b_mismatch=0; start=ataaa; experiment=australie; seq_a_single=33; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ataaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2115:3400:66119_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-03_a': 1}; forward_score=72.0; score=367.38470594; seq_a_mismatch=0; forward_tag=gtacgact; seq_b_mismatch=0; start=ccaaa; experiment=australie; mid_quality=62.0797101449; avg_quality=58.9050632911; seq_a_single=33; score_norm=3.99331202109; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gtgtacat; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_03A; seq_b_single=33;
|
||||
ccaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2310:20070:75862_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=23.1; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-12_b': 1}; forward_score=72.0; score=360.189837214; seq_a_mismatch=0; forward_tag=catcagtc; seq_b_mismatch=0; start=ccaaa; experiment=australie; mid_quality=53.0507246377; avg_quality=49.6265822785; seq_a_single=33; score_norm=3.91510692624; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=28.9; position=11_11D; seq_b_single=33;
|
||||
ccaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggaataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2114:5633:21903_CONS_SUB_SUB merged_sample={'ABR-15_a': 1, 'AN5-12_b': 1, 'AW2-35_b': 1}; forward_score=72.0; seq_b_insertion=0; seq_a_insertion=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=3; seq_length=100; start=ccaaa; experiment=australie; reverse_score=72.0; mode=alignment; status=full; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ccaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1112:5602:81492_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-12_b': 1}; forward_score=72.0; score=367.699098517; seq_a_mismatch=0; forward_tag=catcagtc; seq_b_mismatch=0; start=ccaaa; experiment=australie; mid_quality=62.2463768116; avg_quality=59.0506329114; seq_a_single=33; score_norm=3.9967293317; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_11D; seq_b_single=33;
|
||||
ccaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtcttgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1101:4074:21538_CONS_SUB_SUB merged_sample={'AN4-36_b': 51, 'AW2-04_b': 11, 'AW2-04_a': 10, 'AN4-38_b': 158, 'AN5-43_b': 2, 'ADR-14_b': 2, 'AW2-34_b': 10, 'AW2-34_a': 1, 'AN3-38_b': 1, 'ABR-30_b': 1, 'AW2-36_b': 17, 'AN2-15_a': 11, 'AN4-34_b': 44, 'AN5-45_b': 5, 'AW2-27_b': 1, 'AW2-27_a': 3, 'ADR-25_a': 1, 'AN4-19_b': 30, 'ABR-44_a': 2, 'ABR-31_b': 1, 'AN2-20_b': 1, 'AN1-43_a': 2, 'AN2-13_b': 21, 'AN5-11_b': 2, 'AN4-22_b': 16, 'AN3-14_b': 1, 'AN3-14_a': 3, 'AW2-35_b': 101, 'AN2-19_b': 1, 'ABR-33_a': 1, 'ABR-29_a': 11, 'AW1-40_b': 9, 'ADR-13_a': 5, 'ADR-06_b': 1, 'AN4-26_a': 1, 'AN5-15_b': 20, 'AN2-01_a': 5, 'ADR-21_b': 1, 'AW2-23_b': 29, 'AW2-23_a': 3, 'ADR-13_b': 1, 'AW2-21_a': 1, 'AWO-27_b': 1, 'AW2-21_b': 20, 'AN2-10_a': 9, 'AN5-13_b': 19, 'AN4-20_b': 1, 'AN2-30_a': 1, 'AN3-22_b': 4, 'AN3-25_b': 3, 'AN3-25_a': 5, 'AN4-24_b': 26, 'ABR-15_a': 34, 'AW2-36_a': 5, 'AW2-12_b': 7, 'AW2-12_a': 5, 'AN5-48_a': 1, 'AW1-06_a': 6, 'AN5-21_a': 1, 'AN5-21_b': 20, 'AN2-09_b': 11, 'ABR-13_b': 2, 'ABR-13_a': 23, 'AN2-09_a': 7, 'AN3-20_b': 2, 'AN2-07_a': 4, 'AN2-07_b': 8, 'AN4-44_b': 26, 'AN4-21_b': 19, 'AW2-03_a': 28, 'AN4-42_b': 30, 'AW2-03_b': 47, 'ADR-26_b': 1, 'AN1-03_a': 6, 'AML-47_a': 1, 'AWO-30_b': 1, 'AN1-03_b': 3, 'ADR-30_b': 1, 'AN4-37_b': 40, 'AW2-07_b': 43, 'AN4-40_a': 1, 'AN3-44_b': 1, 'AN4-40_b': 36, 'AW2-01_b': 32, 'AW2-01_a': 13, 'AN5-10_b': 64, 'AW1-11_a': 29, 'AN2-43_a': 1, 'AN2-43_b': 2, 'AN1-17_a': 6, 'AN5-37_b': 1, 'blk-12_b': 1, 'ABR-35_b': 1, 'blk-02_b': 1, 'AN2-45_b': 8, 'AN3-40_a': 1, 'AW1-17_a': 5, 'AN2-45_a': 7, 'AW2-17_b': 4, 'ABR-25_a': 1, 'ABR-08_b': 1, 'AN5-14_b': 6, 'AWO-10_b': 1, 'AN2-38_a': 3, 'AN5-33_b': 15, 'AN2-12_a': 7, 'blk-04_b': 1, 'AN3-08_b': 4, 'AN3-01_b': 2, 'AN5-12_b': 106, 'ABR-23_a': 2, 'AN3-42_b': 19, 'AN4-23_b': 96, 'AN2-35_b': 1, 'ABR-02_a': 7, 'AN5-35_a': 1, 'ABR-17_a': 5, 'AML-42_a': 1, 'ADR-10_b': 1, 'ABR-39_a': 14, 'AN3-20_a': 3, 'pos-01_a': 1, 'AN5-03_b': 13, 'AN3-05_a': 1, 'AWO-42_b': 1, 'ABR-14_b': 1, 'AW2-13_b': 2, 'AW2-13_a': 5, 'blk-06_b': 2, 'AML-24_a': 1, 'ADR-12_b': 1, 'ABR-09_a': 3, 'ABR-12_b': 1, 'AN3-03_b': 3, 'AN3-03_a': 4, 'ABR-12_a': 34, 'ABR-27_b': 1, 'AN4-11_b': 15, 'AN5-22_b': 11, 'AN3-47_b': 2, 'AN3-47_a': 1, 'AN4-13_b': 35, 'AN2-41_b': 1, 'AN4-41_b': 14, 'AN4-22_a': 1, 'AN2-02_b': 5, 'ADR-37_b': 1, 'AN4-15_b': 2, 'AN5-20_a': 1, 'AN5-20_b': 18, 'AN3-45_b': 4, 'AW2-40_b': 20, 'AN5-09_b': 2, 'AN5-09_a': 1, 'AW2-40_a': 5, 'AN4-47_b': 11, 'AWO-19_b': 1, 'ABR-41_a': 12, 'AW1-32_b': 1, 'AW1-30_a': 15, 'AN4-05_a': 1, 'AN3-41_a': 1, 'AN3-46_b': 2, 'AN5-24_b': 35, 'ABR-43_a': 1, 'AW1-34_b': 1, 'AN5-30_b': 20, 'AN5-30_a': 1, 'AW2-28_a': 10, 'AW2-28_b': 5, 'AN5-25_b': 16, 'AN4-01_a': 1, 'AN2-04_b': 2, 'AW2-30_a': 22, 'AML-19_a': 1, 'AN2-34_a': 2, 'AN2-38_b': 3, 'pos-06_a': 1, 'AN5-19_b': 4, 'blk-07_b': 1, 'ABR-47_a': 1, 'AN2-02_a': 7, 'AN3-43_b': 12, 'AW1-20_a': 2, 'AW2-39_a': 3, 'AW2-43_a': 1, 'AWO-41_a': 10, 'AN5-38_a': 1, 'AW2-43_b': 3, 'AN4-17_b': 12, 'AW2-07_a': 48, 'AN3-04_b': 2, 'AN4-35_b': 86, 'AW1-26_a': 8, 'AWO-34_b': 1, 'ABR-14_a': 13, 'AN2-13_a': 4, 'AN4-39_a': 1, 'ABR-01_a': 78, 'AN5-44_b': 1, 'AN4-39_b': 85, 'AW1-30_b': 4, 'AN2-31_b': 7, 'AN3-37_a': 1, 'AN4-12_b': 142, 'AN3-35_b': 12, 'ABR-42_a': 9, 'ABR-03_b': 1, 'AN3-17_b': 19, 'AML-08_a': 2, 'AW1-29_a': 4, 'AN2-05_b': 3, 'AN4-46_b': 39, 'AN2-05_a': 5, 'AN4-14_b': 4, 'AN5-23_b': 24, 'AN4-25_a': 1, 'AML-12_a': 1, 'AN3-34_a': 1, 'AN5-28_b': 31, 'AN3-34_b': 2, 'AN5-27_b': 3, 'ABR-32_b': 1, 'AWO-15_b': 1, 'ABR-46_a': 8, 'AW1-18_a': 76, 'AN3-13_b': 1, 'AN4-18_b': 2, 'AN4-24_a': 1, 'AWO-06_b': 1, 'AN5-42_b': 14, 'ABR-28_a': 2, 'AN2-40_a': 2, 'AW1-40_a': 9, 'AW2-35_a': 70, 'ABR-40_b': 2, 'AN2-10_b': 3, 'AN3-27_b': 3, 'ABR-44_b': 1, 'ADR-38_b': 1, 'AN3-19_b': 2, 'ABR-40_a': 22, 'AN4-06_b': 17, 'ADR-05_b': 1, 'AN2-12_b': 1, 'ABR-08_a': 6, 'AN5-41_b': 10, 'AWO-37_b': 1, 'AN4-29_b': 40, 'AW2-42_b': 3, 'AW1-27_a': 6, 'ADR-34_b': 1, 'AN4-45_b': 115, 'AWO-13_b': 1, 'AN5-34_b': 73, 'ABR-22_b': 2, 'AN5-39_b': 2, 'AW2-42_a': 1, 'AN2-04_a': 4, 'AN4-43_b': 19, 'AW1-08_b': 1, 'AW1-08_a': 1, 'AW1-11_b': 1, 'AN4-48_a': 1, 'AML-04_a': 1, 'AML-41_a': 1, 'AW1-35_a': 2}; count=3190; seq_b_insertion=0; status=full; seq_a_deletion=0; seq_length=100; start=ctaaa; experiment=australie; seq_a_insertion=0; mode=alignment; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1102:20365:63690_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-13_a': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=tagtcgca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgctctcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=01_09E; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgctcgttga
|
||||
>HWI-D00393:103:C6KCUANXX:2:1109:5898:71477_CONS_SUB_SUB_CMP merged_sample={'AW2-04_a': 1, 'AN4-38_b': 1, 'AW1-07_b': 1, 'AN4-34_b': 5, 'AN4-19_b': 1, 'AN2-20_b': 1, 'AN2-13_b': 3, 'AN5-11_b': 1, 'AN3-14_a': 1, 'ABR-29_a': 1, 'AW2-23_b': 3, 'AW2-23_a': 1, 'AN4-28_b': 1, 'AW2-21_b': 3, 'AN4-24_b': 1, 'AN5-21_b': 1, 'ABR-13_a': 2, 'AN3-20_b': 1, 'ADR-11_b': 1, 'AN4-44_b': 1, 'AN3-46_b': 2, 'AN4-42_b': 1, 'AW2-03_b': 2, 'AN1-03_a': 1, 'AN4-37_b': 2, 'AW2-07_b': 3, 'AN4-40_b': 3, 'AW2-01_a': 1, 'AW1-11_a': 5, 'AN2-43_b': 2, 'pos-09_a': 1, 'ABR-25_a': 1, 'AN2-38_b': 1, 'AN3-08_a': 1, 'AN3-01_b': 1, 'AN5-12_b': 2, 'AN3-42_b': 3, 'AN4-23_b': 1, 'ABR-02_a': 1, 'ABR-17_a': 1, 'ABR-39_a': 1, 'ABR-14_a': 3, 'AW2-13_b': 1, 'ABR-12_a': 4, 'AN4-13_b': 2, 'AN2-41_b': 1, 'AN4-15_b': 1, 'AN5-20_b': 1, 'AW2-40_b': 3, 'ABR-41_a': 1, 'AN3-35_a': 1, 'AN3-43_b': 1, 'AN5-24_b': 2, 'AN5-30_b': 1, 'AN5-34_b': 3, 'AN5-19_b': 1, 'AWO-41_a': 1, 'AN5-38_a': 1, 'AW2-07_a': 1, 'AN4-35_b': 5, 'ABR-01_a': 4, 'AN4-39_b': 2, 'AN2-31_b': 1, 'AW1-30_a': 1, 'AN4-12_b': 2, 'ABR-42_a': 2, 'AN3-17_a': 1, 'AN3-17_b': 1, 'AN2-05_b': 1, 'AN4-46_b': 1, 'AN4-14_b': 4, 'AN5-23_b': 4, 'ABR-32_a': 1, 'ABR-46_a': 2, 'AW1-18_a': 4, 'AN4-18_b': 1, 'AW1-35_a': 1, 'AW2-35_b': 6, 'AW1-40_b': 3, 'AW2-35_a': 2, 'AN2-10_a': 2, 'AN2-02_a': 1, 'AN4-29_b': 1, 'AN4-45_b': 3, 'AW2-44_b': 1, 'ABR-22_b': 1, 'AW1-28_b': 1, 'AW2-42_b': 1, 'AN4-43_b': 2, 'AW1-27_a': 1}; count=159; status=full; seq_a_deletion=0; seq_length=100; start=ctaaa; experiment=australie; seq_a_insertion=0; mode=alignment; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1116:11515:15328_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-44_b': 1, 'AN4-39_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; head_quality=37.0; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttactaggttaaggtctcgtttgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1206:17870:33853_CONS_SUB_SUB ali_length=92; seq_ab_match=92; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-30_a': 7}; forward_score=72.0; seq_a_mismatch=0; forward_tag=cgctctcg; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gactgatg; goodAli=Alignement; count=7; seq_length=100; status=full; mode=alignment; position=02_12A; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcttaactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2106:7652:12042_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-40_b': 1, 'AW2-35_b': 1, 'ABR-01_a': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcgtaactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2115:4325:51738_CONS_SUB_SUB_CMP status=full; merged_sample={'AW1-29_a': 1, 'ABR-41_a': 1, 'AN2-02_a': 1, 'AN4-39_b': 1, 'AN2-07_a': 1}; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; forward_score=72.0; tail_quality=37.0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=5; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgctaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2211:17027:90962_CONS_SUB_SUB status=full; merged_sample={'AML-46_a': 1, 'AN4-46_b': 7}; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; forward_score=72.0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=8; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcattcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1113:14380:43631_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-34_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=atgatcgc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08B; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagtatgtaaaggtctcgatcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1116:10928:36920_CONS_SUB_SUB_CMP ali_length=92; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1, 'AN5-25_b': 1, 'AW2-01_a': 1}; forward_score=72.0; seq_a_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgtttgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1204:11153:34132_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1, 'AN4-39_b': 1, 'AN2-31_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; status=full; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; mode=alignment; head_quality=37.0; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactattcagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2106:2429:2922_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1}; forward_score=72.0; score=371.700731814; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.7299270073; avg_quality=59.4522292994; seq_a_single=32; score_norm=3.99678206251; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=99; status=full; mode=alignment; head_quality=37.0; position=11_08F; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggtaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2201:17654:97503_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=34.6; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-34_b': 1}; forward_score=72.0; score=307.231253197; seq_a_mismatch=0; forward_tag=tcagtgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=53.8115942029; avg_quality=51.4113924051; seq_a_single=33; score_norm=3.33947014344; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tcagtgtc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.1; position=12_02B; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgtgaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2206:15790:49247_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-24_b': 1, 'AWO-41_a': 1, 'AW2-35_a': 1, 'AN4-35_b': 1, 'AW2-07_a': 1, 'AN3-04_b': 1, 'AN2-13_a': 1, 'AN4-22_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=8; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttagggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2301:12930:40102_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-15_a': 1, 'AN4-39_b': 1}; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttaacaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2315:17692:90977_CONS_SUB_SUB_CMP ali_length=93; seq_a_deletion=0; reverse_score=72.0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1, 'ABR-41_a': 1, 'AN3-17_b': 1}; forward_score=72.0; seq_a_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=32; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; goodAli=Alignement; count=3; seq_length=99; mode=alignment; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactattagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1207:9047:41800_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-13_a': 2, 'AN5-19_a': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgctcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2210:14421:16016_CONS_SUB_SUB_CMP reverse_score=66.0; count=7; direction=reverse; seq_b_insertion=0; experiment=australie; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; seq_length=99; start=ctaaa; merged_sample={'AW2-12_b': 1, 'AN4-41_b': 1, 'AW2-07_a': 1, 'AN5-21_b': 1, 'AN2-15_a': 1, 'AN4-23_b': 1, 'AN4-37_b': 1}; seq_a_insertion=0; mode=alignment; status=full; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgtta
|
||||
>HWI-D00393:103:C6KCUANXX:2:2211:13367:14752_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=32.6; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-18_a': 1}; forward_score=72.0; score=367.610399661; seq_a_mismatch=0; forward_tag=gtcacgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.0434782609; avg_quality=57.6329113924; seq_a_single=33; score_norm=3.9957652137; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gtcgtaga; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.6; position=02_10C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaagatctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2307:6880:68904_CONS_SUB_SUB_CMP ali_length=81; seq_ab_match=56; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-37_b': 1}; forward_score=72.0; score=59.9384545346; seq_a_mismatch=1; forward_tag=atcagtca; seq_b_mismatch=22; start=ctaaa; experiment=australie; mid_quality=52.6666666667; avg_quality=50.6835443038; seq_a_single=33; score_norm=0.73998092018; reverse_score=72.0; direction=reverse; seq_b_insertion=2; seq_b_deletion=13; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08E; seq_b_single=44;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaagtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1312:11682:89893_CONS_SUB_SUB_CMP ali_length=92; seq_a_deletion=0; reverse_score=72.0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-19_b': 1, 'AN5-10_b': 1, 'AN5-15_b': 1, 'AW2-34_b': 1, 'AN5-45_b': 1, 'AN4-40_b': 1}; forward_score=72.0; seq_a_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; status=full; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=6; seq_length=100; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcattaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2109:18223:83128_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-15_a': 1, 'ABR-13_a': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgctctcg; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; head_quality=37.0; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttggcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2204:19781:26262_CONS_SUB_SUB_CMP ali_length=95; seq_ab_match=95; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ABR-01_a': 1}; forward_score=72.0; score=379.691935178; seq_a_mismatch=0; forward_tag=gatcgcga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=63.5703703704; avg_quality=60.1419354839; seq_a_single=30; score_norm=3.9967572124; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=155; reverse_tag=actagatc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=01_07H; seq_b_single=30;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaagatctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2215:4598:28592_CONS_SUB_SUB_CMP status=full; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-40_b': 1, 'AW2-35_b': 1, 'AN5-03_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; goodAli=Alignement; count=3; seq_length=100; mode=alignment; head_quality=37.0;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggcctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2304:13327:3228_CONS_SUB_SUB merged_sample={'AN4-36_b': 1, 'AN4-29_b': 1, 'AW2-01_b': 1, 'AW2-35_a': 1, 'ABR-46_a': 1, 'AW1-18_a': 1, 'AW1-40_b': 1, 'AN4-24_b': 1, 'AW2-07_b': 2}; forward_score=72.0; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=10; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; status=full; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtcccgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1208:20375:7918_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-37_b': 1, 'AW2-30_a': 1, 'AN4-35_b': 1}; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; head_quality=37.0; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcatagctatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1215:20045:28654_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-12_b': 1, 'AN5-42_b': 1, 'AN4-23_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcctaactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1302:19280:14816_CONS_SUB_SUB_CMP status=full; merged_sample={'ABR-01_a': 1, 'AWO-46_b': 1, 'AN4-22_b': 1}; seq_b_insertion=0; seq_a_mismatch=0; forward_score=72.0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=3; seq_length=100; start=ctaaa; experiment=australie; seq_a_insertion=0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgtacgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2214:8779:29491_CONS_SUB_SUB_CMP status=full; merged_sample={'AN5-24_b': 1, 'ADR-13_a': 2, 'AW2-35_a': 1}; seq_b_insertion=0; seq_a_insertion=0; forward_score=72.0; reverse_match=tttgtctgcttaattgcg; seq_b_mismatch=0; seq_a_deletion=0; count=4; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; seq_b_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; head_quality=37.0; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggctaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2104:3449:54357_CONS_SUB_SUB_CMP status=full; merged_sample={'AN4-13_b': 1, 'AN4-42_b': 1, 'AN4-38_b': 1, 'AN5-21_b': 1, 'AN4-46_b': 1, 'AN4-12_b': 6}; seq_b_insertion=0; seq_a_mismatch=0; forward_score=72.0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=11; seq_length=100; start=ctaaa; experiment=australie; seq_a_insertion=0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgttag
|
||||
>HWI-D00393:103:C6KCUANXX:2:1102:14455:71108_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-23_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=actagatc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=ctatgcta; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_05G; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgtcag
|
||||
>HWI-D00393:103:C6KCUANXX:2:1110:14996:73808_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-43_b': 1, 'AW2-40_b': 1, 'AW2-07_b': 2}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=4; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagtaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1204:14078:1983_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-13_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=tagtcgca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=atatagcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_04E; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttactaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1212:8502:98185_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-37_b': 1, 'AW2-35_a': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcatgactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1305:7874:81774_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-07_a': 1, 'AN4-34_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtcttgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1103:10874:79989_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=gtgtacat; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agcacagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_07C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttcaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1207:7817:59077_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1}; forward_score=72.0; score=366.948065081; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=60.9420289855; avg_quality=57.9113924051; seq_a_single=33; score_norm=3.9885659248; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08F; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgatcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1308:2120:40513_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN3-42_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=atgatcgc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actctgct; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_03B; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcattcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2206:6280:74968_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=acacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-40_a': 1}; forward_score=66.0; score=367.706336455; seq_a_mismatch=0; forward_tag=gatcgcga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=catcagtc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=05_08H; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttgaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1215:13979:31122_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgttgcttaattgcga; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-34_b': 1}; forward_score=72.0; score=371.70314446; seq_a_mismatch=0; forward_tag=atgatcgc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.6861313869; avg_quality=59.4140127389; seq_a_single=32; score_norm=3.99680800494; reverse_score=66.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=99; status=full; mode=alignment; head_quality=37.0; position=11_08B; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggtaagggtctcgttcgtta
|
||||
>HWI-D00393:103:C6KCUANXX:2:2208:15903:75212_CONS_SUB_SUB ali_length=92; seq_ab_match=91; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-01_b': 1}; forward_score=72.0; score=359.811368897; seq_a_mismatch=1; forward_tag=actagatc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.0072463768; avg_quality=58.8417721519; seq_a_single=33; score_norm=3.91099314019; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tcagtgtc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_02G; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagtaggttaaggtctcgtttgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1203:16969:99847_CONS_SUB_SUB ali_length=92; seq_ab_match=91; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-40_b': 1}; forward_score=72.0; score=359.526419012; seq_a_mismatch=0; forward_tag=ctgcgtac; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=60.9782608696; avg_quality=57.9430379747; seq_a_single=33; score_norm=3.90789585883; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08H; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataacaatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2213:12449:50581_CONS_SUB_SUB_CMP status=full; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-45_b': 1, 'AN5-25_b': 1, 'AN4-22_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; goodAli=Alignement; count=3; seq_length=100; mode=alignment; head_quality=37.0;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggtataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2310:15600:75905_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-03_a': 1}; forward_score=72.0; score=367.701511163; seq_a_mismatch=0; forward_tag=gtacgact; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.2753623188; avg_quality=59.0759493671; seq_a_single=33; score_norm=3.99675555612; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gtgtacat; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_03A; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggtaagggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1214:16466:5047_CONS_SUB_SUB status=full; merged_sample={'AN2-07_b': 1, 'AN1-17_a': 1, 'AN3-25_b': 1, 'AN4-23_b': 1, 'AW2-35_a': 1}; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; forward_score=72.0; seq_b_deletion=0; seq_a_deletion=0; count=5; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcgggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1306:6166:18686_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=24.8; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-39_b': 1}; forward_score=72.0; score=200.442729055; seq_a_mismatch=0; forward_tag=gatgatct; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=40.5289855072; avg_quality=38.6898734177; seq_a_single=33; score_norm=2.17872531582; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=27.2; position=11_08G; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggctagagtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2312:19984:25051_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-03_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=acacacac; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actctgct; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_03A; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttagagtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1209:10424:5048_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-43_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=acgacgag; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_09C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggataaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1306:10017:29340_CONS_SUB_SUB_CMP merged_sample={'AN4-36_b': 1, 'AN4-12_b': 1, 'AW2-07_a': 1}; forward_score=72.0; direction=reverse; seq_b_insertion=0; seq_a_insertion=0; seq_a_mismatch=0; seq_b_deletion=0; status=full; seq_a_deletion=0; count=3; seq_length=100; start=ctaaa; experiment=australie; reverse_score=72.0; mode=alignment; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttagcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1116:1609:19254_CONS_SUB_SUB ali_length=93; seq_ab_match=93; tail_quality=34.4; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-45_b': 1}; forward_score=72.0; score=365.502727331; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=55.8905109489; avg_quality=52.9936305732; seq_a_single=32; score_norm=3.93013685302; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=31.9; position=11_09E; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgtttt
|
||||
>HWI-D00393:103:C6KCUANXX:2:1116:18814:29744_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_a': 1}; forward_score=72.0; score=367.048675141; seq_a_mismatch=0; forward_tag=acgacgag; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.7101449275; avg_quality=58.582278481; seq_a_single=33; score_norm=3.9896595124; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actagatc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_07C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcagattaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1301:18017:5670_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=91; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-29_b': 1}; forward_score=72.0; score=307.503017136; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=48.0362318841; avg_quality=46.0632911392; seq_a_single=33; score_norm=3.3424240993; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agcacagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=27.9; position=11_07E; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagccggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2304:14057:23271_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'ADR-47_b': 1, 'AN3-35_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataattatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1105:1482:86390_CONS_SUB_SUB_CMP ali_length=94; seq_ab_match=94; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-13_b': 1}; forward_score=72.0; score=375.690301881; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=63.0735294118; avg_quality=59.7307692308; seq_a_single=31; score_norm=3.99670533916; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=156; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_11E; seq_b_single=31;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgtaaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1214:1498:16792_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=36.4; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-01_a': 1}; forward_score=72.0; score=367.691974388; seq_a_mismatch=0; forward_tag=gatgatct; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.2608695652; avg_quality=59.0253164557; seq_a_single=33; score_norm=3.99665189552; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=acagcaca; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_02G; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggctgaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2112:6296:18402_CONS_SUB_SUB_CMP reverse_score=72.0; count=2; direction=reverse; seq_b_insertion=0; experiment=australie; seq_b_deletion=0; status=full; seq_a_deletion=0; seq_length=100; start=ctaaa; merged_sample={'AN4-11_b': 1, 'AN4-17_b': 1}; seq_a_insertion=0; mode=alignment; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggaataactatttagcaggttaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2202:15115:92220_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=gtgtacat; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agcacagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_07C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggtccgggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2308:15218:31319_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=35.1; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-30_a': 1}; forward_score=72.0; score=367.379880648; seq_a_mismatch=0; forward_tag=gcgtcagc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.7463768116; avg_quality=58.4936708861; seq_a_single=33; score_norm=3.99325957226; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agactatg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=06_01F; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactagttagcaggttaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1303:8962:10788_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=91; tail_quality=35.6; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-40_b': 1}; forward_score=72.0; score=328.577396734; seq_a_mismatch=0; forward_tag=ctgcgtac; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=51.2898550725; avg_quality=49.2721518987; seq_a_single=33; score_norm=3.57149344276; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.1; position=11_08H; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactttttagcaggttaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1309:18593:53694_CONS_SUB_SUB_CMP ali_length=92; seq_a_deletion=0; reverse_score=72.0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-43_b': 1, 'ABR-41_a': 1, 'AW2-35_a': 1}; forward_score=72.0; seq_a_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; status=full; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=3; seq_length=100; mode=alignment; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactacttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1208:4517:87099_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-03_a': 1}; forward_score=72.0; score=367.701511163; seq_a_mismatch=0; forward_tag=gtacgact; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.2753623188; avg_quality=59.0759493671; seq_a_single=33; score_norm=3.99675555612; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=gtgtacat; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_03A; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagaaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1115:14275:7184_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-45_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_09E; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttagagtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1209:11513:82442_CONS_SUB_SUB_CMP ali_length=94; seq_ab_match=94; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-39_a': 1}; forward_score=72.0; score=375.685590398; seq_a_mismatch=0; forward_tag=gatgatct; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=63.0294117647; avg_quality=59.6923076923; seq_a_single=31; score_norm=3.996655217; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=156; reverse_tag=actagatc; goodAli=Alignement; count=1; seq_length=98; status=full; mode=alignment; head_quality=37.0; position=03_07G; seq_b_single=31;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1214:6399:72570_CONS_SUB_SUB ali_length=92; seq_ab_match=88; tail_quality=24.5; reverse_match=tttgtctgcttaattacg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-30_a': 1}; forward_score=72.0; score=255.892896628; seq_a_mismatch=3; forward_tag=tctactga; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=40.4057971014; avg_quality=38.5253164557; seq_a_single=33; score_norm=2.78144452857; reverse_score=66.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tactatac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=26.6; position=03_06F; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgtgcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1305:3473:19356_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-07_b': 1}; forward_score=72.0; score=367.635869076; seq_a_mismatch=0; forward_tag=tactatac; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.9782608696; avg_quality=58.8164556962; seq_a_single=33; score_norm=3.99604205518; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actctgct; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_03F; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcgtaactatttagcaggttaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2302:5940:68325_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-37_b': 1}; forward_score=72.0; score=367.706336455; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3333333333; avg_quality=59.1265822785; seq_a_single=33; score_norm=3.99680800494; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08E; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggtcgaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2309:13769:62059_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=91; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_a': 1}; forward_score=72.0; score=359.470664833; seq_a_mismatch=0; forward_tag=acgacgag; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=61.7536231884; avg_quality=58.6202531646; seq_a_single=33; score_norm=3.90728983514; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=actagatc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=03_07C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaagttctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2315:8947:22476_CONS_SUB_SUB ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN3-25_a': 1}; forward_score=72.0; score=368.159719936; seq_a_mismatch=0; forward_tag=acacacac; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=58.4744525547; avg_quality=55.7388535032; seq_a_single=32; score_norm=3.95870666598; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=agactatg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=05_01A; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactgtttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1301:19819:95569_CONS_SUB_SUB_CMP ali_length=94; seq_ab_match=94; tail_quality=37.0; reverse_match=tttgtctttaattccgat; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-34_b': 1}; forward_score=72.0; score=375.699952465; seq_a_mismatch=0; forward_tag=tcagtgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=63.1911764706; avg_quality=59.8333333333; seq_a_single=31; score_norm=3.99680800494; reverse_score=60.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=156; reverse_tag=tcagtgtc; goodAli=Alignement; count=1; seq_length=98; status=full; mode=alignment; head_quality=37.0; position=12_02B; seq_b_single=31;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgtt
|
||||
>HWI-D00393:103:C6KCUANXX:2:1313:4613:34271_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=32.3; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-11_a': 1}; forward_score=72.0; score=313.551003229; seq_a_mismatch=0; forward_tag=gtcacgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=56.6594202899; avg_quality=53.1518987342; seq_a_single=33; score_norm=3.40816307858; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgctctcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=25.6; position=02_09C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggtcaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2113:10988:9799_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=30.2; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-13_b': 1}; forward_score=72.0; score=361.020650934; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=55.6086956522; avg_quality=52.4936708861; seq_a_single=33; score_norm=3.92413751015; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=31.8; position=11_11E; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaatgtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2113:5686:82397_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=36.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'blk-12_b': 1}; forward_score=72.0; score=371.343139227; seq_a_mismatch=0; forward_tag=ctatgcta; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.1532846715; avg_quality=58.7643312102; seq_a_single=32; score_norm=3.99293698094; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=ctgcgtac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.1; position=08_12E; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggatagggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2311:15976:57660_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=91; tail_quality=30.1; reverse_match=tttgtctggttaattccg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-39_b': 1}; forward_score=72.0; score=346.55597612; seq_a_mismatch=0; forward_tag=gatgatct; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=56.8550724638; avg_quality=53.8037974684; seq_a_single=33; score_norm=3.76691278391; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.4; position=11_08G; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggtgaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2205:20527:78743_CONS_SUB_SUB ali_length=95; seq_ab_match=93; tail_quality=29.1; reverse_match=tttgtctgcttaatggcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-12_b': 1}; forward_score=72.0; score=351.353534518; seq_a_mismatch=1; forward_tag=catcagtc; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=50.2074074074; avg_quality=47.8709677419; seq_a_single=30; score_norm=3.69845825808; reverse_score=66.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=155; reverse_tag=ctatgcta; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.1; position=11_05D; seq_b_single=30;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtcgcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1104:5688:6100_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-46_b': 1}; forward_score=72.0; score=367.370471331; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.0362318841; avg_quality=58.8670886076; seq_a_single=33; score_norm=3.99315729707; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_09F; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtcccattcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1111:18652:56309_CONS_SUB_SUB ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN2-12_a': 1}; forward_score=72.0; score=371.700731814; seq_a_mismatch=0; forward_tag=atatagcg; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.7299270073; avg_quality=59.4522292994; seq_a_single=32; score_norm=3.99678206251; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=tatgtcag; goodAli=Alignement; count=1; seq_length=99; status=full; mode=alignment; head_quality=37.0; position=04_04D; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1211:20612:10566_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_08F; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataaccatttagctggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2113:19027:63568_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-38_b': 1}; forward_score=72.0; score=371.700731814; seq_a_mismatch=0; forward_tag=tctactga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.7299270073; avg_quality=59.4267515924; seq_a_single=32; score_norm=3.99678206251; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=36.6; position=11_08F; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagtaggctagagtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2201:15565:85738_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=32.1; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-23_b': 1}; forward_score=72.0; score=365.670520501; seq_a_mismatch=0; forward_tag=actagatc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=59.5289855072; avg_quality=56.3670886076; seq_a_single=33; score_norm=3.97467957066; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=ctatgcta; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_05G; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcatatctatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2215:13270:16769_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-01_b': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=actagatc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tcagtgtc; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=09_02G; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgtccgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1102:18567:11128_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN2-10_b': 1}; forward_score=72.0; score=370.995083415; seq_a_mismatch=0; forward_tag=gtcgtaga; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=61.8248175182; avg_quality=58.5732484076; seq_a_single=32; score_norm=3.98919444533; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=atatagcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=35.6; position=10_04B; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttgacataactatttagcaggttaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:1106:7644:40758_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=90; tail_quality=34.2; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-35_b': 1}; forward_score=72.0; score=296.115079326; seq_a_mismatch=2; forward_tag=acgacgag; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=54.1449275362; avg_quality=51.6075949367; seq_a_single=33; score_norm=3.21864216659; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=tagctagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=34.0; position=11_08C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgttac
|
||||
>HWI-D00393:103:C6KCUANXX:2:1111:17524:16890_CONS_SUB_SUB ali_length=94; seq_ab_match=94; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN2-12_b': 1}; forward_score=72.0; score=375.680651297; seq_a_mismatch=0; forward_tag=gactgatg; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.9264705882; avg_quality=59.6025641026; seq_a_single=31; score_norm=3.99660267337; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=156; reverse_tag=atatagcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=10_04D; seq_b_single=31;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttgtcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2109:4640:21588_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN5-10_b': 1}; forward_score=72.0; score=367.701511163; seq_a_mismatch=0; forward_tag=atgatcgc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.2173913043; avg_quality=59.0253164557; seq_a_single=33; score_norm=3.99675555612; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgagtcgt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_11B; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcggtcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2212:14824:56282_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-11_a': 1}; forward_score=72.0; score=367.703923809; seq_a_mismatch=0; forward_tag=gtcacgtc; seq_b_mismatch=0; start=ctaaa; experiment=australie; mid_quality=62.3043478261; avg_quality=59.1012658228; seq_a_single=33; score_norm=3.99678178053; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=cgctctcg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=02_09C; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggtcaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2306:10336:10418_CONS_SUB_SUB ali_length=92; seq_ab_match=92; tail_quality=37.0; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW2-35_b': 1, 'AN4-23_b': 1}; forward_score=72.0; seq_a_mismatch=0; seq_b_mismatch=0; start=ctaaa; experiment=australie; seq_a_single=33; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; goodAli=Alignement; count=2; seq_length=100; status=full; mode=alignment; head_quality=37.0; seq_b_single=33;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataaccatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2216:11206:55822_CONS_SUB_SUB_CMP ali_length=93; seq_ab_match=92; tail_quality=29.8; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AW1-30_a': 1}; forward_score=72.0; score=357.173125277; seq_a_mismatch=0; forward_tag=cgctctcg; seq_b_mismatch=1; start=ctaaa; experiment=australie; mid_quality=54.6642335766; avg_quality=51.4777070064; seq_a_single=32; score_norm=3.84057123954; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=gactgatg; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=29.5; position=02_12A; seq_b_single=32;
|
||||
ctaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcttaactatttagcaggttaaggtctcgttcgttat
|
||||
>HWI-D00393:103:C6KCUANXX:2:2303:4215:99753_CONS_SUB_SUB ali_length=93; seq_ab_match=93; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgtattgcc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-41_b': 1}; forward_score=66.0; score=371.70314446; seq_a_mismatch=0; forward_tag=gtacgact; seq_b_mismatch=0; start=gaaaa; experiment=australie; mid_quality=62.7591240876; avg_quality=59.4522292994; seq_a_single=32; score_norm=3.99680800494; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=157; reverse_tag=agtgctac; goodAli=Alignement; count=1; seq_length=99; status=full; mode=alignment; head_quality=36.6; position=11_09A; seq_b_single=32;
|
||||
gaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagagt
|
||||
tggcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2209:18822:35652_CONS_SUB_SUB_CMP ali_length=92; seq_ab_match=92; tail_quality=37.0; reverse_match=tttgtctgcttaattgcg; seq_a_deletion=0; forward_match=tcacagacctgttattgc; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; merged_sample={'AN4-29_b': 1}; forward_score=72.0; score=366.357546614; seq_a_mismatch=0; forward_tag=atcagtca; seq_b_mismatch=0; start=gtaaa; experiment=australie; mid_quality=60.7898550725; avg_quality=57.7784810127; seq_a_single=33; score_norm=3.98214724581; reverse_score=72.0; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=158; reverse_tag=agcacagt; goodAli=Alignement; count=1; seq_length=100; status=full; mode=alignment; head_quality=37.0; position=11_07E; seq_b_single=33;
|
||||
gtaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttggcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:2212:12163:84900_CONS_SUB_SUB merged_sample={'AN4-36_b': 1, 'AN4-39_b': 1, 'AN4-12_b': 1, 'AW2-03_b': 1, 'ABR-42_a': 1, 'AN4-46_b': 1, 'AN4-23_b': 1, 'AN4-37_b': 1, 'AN5-23_b': 1, 'AN5-41_b': 1, 'AW1-18_a': 1, 'AN4-35_b': 1, 'AW2-35_b': 1, 'AW2-35_a': 1}; reverse_score=72.0; seq_b_insertion=0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; count=14; seq_length=99; start=taaaa; experiment=australie; seq_a_insertion=0; mode=alignment; status=full; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; goodAli=Alignement;
|
||||
taaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagagt
|
||||
tggcataactatttagcaggttaaggtctcgttcgttaa
|
||||
>HWI-D00393:103:C6KCUANXX:2:1210:8971:79507_CONS_SUB_SUB_CMP merged_sample={'AN5-41_b': 1, 'AW2-01_a': 3, 'AN5-25_b': 2, 'AW2-07_b': 4, 'AN5-23_b': 3, 'AW2-01_b': 1}; ali_length=92; seq_b_insertion=0; seq_b_deletion=0; status=full; seq_a_deletion=0; count=14; seq_length=100; start=ttaaa; experiment=australie; seq_a_insertion=0; mode=alignment; seq_length_ori=158; forward_primer=tcacagacctgttattgc; reverse_primer=tttgtctgsttaattscg; seq_a_single=33; seq_b_single=33; goodAli=Alignement;
|
||||
ttaaaacttccgtcggctaatcgccgacagtccctctaagaagttgactaccaacgagag
|
||||
ttagcataactattcagtaggttaaggtctcgttcgttaa
|
||||
12
pkg/obiformats/kseq/test.seq
Normal file
12
pkg/obiformats/kseq/test.seq
Normal file
@@ -0,0 +1,12 @@
|
||||
>1 {"taxid" : 1234, "specie_name" : "Lupus lupus"}
|
||||
acgtacgtacgtagc
|
||||
>2 {"taxid" : 3243, "specie_name" : "Gallus gallus"} test
|
||||
acgatcgatc
|
||||
@3 {"taxid" : 3243, "specie_name" : "Gallus gallus"} test2
|
||||
cgctagcatagc
|
||||
cgatatgactta
|
||||
+
|
||||
78wo82usd980
|
||||
d88fau
|
||||
|
||||
238ud8
|
||||
141
pkg/obiformats/ncbitaxdump/read.go
Normal file
141
pkg/obiformats/ncbitaxdump/read.go
Normal file
@@ -0,0 +1,141 @@
|
||||
package ncbitaxdump
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obitax"
|
||||
)
|
||||
|
||||
func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
|
||||
file := csv.NewReader(reader)
|
||||
file.Comma = '|'
|
||||
file.Comment = '#'
|
||||
file.TrimLeadingSpace = true
|
||||
file.ReuseRecord = true
|
||||
|
||||
for record, err := file.Read(); err == nil; record, err = file.Read() {
|
||||
taxid, _ := strconv.Atoi(strings.TrimSpace(record[0]))
|
||||
parent, _ := strconv.Atoi(strings.TrimSpace(record[1]))
|
||||
rank := strings.TrimSpace(record[2])
|
||||
|
||||
taxonomy.AddNewTaxa(taxid, parent, rank, true, true)
|
||||
}
|
||||
|
||||
taxonomy.ReindexParent()
|
||||
}
|
||||
|
||||
func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int {
|
||||
// file := csv.NewReader(reader)
|
||||
// file.Comma = '|'
|
||||
// file.Comment = '#'
|
||||
// file.TrimLeadingSpace = true
|
||||
// file.ReuseRecord = true
|
||||
// file.LazyQuotes = true
|
||||
file := bufio.NewReader(reader)
|
||||
|
||||
n := 0
|
||||
|
||||
for line, prefix, err := file.ReadLine(); err == nil; line, prefix, err = file.ReadLine() {
|
||||
|
||||
if prefix {
|
||||
return -1
|
||||
}
|
||||
|
||||
record := strings.Split(string(line), "|")
|
||||
taxid, _ := strconv.Atoi(strings.TrimSpace(record[0]))
|
||||
name := strings.TrimSpace(record[1])
|
||||
classname := strings.TrimSpace(record[3])
|
||||
|
||||
if !onlysn || classname == "scientific name" {
|
||||
n++
|
||||
taxonomy.AddNewName(taxid, &name, &classname)
|
||||
}
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
|
||||
file := csv.NewReader(reader)
|
||||
file.Comma = '|'
|
||||
file.Comment = '#'
|
||||
file.TrimLeadingSpace = true
|
||||
file.ReuseRecord = true
|
||||
|
||||
n := 0
|
||||
|
||||
for record, err := file.Read(); err == nil; record, err = file.Read() {
|
||||
oldtaxid, _ := strconv.Atoi(strings.TrimSpace(record[0]))
|
||||
newtaxid, _ := strconv.Atoi(strings.TrimSpace(record[1]))
|
||||
n++
|
||||
taxonomy.AddNewAlias(newtaxid, oldtaxid)
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||
|
||||
taxonomy := obitax.NewTaxonomy()
|
||||
|
||||
//
|
||||
// Load the Taxonomy nodes
|
||||
//
|
||||
|
||||
log.Printf("Loading Taxonomy nodes\n")
|
||||
|
||||
nodefile, err := os.Open(path.Join(directory, "nodes.dmp"))
|
||||
if err != nil {
|
||||
return nil, errors.New(fmt.Sprintf("Cannot open nodes file from '%s'",
|
||||
directory))
|
||||
}
|
||||
defer nodefile.Close()
|
||||
|
||||
buffered := bufio.NewReader(nodefile)
|
||||
loadNodeTable(buffered, taxonomy)
|
||||
log.Printf("%d Taxonomy nodes read\n", taxonomy.Length())
|
||||
|
||||
//
|
||||
// Load the Taxonomy nodes
|
||||
//
|
||||
|
||||
log.Printf("Loading Taxon names\n")
|
||||
|
||||
namefile, nerr := os.Open(path.Join(directory, "names.dmp"))
|
||||
if nerr != nil {
|
||||
return nil, errors.New(fmt.Sprintf("Cannot open names file from '%s'",
|
||||
directory))
|
||||
}
|
||||
defer namefile.Close()
|
||||
|
||||
n := loadNameTable(namefile, taxonomy, onlysn)
|
||||
log.Printf("%d taxon names read\n", n)
|
||||
|
||||
//
|
||||
// Load the merged taxa
|
||||
//
|
||||
|
||||
log.Printf("Loading Merged taxa\n")
|
||||
|
||||
aliasfile, aerr := os.Open(path.Join(directory, "merged.dmp"))
|
||||
if aerr != nil {
|
||||
return nil, errors.New(fmt.Sprintf("Cannot open merged file from '%s'",
|
||||
directory))
|
||||
}
|
||||
defer aliasfile.Close()
|
||||
|
||||
buffered = bufio.NewReader(aliasfile)
|
||||
n = loadMergedTable(buffered, taxonomy)
|
||||
log.Printf("%d merged taxa read\n", n)
|
||||
|
||||
return taxonomy, nil
|
||||
}
|
||||
158
pkg/obiformats/options.go
Normal file
158
pkg/obiformats/options.go
Normal file
@@ -0,0 +1,158 @@
|
||||
package obiformats
|
||||
|
||||
import "git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
|
||||
type __options__ struct {
|
||||
fastseq_header_parser obiseq.SeqAnnotator
|
||||
fastseq_header_writer func(obiseq.BioSequence) string
|
||||
with_progress_bar bool
|
||||
buffer_size int
|
||||
batch_size int
|
||||
quality_shift int
|
||||
parallel_workers int
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
pointer *__options__
|
||||
}
|
||||
|
||||
type WithOption func(Options)
|
||||
|
||||
func MakeOptions(setters []WithOption) Options {
|
||||
o := __options__{
|
||||
fastseq_header_parser: ParseGuessedFastSeqHeader,
|
||||
fastseq_header_writer: FormatFastSeqJsonHeader,
|
||||
with_progress_bar: false,
|
||||
buffer_size: 2,
|
||||
quality_shift: 33,
|
||||
parallel_workers: 4,
|
||||
batch_size: 5000,
|
||||
}
|
||||
|
||||
opt := Options{&o}
|
||||
|
||||
for _, set := range setters {
|
||||
set(opt)
|
||||
}
|
||||
|
||||
return opt
|
||||
}
|
||||
|
||||
func (opt Options) QualityShift() int {
|
||||
return opt.pointer.quality_shift
|
||||
}
|
||||
|
||||
func (opt Options) BufferSize() int {
|
||||
return opt.pointer.buffer_size
|
||||
}
|
||||
|
||||
func (opt Options) BatchSize() int {
|
||||
return opt.pointer.batch_size
|
||||
}
|
||||
|
||||
func (opt Options) ParallelWorkers() int {
|
||||
return opt.pointer.parallel_workers
|
||||
}
|
||||
|
||||
func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator {
|
||||
return opt.pointer.fastseq_header_parser
|
||||
}
|
||||
|
||||
func (opt Options) FormatFastSeqHeader() func(obiseq.BioSequence) string {
|
||||
return opt.pointer.fastseq_header_writer
|
||||
}
|
||||
|
||||
func (opt Options) ProgressBar() bool {
|
||||
return opt.pointer.with_progress_bar
|
||||
}
|
||||
|
||||
func OptionsBufferSize(size int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.buffer_size = size
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
// Allows to specify the ascii code corresponding to
|
||||
// a quality of 0 in fastq encoded quality scores.
|
||||
func OptionsQualityShift(shift int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.quality_shift = shift
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
// Allows to specify a quality shift of 33, corresponding
|
||||
// to a FastQ file qualities encoded following Sanger
|
||||
// convention. This corresponds to Illumina produced FastQ
|
||||
// files.
|
||||
func OptionsQualitySanger() WithOption {
|
||||
return OptionsQualityShift(33)
|
||||
}
|
||||
|
||||
// Allows to specify a quality shift of 64, corresponding
|
||||
// to a FastQ file qualities encoded following the Solexa
|
||||
// convention.
|
||||
func OptionsQualitySolexa() WithOption {
|
||||
return OptionsQualityShift(64)
|
||||
}
|
||||
|
||||
func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.fastseq_header_parser = parser
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionFastSeqDoNotParseHeader() WithOption {
|
||||
return OptionsFastSeqHeaderParser(nil)
|
||||
}
|
||||
|
||||
func OptionsFastSeqDefaultHeaderParser() WithOption {
|
||||
return OptionsFastSeqHeaderParser(ParseGuessedFastSeqHeader)
|
||||
}
|
||||
|
||||
// OptionsFastSeqHeaderFormat allows foor specifying the format
|
||||
// used to write FASTA and FASTQ sequence.
|
||||
func OptionsFastSeqHeaderFormat(format func(obiseq.BioSequence) string) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.fastseq_header_writer = format
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsParallelWorkers(nworkers int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.parallel_workers = nworkers
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsBatchSize(size int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.batch_size = size
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithProgressBar() WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_progress_bar = true
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithoutProgressBar() WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_progress_bar = false
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
93
pkg/obiformats/universal_read.go
Normal file
93
pkg/obiformats/universal_read.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func GuessSeqFileType(firstline string) string {
|
||||
switch {
|
||||
case strings.HasPrefix(firstline, "#@ecopcr-v2"):
|
||||
return "ecopcr"
|
||||
|
||||
case strings.HasPrefix(firstline, "#"):
|
||||
return "ecopcr"
|
||||
|
||||
case strings.HasPrefix(firstline, ">"):
|
||||
return "fasta"
|
||||
|
||||
case strings.HasPrefix(firstline, "@"):
|
||||
return "fastq"
|
||||
|
||||
case strings.HasPrefix(firstline, "ID "):
|
||||
return "embl"
|
||||
|
||||
case strings.HasPrefix(firstline, "LOCUS "):
|
||||
return "genebank"
|
||||
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
var file *os.File
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
var err error
|
||||
|
||||
file, err = os.Open(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
reader = file
|
||||
|
||||
// Test if the flux is compressed by gzip
|
||||
greader, err = gzip.NewReader(reader)
|
||||
if err != nil {
|
||||
file.Seek(0, 0)
|
||||
} else {
|
||||
log.Printf("File %s is gz compressed ", filename)
|
||||
reader = greader
|
||||
}
|
||||
|
||||
breader := bufio.NewReader(reader)
|
||||
|
||||
tag, _ := breader.Peek(30)
|
||||
|
||||
filetype := GuessSeqFileType(string(tag))
|
||||
log.Printf("File guessed format : %s (tag: %s)",
|
||||
filetype, (strings.Split(string(tag), "\n"))[0])
|
||||
reader = breader
|
||||
|
||||
switch filetype {
|
||||
case "fastq", "fasta":
|
||||
file.Close()
|
||||
is, _ := ReadFastSeqBatchFromFile(filename, options...)
|
||||
return is, nil
|
||||
case "ecopcr":
|
||||
return ReadEcoPCRBatch(reader, options...), nil
|
||||
case "embl":
|
||||
return ReadEMBLBatch(reader, options...), nil
|
||||
default:
|
||||
log.Fatalf("File %s has guessed format %s which is not yet implemented",
|
||||
filename, filetype)
|
||||
}
|
||||
|
||||
return obiseq.NilIBioSequenceBatch, nil
|
||||
}
|
||||
|
||||
func ReadSequencesFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
|
||||
ib, err := ReadSequencesBatchFromFile(filename, options...)
|
||||
return ib.SortBatches().IBioSequence(), err
|
||||
|
||||
}
|
||||
79
pkg/obiformats/universal_write.go
Normal file
79
pkg/obiformats/universal_write.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func WriteSequences(iterator obiseq.IBioSequence,
|
||||
file io.Writer,
|
||||
options ...WithOption) error {
|
||||
|
||||
opts := MakeOptions(options)
|
||||
|
||||
header_format := opts.FormatFastSeqHeader()
|
||||
quality := opts.QualityShift()
|
||||
|
||||
ok := iterator.Next()
|
||||
|
||||
if ok {
|
||||
seq := iterator.Get()
|
||||
if seq.HasQualities() {
|
||||
fmt.Fprintln(file, FormatFastq(seq, quality, header_format))
|
||||
WriteFastq(iterator, file, options...)
|
||||
} else {
|
||||
fmt.Fprintln(file, FormatFasta(seq, header_format))
|
||||
WriteFasta(iterator, file, options...)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func WriteSequencesToFile(iterator obiseq.IBioSequence,
|
||||
filename string,
|
||||
options ...WithOption) error {
|
||||
|
||||
file, err := os.Create(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return WriteSequences(iterator, file, options...)
|
||||
}
|
||||
|
||||
func WriteSequencesToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
|
||||
return WriteSequences(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
// func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
||||
// file io.Writer,
|
||||
// options ...WithOption) error {
|
||||
|
||||
// opts := MakeOptions(options)
|
||||
|
||||
// header_format := opts.FormatFastSeqHeader()
|
||||
// quality := opts.QualityShift()
|
||||
|
||||
// ok := iterator.Next()
|
||||
|
||||
// if ok {
|
||||
// batch := iterator.Get()
|
||||
// if batch.Slice()[0].HasQualities() {
|
||||
// file.Write()
|
||||
// fmt.Fprintln(file, FormatFastq(seq, quality, header_format))
|
||||
// WriteFastq(iterator, file, options...)
|
||||
// } else {
|
||||
// fmt.Fprintln(file, FormatFasta(seq, header_format))
|
||||
// WriteFasta(iterator, file, options...)
|
||||
// }
|
||||
// }
|
||||
|
||||
// return nil
|
||||
// }
|
||||
Reference in New Issue
Block a user