diff --git a/.gitignore b/.gitignore index b7622a8..c5825d2 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ **/*.tgz **/*.yaml **/*.csv +xx .rhistory /.vscode @@ -27,3 +28,6 @@ !/obitests/** !/sample/** LLM/** +*_files + +entropy.html \ No newline at end of file diff --git a/cmd/obitools/obilowmask/main.go b/cmd/obitools/obilowmask/main.go new file mode 100644 index 0000000..ec43a54 --- /dev/null +++ b/cmd/obitools/obilowmask/main.go @@ -0,0 +1,47 @@ +package main + +import ( + "os" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obilowmask" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" +) + +func main() { + + defer obiseq.LogBioSeqStatus() + + // go tool pprof -http=":8000" ./obipairing ./cpu.pprof + // f, err := os.Create("cpu.pprof") + // if err != nil { + // log.Fatal(err) + // } + // pprof.StartCPUProfile(f) + // defer pprof.StopCPUProfile() + + // go tool trace cpu.trace + // ftrace, err := os.Create("cpu.trace") + // if err != nil { + // log.Fatal(err) + // } + // trace.Start(ftrace) + // defer trace.Stop() + + optionParser := obioptions.GenerateOptionParser( + "obimicrosat", + "looks for microsatellites sequences in a sequence file", + obilowmask.OptionSet) + + _, args := optionParser(os.Args) + + sequences, err := obiconvert.CLIReadBioSequences(args...) + obiconvert.OpenSequenceDataErrorMessage(args, err) + + selected := obilowmask.CLISequenceEntropyMasker(sequences) + obiconvert.CLIWriteBioSequences(selected, true) + obiutils.WaitForLastPipe() + +} diff --git a/pkg/obikmer/encodefourmer.go b/pkg/obikmer/encodefourmer.go index ba1a5a1..42d9326 100644 --- a/pkg/obikmer/encodefourmer.go +++ b/pkg/obikmer/encodefourmer.go @@ -23,6 +23,10 @@ var __single_base_code__ = []byte{0, 0, 0, 0, } +func EncodeNucleotide(b byte) byte { + return __single_base_code__[b&31] +} + // Encode4mer transforms an obiseq.BioSequence into a sequence // of kmer of length 4. Each letter of the sequence not belonging // A, C, G, T, U are considered as a A. The kmer is encoded as a byte @@ -54,7 +58,7 @@ func Encode4mer(seq *obiseq.BioSequence, buffer *[]byte) []byte { code = 0 for ; i < 4; i++ { code <<= 2 - code += __single_base_code__[rawseq[i]&31] + code += EncodeNucleotide(rawseq[i]) } *buffer = append((*buffer), code) diff --git a/pkg/obikmer/kmernorm.go b/pkg/obikmer/kmernorm.go new file mode 100644 index 0000000..4688f87 --- /dev/null +++ b/pkg/obikmer/kmernorm.go @@ -0,0 +1,5503 @@ +package obikmer + +// LexicographicNormalization est une table qui associe chaque k-mer (k=1 à 6) +// à son représentant lexicographique canonique obtenu par permutation circulaire. +var LexicographicNormalization = map[string]string{ + "a": "a", + "c": "c", + "g": "g", + "t": "t", + "aa": "aa", + "ac": "ac", + "ag": "ag", + "at": "at", + "ca": "ac", + "cc": "cc", + "cg": "cg", + "ct": "ct", + "ga": "ag", + "gc": "cg", + "gg": "gg", + "gt": "gt", + "ta": "at", + "tc": "ct", + "tg": "gt", + "tt": "tt", + "aaa": "aaa", + "aac": "aac", + "aag": "aag", + "aat": "aat", + "aca": "aac", + "acc": "acc", + "acg": "acg", + "act": "act", + "aga": "aag", + "agc": "agc", + "agg": "agg", + "agt": "agt", + "ata": "aat", + "atc": "atc", + "atg": "atg", + "att": "att", + "caa": "aac", + "cac": "acc", + "cag": "agc", + "cat": "atc", + "cca": "acc", + "ccc": "ccc", + "ccg": "ccg", + "cct": "cct", + "cga": "acg", + "cgc": "ccg", + "cgg": "cgg", + "cgt": "cgt", + "cta": "act", + "ctc": "cct", + "ctg": "ctg", + "ctt": "ctt", + "gaa": "aag", + "gac": "acg", + "gag": "agg", + "gat": "atg", + "gca": "agc", + "gcc": "ccg", + "gcg": "cgg", + "gct": "ctg", + "gga": "agg", + "ggc": "cgg", + "ggg": "ggg", + "ggt": "ggt", + "gta": "agt", + "gtc": "cgt", + "gtg": "ggt", + "gtt": "gtt", + "taa": "aat", + "tac": "act", + "tag": "agt", + "tat": "att", + "tca": "atc", + "tcc": "cct", + "tcg": "cgt", + "tct": "ctt", + "tga": "atg", + "tgc": "ctg", + "tgg": "ggt", + "tgt": "gtt", + "tta": "att", + "ttc": "ctt", + "ttg": "gtt", + "ttt": "ttt", + "aaaa": "aaaa", + "aaac": "aaac", + "aaag": "aaag", + "aaat": "aaat", + "aaca": "aaac", + "aacc": "aacc", + "aacg": "aacg", + "aact": "aact", + "aaga": "aaag", + "aagc": "aagc", + "aagg": "aagg", + "aagt": "aagt", + "aata": "aaat", + "aatc": "aatc", + "aatg": "aatg", + "aatt": "aatt", + "acaa": "aaac", + "acac": "acac", + "acag": "acag", + "acat": "acat", + "acca": "aacc", + "accc": "accc", + "accg": "accg", + "acct": "acct", + "acga": "aacg", + "acgc": "acgc", + "acgg": "acgg", + "acgt": "acgt", + "acta": "aact", + "actc": "actc", + "actg": "actg", + "actt": "actt", + "agaa": "aaag", + "agac": "acag", + "agag": "agag", + "agat": "agat", + "agca": "aagc", + "agcc": "agcc", + "agcg": "agcg", + "agct": "agct", + "agga": "aagg", + "aggc": "aggc", + "aggg": "aggg", + "aggt": "aggt", + "agta": "aagt", + "agtc": "agtc", + "agtg": "agtg", + "agtt": "agtt", + "ataa": "aaat", + "atac": "acat", + "atag": "agat", + "atat": "atat", + "atca": "aatc", + "atcc": "atcc", + "atcg": "atcg", + "atct": "atct", + "atga": "aatg", + "atgc": "atgc", + "atgg": "atgg", + "atgt": "atgt", + "atta": "aatt", + "attc": "attc", + "attg": "attg", + "attt": "attt", + "caaa": "aaac", + "caac": "aacc", + "caag": "aagc", + "caat": "aatc", + "caca": "acac", + "cacc": "accc", + "cacg": "acgc", + "cact": "actc", + "caga": "acag", + "cagc": "agcc", + "cagg": "aggc", + "cagt": "agtc", + "cata": "acat", + "catc": "atcc", + "catg": "atgc", + "catt": "attc", + "ccaa": "aacc", + "ccac": "accc", + "ccag": "agcc", + "ccat": "atcc", + "ccca": "accc", + "cccc": "cccc", + "cccg": "cccg", + "ccct": "ccct", + "ccga": "accg", + "ccgc": "cccg", + "ccgg": "ccgg", + "ccgt": "ccgt", + "ccta": "acct", + "cctc": "ccct", + "cctg": "cctg", + "cctt": "cctt", + "cgaa": "aacg", + "cgac": "accg", + "cgag": "agcg", + "cgat": "atcg", + "cgca": "acgc", + "cgcc": "cccg", + "cgcg": "cgcg", + "cgct": "cgct", + "cgga": "acgg", + "cggc": "ccgg", + "cggg": "cggg", + "cggt": "cggt", + "cgta": "acgt", + "cgtc": "ccgt", + "cgtg": "cgtg", + "cgtt": "cgtt", + "ctaa": "aact", + "ctac": "acct", + "ctag": "agct", + "ctat": "atct", + "ctca": "actc", + "ctcc": "ccct", + "ctcg": "cgct", + "ctct": "ctct", + "ctga": "actg", + "ctgc": "cctg", + "ctgg": "ctgg", + "ctgt": "ctgt", + "ctta": "actt", + "cttc": "cctt", + "cttg": "cttg", + "cttt": "cttt", + "gaaa": "aaag", + "gaac": "aacg", + "gaag": "aagg", + "gaat": "aatg", + "gaca": "acag", + "gacc": "accg", + "gacg": "acgg", + "gact": "actg", + "gaga": "agag", + "gagc": "agcg", + "gagg": "aggg", + "gagt": "agtg", + "gata": "agat", + "gatc": "atcg", + "gatg": "atgg", + "gatt": "attg", + "gcaa": "aagc", + "gcac": "acgc", + "gcag": "aggc", + "gcat": "atgc", + "gcca": "agcc", + "gccc": "cccg", + "gccg": "ccgg", + "gcct": "cctg", + "gcga": "agcg", + "gcgc": "cgcg", + "gcgg": "cggg", + "gcgt": "cgtg", + "gcta": "agct", + "gctc": "cgct", + "gctg": "ctgg", + "gctt": "cttg", + "ggaa": "aagg", + "ggac": "acgg", + "ggag": "aggg", + "ggat": "atgg", + "ggca": "aggc", + "ggcc": "ccgg", + "ggcg": "cggg", + "ggct": "ctgg", + "ggga": "aggg", + "gggc": "cggg", + "gggg": "gggg", + "gggt": "gggt", + "ggta": "aggt", + "ggtc": "cggt", + "ggtg": "gggt", + "ggtt": "ggtt", + "gtaa": "aagt", + "gtac": "acgt", + "gtag": "aggt", + "gtat": "atgt", + "gtca": "agtc", + "gtcc": "ccgt", + "gtcg": "cggt", + "gtct": "ctgt", + "gtga": "agtg", + "gtgc": "cgtg", + "gtgg": "gggt", + "gtgt": "gtgt", + "gtta": "agtt", + "gttc": "cgtt", + "gttg": "ggtt", + "gttt": "gttt", + "taaa": "aaat", + "taac": "aact", + "taag": "aagt", + "taat": "aatt", + "taca": "acat", + "tacc": "acct", + "tacg": "acgt", + "tact": "actt", + "taga": "agat", + "tagc": "agct", + "tagg": "aggt", + "tagt": "agtt", + "tata": "atat", + "tatc": "atct", + "tatg": "atgt", + "tatt": "attt", + "tcaa": "aatc", + "tcac": "actc", + "tcag": "agtc", + "tcat": "attc", + "tcca": "atcc", + "tccc": "ccct", + "tccg": "ccgt", + "tcct": "cctt", + "tcga": "atcg", + "tcgc": "cgct", + "tcgg": "cggt", + "tcgt": "cgtt", + "tcta": "atct", + "tctc": "ctct", + "tctg": "ctgt", + "tctt": "cttt", + "tgaa": "aatg", + "tgac": "actg", + "tgag": "agtg", + "tgat": "attg", + "tgca": "atgc", + "tgcc": "cctg", + "tgcg": "cgtg", + "tgct": "cttg", + "tgga": "atgg", + "tggc": "ctgg", + "tggg": "gggt", + "tggt": "ggtt", + "tgta": "atgt", + "tgtc": "ctgt", + "tgtg": "gtgt", + "tgtt": "gttt", + "ttaa": "aatt", + "ttac": "actt", + "ttag": "agtt", + "ttat": "attt", + "ttca": "attc", + "ttcc": "cctt", + "ttcg": "cgtt", + "ttct": "cttt", + "ttga": "attg", + "ttgc": "cttg", + "ttgg": "ggtt", + "ttgt": "gttt", + "ttta": "attt", + "tttc": "cttt", + "tttg": "gttt", + "tttt": "tttt", + "aaaaa": "aaaaa", + "aaaac": "aaaac", + "aaaag": "aaaag", + "aaaat": "aaaat", + "aaaca": "aaaac", + "aaacc": "aaacc", + "aaacg": "aaacg", + "aaact": "aaact", + "aaaga": "aaaag", + "aaagc": "aaagc", + "aaagg": "aaagg", + "aaagt": "aaagt", + "aaata": "aaaat", + "aaatc": "aaatc", + "aaatg": "aaatg", + "aaatt": "aaatt", + "aacaa": "aaaac", + "aacac": "aacac", + "aacag": "aacag", + "aacat": "aacat", + "aacca": "aaacc", + "aaccc": "aaccc", + "aaccg": "aaccg", + "aacct": "aacct", + "aacga": "aaacg", + "aacgc": "aacgc", + "aacgg": "aacgg", + "aacgt": "aacgt", + "aacta": "aaact", + "aactc": "aactc", + "aactg": "aactg", + "aactt": "aactt", + "aagaa": "aaaag", + "aagac": "aagac", + "aagag": "aagag", + "aagat": "aagat", + "aagca": "aaagc", + "aagcc": "aagcc", + "aagcg": "aagcg", + "aagct": "aagct", + "aagga": "aaagg", + "aaggc": "aaggc", + "aaggg": "aaggg", + "aaggt": "aaggt", + "aagta": "aaagt", + "aagtc": "aagtc", + "aagtg": "aagtg", + "aagtt": "aagtt", + "aataa": "aaaat", + "aatac": "aatac", + "aatag": "aatag", + "aatat": "aatat", + "aatca": "aaatc", + "aatcc": "aatcc", + "aatcg": "aatcg", + "aatct": "aatct", + "aatga": "aaatg", + "aatgc": "aatgc", + "aatgg": "aatgg", + "aatgt": "aatgt", + "aatta": "aaatt", + "aattc": "aattc", + "aattg": "aattg", + "aattt": "aattt", + "acaaa": "aaaac", + "acaac": "aacac", + "acaag": "aagac", + "acaat": "aatac", + "acaca": "aacac", + "acacc": "acacc", + "acacg": "acacg", + "acact": "acact", + "acaga": "aacag", + "acagc": "acagc", + "acagg": "acagg", + "acagt": "acagt", + "acata": "aacat", + "acatc": "acatc", + "acatg": "acatg", + "acatt": "acatt", + "accaa": "aaacc", + "accac": "acacc", + "accag": "accag", + "accat": "accat", + "accca": "aaccc", + "acccc": "acccc", + "acccg": "acccg", + "accct": "accct", + "accga": "aaccg", + "accgc": "accgc", + "accgg": "accgg", + "accgt": "accgt", + "accta": "aacct", + "acctc": "acctc", + "acctg": "acctg", + "acctt": "acctt", + "acgaa": "aaacg", + "acgac": "acacg", + "acgag": "acgag", + "acgat": "acgat", + "acgca": "aacgc", + "acgcc": "acgcc", + "acgcg": "acgcg", + "acgct": "acgct", + "acgga": "aacgg", + "acggc": "acggc", + "acggg": "acggg", + "acggt": "acggt", + "acgta": "aacgt", + "acgtc": "acgtc", + "acgtg": "acgtg", + "acgtt": "acgtt", + "actaa": "aaact", + "actac": "acact", + "actag": "actag", + "actat": "actat", + "actca": "aactc", + "actcc": "actcc", + "actcg": "actcg", + "actct": "actct", + "actga": "aactg", + "actgc": "actgc", + "actgg": "actgg", + "actgt": "actgt", + "actta": "aactt", + "acttc": "acttc", + "acttg": "acttg", + "acttt": "acttt", + "agaaa": "aaaag", + "agaac": "aacag", + "agaag": "aagag", + "agaat": "aatag", + "agaca": "aagac", + "agacc": "accag", + "agacg": "acgag", + "agact": "actag", + "agaga": "aagag", + "agagc": "agagc", + "agagg": "agagg", + "agagt": "agagt", + "agata": "aagat", + "agatc": "agatc", + "agatg": "agatg", + "agatt": "agatt", + "agcaa": "aaagc", + "agcac": "acagc", + "agcag": "agagc", + "agcat": "agcat", + "agcca": "aagcc", + "agccc": "agccc", + "agccg": "agccg", + "agcct": "agcct", + "agcga": "aagcg", + "agcgc": "agcgc", + "agcgg": "agcgg", + "agcgt": "agcgt", + "agcta": "aagct", + "agctc": "agctc", + "agctg": "agctg", + "agctt": "agctt", + "aggaa": "aaagg", + "aggac": "acagg", + "aggag": "agagg", + "aggat": "aggat", + "aggca": "aaggc", + "aggcc": "aggcc", + "aggcg": "aggcg", + "aggct": "aggct", + "aggga": "aaggg", + "agggc": "agggc", + "agggg": "agggg", + "agggt": "agggt", + "aggta": "aaggt", + "aggtc": "aggtc", + "aggtg": "aggtg", + "aggtt": "aggtt", + "agtaa": "aaagt", + "agtac": "acagt", + "agtag": "agagt", + "agtat": "agtat", + "agtca": "aagtc", + "agtcc": "agtcc", + "agtcg": "agtcg", + "agtct": "agtct", + "agtga": "aagtg", + "agtgc": "agtgc", + "agtgg": "agtgg", + "agtgt": "agtgt", + "agtta": "aagtt", + "agttc": "agttc", + "agttg": "agttg", + "agttt": "agttt", + "ataaa": "aaaat", + "ataac": "aacat", + "ataag": "aagat", + "ataat": "aatat", + "ataca": "aatac", + "atacc": "accat", + "atacg": "acgat", + "atact": "actat", + "ataga": "aatag", + "atagc": "agcat", + "atagg": "aggat", + "atagt": "agtat", + "atata": "aatat", + "atatc": "atatc", + "atatg": "atatg", + "atatt": "atatt", + "atcaa": "aaatc", + "atcac": "acatc", + "atcag": "agatc", + "atcat": "atatc", + "atcca": "aatcc", + "atccc": "atccc", + "atccg": "atccg", + "atcct": "atcct", + "atcga": "aatcg", + "atcgc": "atcgc", + "atcgg": "atcgg", + "atcgt": "atcgt", + "atcta": "aatct", + "atctc": "atctc", + "atctg": "atctg", + "atctt": "atctt", + "atgaa": "aaatg", + "atgac": "acatg", + "atgag": "agatg", + "atgat": "atatg", + "atgca": "aatgc", + "atgcc": "atgcc", + "atgcg": "atgcg", + "atgct": "atgct", + "atgga": "aatgg", + "atggc": "atggc", + "atggg": "atggg", + "atggt": "atggt", + "atgta": "aatgt", + "atgtc": "atgtc", + "atgtg": "atgtg", + "atgtt": "atgtt", + "attaa": "aaatt", + "attac": "acatt", + "attag": "agatt", + "attat": "atatt", + "attca": "aattc", + "attcc": "attcc", + "attcg": "attcg", + "attct": "attct", + "attga": "aattg", + "attgc": "attgc", + "attgg": "attgg", + "attgt": "attgt", + "attta": "aattt", + "atttc": "atttc", + "atttg": "atttg", + "atttt": "atttt", + "caaaa": "aaaac", + "caaac": "aaacc", + "caaag": "aaagc", + "caaat": "aaatc", + "caaca": "aacac", + "caacc": "aaccc", + "caacg": "aacgc", + "caact": "aactc", + "caaga": "aagac", + "caagc": "aagcc", + "caagg": "aaggc", + "caagt": "aagtc", + "caata": "aatac", + "caatc": "aatcc", + "caatg": "aatgc", + "caatt": "aattc", + "cacaa": "aacac", + "cacac": "acacc", + "cacag": "acagc", + "cacat": "acatc", + "cacca": "acacc", + "caccc": "acccc", + "caccg": "accgc", + "cacct": "acctc", + "cacga": "acacg", + "cacgc": "acgcc", + "cacgg": "acggc", + "cacgt": "acgtc", + "cacta": "acact", + "cactc": "actcc", + "cactg": "actgc", + "cactt": "acttc", + "cagaa": "aacag", + "cagac": "accag", + "cagag": "agagc", + "cagat": "agatc", + "cagca": "acagc", + "cagcc": "agccc", + "cagcg": "agcgc", + "cagct": "agctc", + "cagga": "acagg", + "caggc": "aggcc", + "caggg": "agggc", + "caggt": "aggtc", + "cagta": "acagt", + "cagtc": "agtcc", + "cagtg": "agtgc", + "cagtt": "agttc", + "cataa": "aacat", + "catac": "accat", + "catag": "agcat", + "catat": "atatc", + "catca": "acatc", + "catcc": "atccc", + "catcg": "atcgc", + "catct": "atctc", + "catga": "acatg", + "catgc": "atgcc", + "catgg": "atggc", + "catgt": "atgtc", + "catta": "acatt", + "cattc": "attcc", + "cattg": "attgc", + "cattt": "atttc", + "ccaaa": "aaacc", + "ccaac": "aaccc", + "ccaag": "aagcc", + "ccaat": "aatcc", + "ccaca": "acacc", + "ccacc": "acccc", + "ccacg": "acgcc", + "ccact": "actcc", + "ccaga": "accag", + "ccagc": "agccc", + "ccagg": "aggcc", + "ccagt": "agtcc", + "ccata": "accat", + "ccatc": "atccc", + "ccatg": "atgcc", + "ccatt": "attcc", + "cccaa": "aaccc", + "cccac": "acccc", + "cccag": "agccc", + "cccat": "atccc", + "cccca": "acccc", + "ccccc": "ccccc", + "ccccg": "ccccg", + "cccct": "cccct", + "cccga": "acccg", + "cccgc": "ccccg", + "cccgg": "cccgg", + "cccgt": "cccgt", + "cccta": "accct", + "ccctc": "cccct", + "ccctg": "ccctg", + "ccctt": "ccctt", + "ccgaa": "aaccg", + "ccgac": "acccg", + "ccgag": "agccg", + "ccgat": "atccg", + "ccgca": "accgc", + "ccgcc": "ccccg", + "ccgcg": "ccgcg", + "ccgct": "ccgct", + "ccgga": "accgg", + "ccggc": "cccgg", + "ccggg": "ccggg", + "ccggt": "ccggt", + "ccgta": "accgt", + "ccgtc": "cccgt", + "ccgtg": "ccgtg", + "ccgtt": "ccgtt", + "cctaa": "aacct", + "cctac": "accct", + "cctag": "agcct", + "cctat": "atcct", + "cctca": "acctc", + "cctcc": "cccct", + "cctcg": "cctcg", + "cctct": "cctct", + "cctga": "acctg", + "cctgc": "ccctg", + "cctgg": "cctgg", + "cctgt": "cctgt", + "cctta": "acctt", + "ccttc": "ccctt", + "ccttg": "ccttg", + "ccttt": "ccttt", + "cgaaa": "aaacg", + "cgaac": "aaccg", + "cgaag": "aagcg", + "cgaat": "aatcg", + "cgaca": "acacg", + "cgacc": "acccg", + "cgacg": "acgcg", + "cgact": "actcg", + "cgaga": "acgag", + "cgagc": "agccg", + "cgagg": "aggcg", + "cgagt": "agtcg", + "cgata": "acgat", + "cgatc": "atccg", + "cgatg": "atgcg", + "cgatt": "attcg", + "cgcaa": "aacgc", + "cgcac": "accgc", + "cgcag": "agcgc", + "cgcat": "atcgc", + "cgcca": "acgcc", + "cgccc": "ccccg", + "cgccg": "ccgcg", + "cgcct": "cctcg", + "cgcga": "acgcg", + "cgcgc": "ccgcg", + "cgcgg": "cgcgg", + "cgcgt": "cgcgt", + "cgcta": "acgct", + "cgctc": "ccgct", + "cgctg": "cgctg", + "cgctt": "cgctt", + "cggaa": "aacgg", + "cggac": "accgg", + "cggag": "agcgg", + "cggat": "atcgg", + "cggca": "acggc", + "cggcc": "cccgg", + "cggcg": "cgcgg", + "cggct": "cggct", + "cggga": "acggg", + "cgggc": "ccggg", + "cgggg": "cgggg", + "cgggt": "cgggt", + "cggta": "acggt", + "cggtc": "ccggt", + "cggtg": "cggtg", + "cggtt": "cggtt", + "cgtaa": "aacgt", + "cgtac": "accgt", + "cgtag": "agcgt", + "cgtat": "atcgt", + "cgtca": "acgtc", + "cgtcc": "cccgt", + "cgtcg": "cgcgt", + "cgtct": "cgtct", + "cgtga": "acgtg", + "cgtgc": "ccgtg", + "cgtgg": "cgtgg", + "cgtgt": "cgtgt", + "cgtta": "acgtt", + "cgttc": "ccgtt", + "cgttg": "cgttg", + "cgttt": "cgttt", + "ctaaa": "aaact", + "ctaac": "aacct", + "ctaag": "aagct", + "ctaat": "aatct", + "ctaca": "acact", + "ctacc": "accct", + "ctacg": "acgct", + "ctact": "actct", + "ctaga": "actag", + "ctagc": "agcct", + "ctagg": "aggct", + "ctagt": "agtct", + "ctata": "actat", + "ctatc": "atcct", + "ctatg": "atgct", + "ctatt": "attct", + "ctcaa": "aactc", + "ctcac": "acctc", + "ctcag": "agctc", + "ctcat": "atctc", + "ctcca": "actcc", + "ctccc": "cccct", + "ctccg": "ccgct", + "ctcct": "cctct", + "ctcga": "actcg", + "ctcgc": "cctcg", + "ctcgg": "cggct", + "ctcgt": "cgtct", + "ctcta": "actct", + "ctctc": "cctct", + "ctctg": "ctctg", + "ctctt": "ctctt", + "ctgaa": "aactg", + "ctgac": "acctg", + "ctgag": "agctg", + "ctgat": "atctg", + "ctgca": "actgc", + "ctgcc": "ccctg", + "ctgcg": "cgctg", + "ctgct": "ctctg", + "ctgga": "actgg", + "ctggc": "cctgg", + "ctggg": "ctggg", + "ctggt": "ctggt", + "ctgta": "actgt", + "ctgtc": "cctgt", + "ctgtg": "ctgtg", + "ctgtt": "ctgtt", + "cttaa": "aactt", + "cttac": "acctt", + "cttag": "agctt", + "cttat": "atctt", + "cttca": "acttc", + "cttcc": "ccctt", + "cttcg": "cgctt", + "cttct": "ctctt", + "cttga": "acttg", + "cttgc": "ccttg", + "cttgg": "cttgg", + "cttgt": "cttgt", + "cttta": "acttt", + "ctttc": "ccttt", + "ctttg": "ctttg", + "ctttt": "ctttt", + "gaaaa": "aaaag", + "gaaac": "aaacg", + "gaaag": "aaagg", + "gaaat": "aaatg", + "gaaca": "aacag", + "gaacc": "aaccg", + "gaacg": "aacgg", + "gaact": "aactg", + "gaaga": "aagag", + "gaagc": "aagcg", + "gaagg": "aaggg", + "gaagt": "aagtg", + "gaata": "aatag", + "gaatc": "aatcg", + "gaatg": "aatgg", + "gaatt": "aattg", + "gacaa": "aagac", + "gacac": "acacg", + "gacag": "acagg", + "gacat": "acatg", + "gacca": "accag", + "gaccc": "acccg", + "gaccg": "accgg", + "gacct": "acctg", + "gacga": "acgag", + "gacgc": "acgcg", + "gacgg": "acggg", + "gacgt": "acgtg", + "gacta": "actag", + "gactc": "actcg", + "gactg": "actgg", + "gactt": "acttg", + "gagaa": "aagag", + "gagac": "acgag", + "gagag": "agagg", + "gagat": "agatg", + "gagca": "agagc", + "gagcc": "agccg", + "gagcg": "agcgg", + "gagct": "agctg", + "gagga": "agagg", + "gaggc": "aggcg", + "gaggg": "agggg", + "gaggt": "aggtg", + "gagta": "agagt", + "gagtc": "agtcg", + "gagtg": "agtgg", + "gagtt": "agttg", + "gataa": "aagat", + "gatac": "acgat", + "gatag": "aggat", + "gatat": "atatg", + "gatca": "agatc", + "gatcc": "atccg", + "gatcg": "atcgg", + "gatct": "atctg", + "gatga": "agatg", + "gatgc": "atgcg", + "gatgg": "atggg", + "gatgt": "atgtg", + "gatta": "agatt", + "gattc": "attcg", + "gattg": "attgg", + "gattt": "atttg", + "gcaaa": "aaagc", + "gcaac": "aacgc", + "gcaag": "aaggc", + "gcaat": "aatgc", + "gcaca": "acagc", + "gcacc": "accgc", + "gcacg": "acggc", + "gcact": "actgc", + "gcaga": "agagc", + "gcagc": "agcgc", + "gcagg": "agggc", + "gcagt": "agtgc", + "gcata": "agcat", + "gcatc": "atcgc", + "gcatg": "atggc", + "gcatt": "attgc", + "gccaa": "aagcc", + "gccac": "acgcc", + "gccag": "aggcc", + "gccat": "atgcc", + "gccca": "agccc", + "gcccc": "ccccg", + "gcccg": "cccgg", + "gccct": "ccctg", + "gccga": "agccg", + "gccgc": "ccgcg", + "gccgg": "ccggg", + "gccgt": "ccgtg", + "gccta": "agcct", + "gcctc": "cctcg", + "gcctg": "cctgg", + "gcctt": "ccttg", + "gcgaa": "aagcg", + "gcgac": "acgcg", + "gcgag": "aggcg", + "gcgat": "atgcg", + "gcgca": "agcgc", + "gcgcc": "ccgcg", + "gcgcg": "cgcgg", + "gcgct": "cgctg", + "gcgga": "agcgg", + "gcggc": "cgcgg", + "gcggg": "cgggg", + "gcggt": "cggtg", + "gcgta": "agcgt", + "gcgtc": "cgcgt", + "gcgtg": "cgtgg", + "gcgtt": "cgttg", + "gctaa": "aagct", + "gctac": "acgct", + "gctag": "aggct", + "gctat": "atgct", + "gctca": "agctc", + "gctcc": "ccgct", + "gctcg": "cggct", + "gctct": "ctctg", + "gctga": "agctg", + "gctgc": "cgctg", + "gctgg": "ctggg", + "gctgt": "ctgtg", + "gctta": "agctt", + "gcttc": "cgctt", + "gcttg": "cttgg", + "gcttt": "ctttg", + "ggaaa": "aaagg", + "ggaac": "aacgg", + "ggaag": "aaggg", + "ggaat": "aatgg", + "ggaca": "acagg", + "ggacc": "accgg", + "ggacg": "acggg", + "ggact": "actgg", + "ggaga": "agagg", + "ggagc": "agcgg", + "ggagg": "agggg", + "ggagt": "agtgg", + "ggata": "aggat", + "ggatc": "atcgg", + "ggatg": "atggg", + "ggatt": "attgg", + "ggcaa": "aaggc", + "ggcac": "acggc", + "ggcag": "agggc", + "ggcat": "atggc", + "ggcca": "aggcc", + "ggccc": "cccgg", + "ggccg": "ccggg", + "ggcct": "cctgg", + "ggcga": "aggcg", + "ggcgc": "cgcgg", + "ggcgg": "cgggg", + "ggcgt": "cgtgg", + "ggcta": "aggct", + "ggctc": "cggct", + "ggctg": "ctggg", + "ggctt": "cttgg", + "gggaa": "aaggg", + "gggac": "acggg", + "gggag": "agggg", + "gggat": "atggg", + "gggca": "agggc", + "gggcc": "ccggg", + "gggcg": "cgggg", + "gggct": "ctggg", + "gggga": "agggg", + "ggggc": "cgggg", + "ggggg": "ggggg", + "ggggt": "ggggt", + "gggta": "agggt", + "gggtc": "cgggt", + "gggtg": "ggggt", + "gggtt": "gggtt", + "ggtaa": "aaggt", + "ggtac": "acggt", + "ggtag": "agggt", + "ggtat": "atggt", + "ggtca": "aggtc", + "ggtcc": "ccggt", + "ggtcg": "cgggt", + "ggtct": "ctggt", + "ggtga": "aggtg", + "ggtgc": "cggtg", + "ggtgg": "ggggt", + "ggtgt": "ggtgt", + "ggtta": "aggtt", + "ggttc": "cggtt", + "ggttg": "gggtt", + "ggttt": "ggttt", + "gtaaa": "aaagt", + "gtaac": "aacgt", + "gtaag": "aaggt", + "gtaat": "aatgt", + "gtaca": "acagt", + "gtacc": "accgt", + "gtacg": "acggt", + "gtact": "actgt", + "gtaga": "agagt", + "gtagc": "agcgt", + "gtagg": "agggt", + "gtagt": "agtgt", + "gtata": "agtat", + "gtatc": "atcgt", + "gtatg": "atggt", + "gtatt": "attgt", + "gtcaa": "aagtc", + "gtcac": "acgtc", + "gtcag": "aggtc", + "gtcat": "atgtc", + "gtcca": "agtcc", + "gtccc": "cccgt", + "gtccg": "ccggt", + "gtcct": "cctgt", + "gtcga": "agtcg", + "gtcgc": "cgcgt", + "gtcgg": "cgggt", + "gtcgt": "cgtgt", + "gtcta": "agtct", + "gtctc": "cgtct", + "gtctg": "ctggt", + "gtctt": "cttgt", + "gtgaa": "aagtg", + "gtgac": "acgtg", + "gtgag": "aggtg", + "gtgat": "atgtg", + "gtgca": "agtgc", + "gtgcc": "ccgtg", + "gtgcg": "cggtg", + "gtgct": "ctgtg", + "gtgga": "agtgg", + "gtggc": "cgtgg", + "gtggg": "ggggt", + "gtggt": "ggtgt", + "gtgta": "agtgt", + "gtgtc": "cgtgt", + "gtgtg": "ggtgt", + "gtgtt": "gtgtt", + "gttaa": "aagtt", + "gttac": "acgtt", + "gttag": "aggtt", + "gttat": "atgtt", + "gttca": "agttc", + "gttcc": "ccgtt", + "gttcg": "cggtt", + "gttct": "ctgtt", + "gttga": "agttg", + "gttgc": "cgttg", + "gttgg": "gggtt", + "gttgt": "gtgtt", + "gttta": "agttt", + "gtttc": "cgttt", + "gtttg": "ggttt", + "gtttt": "gtttt", + "taaaa": "aaaat", + "taaac": "aaact", + "taaag": "aaagt", + "taaat": "aaatt", + "taaca": "aacat", + "taacc": "aacct", + "taacg": "aacgt", + "taact": "aactt", + "taaga": "aagat", + "taagc": "aagct", + "taagg": "aaggt", + "taagt": "aagtt", + "taata": "aatat", + "taatc": "aatct", + "taatg": "aatgt", + "taatt": "aattt", + "tacaa": "aatac", + "tacac": "acact", + "tacag": "acagt", + "tacat": "acatt", + "tacca": "accat", + "taccc": "accct", + "taccg": "accgt", + "tacct": "acctt", + "tacga": "acgat", + "tacgc": "acgct", + "tacgg": "acggt", + "tacgt": "acgtt", + "tacta": "actat", + "tactc": "actct", + "tactg": "actgt", + "tactt": "acttt", + "tagaa": "aatag", + "tagac": "actag", + "tagag": "agagt", + "tagat": "agatt", + "tagca": "agcat", + "tagcc": "agcct", + "tagcg": "agcgt", + "tagct": "agctt", + "tagga": "aggat", + "taggc": "aggct", + "taggg": "agggt", + "taggt": "aggtt", + "tagta": "agtat", + "tagtc": "agtct", + "tagtg": "agtgt", + "tagtt": "agttt", + "tataa": "aatat", + "tatac": "actat", + "tatag": "agtat", + "tatat": "atatt", + "tatca": "atatc", + "tatcc": "atcct", + "tatcg": "atcgt", + "tatct": "atctt", + "tatga": "atatg", + "tatgc": "atgct", + "tatgg": "atggt", + "tatgt": "atgtt", + "tatta": "atatt", + "tattc": "attct", + "tattg": "attgt", + "tattt": "atttt", + "tcaaa": "aaatc", + "tcaac": "aactc", + "tcaag": "aagtc", + "tcaat": "aattc", + "tcaca": "acatc", + "tcacc": "acctc", + "tcacg": "acgtc", + "tcact": "acttc", + "tcaga": "agatc", + "tcagc": "agctc", + "tcagg": "aggtc", + "tcagt": "agttc", + "tcata": "atatc", + "tcatc": "atctc", + "tcatg": "atgtc", + "tcatt": "atttc", + "tccaa": "aatcc", + "tccac": "actcc", + "tccag": "agtcc", + "tccat": "attcc", + "tccca": "atccc", + "tcccc": "cccct", + "tcccg": "cccgt", + "tccct": "ccctt", + "tccga": "atccg", + "tccgc": "ccgct", + "tccgg": "ccggt", + "tccgt": "ccgtt", + "tccta": "atcct", + "tcctc": "cctct", + "tcctg": "cctgt", + "tcctt": "ccttt", + "tcgaa": "aatcg", + "tcgac": "actcg", + "tcgag": "agtcg", + "tcgat": "attcg", + "tcgca": "atcgc", + "tcgcc": "cctcg", + "tcgcg": "cgcgt", + "tcgct": "cgctt", + "tcgga": "atcgg", + "tcggc": "cggct", + "tcggg": "cgggt", + "tcggt": "cggtt", + "tcgta": "atcgt", + "tcgtc": "cgtct", + "tcgtg": "cgtgt", + "tcgtt": "cgttt", + "tctaa": "aatct", + "tctac": "actct", + "tctag": "agtct", + "tctat": "attct", + "tctca": "atctc", + "tctcc": "cctct", + "tctcg": "cgtct", + "tctct": "ctctt", + "tctga": "atctg", + "tctgc": "ctctg", + "tctgg": "ctggt", + "tctgt": "ctgtt", + "tctta": "atctt", + "tcttc": "ctctt", + "tcttg": "cttgt", + "tcttt": "ctttt", + "tgaaa": "aaatg", + "tgaac": "aactg", + "tgaag": "aagtg", + "tgaat": "aattg", + "tgaca": "acatg", + "tgacc": "acctg", + "tgacg": "acgtg", + "tgact": "acttg", + "tgaga": "agatg", + "tgagc": "agctg", + "tgagg": "aggtg", + "tgagt": "agttg", + "tgata": "atatg", + "tgatc": "atctg", + "tgatg": "atgtg", + "tgatt": "atttg", + "tgcaa": "aatgc", + "tgcac": "actgc", + "tgcag": "agtgc", + "tgcat": "attgc", + "tgcca": "atgcc", + "tgccc": "ccctg", + "tgccg": "ccgtg", + "tgcct": "ccttg", + "tgcga": "atgcg", + "tgcgc": "cgctg", + "tgcgg": "cggtg", + "tgcgt": "cgttg", + "tgcta": "atgct", + "tgctc": "ctctg", + "tgctg": "ctgtg", + "tgctt": "ctttg", + "tggaa": "aatgg", + "tggac": "actgg", + "tggag": "agtgg", + "tggat": "attgg", + "tggca": "atggc", + "tggcc": "cctgg", + "tggcg": "cgtgg", + "tggct": "cttgg", + "tggga": "atggg", + "tgggc": "ctggg", + "tgggg": "ggggt", + "tgggt": "gggtt", + "tggta": "atggt", + "tggtc": "ctggt", + "tggtg": "ggtgt", + "tggtt": "ggttt", + "tgtaa": "aatgt", + "tgtac": "actgt", + "tgtag": "agtgt", + "tgtat": "attgt", + "tgtca": "atgtc", + "tgtcc": "cctgt", + "tgtcg": "cgtgt", + "tgtct": "cttgt", + "tgtga": "atgtg", + "tgtgc": "ctgtg", + "tgtgg": "ggtgt", + "tgtgt": "gtgtt", + "tgtta": "atgtt", + "tgttc": "ctgtt", + "tgttg": "gtgtt", + "tgttt": "gtttt", + "ttaaa": "aaatt", + "ttaac": "aactt", + "ttaag": "aagtt", + "ttaat": "aattt", + "ttaca": "acatt", + "ttacc": "acctt", + "ttacg": "acgtt", + "ttact": "acttt", + "ttaga": "agatt", + "ttagc": "agctt", + "ttagg": "aggtt", + "ttagt": "agttt", + "ttata": "atatt", + "ttatc": "atctt", + "ttatg": "atgtt", + "ttatt": "atttt", + "ttcaa": "aattc", + "ttcac": "acttc", + "ttcag": "agttc", + "ttcat": "atttc", + "ttcca": "attcc", + "ttccc": "ccctt", + "ttccg": "ccgtt", + "ttcct": "ccttt", + "ttcga": "attcg", + "ttcgc": "cgctt", + "ttcgg": "cggtt", + "ttcgt": "cgttt", + "ttcta": "attct", + "ttctc": "ctctt", + "ttctg": "ctgtt", + "ttctt": "ctttt", + "ttgaa": "aattg", + "ttgac": "acttg", + "ttgag": "agttg", + "ttgat": "atttg", + "ttgca": "attgc", + "ttgcc": "ccttg", + "ttgcg": "cgttg", + "ttgct": "ctttg", + "ttgga": "attgg", + "ttggc": "cttgg", + "ttggg": "gggtt", + "ttggt": "ggttt", + "ttgta": "attgt", + "ttgtc": "cttgt", + "ttgtg": "gtgtt", + "ttgtt": "gtttt", + "tttaa": "aattt", + "tttac": "acttt", + "tttag": "agttt", + "tttat": "atttt", + "tttca": "atttc", + "tttcc": "ccttt", + "tttcg": "cgttt", + "tttct": "ctttt", + "tttga": "atttg", + "tttgc": "ctttg", + "tttgg": "ggttt", + "tttgt": "gtttt", + "tttta": "atttt", + "ttttc": "ctttt", + "ttttg": "gtttt", + "ttttt": "ttttt", + "aaaaaa": "aaaaaa", + "aaaaac": "aaaaac", + "aaaaag": "aaaaag", + "aaaaat": "aaaaat", + "aaaaca": "aaaaac", + "aaaacc": "aaaacc", + "aaaacg": "aaaacg", + "aaaact": "aaaact", + "aaaaga": "aaaaag", + "aaaagc": "aaaagc", + "aaaagg": "aaaagg", + "aaaagt": "aaaagt", + "aaaata": "aaaaat", + "aaaatc": "aaaatc", + "aaaatg": "aaaatg", + "aaaatt": "aaaatt", + "aaacaa": "aaaaac", + "aaacac": "aaacac", + "aaacag": "aaacag", + "aaacat": "aaacat", + "aaacca": "aaaacc", + "aaaccc": "aaaccc", + "aaaccg": "aaaccg", + "aaacct": "aaacct", + "aaacga": "aaaacg", + "aaacgc": "aaacgc", + "aaacgg": "aaacgg", + "aaacgt": "aaacgt", + "aaacta": "aaaact", + "aaactc": "aaactc", + "aaactg": "aaactg", + "aaactt": "aaactt", + "aaagaa": "aaaaag", + "aaagac": "aaagac", + "aaagag": "aaagag", + "aaagat": "aaagat", + "aaagca": "aaaagc", + "aaagcc": "aaagcc", + "aaagcg": "aaagcg", + "aaagct": "aaagct", + "aaagga": "aaaagg", + "aaaggc": "aaaggc", + "aaaggg": "aaaggg", + "aaaggt": "aaaggt", + "aaagta": "aaaagt", + "aaagtc": "aaagtc", + "aaagtg": "aaagtg", + "aaagtt": "aaagtt", + "aaataa": "aaaaat", + "aaatac": "aaatac", + "aaatag": "aaatag", + "aaatat": "aaatat", + "aaatca": "aaaatc", + "aaatcc": "aaatcc", + "aaatcg": "aaatcg", + "aaatct": "aaatct", + "aaatga": "aaaatg", + "aaatgc": "aaatgc", + "aaatgg": "aaatgg", + "aaatgt": "aaatgt", + "aaatta": "aaaatt", + "aaattc": "aaattc", + "aaattg": "aaattg", + "aaattt": "aaattt", + "aacaaa": "aaaaac", + "aacaac": "aacaac", + "aacaag": "aacaag", + "aacaat": "aacaat", + "aacaca": "aaacac", + "aacacc": "aacacc", + "aacacg": "aacacg", + "aacact": "aacact", + "aacaga": "aaacag", + "aacagc": "aacagc", + "aacagg": "aacagg", + "aacagt": "aacagt", + "aacata": "aaacat", + "aacatc": "aacatc", + "aacatg": "aacatg", + "aacatt": "aacatt", + "aaccaa": "aaaacc", + "aaccac": "aaccac", + "aaccag": "aaccag", + "aaccat": "aaccat", + "aaccca": "aaaccc", + "aacccc": "aacccc", + "aacccg": "aacccg", + "aaccct": "aaccct", + "aaccga": "aaaccg", + "aaccgc": "aaccgc", + "aaccgg": "aaccgg", + "aaccgt": "aaccgt", + "aaccta": "aaacct", + "aacctc": "aacctc", + "aacctg": "aacctg", + "aacctt": "aacctt", + "aacgaa": "aaaacg", + "aacgac": "aacgac", + "aacgag": "aacgag", + "aacgat": "aacgat", + "aacgca": "aaacgc", + "aacgcc": "aacgcc", + "aacgcg": "aacgcg", + "aacgct": "aacgct", + "aacgga": "aaacgg", + "aacggc": "aacggc", + "aacggg": "aacggg", + "aacggt": "aacggt", + "aacgta": "aaacgt", + "aacgtc": "aacgtc", + "aacgtg": "aacgtg", + "aacgtt": "aacgtt", + "aactaa": "aaaact", + "aactac": "aactac", + "aactag": "aactag", + "aactat": "aactat", + "aactca": "aaactc", + "aactcc": "aactcc", + "aactcg": "aactcg", + "aactct": "aactct", + "aactga": "aaactg", + "aactgc": "aactgc", + "aactgg": "aactgg", + "aactgt": "aactgt", + "aactta": "aaactt", + "aacttc": "aacttc", + "aacttg": "aacttg", + "aacttt": "aacttt", + "aagaaa": "aaaaag", + "aagaac": "aacaag", + "aagaag": "aagaag", + "aagaat": "aagaat", + "aagaca": "aaagac", + "aagacc": "aagacc", + "aagacg": "aagacg", + "aagact": "aagact", + "aagaga": "aaagag", + "aagagc": "aagagc", + "aagagg": "aagagg", + "aagagt": "aagagt", + "aagata": "aaagat", + "aagatc": "aagatc", + "aagatg": "aagatg", + "aagatt": "aagatt", + "aagcaa": "aaaagc", + "aagcac": "aagcac", + "aagcag": "aagcag", + "aagcat": "aagcat", + "aagcca": "aaagcc", + "aagccc": "aagccc", + "aagccg": "aagccg", + "aagcct": "aagcct", + "aagcga": "aaagcg", + "aagcgc": "aagcgc", + "aagcgg": "aagcgg", + "aagcgt": "aagcgt", + "aagcta": "aaagct", + "aagctc": "aagctc", + "aagctg": "aagctg", + "aagctt": "aagctt", + "aaggaa": "aaaagg", + "aaggac": "aaggac", + "aaggag": "aaggag", + "aaggat": "aaggat", + "aaggca": "aaaggc", + "aaggcc": "aaggcc", + "aaggcg": "aaggcg", + "aaggct": "aaggct", + "aaggga": "aaaggg", + "aagggc": "aagggc", + "aagggg": "aagggg", + "aagggt": "aagggt", + "aaggta": "aaaggt", + "aaggtc": "aaggtc", + "aaggtg": "aaggtg", + "aaggtt": "aaggtt", + "aagtaa": "aaaagt", + "aagtac": "aagtac", + "aagtag": "aagtag", + "aagtat": "aagtat", + "aagtca": "aaagtc", + "aagtcc": "aagtcc", + "aagtcg": "aagtcg", + "aagtct": "aagtct", + "aagtga": "aaagtg", + "aagtgc": "aagtgc", + "aagtgg": "aagtgg", + "aagtgt": "aagtgt", + "aagtta": "aaagtt", + "aagttc": "aagttc", + "aagttg": "aagttg", + "aagttt": "aagttt", + "aataaa": "aaaaat", + "aataac": "aacaat", + "aataag": "aagaat", + "aataat": "aataat", + "aataca": "aaatac", + "aatacc": "aatacc", + "aatacg": "aatacg", + "aatact": "aatact", + "aataga": "aaatag", + "aatagc": "aatagc", + "aatagg": "aatagg", + "aatagt": "aatagt", + "aatata": "aaatat", + "aatatc": "aatatc", + "aatatg": "aatatg", + "aatatt": "aatatt", + "aatcaa": "aaaatc", + "aatcac": "aatcac", + "aatcag": "aatcag", + "aatcat": "aatcat", + "aatcca": "aaatcc", + "aatccc": "aatccc", + "aatccg": "aatccg", + "aatcct": "aatcct", + "aatcga": "aaatcg", + "aatcgc": "aatcgc", + "aatcgg": "aatcgg", + "aatcgt": "aatcgt", + "aatcta": "aaatct", + "aatctc": "aatctc", + "aatctg": "aatctg", + "aatctt": "aatctt", + "aatgaa": "aaaatg", + "aatgac": "aatgac", + "aatgag": "aatgag", + "aatgat": "aatgat", + "aatgca": "aaatgc", + "aatgcc": "aatgcc", + "aatgcg": "aatgcg", + "aatgct": "aatgct", + "aatgga": "aaatgg", + "aatggc": "aatggc", + "aatggg": "aatggg", + "aatggt": "aatggt", + "aatgta": "aaatgt", + "aatgtc": "aatgtc", + "aatgtg": "aatgtg", + "aatgtt": "aatgtt", + "aattaa": "aaaatt", + "aattac": "aattac", + "aattag": "aattag", + "aattat": "aattat", + "aattca": "aaattc", + "aattcc": "aattcc", + "aattcg": "aattcg", + "aattct": "aattct", + "aattga": "aaattg", + "aattgc": "aattgc", + "aattgg": "aattgg", + "aattgt": "aattgt", + "aattta": "aaattt", + "aatttc": "aatttc", + "aatttg": "aatttg", + "aatttt": "aatttt", + "acaaaa": "aaaaac", + "acaaac": "aaacac", + "acaaag": "aaagac", + "acaaat": "aaatac", + "acaaca": "aacaac", + "acaacc": "aaccac", + "acaacg": "aacgac", + "acaact": "aactac", + "acaaga": "aacaag", + "acaagc": "aagcac", + "acaagg": "aaggac", + "acaagt": "aagtac", + "acaata": "aacaat", + "acaatc": "aatcac", + "acaatg": "aatgac", + "acaatt": "aattac", + "acacaa": "aaacac", + "acacac": "acacac", + "acacag": "acacag", + "acacat": "acacat", + "acacca": "aacacc", + "acaccc": "acaccc", + "acaccg": "acaccg", + "acacct": "acacct", + "acacga": "aacacg", + "acacgc": "acacgc", + "acacgg": "acacgg", + "acacgt": "acacgt", + "acacta": "aacact", + "acactc": "acactc", + "acactg": "acactg", + "acactt": "acactt", + "acagaa": "aaacag", + "acagac": "acacag", + "acagag": "acagag", + "acagat": "acagat", + "acagca": "aacagc", + "acagcc": "acagcc", + "acagcg": "acagcg", + "acagct": "acagct", + "acagga": "aacagg", + "acaggc": "acaggc", + "acaggg": "acaggg", + "acaggt": "acaggt", + "acagta": "aacagt", + "acagtc": "acagtc", + "acagtg": "acagtg", + "acagtt": "acagtt", + "acataa": "aaacat", + "acatac": "acacat", + "acatag": "acatag", + "acatat": "acatat", + "acatca": "aacatc", + "acatcc": "acatcc", + "acatcg": "acatcg", + "acatct": "acatct", + "acatga": "aacatg", + "acatgc": "acatgc", + "acatgg": "acatgg", + "acatgt": "acatgt", + "acatta": "aacatt", + "acattc": "acattc", + "acattg": "acattg", + "acattt": "acattt", + "accaaa": "aaaacc", + "accaac": "aacacc", + "accaag": "aagacc", + "accaat": "aatacc", + "accaca": "aaccac", + "accacc": "accacc", + "accacg": "accacg", + "accact": "accact", + "accaga": "aaccag", + "accagc": "accagc", + "accagg": "accagg", + "accagt": "accagt", + "accata": "aaccat", + "accatc": "accatc", + "accatg": "accatg", + "accatt": "accatt", + "acccaa": "aaaccc", + "acccac": "acaccc", + "acccag": "acccag", + "acccat": "acccat", + "acccca": "aacccc", + "accccc": "accccc", + "accccg": "accccg", + "acccct": "acccct", + "acccga": "aacccg", + "acccgc": "acccgc", + "acccgg": "acccgg", + "acccgt": "acccgt", + "acccta": "aaccct", + "accctc": "accctc", + "accctg": "accctg", + "accctt": "accctt", + "accgaa": "aaaccg", + "accgac": "acaccg", + "accgag": "accgag", + "accgat": "accgat", + "accgca": "aaccgc", + "accgcc": "accgcc", + "accgcg": "accgcg", + "accgct": "accgct", + "accgga": "aaccgg", + "accggc": "accggc", + "accggg": "accggg", + "accggt": "accggt", + "accgta": "aaccgt", + "accgtc": "accgtc", + "accgtg": "accgtg", + "accgtt": "accgtt", + "acctaa": "aaacct", + "acctac": "acacct", + "acctag": "acctag", + "acctat": "acctat", + "acctca": "aacctc", + "acctcc": "acctcc", + "acctcg": "acctcg", + "acctct": "acctct", + "acctga": "aacctg", + "acctgc": "acctgc", + "acctgg": "acctgg", + "acctgt": "acctgt", + "acctta": "aacctt", + "accttc": "accttc", + "accttg": "accttg", + "accttt": "accttt", + "acgaaa": "aaaacg", + "acgaac": "aacacg", + "acgaag": "aagacg", + "acgaat": "aatacg", + "acgaca": "aacgac", + "acgacc": "accacg", + "acgacg": "acgacg", + "acgact": "acgact", + "acgaga": "aacgag", + "acgagc": "acgagc", + "acgagg": "acgagg", + "acgagt": "acgagt", + "acgata": "aacgat", + "acgatc": "acgatc", + "acgatg": "acgatg", + "acgatt": "acgatt", + "acgcaa": "aaacgc", + "acgcac": "acacgc", + "acgcag": "acgcag", + "acgcat": "acgcat", + "acgcca": "aacgcc", + "acgccc": "acgccc", + "acgccg": "acgccg", + "acgcct": "acgcct", + "acgcga": "aacgcg", + "acgcgc": "acgcgc", + "acgcgg": "acgcgg", + "acgcgt": "acgcgt", + "acgcta": "aacgct", + "acgctc": "acgctc", + "acgctg": "acgctg", + "acgctt": "acgctt", + "acggaa": "aaacgg", + "acggac": "acacgg", + "acggag": "acggag", + "acggat": "acggat", + "acggca": "aacggc", + "acggcc": "acggcc", + "acggcg": "acggcg", + "acggct": "acggct", + "acggga": "aacggg", + "acgggc": "acgggc", + "acgggg": "acgggg", + "acgggt": "acgggt", + "acggta": "aacggt", + "acggtc": "acggtc", + "acggtg": "acggtg", + "acggtt": "acggtt", + "acgtaa": "aaacgt", + "acgtac": "acacgt", + "acgtag": "acgtag", + "acgtat": "acgtat", + "acgtca": "aacgtc", + "acgtcc": "acgtcc", + "acgtcg": "acgtcg", + "acgtct": "acgtct", + "acgtga": "aacgtg", + "acgtgc": "acgtgc", + "acgtgg": "acgtgg", + "acgtgt": "acgtgt", + "acgtta": "aacgtt", + "acgttc": "acgttc", + "acgttg": "acgttg", + "acgttt": "acgttt", + "actaaa": "aaaact", + "actaac": "aacact", + "actaag": "aagact", + "actaat": "aatact", + "actaca": "aactac", + "actacc": "accact", + "actacg": "acgact", + "actact": "actact", + "actaga": "aactag", + "actagc": "actagc", + "actagg": "actagg", + "actagt": "actagt", + "actata": "aactat", + "actatc": "actatc", + "actatg": "actatg", + "actatt": "actatt", + "actcaa": "aaactc", + "actcac": "acactc", + "actcag": "actcag", + "actcat": "actcat", + "actcca": "aactcc", + "actccc": "actccc", + "actccg": "actccg", + "actcct": "actcct", + "actcga": "aactcg", + "actcgc": "actcgc", + "actcgg": "actcgg", + "actcgt": "actcgt", + "actcta": "aactct", + "actctc": "actctc", + "actctg": "actctg", + "actctt": "actctt", + "actgaa": "aaactg", + "actgac": "acactg", + "actgag": "actgag", + "actgat": "actgat", + "actgca": "aactgc", + "actgcc": "actgcc", + "actgcg": "actgcg", + "actgct": "actgct", + "actgga": "aactgg", + "actggc": "actggc", + "actggg": "actggg", + "actggt": "actggt", + "actgta": "aactgt", + "actgtc": "actgtc", + "actgtg": "actgtg", + "actgtt": "actgtt", + "acttaa": "aaactt", + "acttac": "acactt", + "acttag": "acttag", + "acttat": "acttat", + "acttca": "aacttc", + "acttcc": "acttcc", + "acttcg": "acttcg", + "acttct": "acttct", + "acttga": "aacttg", + "acttgc": "acttgc", + "acttgg": "acttgg", + "acttgt": "acttgt", + "acttta": "aacttt", + "actttc": "actttc", + "actttg": "actttg", + "actttt": "actttt", + "agaaaa": "aaaaag", + "agaaac": "aaacag", + "agaaag": "aaagag", + "agaaat": "aaatag", + "agaaca": "aacaag", + "agaacc": "aaccag", + "agaacg": "aacgag", + "agaact": "aactag", + "agaaga": "aagaag", + "agaagc": "aagcag", + "agaagg": "aaggag", + "agaagt": "aagtag", + "agaata": "aagaat", + "agaatc": "aatcag", + "agaatg": "aatgag", + "agaatt": "aattag", + "agacaa": "aaagac", + "agacac": "acacag", + "agacag": "acagag", + "agacat": "acatag", + "agacca": "aagacc", + "agaccc": "acccag", + "agaccg": "accgag", + "agacct": "acctag", + "agacga": "aagacg", + "agacgc": "acgcag", + "agacgg": "acggag", + "agacgt": "acgtag", + "agacta": "aagact", + "agactc": "actcag", + "agactg": "actgag", + "agactt": "acttag", + "agagaa": "aaagag", + "agagac": "acagag", + "agagag": "agagag", + "agagat": "agagat", + "agagca": "aagagc", + "agagcc": "agagcc", + "agagcg": "agagcg", + "agagct": "agagct", + "agagga": "aagagg", + "agaggc": "agaggc", + "agaggg": "agaggg", + "agaggt": "agaggt", + "agagta": "aagagt", + "agagtc": "agagtc", + "agagtg": "agagtg", + "agagtt": "agagtt", + "agataa": "aaagat", + "agatac": "acagat", + "agatag": "agagat", + "agatat": "agatat", + "agatca": "aagatc", + "agatcc": "agatcc", + "agatcg": "agatcg", + "agatct": "agatct", + "agatga": "aagatg", + "agatgc": "agatgc", + "agatgg": "agatgg", + "agatgt": "agatgt", + "agatta": "aagatt", + "agattc": "agattc", + "agattg": "agattg", + "agattt": "agattt", + "agcaaa": "aaaagc", + "agcaac": "aacagc", + "agcaag": "aagagc", + "agcaat": "aatagc", + "agcaca": "aagcac", + "agcacc": "accagc", + "agcacg": "acgagc", + "agcact": "actagc", + "agcaga": "aagcag", + "agcagc": "agcagc", + "agcagg": "agcagg", + "agcagt": "agcagt", + "agcata": "aagcat", + "agcatc": "agcatc", + "agcatg": "agcatg", + "agcatt": "agcatt", + "agccaa": "aaagcc", + "agccac": "acagcc", + "agccag": "agagcc", + "agccat": "agccat", + "agccca": "aagccc", + "agcccc": "agcccc", + "agcccg": "agcccg", + "agccct": "agccct", + "agccga": "aagccg", + "agccgc": "agccgc", + "agccgg": "agccgg", + "agccgt": "agccgt", + "agccta": "aagcct", + "agcctc": "agcctc", + "agcctg": "agcctg", + "agcctt": "agcctt", + "agcgaa": "aaagcg", + "agcgac": "acagcg", + "agcgag": "agagcg", + "agcgat": "agcgat", + "agcgca": "aagcgc", + "agcgcc": "agcgcc", + "agcgcg": "agcgcg", + "agcgct": "agcgct", + "agcgga": "aagcgg", + "agcggc": "agcggc", + "agcggg": "agcggg", + "agcggt": "agcggt", + "agcgta": "aagcgt", + "agcgtc": "agcgtc", + "agcgtg": "agcgtg", + "agcgtt": "agcgtt", + "agctaa": "aaagct", + "agctac": "acagct", + "agctag": "agagct", + "agctat": "agctat", + "agctca": "aagctc", + "agctcc": "agctcc", + "agctcg": "agctcg", + "agctct": "agctct", + "agctga": "aagctg", + "agctgc": "agctgc", + "agctgg": "agctgg", + "agctgt": "agctgt", + "agctta": "aagctt", + "agcttc": "agcttc", + "agcttg": "agcttg", + "agcttt": "agcttt", + "aggaaa": "aaaagg", + "aggaac": "aacagg", + "aggaag": "aagagg", + "aggaat": "aatagg", + "aggaca": "aaggac", + "aggacc": "accagg", + "aggacg": "acgagg", + "aggact": "actagg", + "aggaga": "aaggag", + "aggagc": "agcagg", + "aggagg": "aggagg", + "aggagt": "aggagt", + "aggata": "aaggat", + "aggatc": "aggatc", + "aggatg": "aggatg", + "aggatt": "aggatt", + "aggcaa": "aaaggc", + "aggcac": "acaggc", + "aggcag": "agaggc", + "aggcat": "aggcat", + "aggcca": "aaggcc", + "aggccc": "aggccc", + "aggccg": "aggccg", + "aggcct": "aggcct", + "aggcga": "aaggcg", + "aggcgc": "aggcgc", + "aggcgg": "aggcgg", + "aggcgt": "aggcgt", + "aggcta": "aaggct", + "aggctc": "aggctc", + "aggctg": "aggctg", + "aggctt": "aggctt", + "agggaa": "aaaggg", + "agggac": "acaggg", + "agggag": "agaggg", + "agggat": "agggat", + "agggca": "aagggc", + "agggcc": "agggcc", + "agggcg": "agggcg", + "agggct": "agggct", + "agggga": "aagggg", + "aggggc": "aggggc", + "aggggg": "aggggg", + "aggggt": "aggggt", + "agggta": "aagggt", + "agggtc": "agggtc", + "agggtg": "agggtg", + "agggtt": "agggtt", + "aggtaa": "aaaggt", + "aggtac": "acaggt", + "aggtag": "agaggt", + "aggtat": "aggtat", + "aggtca": "aaggtc", + "aggtcc": "aggtcc", + "aggtcg": "aggtcg", + "aggtct": "aggtct", + "aggtga": "aaggtg", + "aggtgc": "aggtgc", + "aggtgg": "aggtgg", + "aggtgt": "aggtgt", + "aggtta": "aaggtt", + "aggttc": "aggttc", + "aggttg": "aggttg", + "aggttt": "aggttt", + "agtaaa": "aaaagt", + "agtaac": "aacagt", + "agtaag": "aagagt", + "agtaat": "aatagt", + "agtaca": "aagtac", + "agtacc": "accagt", + "agtacg": "acgagt", + "agtact": "actagt", + "agtaga": "aagtag", + "agtagc": "agcagt", + "agtagg": "aggagt", + "agtagt": "agtagt", + "agtata": "aagtat", + "agtatc": "agtatc", + "agtatg": "agtatg", + "agtatt": "agtatt", + "agtcaa": "aaagtc", + "agtcac": "acagtc", + "agtcag": "agagtc", + "agtcat": "agtcat", + "agtcca": "aagtcc", + "agtccc": "agtccc", + "agtccg": "agtccg", + "agtcct": "agtcct", + "agtcga": "aagtcg", + "agtcgc": "agtcgc", + "agtcgg": "agtcgg", + "agtcgt": "agtcgt", + "agtcta": "aagtct", + "agtctc": "agtctc", + "agtctg": "agtctg", + "agtctt": "agtctt", + "agtgaa": "aaagtg", + "agtgac": "acagtg", + "agtgag": "agagtg", + "agtgat": "agtgat", + "agtgca": "aagtgc", + "agtgcc": "agtgcc", + "agtgcg": "agtgcg", + "agtgct": "agtgct", + "agtgga": "aagtgg", + "agtggc": "agtggc", + "agtggg": "agtggg", + "agtggt": "agtggt", + "agtgta": "aagtgt", + "agtgtc": "agtgtc", + "agtgtg": "agtgtg", + "agtgtt": "agtgtt", + "agttaa": "aaagtt", + "agttac": "acagtt", + "agttag": "agagtt", + "agttat": "agttat", + "agttca": "aagttc", + "agttcc": "agttcc", + "agttcg": "agttcg", + "agttct": "agttct", + "agttga": "aagttg", + "agttgc": "agttgc", + "agttgg": "agttgg", + "agttgt": "agttgt", + "agttta": "aagttt", + "agtttc": "agtttc", + "agtttg": "agtttg", + "agtttt": "agtttt", + "ataaaa": "aaaaat", + "ataaac": "aaacat", + "ataaag": "aaagat", + "ataaat": "aaatat", + "ataaca": "aacaat", + "ataacc": "aaccat", + "ataacg": "aacgat", + "ataact": "aactat", + "ataaga": "aagaat", + "ataagc": "aagcat", + "ataagg": "aaggat", + "ataagt": "aagtat", + "ataata": "aataat", + "ataatc": "aatcat", + "ataatg": "aatgat", + "ataatt": "aattat", + "atacaa": "aaatac", + "atacac": "acacat", + "atacag": "acagat", + "atacat": "acatat", + "atacca": "aatacc", + "ataccc": "acccat", + "ataccg": "accgat", + "atacct": "acctat", + "atacga": "aatacg", + "atacgc": "acgcat", + "atacgg": "acggat", + "atacgt": "acgtat", + "atacta": "aatact", + "atactc": "actcat", + "atactg": "actgat", + "atactt": "acttat", + "atagaa": "aaatag", + "atagac": "acatag", + "atagag": "agagat", + "atagat": "agatat", + "atagca": "aatagc", + "atagcc": "agccat", + "atagcg": "agcgat", + "atagct": "agctat", + "atagga": "aatagg", + "ataggc": "aggcat", + "ataggg": "agggat", + "ataggt": "aggtat", + "atagta": "aatagt", + "atagtc": "agtcat", + "atagtg": "agtgat", + "atagtt": "agttat", + "atataa": "aaatat", + "atatac": "acatat", + "atatag": "agatat", + "atatat": "atatat", + "atatca": "aatatc", + "atatcc": "atatcc", + "atatcg": "atatcg", + "atatct": "atatct", + "atatga": "aatatg", + "atatgc": "atatgc", + "atatgg": "atatgg", + "atatgt": "atatgt", + "atatta": "aatatt", + "atattc": "atattc", + "atattg": "atattg", + "atattt": "atattt", + "atcaaa": "aaaatc", + "atcaac": "aacatc", + "atcaag": "aagatc", + "atcaat": "aatatc", + "atcaca": "aatcac", + "atcacc": "accatc", + "atcacg": "acgatc", + "atcact": "actatc", + "atcaga": "aatcag", + "atcagc": "agcatc", + "atcagg": "aggatc", + "atcagt": "agtatc", + "atcata": "aatcat", + "atcatc": "atcatc", + "atcatg": "atcatg", + "atcatt": "atcatt", + "atccaa": "aaatcc", + "atccac": "acatcc", + "atccag": "agatcc", + "atccat": "atatcc", + "atccca": "aatccc", + "atcccc": "atcccc", + "atcccg": "atcccg", + "atccct": "atccct", + "atccga": "aatccg", + "atccgc": "atccgc", + "atccgg": "atccgg", + "atccgt": "atccgt", + "atccta": "aatcct", + "atcctc": "atcctc", + "atcctg": "atcctg", + "atcctt": "atcctt", + "atcgaa": "aaatcg", + "atcgac": "acatcg", + "atcgag": "agatcg", + "atcgat": "atatcg", + "atcgca": "aatcgc", + "atcgcc": "atcgcc", + "atcgcg": "atcgcg", + "atcgct": "atcgct", + "atcgga": "aatcgg", + "atcggc": "atcggc", + "atcggg": "atcggg", + "atcggt": "atcggt", + "atcgta": "aatcgt", + "atcgtc": "atcgtc", + "atcgtg": "atcgtg", + "atcgtt": "atcgtt", + "atctaa": "aaatct", + "atctac": "acatct", + "atctag": "agatct", + "atctat": "atatct", + "atctca": "aatctc", + "atctcc": "atctcc", + "atctcg": "atctcg", + "atctct": "atctct", + "atctga": "aatctg", + "atctgc": "atctgc", + "atctgg": "atctgg", + "atctgt": "atctgt", + "atctta": "aatctt", + "atcttc": "atcttc", + "atcttg": "atcttg", + "atcttt": "atcttt", + "atgaaa": "aaaatg", + "atgaac": "aacatg", + "atgaag": "aagatg", + "atgaat": "aatatg", + "atgaca": "aatgac", + "atgacc": "accatg", + "atgacg": "acgatg", + "atgact": "actatg", + "atgaga": "aatgag", + "atgagc": "agcatg", + "atgagg": "aggatg", + "atgagt": "agtatg", + "atgata": "aatgat", + "atgatc": "atcatg", + "atgatg": "atgatg", + "atgatt": "atgatt", + "atgcaa": "aaatgc", + "atgcac": "acatgc", + "atgcag": "agatgc", + "atgcat": "atatgc", + "atgcca": "aatgcc", + "atgccc": "atgccc", + "atgccg": "atgccg", + "atgcct": "atgcct", + "atgcga": "aatgcg", + "atgcgc": "atgcgc", + "atgcgg": "atgcgg", + "atgcgt": "atgcgt", + "atgcta": "aatgct", + "atgctc": "atgctc", + "atgctg": "atgctg", + "atgctt": "atgctt", + "atggaa": "aaatgg", + "atggac": "acatgg", + "atggag": "agatgg", + "atggat": "atatgg", + "atggca": "aatggc", + "atggcc": "atggcc", + "atggcg": "atggcg", + "atggct": "atggct", + "atggga": "aatggg", + "atgggc": "atgggc", + "atgggg": "atgggg", + "atgggt": "atgggt", + "atggta": "aatggt", + "atggtc": "atggtc", + "atggtg": "atggtg", + "atggtt": "atggtt", + "atgtaa": "aaatgt", + "atgtac": "acatgt", + "atgtag": "agatgt", + "atgtat": "atatgt", + "atgtca": "aatgtc", + "atgtcc": "atgtcc", + "atgtcg": "atgtcg", + "atgtct": "atgtct", + "atgtga": "aatgtg", + "atgtgc": "atgtgc", + "atgtgg": "atgtgg", + "atgtgt": "atgtgt", + "atgtta": "aatgtt", + "atgttc": "atgttc", + "atgttg": "atgttg", + "atgttt": "atgttt", + "attaaa": "aaaatt", + "attaac": "aacatt", + "attaag": "aagatt", + "attaat": "aatatt", + "attaca": "aattac", + "attacc": "accatt", + "attacg": "acgatt", + "attact": "actatt", + "attaga": "aattag", + "attagc": "agcatt", + "attagg": "aggatt", + "attagt": "agtatt", + "attata": "aattat", + "attatc": "atcatt", + "attatg": "atgatt", + "attatt": "attatt", + "attcaa": "aaattc", + "attcac": "acattc", + "attcag": "agattc", + "attcat": "atattc", + "attcca": "aattcc", + "attccc": "attccc", + "attccg": "attccg", + "attcct": "attcct", + "attcga": "aattcg", + "attcgc": "attcgc", + "attcgg": "attcgg", + "attcgt": "attcgt", + "attcta": "aattct", + "attctc": "attctc", + "attctg": "attctg", + "attctt": "attctt", + "attgaa": "aaattg", + "attgac": "acattg", + "attgag": "agattg", + "attgat": "atattg", + "attgca": "aattgc", + "attgcc": "attgcc", + "attgcg": "attgcg", + "attgct": "attgct", + "attgga": "aattgg", + "attggc": "attggc", + "attggg": "attggg", + "attggt": "attggt", + "attgta": "aattgt", + "attgtc": "attgtc", + "attgtg": "attgtg", + "attgtt": "attgtt", + "atttaa": "aaattt", + "atttac": "acattt", + "atttag": "agattt", + "atttat": "atattt", + "atttca": "aatttc", + "atttcc": "atttcc", + "atttcg": "atttcg", + "atttct": "atttct", + "atttga": "aatttg", + "atttgc": "atttgc", + "atttgg": "atttgg", + "atttgt": "atttgt", + "atttta": "aatttt", + "attttc": "attttc", + "attttg": "attttg", + "attttt": "attttt", + "caaaaa": "aaaaac", + "caaaac": "aaaacc", + "caaaag": "aaaagc", + "caaaat": "aaaatc", + "caaaca": "aaacac", + "caaacc": "aaaccc", + "caaacg": "aaacgc", + "caaact": "aaactc", + "caaaga": "aaagac", + "caaagc": "aaagcc", + "caaagg": "aaaggc", + "caaagt": "aaagtc", + "caaata": "aaatac", + "caaatc": "aaatcc", + "caaatg": "aaatgc", + "caaatt": "aaattc", + "caacaa": "aacaac", + "caacac": "aacacc", + "caacag": "aacagc", + "caacat": "aacatc", + "caacca": "aaccac", + "caaccc": "aacccc", + "caaccg": "aaccgc", + "caacct": "aacctc", + "caacga": "aacgac", + "caacgc": "aacgcc", + "caacgg": "aacggc", + "caacgt": "aacgtc", + "caacta": "aactac", + "caactc": "aactcc", + "caactg": "aactgc", + "caactt": "aacttc", + "caagaa": "aacaag", + "caagac": "aagacc", + "caagag": "aagagc", + "caagat": "aagatc", + "caagca": "aagcac", + "caagcc": "aagccc", + "caagcg": "aagcgc", + "caagct": "aagctc", + "caagga": "aaggac", + "caaggc": "aaggcc", + "caaggg": "aagggc", + "caaggt": "aaggtc", + "caagta": "aagtac", + "caagtc": "aagtcc", + "caagtg": "aagtgc", + "caagtt": "aagttc", + "caataa": "aacaat", + "caatac": "aatacc", + "caatag": "aatagc", + "caatat": "aatatc", + "caatca": "aatcac", + "caatcc": "aatccc", + "caatcg": "aatcgc", + "caatct": "aatctc", + "caatga": "aatgac", + "caatgc": "aatgcc", + "caatgg": "aatggc", + "caatgt": "aatgtc", + "caatta": "aattac", + "caattc": "aattcc", + "caattg": "aattgc", + "caattt": "aatttc", + "cacaaa": "aaacac", + "cacaac": "aaccac", + "cacaag": "aagcac", + "cacaat": "aatcac", + "cacaca": "acacac", + "cacacc": "acaccc", + "cacacg": "acacgc", + "cacact": "acactc", + "cacaga": "acacag", + "cacagc": "acagcc", + "cacagg": "acaggc", + "cacagt": "acagtc", + "cacata": "acacat", + "cacatc": "acatcc", + "cacatg": "acatgc", + "cacatt": "acattc", + "caccaa": "aacacc", + "caccac": "accacc", + "caccag": "accagc", + "caccat": "accatc", + "caccca": "acaccc", + "cacccc": "accccc", + "cacccg": "acccgc", + "caccct": "accctc", + "caccga": "acaccg", + "caccgc": "accgcc", + "caccgg": "accggc", + "caccgt": "accgtc", + "caccta": "acacct", + "cacctc": "acctcc", + "cacctg": "acctgc", + "cacctt": "accttc", + "cacgaa": "aacacg", + "cacgac": "accacg", + "cacgag": "acgagc", + "cacgat": "acgatc", + "cacgca": "acacgc", + "cacgcc": "acgccc", + "cacgcg": "acgcgc", + "cacgct": "acgctc", + "cacgga": "acacgg", + "cacggc": "acggcc", + "cacggg": "acgggc", + "cacggt": "acggtc", + "cacgta": "acacgt", + "cacgtc": "acgtcc", + "cacgtg": "acgtgc", + "cacgtt": "acgttc", + "cactaa": "aacact", + "cactac": "accact", + "cactag": "actagc", + "cactat": "actatc", + "cactca": "acactc", + "cactcc": "actccc", + "cactcg": "actcgc", + "cactct": "actctc", + "cactga": "acactg", + "cactgc": "actgcc", + "cactgg": "actggc", + "cactgt": "actgtc", + "cactta": "acactt", + "cacttc": "acttcc", + "cacttg": "acttgc", + "cacttt": "actttc", + "cagaaa": "aaacag", + "cagaac": "aaccag", + "cagaag": "aagcag", + "cagaat": "aatcag", + "cagaca": "acacag", + "cagacc": "acccag", + "cagacg": "acgcag", + "cagact": "actcag", + "cagaga": "acagag", + "cagagc": "agagcc", + "cagagg": "agaggc", + "cagagt": "agagtc", + "cagata": "acagat", + "cagatc": "agatcc", + "cagatg": "agatgc", + "cagatt": "agattc", + "cagcaa": "aacagc", + "cagcac": "accagc", + "cagcag": "agcagc", + "cagcat": "agcatc", + "cagcca": "acagcc", + "cagccc": "agcccc", + "cagccg": "agccgc", + "cagcct": "agcctc", + "cagcga": "acagcg", + "cagcgc": "agcgcc", + "cagcgg": "agcggc", + "cagcgt": "agcgtc", + "cagcta": "acagct", + "cagctc": "agctcc", + "cagctg": "agctgc", + "cagctt": "agcttc", + "caggaa": "aacagg", + "caggac": "accagg", + "caggag": "agcagg", + "caggat": "aggatc", + "caggca": "acaggc", + "caggcc": "aggccc", + "caggcg": "aggcgc", + "caggct": "aggctc", + "caggga": "acaggg", + "cagggc": "agggcc", + "cagggg": "aggggc", + "cagggt": "agggtc", + "caggta": "acaggt", + "caggtc": "aggtcc", + "caggtg": "aggtgc", + "caggtt": "aggttc", + "cagtaa": "aacagt", + "cagtac": "accagt", + "cagtag": "agcagt", + "cagtat": "agtatc", + "cagtca": "acagtc", + "cagtcc": "agtccc", + "cagtcg": "agtcgc", + "cagtct": "agtctc", + "cagtga": "acagtg", + "cagtgc": "agtgcc", + "cagtgg": "agtggc", + "cagtgt": "agtgtc", + "cagtta": "acagtt", + "cagttc": "agttcc", + "cagttg": "agttgc", + "cagttt": "agtttc", + "cataaa": "aaacat", + "cataac": "aaccat", + "cataag": "aagcat", + "cataat": "aatcat", + "cataca": "acacat", + "catacc": "acccat", + "catacg": "acgcat", + "catact": "actcat", + "cataga": "acatag", + "catagc": "agccat", + "catagg": "aggcat", + "catagt": "agtcat", + "catata": "acatat", + "catatc": "atatcc", + "catatg": "atatgc", + "catatt": "atattc", + "catcaa": "aacatc", + "catcac": "accatc", + "catcag": "agcatc", + "catcat": "atcatc", + "catcca": "acatcc", + "catccc": "atcccc", + "catccg": "atccgc", + "catcct": "atcctc", + "catcga": "acatcg", + "catcgc": "atcgcc", + "catcgg": "atcggc", + "catcgt": "atcgtc", + "catcta": "acatct", + "catctc": "atctcc", + "catctg": "atctgc", + "catctt": "atcttc", + "catgaa": "aacatg", + "catgac": "accatg", + "catgag": "agcatg", + "catgat": "atcatg", + "catgca": "acatgc", + "catgcc": "atgccc", + "catgcg": "atgcgc", + "catgct": "atgctc", + "catgga": "acatgg", + "catggc": "atggcc", + "catggg": "atgggc", + "catggt": "atggtc", + "catgta": "acatgt", + "catgtc": "atgtcc", + "catgtg": "atgtgc", + "catgtt": "atgttc", + "cattaa": "aacatt", + "cattac": "accatt", + "cattag": "agcatt", + "cattat": "atcatt", + "cattca": "acattc", + "cattcc": "attccc", + "cattcg": "attcgc", + "cattct": "attctc", + "cattga": "acattg", + "cattgc": "attgcc", + "cattgg": "attggc", + "cattgt": "attgtc", + "cattta": "acattt", + "catttc": "atttcc", + "catttg": "atttgc", + "catttt": "attttc", + "ccaaaa": "aaaacc", + "ccaaac": "aaaccc", + "ccaaag": "aaagcc", + "ccaaat": "aaatcc", + "ccaaca": "aacacc", + "ccaacc": "aacccc", + "ccaacg": "aacgcc", + "ccaact": "aactcc", + "ccaaga": "aagacc", + "ccaagc": "aagccc", + "ccaagg": "aaggcc", + "ccaagt": "aagtcc", + "ccaata": "aatacc", + "ccaatc": "aatccc", + "ccaatg": "aatgcc", + "ccaatt": "aattcc", + "ccacaa": "aaccac", + "ccacac": "acaccc", + "ccacag": "acagcc", + "ccacat": "acatcc", + "ccacca": "accacc", + "ccaccc": "accccc", + "ccaccg": "accgcc", + "ccacct": "acctcc", + "ccacga": "accacg", + "ccacgc": "acgccc", + "ccacgg": "acggcc", + "ccacgt": "acgtcc", + "ccacta": "accact", + "ccactc": "actccc", + "ccactg": "actgcc", + "ccactt": "acttcc", + "ccagaa": "aaccag", + "ccagac": "acccag", + "ccagag": "agagcc", + "ccagat": "agatcc", + "ccagca": "accagc", + "ccagcc": "agcccc", + "ccagcg": "agcgcc", + "ccagct": "agctcc", + "ccagga": "accagg", + "ccaggc": "aggccc", + "ccaggg": "agggcc", + "ccaggt": "aggtcc", + "ccagta": "accagt", + "ccagtc": "agtccc", + "ccagtg": "agtgcc", + "ccagtt": "agttcc", + "ccataa": "aaccat", + "ccatac": "acccat", + "ccatag": "agccat", + "ccatat": "atatcc", + "ccatca": "accatc", + "ccatcc": "atcccc", + "ccatcg": "atcgcc", + "ccatct": "atctcc", + "ccatga": "accatg", + "ccatgc": "atgccc", + "ccatgg": "atggcc", + "ccatgt": "atgtcc", + "ccatta": "accatt", + "ccattc": "attccc", + "ccattg": "attgcc", + "ccattt": "atttcc", + "cccaaa": "aaaccc", + "cccaac": "aacccc", + "cccaag": "aagccc", + "cccaat": "aatccc", + "cccaca": "acaccc", + "cccacc": "accccc", + "cccacg": "acgccc", + "cccact": "actccc", + "cccaga": "acccag", + "cccagc": "agcccc", + "cccagg": "aggccc", + "cccagt": "agtccc", + "cccata": "acccat", + "cccatc": "atcccc", + "cccatg": "atgccc", + "cccatt": "attccc", + "ccccaa": "aacccc", + "ccccac": "accccc", + "ccccag": "agcccc", + "ccccat": "atcccc", + "ccccca": "accccc", + "cccccc": "cccccc", + "cccccg": "cccccg", + "ccccct": "ccccct", + "ccccga": "accccg", + "ccccgc": "cccccg", + "ccccgg": "ccccgg", + "ccccgt": "ccccgt", + "ccccta": "acccct", + "cccctc": "ccccct", + "cccctg": "cccctg", + "cccctt": "cccctt", + "cccgaa": "aacccg", + "cccgac": "accccg", + "cccgag": "agcccg", + "cccgat": "atcccg", + "cccgca": "acccgc", + "cccgcc": "cccccg", + "cccgcg": "cccgcg", + "cccgct": "cccgct", + "cccgga": "acccgg", + "cccggc": "ccccgg", + "cccggg": "cccggg", + "cccggt": "cccggt", + "cccgta": "acccgt", + "cccgtc": "ccccgt", + "cccgtg": "cccgtg", + "cccgtt": "cccgtt", + "ccctaa": "aaccct", + "ccctac": "acccct", + "ccctag": "agccct", + "ccctat": "atccct", + "ccctca": "accctc", + "ccctcc": "ccccct", + "ccctcg": "ccctcg", + "ccctct": "ccctct", + "ccctga": "accctg", + "ccctgc": "cccctg", + "ccctgg": "ccctgg", + "ccctgt": "ccctgt", + "ccctta": "accctt", + "cccttc": "cccctt", + "cccttg": "cccttg", + "cccttt": "cccttt", + "ccgaaa": "aaaccg", + "ccgaac": "aacccg", + "ccgaag": "aagccg", + "ccgaat": "aatccg", + "ccgaca": "acaccg", + "ccgacc": "accccg", + "ccgacg": "acgccg", + "ccgact": "actccg", + "ccgaga": "accgag", + "ccgagc": "agcccg", + "ccgagg": "aggccg", + "ccgagt": "agtccg", + "ccgata": "accgat", + "ccgatc": "atcccg", + "ccgatg": "atgccg", + "ccgatt": "attccg", + "ccgcaa": "aaccgc", + "ccgcac": "acccgc", + "ccgcag": "agccgc", + "ccgcat": "atccgc", + "ccgcca": "accgcc", + "ccgccc": "cccccg", + "ccgccg": "ccgccg", + "ccgcct": "ccgcct", + "ccgcga": "accgcg", + "ccgcgc": "cccgcg", + "ccgcgg": "ccgcgg", + "ccgcgt": "ccgcgt", + "ccgcta": "accgct", + "ccgctc": "cccgct", + "ccgctg": "ccgctg", + "ccgctt": "ccgctt", + "ccggaa": "aaccgg", + "ccggac": "acccgg", + "ccggag": "agccgg", + "ccggat": "atccgg", + "ccggca": "accggc", + "ccggcc": "ccccgg", + "ccggcg": "ccggcg", + "ccggct": "ccggct", + "ccggga": "accggg", + "ccgggc": "cccggg", + "ccgggg": "ccgggg", + "ccgggt": "ccgggt", + "ccggta": "accggt", + "ccggtc": "cccggt", + "ccggtg": "ccggtg", + "ccggtt": "ccggtt", + "ccgtaa": "aaccgt", + "ccgtac": "acccgt", + "ccgtag": "agccgt", + "ccgtat": "atccgt", + "ccgtca": "accgtc", + "ccgtcc": "ccccgt", + "ccgtcg": "ccgtcg", + "ccgtct": "ccgtct", + "ccgtga": "accgtg", + "ccgtgc": "cccgtg", + "ccgtgg": "ccgtgg", + "ccgtgt": "ccgtgt", + "ccgtta": "accgtt", + "ccgttc": "cccgtt", + "ccgttg": "ccgttg", + "ccgttt": "ccgttt", + "cctaaa": "aaacct", + "cctaac": "aaccct", + "cctaag": "aagcct", + "cctaat": "aatcct", + "cctaca": "acacct", + "cctacc": "acccct", + "cctacg": "acgcct", + "cctact": "actcct", + "cctaga": "acctag", + "cctagc": "agccct", + "cctagg": "aggcct", + "cctagt": "agtcct", + "cctata": "acctat", + "cctatc": "atccct", + "cctatg": "atgcct", + "cctatt": "attcct", + "cctcaa": "aacctc", + "cctcac": "accctc", + "cctcag": "agcctc", + "cctcat": "atcctc", + "cctcca": "acctcc", + "cctccc": "ccccct", + "cctccg": "ccgcct", + "cctcct": "cctcct", + "cctcga": "acctcg", + "cctcgc": "ccctcg", + "cctcgg": "cctcgg", + "cctcgt": "cctcgt", + "cctcta": "acctct", + "cctctc": "ccctct", + "cctctg": "cctctg", + "cctctt": "cctctt", + "cctgaa": "aacctg", + "cctgac": "accctg", + "cctgag": "agcctg", + "cctgat": "atcctg", + "cctgca": "acctgc", + "cctgcc": "cccctg", + "cctgcg": "cctgcg", + "cctgct": "cctgct", + "cctgga": "acctgg", + "cctggc": "ccctgg", + "cctggg": "cctggg", + "cctggt": "cctggt", + "cctgta": "acctgt", + "cctgtc": "ccctgt", + "cctgtg": "cctgtg", + "cctgtt": "cctgtt", + "ccttaa": "aacctt", + "ccttac": "accctt", + "ccttag": "agcctt", + "ccttat": "atcctt", + "ccttca": "accttc", + "ccttcc": "cccctt", + "ccttcg": "ccttcg", + "ccttct": "ccttct", + "ccttga": "accttg", + "ccttgc": "cccttg", + "ccttgg": "ccttgg", + "ccttgt": "ccttgt", + "ccttta": "accttt", + "cctttc": "cccttt", + "cctttg": "cctttg", + "cctttt": "cctttt", + "cgaaaa": "aaaacg", + "cgaaac": "aaaccg", + "cgaaag": "aaagcg", + "cgaaat": "aaatcg", + "cgaaca": "aacacg", + "cgaacc": "aacccg", + "cgaacg": "aacgcg", + "cgaact": "aactcg", + "cgaaga": "aagacg", + "cgaagc": "aagccg", + "cgaagg": "aaggcg", + "cgaagt": "aagtcg", + "cgaata": "aatacg", + "cgaatc": "aatccg", + "cgaatg": "aatgcg", + "cgaatt": "aattcg", + "cgacaa": "aacgac", + "cgacac": "acaccg", + "cgacag": "acagcg", + "cgacat": "acatcg", + "cgacca": "accacg", + "cgaccc": "accccg", + "cgaccg": "accgcg", + "cgacct": "acctcg", + "cgacga": "acgacg", + "cgacgc": "acgccg", + "cgacgg": "acggcg", + "cgacgt": "acgtcg", + "cgacta": "acgact", + "cgactc": "actccg", + "cgactg": "actgcg", + "cgactt": "acttcg", + "cgagaa": "aacgag", + "cgagac": "accgag", + "cgagag": "agagcg", + "cgagat": "agatcg", + "cgagca": "acgagc", + "cgagcc": "agcccg", + "cgagcg": "agcgcg", + "cgagct": "agctcg", + "cgagga": "acgagg", + "cgaggc": "aggccg", + "cgaggg": "agggcg", + "cgaggt": "aggtcg", + "cgagta": "acgagt", + "cgagtc": "agtccg", + "cgagtg": "agtgcg", + "cgagtt": "agttcg", + "cgataa": "aacgat", + "cgatac": "accgat", + "cgatag": "agcgat", + "cgatat": "atatcg", + "cgatca": "acgatc", + "cgatcc": "atcccg", + "cgatcg": "atcgcg", + "cgatct": "atctcg", + "cgatga": "acgatg", + "cgatgc": "atgccg", + "cgatgg": "atggcg", + "cgatgt": "atgtcg", + "cgatta": "acgatt", + "cgattc": "attccg", + "cgattg": "attgcg", + "cgattt": "atttcg", + "cgcaaa": "aaacgc", + "cgcaac": "aaccgc", + "cgcaag": "aagcgc", + "cgcaat": "aatcgc", + "cgcaca": "acacgc", + "cgcacc": "acccgc", + "cgcacg": "acgcgc", + "cgcact": "actcgc", + "cgcaga": "acgcag", + "cgcagc": "agccgc", + "cgcagg": "aggcgc", + "cgcagt": "agtcgc", + "cgcata": "acgcat", + "cgcatc": "atccgc", + "cgcatg": "atgcgc", + "cgcatt": "attcgc", + "cgccaa": "aacgcc", + "cgccac": "accgcc", + "cgccag": "agcgcc", + "cgccat": "atcgcc", + "cgccca": "acgccc", + "cgcccc": "cccccg", + "cgcccg": "cccgcg", + "cgccct": "ccctcg", + "cgccga": "acgccg", + "cgccgc": "ccgccg", + "cgccgg": "ccggcg", + "cgccgt": "ccgtcg", + "cgccta": "acgcct", + "cgcctc": "ccgcct", + "cgcctg": "cctgcg", + "cgcctt": "ccttcg", + "cgcgaa": "aacgcg", + "cgcgac": "accgcg", + "cgcgag": "agcgcg", + "cgcgat": "atcgcg", + "cgcgca": "acgcgc", + "cgcgcc": "cccgcg", + "cgcgcg": "cgcgcg", + "cgcgct": "cgcgct", + "cgcgga": "acgcgg", + "cgcggc": "ccgcgg", + "cgcggg": "cgcggg", + "cgcggt": "cgcggt", + "cgcgta": "acgcgt", + "cgcgtc": "ccgcgt", + "cgcgtg": "cgcgtg", + "cgcgtt": "cgcgtt", + "cgctaa": "aacgct", + "cgctac": "accgct", + "cgctag": "agcgct", + "cgctat": "atcgct", + "cgctca": "acgctc", + "cgctcc": "cccgct", + "cgctcg": "cgcgct", + "cgctct": "cgctct", + "cgctga": "acgctg", + "cgctgc": "ccgctg", + "cgctgg": "cgctgg", + "cgctgt": "cgctgt", + "cgctta": "acgctt", + "cgcttc": "ccgctt", + "cgcttg": "cgcttg", + "cgcttt": "cgcttt", + "cggaaa": "aaacgg", + "cggaac": "aaccgg", + "cggaag": "aagcgg", + "cggaat": "aatcgg", + "cggaca": "acacgg", + "cggacc": "acccgg", + "cggacg": "acgcgg", + "cggact": "actcgg", + "cggaga": "acggag", + "cggagc": "agccgg", + "cggagg": "aggcgg", + "cggagt": "agtcgg", + "cggata": "acggat", + "cggatc": "atccgg", + "cggatg": "atgcgg", + "cggatt": "attcgg", + "cggcaa": "aacggc", + "cggcac": "accggc", + "cggcag": "agcggc", + "cggcat": "atcggc", + "cggcca": "acggcc", + "cggccc": "ccccgg", + "cggccg": "ccgcgg", + "cggcct": "cctcgg", + "cggcga": "acggcg", + "cggcgc": "ccggcg", + "cggcgg": "cggcgg", + "cggcgt": "cggcgt", + "cggcta": "acggct", + "cggctc": "ccggct", + "cggctg": "cggctg", + "cggctt": "cggctt", + "cgggaa": "aacggg", + "cgggac": "accggg", + "cgggag": "agcggg", + "cgggat": "atcggg", + "cgggca": "acgggc", + "cgggcc": "cccggg", + "cgggcg": "cgcggg", + "cgggct": "cgggct", + "cgggga": "acgggg", + "cggggc": "ccgggg", + "cggggg": "cggggg", + "cggggt": "cggggt", + "cgggta": "acgggt", + "cgggtc": "ccgggt", + "cgggtg": "cgggtg", + "cgggtt": "cgggtt", + "cggtaa": "aacggt", + "cggtac": "accggt", + "cggtag": "agcggt", + "cggtat": "atcggt", + "cggtca": "acggtc", + "cggtcc": "cccggt", + "cggtcg": "cgcggt", + "cggtct": "cggtct", + "cggtga": "acggtg", + "cggtgc": "ccggtg", + "cggtgg": "cggtgg", + "cggtgt": "cggtgt", + "cggtta": "acggtt", + "cggttc": "ccggtt", + "cggttg": "cggttg", + "cggttt": "cggttt", + "cgtaaa": "aaacgt", + "cgtaac": "aaccgt", + "cgtaag": "aagcgt", + "cgtaat": "aatcgt", + "cgtaca": "acacgt", + "cgtacc": "acccgt", + "cgtacg": "acgcgt", + "cgtact": "actcgt", + "cgtaga": "acgtag", + "cgtagc": "agccgt", + "cgtagg": "aggcgt", + "cgtagt": "agtcgt", + "cgtata": "acgtat", + "cgtatc": "atccgt", + "cgtatg": "atgcgt", + "cgtatt": "attcgt", + "cgtcaa": "aacgtc", + "cgtcac": "accgtc", + "cgtcag": "agcgtc", + "cgtcat": "atcgtc", + "cgtcca": "acgtcc", + "cgtccc": "ccccgt", + "cgtccg": "ccgcgt", + "cgtcct": "cctcgt", + "cgtcga": "acgtcg", + "cgtcgc": "ccgtcg", + "cgtcgg": "cggcgt", + "cgtcgt": "cgtcgt", + "cgtcta": "acgtct", + "cgtctc": "ccgtct", + "cgtctg": "cgtctg", + "cgtctt": "cgtctt", + "cgtgaa": "aacgtg", + "cgtgac": "accgtg", + "cgtgag": "agcgtg", + "cgtgat": "atcgtg", + "cgtgca": "acgtgc", + "cgtgcc": "cccgtg", + "cgtgcg": "cgcgtg", + "cgtgct": "cgtgct", + "cgtgga": "acgtgg", + "cgtggc": "ccgtgg", + "cgtggg": "cgtggg", + "cgtggt": "cgtggt", + "cgtgta": "acgtgt", + "cgtgtc": "ccgtgt", + "cgtgtg": "cgtgtg", + "cgtgtt": "cgtgtt", + "cgttaa": "aacgtt", + "cgttac": "accgtt", + "cgttag": "agcgtt", + "cgttat": "atcgtt", + "cgttca": "acgttc", + "cgttcc": "cccgtt", + "cgttcg": "cgcgtt", + "cgttct": "cgttct", + "cgttga": "acgttg", + "cgttgc": "ccgttg", + "cgttgg": "cgttgg", + "cgttgt": "cgttgt", + "cgttta": "acgttt", + "cgtttc": "ccgttt", + "cgtttg": "cgtttg", + "cgtttt": "cgtttt", + "ctaaaa": "aaaact", + "ctaaac": "aaacct", + "ctaaag": "aaagct", + "ctaaat": "aaatct", + "ctaaca": "aacact", + "ctaacc": "aaccct", + "ctaacg": "aacgct", + "ctaact": "aactct", + "ctaaga": "aagact", + "ctaagc": "aagcct", + "ctaagg": "aaggct", + "ctaagt": "aagtct", + "ctaata": "aatact", + "ctaatc": "aatcct", + "ctaatg": "aatgct", + "ctaatt": "aattct", + "ctacaa": "aactac", + "ctacac": "acacct", + "ctacag": "acagct", + "ctacat": "acatct", + "ctacca": "accact", + "ctaccc": "acccct", + "ctaccg": "accgct", + "ctacct": "acctct", + "ctacga": "acgact", + "ctacgc": "acgcct", + "ctacgg": "acggct", + "ctacgt": "acgtct", + "ctacta": "actact", + "ctactc": "actcct", + "ctactg": "actgct", + "ctactt": "acttct", + "ctagaa": "aactag", + "ctagac": "acctag", + "ctagag": "agagct", + "ctagat": "agatct", + "ctagca": "actagc", + "ctagcc": "agccct", + "ctagcg": "agcgct", + "ctagct": "agctct", + "ctagga": "actagg", + "ctaggc": "aggcct", + "ctaggg": "agggct", + "ctaggt": "aggtct", + "ctagta": "actagt", + "ctagtc": "agtcct", + "ctagtg": "agtgct", + "ctagtt": "agttct", + "ctataa": "aactat", + "ctatac": "acctat", + "ctatag": "agctat", + "ctatat": "atatct", + "ctatca": "actatc", + "ctatcc": "atccct", + "ctatcg": "atcgct", + "ctatct": "atctct", + "ctatga": "actatg", + "ctatgc": "atgcct", + "ctatgg": "atggct", + "ctatgt": "atgtct", + "ctatta": "actatt", + "ctattc": "attcct", + "ctattg": "attgct", + "ctattt": "atttct", + "ctcaaa": "aaactc", + "ctcaac": "aacctc", + "ctcaag": "aagctc", + "ctcaat": "aatctc", + "ctcaca": "acactc", + "ctcacc": "accctc", + "ctcacg": "acgctc", + "ctcact": "actctc", + "ctcaga": "actcag", + "ctcagc": "agcctc", + "ctcagg": "aggctc", + "ctcagt": "agtctc", + "ctcata": "actcat", + "ctcatc": "atcctc", + "ctcatg": "atgctc", + "ctcatt": "attctc", + "ctccaa": "aactcc", + "ctccac": "acctcc", + "ctccag": "agctcc", + "ctccat": "atctcc", + "ctccca": "actccc", + "ctcccc": "ccccct", + "ctcccg": "cccgct", + "ctccct": "ccctct", + "ctccga": "actccg", + "ctccgc": "ccgcct", + "ctccgg": "ccggct", + "ctccgt": "ccgtct", + "ctccta": "actcct", + "ctcctc": "cctcct", + "ctcctg": "cctgct", + "ctcctt": "ccttct", + "ctcgaa": "aactcg", + "ctcgac": "acctcg", + "ctcgag": "agctcg", + "ctcgat": "atctcg", + "ctcgca": "actcgc", + "ctcgcc": "ccctcg", + "ctcgcg": "cgcgct", + "ctcgct": "cgctct", + "ctcgga": "actcgg", + "ctcggc": "cctcgg", + "ctcggg": "cgggct", + "ctcggt": "cggtct", + "ctcgta": "actcgt", + "ctcgtc": "cctcgt", + "ctcgtg": "cgtgct", + "ctcgtt": "cgttct", + "ctctaa": "aactct", + "ctctac": "acctct", + "ctctag": "agctct", + "ctctat": "atctct", + "ctctca": "actctc", + "ctctcc": "ccctct", + "ctctcg": "cgctct", + "ctctct": "ctctct", + "ctctga": "actctg", + "ctctgc": "cctctg", + "ctctgg": "ctctgg", + "ctctgt": "ctctgt", + "ctctta": "actctt", + "ctcttc": "cctctt", + "ctcttg": "ctcttg", + "ctcttt": "ctcttt", + "ctgaaa": "aaactg", + "ctgaac": "aacctg", + "ctgaag": "aagctg", + "ctgaat": "aatctg", + "ctgaca": "acactg", + "ctgacc": "accctg", + "ctgacg": "acgctg", + "ctgact": "actctg", + "ctgaga": "actgag", + "ctgagc": "agcctg", + "ctgagg": "aggctg", + "ctgagt": "agtctg", + "ctgata": "actgat", + "ctgatc": "atcctg", + "ctgatg": "atgctg", + "ctgatt": "attctg", + "ctgcaa": "aactgc", + "ctgcac": "acctgc", + "ctgcag": "agctgc", + "ctgcat": "atctgc", + "ctgcca": "actgcc", + "ctgccc": "cccctg", + "ctgccg": "ccgctg", + "ctgcct": "cctctg", + "ctgcga": "actgcg", + "ctgcgc": "cctgcg", + "ctgcgg": "cggctg", + "ctgcgt": "cgtctg", + "ctgcta": "actgct", + "ctgctc": "cctgct", + "ctgctg": "ctgctg", + "ctgctt": "ctgctt", + "ctggaa": "aactgg", + "ctggac": "acctgg", + "ctggag": "agctgg", + "ctggat": "atctgg", + "ctggca": "actggc", + "ctggcc": "ccctgg", + "ctggcg": "cgctgg", + "ctggct": "ctctgg", + "ctggga": "actggg", + "ctgggc": "cctggg", + "ctgggg": "ctgggg", + "ctgggt": "ctgggt", + "ctggta": "actggt", + "ctggtc": "cctggt", + "ctggtg": "ctggtg", + "ctggtt": "ctggtt", + "ctgtaa": "aactgt", + "ctgtac": "acctgt", + "ctgtag": "agctgt", + "ctgtat": "atctgt", + "ctgtca": "actgtc", + "ctgtcc": "ccctgt", + "ctgtcg": "cgctgt", + "ctgtct": "ctctgt", + "ctgtga": "actgtg", + "ctgtgc": "cctgtg", + "ctgtgg": "ctgtgg", + "ctgtgt": "ctgtgt", + "ctgtta": "actgtt", + "ctgttc": "cctgtt", + "ctgttg": "ctgttg", + "ctgttt": "ctgttt", + "cttaaa": "aaactt", + "cttaac": "aacctt", + "cttaag": "aagctt", + "cttaat": "aatctt", + "cttaca": "acactt", + "cttacc": "accctt", + "cttacg": "acgctt", + "cttact": "actctt", + "cttaga": "acttag", + "cttagc": "agcctt", + "cttagg": "aggctt", + "cttagt": "agtctt", + "cttata": "acttat", + "cttatc": "atcctt", + "cttatg": "atgctt", + "cttatt": "attctt", + "cttcaa": "aacttc", + "cttcac": "accttc", + "cttcag": "agcttc", + "cttcat": "atcttc", + "cttcca": "acttcc", + "cttccc": "cccctt", + "cttccg": "ccgctt", + "cttcct": "cctctt", + "cttcga": "acttcg", + "cttcgc": "ccttcg", + "cttcgg": "cggctt", + "cttcgt": "cgtctt", + "cttcta": "acttct", + "cttctc": "ccttct", + "cttctg": "ctgctt", + "cttctt": "cttctt", + "cttgaa": "aacttg", + "cttgac": "accttg", + "cttgag": "agcttg", + "cttgat": "atcttg", + "cttgca": "acttgc", + "cttgcc": "cccttg", + "cttgcg": "cgcttg", + "cttgct": "ctcttg", + "cttgga": "acttgg", + "cttggc": "ccttgg", + "cttggg": "cttggg", + "cttggt": "cttggt", + "cttgta": "acttgt", + "cttgtc": "ccttgt", + "cttgtg": "cttgtg", + "cttgtt": "cttgtt", + "ctttaa": "aacttt", + "ctttac": "accttt", + "ctttag": "agcttt", + "ctttat": "atcttt", + "ctttca": "actttc", + "ctttcc": "cccttt", + "ctttcg": "cgcttt", + "ctttct": "ctcttt", + "ctttga": "actttg", + "ctttgc": "cctttg", + "ctttgg": "ctttgg", + "ctttgt": "ctttgt", + "ctttta": "actttt", + "cttttc": "cctttt", + "cttttg": "cttttg", + "cttttt": "cttttt", + "gaaaaa": "aaaaag", + "gaaaac": "aaaacg", + "gaaaag": "aaaagg", + "gaaaat": "aaaatg", + "gaaaca": "aaacag", + "gaaacc": "aaaccg", + "gaaacg": "aaacgg", + "gaaact": "aaactg", + "gaaaga": "aaagag", + "gaaagc": "aaagcg", + "gaaagg": "aaaggg", + "gaaagt": "aaagtg", + "gaaata": "aaatag", + "gaaatc": "aaatcg", + "gaaatg": "aaatgg", + "gaaatt": "aaattg", + "gaacaa": "aacaag", + "gaacac": "aacacg", + "gaacag": "aacagg", + "gaacat": "aacatg", + "gaacca": "aaccag", + "gaaccc": "aacccg", + "gaaccg": "aaccgg", + "gaacct": "aacctg", + "gaacga": "aacgag", + "gaacgc": "aacgcg", + "gaacgg": "aacggg", + "gaacgt": "aacgtg", + "gaacta": "aactag", + "gaactc": "aactcg", + "gaactg": "aactgg", + "gaactt": "aacttg", + "gaagaa": "aagaag", + "gaagac": "aagacg", + "gaagag": "aagagg", + "gaagat": "aagatg", + "gaagca": "aagcag", + "gaagcc": "aagccg", + "gaagcg": "aagcgg", + "gaagct": "aagctg", + "gaagga": "aaggag", + "gaaggc": "aaggcg", + "gaaggg": "aagggg", + "gaaggt": "aaggtg", + "gaagta": "aagtag", + "gaagtc": "aagtcg", + "gaagtg": "aagtgg", + "gaagtt": "aagttg", + "gaataa": "aagaat", + "gaatac": "aatacg", + "gaatag": "aatagg", + "gaatat": "aatatg", + "gaatca": "aatcag", + "gaatcc": "aatccg", + "gaatcg": "aatcgg", + "gaatct": "aatctg", + "gaatga": "aatgag", + "gaatgc": "aatgcg", + "gaatgg": "aatggg", + "gaatgt": "aatgtg", + "gaatta": "aattag", + "gaattc": "aattcg", + "gaattg": "aattgg", + "gaattt": "aatttg", + "gacaaa": "aaagac", + "gacaac": "aacgac", + "gacaag": "aaggac", + "gacaat": "aatgac", + "gacaca": "acacag", + "gacacc": "acaccg", + "gacacg": "acacgg", + "gacact": "acactg", + "gacaga": "acagag", + "gacagc": "acagcg", + "gacagg": "acaggg", + "gacagt": "acagtg", + "gacata": "acatag", + "gacatc": "acatcg", + "gacatg": "acatgg", + "gacatt": "acattg", + "gaccaa": "aagacc", + "gaccac": "accacg", + "gaccag": "accagg", + "gaccat": "accatg", + "gaccca": "acccag", + "gacccc": "accccg", + "gacccg": "acccgg", + "gaccct": "accctg", + "gaccga": "accgag", + "gaccgc": "accgcg", + "gaccgg": "accggg", + "gaccgt": "accgtg", + "gaccta": "acctag", + "gacctc": "acctcg", + "gacctg": "acctgg", + "gacctt": "accttg", + "gacgaa": "aagacg", + "gacgac": "acgacg", + "gacgag": "acgagg", + "gacgat": "acgatg", + "gacgca": "acgcag", + "gacgcc": "acgccg", + "gacgcg": "acgcgg", + "gacgct": "acgctg", + "gacgga": "acggag", + "gacggc": "acggcg", + "gacggg": "acgggg", + "gacggt": "acggtg", + "gacgta": "acgtag", + "gacgtc": "acgtcg", + "gacgtg": "acgtgg", + "gacgtt": "acgttg", + "gactaa": "aagact", + "gactac": "acgact", + "gactag": "actagg", + "gactat": "actatg", + "gactca": "actcag", + "gactcc": "actccg", + "gactcg": "actcgg", + "gactct": "actctg", + "gactga": "actgag", + "gactgc": "actgcg", + "gactgg": "actggg", + "gactgt": "actgtg", + "gactta": "acttag", + "gacttc": "acttcg", + "gacttg": "acttgg", + "gacttt": "actttg", + "gagaaa": "aaagag", + "gagaac": "aacgag", + "gagaag": "aaggag", + "gagaat": "aatgag", + "gagaca": "acagag", + "gagacc": "accgag", + "gagacg": "acggag", + "gagact": "actgag", + "gagaga": "agagag", + "gagagc": "agagcg", + "gagagg": "agaggg", + "gagagt": "agagtg", + "gagata": "agagat", + "gagatc": "agatcg", + "gagatg": "agatgg", + "gagatt": "agattg", + "gagcaa": "aagagc", + "gagcac": "acgagc", + "gagcag": "agcagg", + "gagcat": "agcatg", + "gagcca": "agagcc", + "gagccc": "agcccg", + "gagccg": "agccgg", + "gagcct": "agcctg", + "gagcga": "agagcg", + "gagcgc": "agcgcg", + "gagcgg": "agcggg", + "gagcgt": "agcgtg", + "gagcta": "agagct", + "gagctc": "agctcg", + "gagctg": "agctgg", + "gagctt": "agcttg", + "gaggaa": "aagagg", + "gaggac": "acgagg", + "gaggag": "aggagg", + "gaggat": "aggatg", + "gaggca": "agaggc", + "gaggcc": "aggccg", + "gaggcg": "aggcgg", + "gaggct": "aggctg", + "gaggga": "agaggg", + "gagggc": "agggcg", + "gagggg": "aggggg", + "gagggt": "agggtg", + "gaggta": "agaggt", + "gaggtc": "aggtcg", + "gaggtg": "aggtgg", + "gaggtt": "aggttg", + "gagtaa": "aagagt", + "gagtac": "acgagt", + "gagtag": "aggagt", + "gagtat": "agtatg", + "gagtca": "agagtc", + "gagtcc": "agtccg", + "gagtcg": "agtcgg", + "gagtct": "agtctg", + "gagtga": "agagtg", + "gagtgc": "agtgcg", + "gagtgg": "agtggg", + "gagtgt": "agtgtg", + "gagtta": "agagtt", + "gagttc": "agttcg", + "gagttg": "agttgg", + "gagttt": "agtttg", + "gataaa": "aaagat", + "gataac": "aacgat", + "gataag": "aaggat", + "gataat": "aatgat", + "gataca": "acagat", + "gatacc": "accgat", + "gatacg": "acggat", + "gatact": "actgat", + "gataga": "agagat", + "gatagc": "agcgat", + "gatagg": "agggat", + "gatagt": "agtgat", + "gatata": "agatat", + "gatatc": "atatcg", + "gatatg": "atatgg", + "gatatt": "atattg", + "gatcaa": "aagatc", + "gatcac": "acgatc", + "gatcag": "aggatc", + "gatcat": "atcatg", + "gatcca": "agatcc", + "gatccc": "atcccg", + "gatccg": "atccgg", + "gatcct": "atcctg", + "gatcga": "agatcg", + "gatcgc": "atcgcg", + "gatcgg": "atcggg", + "gatcgt": "atcgtg", + "gatcta": "agatct", + "gatctc": "atctcg", + "gatctg": "atctgg", + "gatctt": "atcttg", + "gatgaa": "aagatg", + "gatgac": "acgatg", + "gatgag": "aggatg", + "gatgat": "atgatg", + "gatgca": "agatgc", + "gatgcc": "atgccg", + "gatgcg": "atgcgg", + "gatgct": "atgctg", + "gatgga": "agatgg", + "gatggc": "atggcg", + "gatggg": "atgggg", + "gatggt": "atggtg", + "gatgta": "agatgt", + "gatgtc": "atgtcg", + "gatgtg": "atgtgg", + "gatgtt": "atgttg", + "gattaa": "aagatt", + "gattac": "acgatt", + "gattag": "aggatt", + "gattat": "atgatt", + "gattca": "agattc", + "gattcc": "attccg", + "gattcg": "attcgg", + "gattct": "attctg", + "gattga": "agattg", + "gattgc": "attgcg", + "gattgg": "attggg", + "gattgt": "attgtg", + "gattta": "agattt", + "gatttc": "atttcg", + "gatttg": "atttgg", + "gatttt": "attttg", + "gcaaaa": "aaaagc", + "gcaaac": "aaacgc", + "gcaaag": "aaaggc", + "gcaaat": "aaatgc", + "gcaaca": "aacagc", + "gcaacc": "aaccgc", + "gcaacg": "aacggc", + "gcaact": "aactgc", + "gcaaga": "aagagc", + "gcaagc": "aagcgc", + "gcaagg": "aagggc", + "gcaagt": "aagtgc", + "gcaata": "aatagc", + "gcaatc": "aatcgc", + "gcaatg": "aatggc", + "gcaatt": "aattgc", + "gcacaa": "aagcac", + "gcacac": "acacgc", + "gcacag": "acaggc", + "gcacat": "acatgc", + "gcacca": "accagc", + "gcaccc": "acccgc", + "gcaccg": "accggc", + "gcacct": "acctgc", + "gcacga": "acgagc", + "gcacgc": "acgcgc", + "gcacgg": "acgggc", + "gcacgt": "acgtgc", + "gcacta": "actagc", + "gcactc": "actcgc", + "gcactg": "actggc", + "gcactt": "acttgc", + "gcagaa": "aagcag", + "gcagac": "acgcag", + "gcagag": "agaggc", + "gcagat": "agatgc", + "gcagca": "agcagc", + "gcagcc": "agccgc", + "gcagcg": "agcggc", + "gcagct": "agctgc", + "gcagga": "agcagg", + "gcaggc": "aggcgc", + "gcaggg": "aggggc", + "gcaggt": "aggtgc", + "gcagta": "agcagt", + "gcagtc": "agtcgc", + "gcagtg": "agtggc", + "gcagtt": "agttgc", + "gcataa": "aagcat", + "gcatac": "acgcat", + "gcatag": "aggcat", + "gcatat": "atatgc", + "gcatca": "agcatc", + "gcatcc": "atccgc", + "gcatcg": "atcggc", + "gcatct": "atctgc", + "gcatga": "agcatg", + "gcatgc": "atgcgc", + "gcatgg": "atgggc", + "gcatgt": "atgtgc", + "gcatta": "agcatt", + "gcattc": "attcgc", + "gcattg": "attggc", + "gcattt": "atttgc", + "gccaaa": "aaagcc", + "gccaac": "aacgcc", + "gccaag": "aaggcc", + "gccaat": "aatgcc", + "gccaca": "acagcc", + "gccacc": "accgcc", + "gccacg": "acggcc", + "gccact": "actgcc", + "gccaga": "agagcc", + "gccagc": "agcgcc", + "gccagg": "agggcc", + "gccagt": "agtgcc", + "gccata": "agccat", + "gccatc": "atcgcc", + "gccatg": "atggcc", + "gccatt": "attgcc", + "gcccaa": "aagccc", + "gcccac": "acgccc", + "gcccag": "aggccc", + "gcccat": "atgccc", + "gcccca": "agcccc", + "gccccc": "cccccg", + "gccccg": "ccccgg", + "gcccct": "cccctg", + "gcccga": "agcccg", + "gcccgc": "cccgcg", + "gcccgg": "cccggg", + "gcccgt": "cccgtg", + "gcccta": "agccct", + "gccctc": "ccctcg", + "gccctg": "ccctgg", + "gccctt": "cccttg", + "gccgaa": "aagccg", + "gccgac": "acgccg", + "gccgag": "aggccg", + "gccgat": "atgccg", + "gccgca": "agccgc", + "gccgcc": "ccgccg", + "gccgcg": "ccgcgg", + "gccgct": "ccgctg", + "gccgga": "agccgg", + "gccggc": "ccggcg", + "gccggg": "ccgggg", + "gccggt": "ccggtg", + "gccgta": "agccgt", + "gccgtc": "ccgtcg", + "gccgtg": "ccgtgg", + "gccgtt": "ccgttg", + "gcctaa": "aagcct", + "gcctac": "acgcct", + "gcctag": "aggcct", + "gcctat": "atgcct", + "gcctca": "agcctc", + "gcctcc": "ccgcct", + "gcctcg": "cctcgg", + "gcctct": "cctctg", + "gcctga": "agcctg", + "gcctgc": "cctgcg", + "gcctgg": "cctggg", + "gcctgt": "cctgtg", + "gcctta": "agcctt", + "gccttc": "ccttcg", + "gccttg": "ccttgg", + "gccttt": "cctttg", + "gcgaaa": "aaagcg", + "gcgaac": "aacgcg", + "gcgaag": "aaggcg", + "gcgaat": "aatgcg", + "gcgaca": "acagcg", + "gcgacc": "accgcg", + "gcgacg": "acggcg", + "gcgact": "actgcg", + "gcgaga": "agagcg", + "gcgagc": "agcgcg", + "gcgagg": "agggcg", + "gcgagt": "agtgcg", + "gcgata": "agcgat", + "gcgatc": "atcgcg", + "gcgatg": "atggcg", + "gcgatt": "attgcg", + "gcgcaa": "aagcgc", + "gcgcac": "acgcgc", + "gcgcag": "aggcgc", + "gcgcat": "atgcgc", + "gcgcca": "agcgcc", + "gcgccc": "cccgcg", + "gcgccg": "ccggcg", + "gcgcct": "cctgcg", + "gcgcga": "agcgcg", + "gcgcgc": "cgcgcg", + "gcgcgg": "cgcggg", + "gcgcgt": "cgcgtg", + "gcgcta": "agcgct", + "gcgctc": "cgcgct", + "gcgctg": "cgctgg", + "gcgctt": "cgcttg", + "gcggaa": "aagcgg", + "gcggac": "acgcgg", + "gcggag": "aggcgg", + "gcggat": "atgcgg", + "gcggca": "agcggc", + "gcggcc": "ccgcgg", + "gcggcg": "cggcgg", + "gcggct": "cggctg", + "gcggga": "agcggg", + "gcgggc": "cgcggg", + "gcgggg": "cggggg", + "gcgggt": "cgggtg", + "gcggta": "agcggt", + "gcggtc": "cgcggt", + "gcggtg": "cggtgg", + "gcggtt": "cggttg", + "gcgtaa": "aagcgt", + "gcgtac": "acgcgt", + "gcgtag": "aggcgt", + "gcgtat": "atgcgt", + "gcgtca": "agcgtc", + "gcgtcc": "ccgcgt", + "gcgtcg": "cggcgt", + "gcgtct": "cgtctg", + "gcgtga": "agcgtg", + "gcgtgc": "cgcgtg", + "gcgtgg": "cgtggg", + "gcgtgt": "cgtgtg", + "gcgtta": "agcgtt", + "gcgttc": "cgcgtt", + "gcgttg": "cgttgg", + "gcgttt": "cgtttg", + "gctaaa": "aaagct", + "gctaac": "aacgct", + "gctaag": "aaggct", + "gctaat": "aatgct", + "gctaca": "acagct", + "gctacc": "accgct", + "gctacg": "acggct", + "gctact": "actgct", + "gctaga": "agagct", + "gctagc": "agcgct", + "gctagg": "agggct", + "gctagt": "agtgct", + "gctata": "agctat", + "gctatc": "atcgct", + "gctatg": "atggct", + "gctatt": "attgct", + "gctcaa": "aagctc", + "gctcac": "acgctc", + "gctcag": "aggctc", + "gctcat": "atgctc", + "gctcca": "agctcc", + "gctccc": "cccgct", + "gctccg": "ccggct", + "gctcct": "cctgct", + "gctcga": "agctcg", + "gctcgc": "cgcgct", + "gctcgg": "cgggct", + "gctcgt": "cgtgct", + "gctcta": "agctct", + "gctctc": "cgctct", + "gctctg": "ctctgg", + "gctctt": "ctcttg", + "gctgaa": "aagctg", + "gctgac": "acgctg", + "gctgag": "aggctg", + "gctgat": "atgctg", + "gctgca": "agctgc", + "gctgcc": "ccgctg", + "gctgcg": "cggctg", + "gctgct": "ctgctg", + "gctgga": "agctgg", + "gctggc": "cgctgg", + "gctggg": "ctgggg", + "gctggt": "ctggtg", + "gctgta": "agctgt", + "gctgtc": "cgctgt", + "gctgtg": "ctgtgg", + "gctgtt": "ctgttg", + "gcttaa": "aagctt", + "gcttac": "acgctt", + "gcttag": "aggctt", + "gcttat": "atgctt", + "gcttca": "agcttc", + "gcttcc": "ccgctt", + "gcttcg": "cggctt", + "gcttct": "ctgctt", + "gcttga": "agcttg", + "gcttgc": "cgcttg", + "gcttgg": "cttggg", + "gcttgt": "cttgtg", + "gcttta": "agcttt", + "gctttc": "cgcttt", + "gctttg": "ctttgg", + "gctttt": "cttttg", + "ggaaaa": "aaaagg", + "ggaaac": "aaacgg", + "ggaaag": "aaaggg", + "ggaaat": "aaatgg", + "ggaaca": "aacagg", + "ggaacc": "aaccgg", + "ggaacg": "aacggg", + "ggaact": "aactgg", + "ggaaga": "aagagg", + "ggaagc": "aagcgg", + "ggaagg": "aagggg", + "ggaagt": "aagtgg", + "ggaata": "aatagg", + "ggaatc": "aatcgg", + "ggaatg": "aatggg", + "ggaatt": "aattgg", + "ggacaa": "aaggac", + "ggacac": "acacgg", + "ggacag": "acaggg", + "ggacat": "acatgg", + "ggacca": "accagg", + "ggaccc": "acccgg", + "ggaccg": "accggg", + "ggacct": "acctgg", + "ggacga": "acgagg", + "ggacgc": "acgcgg", + "ggacgg": "acgggg", + "ggacgt": "acgtgg", + "ggacta": "actagg", + "ggactc": "actcgg", + "ggactg": "actggg", + "ggactt": "acttgg", + "ggagaa": "aaggag", + "ggagac": "acggag", + "ggagag": "agaggg", + "ggagat": "agatgg", + "ggagca": "agcagg", + "ggagcc": "agccgg", + "ggagcg": "agcggg", + "ggagct": "agctgg", + "ggagga": "aggagg", + "ggaggc": "aggcgg", + "ggaggg": "aggggg", + "ggaggt": "aggtgg", + "ggagta": "aggagt", + "ggagtc": "agtcgg", + "ggagtg": "agtggg", + "ggagtt": "agttgg", + "ggataa": "aaggat", + "ggatac": "acggat", + "ggatag": "agggat", + "ggatat": "atatgg", + "ggatca": "aggatc", + "ggatcc": "atccgg", + "ggatcg": "atcggg", + "ggatct": "atctgg", + "ggatga": "aggatg", + "ggatgc": "atgcgg", + "ggatgg": "atgggg", + "ggatgt": "atgtgg", + "ggatta": "aggatt", + "ggattc": "attcgg", + "ggattg": "attggg", + "ggattt": "atttgg", + "ggcaaa": "aaaggc", + "ggcaac": "aacggc", + "ggcaag": "aagggc", + "ggcaat": "aatggc", + "ggcaca": "acaggc", + "ggcacc": "accggc", + "ggcacg": "acgggc", + "ggcact": "actggc", + "ggcaga": "agaggc", + "ggcagc": "agcggc", + "ggcagg": "aggggc", + "ggcagt": "agtggc", + "ggcata": "aggcat", + "ggcatc": "atcggc", + "ggcatg": "atgggc", + "ggcatt": "attggc", + "ggccaa": "aaggcc", + "ggccac": "acggcc", + "ggccag": "agggcc", + "ggccat": "atggcc", + "ggccca": "aggccc", + "ggcccc": "ccccgg", + "ggcccg": "cccggg", + "ggccct": "ccctgg", + "ggccga": "aggccg", + "ggccgc": "ccgcgg", + "ggccgg": "ccgggg", + "ggccgt": "ccgtgg", + "ggccta": "aggcct", + "ggcctc": "cctcgg", + "ggcctg": "cctggg", + "ggcctt": "ccttgg", + "ggcgaa": "aaggcg", + "ggcgac": "acggcg", + "ggcgag": "agggcg", + "ggcgat": "atggcg", + "ggcgca": "aggcgc", + "ggcgcc": "ccggcg", + "ggcgcg": "cgcggg", + "ggcgct": "cgctgg", + "ggcgga": "aggcgg", + "ggcggc": "cggcgg", + "ggcggg": "cggggg", + "ggcggt": "cggtgg", + "ggcgta": "aggcgt", + "ggcgtc": "cggcgt", + "ggcgtg": "cgtggg", + "ggcgtt": "cgttgg", + "ggctaa": "aaggct", + "ggctac": "acggct", + "ggctag": "agggct", + "ggctat": "atggct", + "ggctca": "aggctc", + "ggctcc": "ccggct", + "ggctcg": "cgggct", + "ggctct": "ctctgg", + "ggctga": "aggctg", + "ggctgc": "cggctg", + "ggctgg": "ctgggg", + "ggctgt": "ctgtgg", + "ggctta": "aggctt", + "ggcttc": "cggctt", + "ggcttg": "cttggg", + "ggcttt": "ctttgg", + "gggaaa": "aaaggg", + "gggaac": "aacggg", + "gggaag": "aagggg", + "gggaat": "aatggg", + "gggaca": "acaggg", + "gggacc": "accggg", + "gggacg": "acgggg", + "gggact": "actggg", + "gggaga": "agaggg", + "gggagc": "agcggg", + "gggagg": "aggggg", + "gggagt": "agtggg", + "gggata": "agggat", + "gggatc": "atcggg", + "gggatg": "atgggg", + "gggatt": "attggg", + "gggcaa": "aagggc", + "gggcac": "acgggc", + "gggcag": "aggggc", + "gggcat": "atgggc", + "gggcca": "agggcc", + "gggccc": "cccggg", + "gggccg": "ccgggg", + "gggcct": "cctggg", + "gggcga": "agggcg", + "gggcgc": "cgcggg", + "gggcgg": "cggggg", + "gggcgt": "cgtggg", + "gggcta": "agggct", + "gggctc": "cgggct", + "gggctg": "ctgggg", + "gggctt": "cttggg", + "ggggaa": "aagggg", + "ggggac": "acgggg", + "ggggag": "aggggg", + "ggggat": "atgggg", + "ggggca": "aggggc", + "ggggcc": "ccgggg", + "ggggcg": "cggggg", + "ggggct": "ctgggg", + "ggggga": "aggggg", + "gggggc": "cggggg", + "gggggg": "gggggg", + "gggggt": "gggggt", + "ggggta": "aggggt", + "ggggtc": "cggggt", + "ggggtg": "gggggt", + "ggggtt": "ggggtt", + "gggtaa": "aagggt", + "gggtac": "acgggt", + "gggtag": "aggggt", + "gggtat": "atgggt", + "gggtca": "agggtc", + "gggtcc": "ccgggt", + "gggtcg": "cggggt", + "gggtct": "ctgggt", + "gggtga": "agggtg", + "gggtgc": "cgggtg", + "gggtgg": "gggggt", + "gggtgt": "gggtgt", + "gggtta": "agggtt", + "gggttc": "cgggtt", + "gggttg": "ggggtt", + "gggttt": "gggttt", + "ggtaaa": "aaaggt", + "ggtaac": "aacggt", + "ggtaag": "aagggt", + "ggtaat": "aatggt", + "ggtaca": "acaggt", + "ggtacc": "accggt", + "ggtacg": "acgggt", + "ggtact": "actggt", + "ggtaga": "agaggt", + "ggtagc": "agcggt", + "ggtagg": "aggggt", + "ggtagt": "agtggt", + "ggtata": "aggtat", + "ggtatc": "atcggt", + "ggtatg": "atgggt", + "ggtatt": "attggt", + "ggtcaa": "aaggtc", + "ggtcac": "acggtc", + "ggtcag": "agggtc", + "ggtcat": "atggtc", + "ggtcca": "aggtcc", + "ggtccc": "cccggt", + "ggtccg": "ccgggt", + "ggtcct": "cctggt", + "ggtcga": "aggtcg", + "ggtcgc": "cgcggt", + "ggtcgg": "cggggt", + "ggtcgt": "cgtggt", + "ggtcta": "aggtct", + "ggtctc": "cggtct", + "ggtctg": "ctgggt", + "ggtctt": "cttggt", + "ggtgaa": "aaggtg", + "ggtgac": "acggtg", + "ggtgag": "agggtg", + "ggtgat": "atggtg", + "ggtgca": "aggtgc", + "ggtgcc": "ccggtg", + "ggtgcg": "cgggtg", + "ggtgct": "ctggtg", + "ggtgga": "aggtgg", + "ggtggc": "cggtgg", + "ggtggg": "gggggt", + "ggtggt": "ggtggt", + "ggtgta": "aggtgt", + "ggtgtc": "cggtgt", + "ggtgtg": "gggtgt", + "ggtgtt": "ggtgtt", + "ggttaa": "aaggtt", + "ggttac": "acggtt", + "ggttag": "agggtt", + "ggttat": "atggtt", + "ggttca": "aggttc", + "ggttcc": "ccggtt", + "ggttcg": "cgggtt", + "ggttct": "ctggtt", + "ggttga": "aggttg", + "ggttgc": "cggttg", + "ggttgg": "ggggtt", + "ggttgt": "ggttgt", + "ggttta": "aggttt", + "ggtttc": "cggttt", + "ggtttg": "gggttt", + "ggtttt": "ggtttt", + "gtaaaa": "aaaagt", + "gtaaac": "aaacgt", + "gtaaag": "aaaggt", + "gtaaat": "aaatgt", + "gtaaca": "aacagt", + "gtaacc": "aaccgt", + "gtaacg": "aacggt", + "gtaact": "aactgt", + "gtaaga": "aagagt", + "gtaagc": "aagcgt", + "gtaagg": "aagggt", + "gtaagt": "aagtgt", + "gtaata": "aatagt", + "gtaatc": "aatcgt", + "gtaatg": "aatggt", + "gtaatt": "aattgt", + "gtacaa": "aagtac", + "gtacac": "acacgt", + "gtacag": "acaggt", + "gtacat": "acatgt", + "gtacca": "accagt", + "gtaccc": "acccgt", + "gtaccg": "accggt", + "gtacct": "acctgt", + "gtacga": "acgagt", + "gtacgc": "acgcgt", + "gtacgg": "acgggt", + "gtacgt": "acgtgt", + "gtacta": "actagt", + "gtactc": "actcgt", + "gtactg": "actggt", + "gtactt": "acttgt", + "gtagaa": "aagtag", + "gtagac": "acgtag", + "gtagag": "agaggt", + "gtagat": "agatgt", + "gtagca": "agcagt", + "gtagcc": "agccgt", + "gtagcg": "agcggt", + "gtagct": "agctgt", + "gtagga": "aggagt", + "gtaggc": "aggcgt", + "gtaggg": "aggggt", + "gtaggt": "aggtgt", + "gtagta": "agtagt", + "gtagtc": "agtcgt", + "gtagtg": "agtggt", + "gtagtt": "agttgt", + "gtataa": "aagtat", + "gtatac": "acgtat", + "gtatag": "aggtat", + "gtatat": "atatgt", + "gtatca": "agtatc", + "gtatcc": "atccgt", + "gtatcg": "atcggt", + "gtatct": "atctgt", + "gtatga": "agtatg", + "gtatgc": "atgcgt", + "gtatgg": "atgggt", + "gtatgt": "atgtgt", + "gtatta": "agtatt", + "gtattc": "attcgt", + "gtattg": "attggt", + "gtattt": "atttgt", + "gtcaaa": "aaagtc", + "gtcaac": "aacgtc", + "gtcaag": "aaggtc", + "gtcaat": "aatgtc", + "gtcaca": "acagtc", + "gtcacc": "accgtc", + "gtcacg": "acggtc", + "gtcact": "actgtc", + "gtcaga": "agagtc", + "gtcagc": "agcgtc", + "gtcagg": "agggtc", + "gtcagt": "agtgtc", + "gtcata": "agtcat", + "gtcatc": "atcgtc", + "gtcatg": "atggtc", + "gtcatt": "attgtc", + "gtccaa": "aagtcc", + "gtccac": "acgtcc", + "gtccag": "aggtcc", + "gtccat": "atgtcc", + "gtccca": "agtccc", + "gtcccc": "ccccgt", + "gtcccg": "cccggt", + "gtccct": "ccctgt", + "gtccga": "agtccg", + "gtccgc": "ccgcgt", + "gtccgg": "ccgggt", + "gtccgt": "ccgtgt", + "gtccta": "agtcct", + "gtcctc": "cctcgt", + "gtcctg": "cctggt", + "gtcctt": "ccttgt", + "gtcgaa": "aagtcg", + "gtcgac": "acgtcg", + "gtcgag": "aggtcg", + "gtcgat": "atgtcg", + "gtcgca": "agtcgc", + "gtcgcc": "ccgtcg", + "gtcgcg": "cgcggt", + "gtcgct": "cgctgt", + "gtcgga": "agtcgg", + "gtcggc": "cggcgt", + "gtcggg": "cggggt", + "gtcggt": "cggtgt", + "gtcgta": "agtcgt", + "gtcgtc": "cgtcgt", + "gtcgtg": "cgtggt", + "gtcgtt": "cgttgt", + "gtctaa": "aagtct", + "gtctac": "acgtct", + "gtctag": "aggtct", + "gtctat": "atgtct", + "gtctca": "agtctc", + "gtctcc": "ccgtct", + "gtctcg": "cggtct", + "gtctct": "ctctgt", + "gtctga": "agtctg", + "gtctgc": "cgtctg", + "gtctgg": "ctgggt", + "gtctgt": "ctgtgt", + "gtctta": "agtctt", + "gtcttc": "cgtctt", + "gtcttg": "cttggt", + "gtcttt": "ctttgt", + "gtgaaa": "aaagtg", + "gtgaac": "aacgtg", + "gtgaag": "aaggtg", + "gtgaat": "aatgtg", + "gtgaca": "acagtg", + "gtgacc": "accgtg", + "gtgacg": "acggtg", + "gtgact": "actgtg", + "gtgaga": "agagtg", + "gtgagc": "agcgtg", + "gtgagg": "agggtg", + "gtgagt": "agtgtg", + "gtgata": "agtgat", + "gtgatc": "atcgtg", + "gtgatg": "atggtg", + "gtgatt": "attgtg", + "gtgcaa": "aagtgc", + "gtgcac": "acgtgc", + "gtgcag": "aggtgc", + "gtgcat": "atgtgc", + "gtgcca": "agtgcc", + "gtgccc": "cccgtg", + "gtgccg": "ccggtg", + "gtgcct": "cctgtg", + "gtgcga": "agtgcg", + "gtgcgc": "cgcgtg", + "gtgcgg": "cgggtg", + "gtgcgt": "cgtgtg", + "gtgcta": "agtgct", + "gtgctc": "cgtgct", + "gtgctg": "ctggtg", + "gtgctt": "cttgtg", + "gtggaa": "aagtgg", + "gtggac": "acgtgg", + "gtggag": "aggtgg", + "gtggat": "atgtgg", + "gtggca": "agtggc", + "gtggcc": "ccgtgg", + "gtggcg": "cggtgg", + "gtggct": "ctgtgg", + "gtggga": "agtggg", + "gtgggc": "cgtggg", + "gtgggg": "gggggt", + "gtgggt": "gggtgt", + "gtggta": "agtggt", + "gtggtc": "cgtggt", + "gtggtg": "ggtggt", + "gtggtt": "ggttgt", + "gtgtaa": "aagtgt", + "gtgtac": "acgtgt", + "gtgtag": "aggtgt", + "gtgtat": "atgtgt", + "gtgtca": "agtgtc", + "gtgtcc": "ccgtgt", + "gtgtcg": "cggtgt", + "gtgtct": "ctgtgt", + "gtgtga": "agtgtg", + "gtgtgc": "cgtgtg", + "gtgtgg": "gggtgt", + "gtgtgt": "gtgtgt", + "gtgtta": "agtgtt", + "gtgttc": "cgtgtt", + "gtgttg": "ggtgtt", + "gtgttt": "gtgttt", + "gttaaa": "aaagtt", + "gttaac": "aacgtt", + "gttaag": "aaggtt", + "gttaat": "aatgtt", + "gttaca": "acagtt", + "gttacc": "accgtt", + "gttacg": "acggtt", + "gttact": "actgtt", + "gttaga": "agagtt", + "gttagc": "agcgtt", + "gttagg": "agggtt", + "gttagt": "agtgtt", + "gttata": "agttat", + "gttatc": "atcgtt", + "gttatg": "atggtt", + "gttatt": "attgtt", + "gttcaa": "aagttc", + "gttcac": "acgttc", + "gttcag": "aggttc", + "gttcat": "atgttc", + "gttcca": "agttcc", + "gttccc": "cccgtt", + "gttccg": "ccggtt", + "gttcct": "cctgtt", + "gttcga": "agttcg", + "gttcgc": "cgcgtt", + "gttcgg": "cgggtt", + "gttcgt": "cgtgtt", + "gttcta": "agttct", + "gttctc": "cgttct", + "gttctg": "ctggtt", + "gttctt": "cttgtt", + "gttgaa": "aagttg", + "gttgac": "acgttg", + "gttgag": "aggttg", + "gttgat": "atgttg", + "gttgca": "agttgc", + "gttgcc": "ccgttg", + "gttgcg": "cggttg", + "gttgct": "ctgttg", + "gttgga": "agttgg", + "gttggc": "cgttgg", + "gttggg": "ggggtt", + "gttggt": "ggtgtt", + "gttgta": "agttgt", + "gttgtc": "cgttgt", + "gttgtg": "ggttgt", + "gttgtt": "gttgtt", + "gtttaa": "aagttt", + "gtttac": "acgttt", + "gtttag": "aggttt", + "gtttat": "atgttt", + "gtttca": "agtttc", + "gtttcc": "ccgttt", + "gtttcg": "cggttt", + "gtttct": "ctgttt", + "gtttga": "agtttg", + "gtttgc": "cgtttg", + "gtttgg": "gggttt", + "gtttgt": "gtgttt", + "gtttta": "agtttt", + "gttttc": "cgtttt", + "gttttg": "ggtttt", + "gttttt": "gttttt", + "taaaaa": "aaaaat", + "taaaac": "aaaact", + "taaaag": "aaaagt", + "taaaat": "aaaatt", + "taaaca": "aaacat", + "taaacc": "aaacct", + "taaacg": "aaacgt", + "taaact": "aaactt", + "taaaga": "aaagat", + "taaagc": "aaagct", + "taaagg": "aaaggt", + "taaagt": "aaagtt", + "taaata": "aaatat", + "taaatc": "aaatct", + "taaatg": "aaatgt", + "taaatt": "aaattt", + "taacaa": "aacaat", + "taacac": "aacact", + "taacag": "aacagt", + "taacat": "aacatt", + "taacca": "aaccat", + "taaccc": "aaccct", + "taaccg": "aaccgt", + "taacct": "aacctt", + "taacga": "aacgat", + "taacgc": "aacgct", + "taacgg": "aacggt", + "taacgt": "aacgtt", + "taacta": "aactat", + "taactc": "aactct", + "taactg": "aactgt", + "taactt": "aacttt", + "taagaa": "aagaat", + "taagac": "aagact", + "taagag": "aagagt", + "taagat": "aagatt", + "taagca": "aagcat", + "taagcc": "aagcct", + "taagcg": "aagcgt", + "taagct": "aagctt", + "taagga": "aaggat", + "taaggc": "aaggct", + "taaggg": "aagggt", + "taaggt": "aaggtt", + "taagta": "aagtat", + "taagtc": "aagtct", + "taagtg": "aagtgt", + "taagtt": "aagttt", + "taataa": "aataat", + "taatac": "aatact", + "taatag": "aatagt", + "taatat": "aatatt", + "taatca": "aatcat", + "taatcc": "aatcct", + "taatcg": "aatcgt", + "taatct": "aatctt", + "taatga": "aatgat", + "taatgc": "aatgct", + "taatgg": "aatggt", + "taatgt": "aatgtt", + "taatta": "aattat", + "taattc": "aattct", + "taattg": "aattgt", + "taattt": "aatttt", + "tacaaa": "aaatac", + "tacaac": "aactac", + "tacaag": "aagtac", + "tacaat": "aattac", + "tacaca": "acacat", + "tacacc": "acacct", + "tacacg": "acacgt", + "tacact": "acactt", + "tacaga": "acagat", + "tacagc": "acagct", + "tacagg": "acaggt", + "tacagt": "acagtt", + "tacata": "acatat", + "tacatc": "acatct", + "tacatg": "acatgt", + "tacatt": "acattt", + "taccaa": "aatacc", + "taccac": "accact", + "taccag": "accagt", + "taccat": "accatt", + "taccca": "acccat", + "tacccc": "acccct", + "tacccg": "acccgt", + "taccct": "accctt", + "taccga": "accgat", + "taccgc": "accgct", + "taccgg": "accggt", + "taccgt": "accgtt", + "taccta": "acctat", + "tacctc": "acctct", + "tacctg": "acctgt", + "tacctt": "accttt", + "tacgaa": "aatacg", + "tacgac": "acgact", + "tacgag": "acgagt", + "tacgat": "acgatt", + "tacgca": "acgcat", + "tacgcc": "acgcct", + "tacgcg": "acgcgt", + "tacgct": "acgctt", + "tacgga": "acggat", + "tacggc": "acggct", + "tacggg": "acgggt", + "tacggt": "acggtt", + "tacgta": "acgtat", + "tacgtc": "acgtct", + "tacgtg": "acgtgt", + "tacgtt": "acgttt", + "tactaa": "aatact", + "tactac": "actact", + "tactag": "actagt", + "tactat": "actatt", + "tactca": "actcat", + "tactcc": "actcct", + "tactcg": "actcgt", + "tactct": "actctt", + "tactga": "actgat", + "tactgc": "actgct", + "tactgg": "actggt", + "tactgt": "actgtt", + "tactta": "acttat", + "tacttc": "acttct", + "tacttg": "acttgt", + "tacttt": "actttt", + "tagaaa": "aaatag", + "tagaac": "aactag", + "tagaag": "aagtag", + "tagaat": "aattag", + "tagaca": "acatag", + "tagacc": "acctag", + "tagacg": "acgtag", + "tagact": "acttag", + "tagaga": "agagat", + "tagagc": "agagct", + "tagagg": "agaggt", + "tagagt": "agagtt", + "tagata": "agatat", + "tagatc": "agatct", + "tagatg": "agatgt", + "tagatt": "agattt", + "tagcaa": "aatagc", + "tagcac": "actagc", + "tagcag": "agcagt", + "tagcat": "agcatt", + "tagcca": "agccat", + "tagccc": "agccct", + "tagccg": "agccgt", + "tagcct": "agcctt", + "tagcga": "agcgat", + "tagcgc": "agcgct", + "tagcgg": "agcggt", + "tagcgt": "agcgtt", + "tagcta": "agctat", + "tagctc": "agctct", + "tagctg": "agctgt", + "tagctt": "agcttt", + "taggaa": "aatagg", + "taggac": "actagg", + "taggag": "aggagt", + "taggat": "aggatt", + "taggca": "aggcat", + "taggcc": "aggcct", + "taggcg": "aggcgt", + "taggct": "aggctt", + "taggga": "agggat", + "tagggc": "agggct", + "tagggg": "aggggt", + "tagggt": "agggtt", + "taggta": "aggtat", + "taggtc": "aggtct", + "taggtg": "aggtgt", + "taggtt": "aggttt", + "tagtaa": "aatagt", + "tagtac": "actagt", + "tagtag": "agtagt", + "tagtat": "agtatt", + "tagtca": "agtcat", + "tagtcc": "agtcct", + "tagtcg": "agtcgt", + "tagtct": "agtctt", + "tagtga": "agtgat", + "tagtgc": "agtgct", + "tagtgg": "agtggt", + "tagtgt": "agtgtt", + "tagtta": "agttat", + "tagttc": "agttct", + "tagttg": "agttgt", + "tagttt": "agtttt", + "tataaa": "aaatat", + "tataac": "aactat", + "tataag": "aagtat", + "tataat": "aattat", + "tataca": "acatat", + "tatacc": "acctat", + "tatacg": "acgtat", + "tatact": "acttat", + "tataga": "agatat", + "tatagc": "agctat", + "tatagg": "aggtat", + "tatagt": "agttat", + "tatata": "atatat", + "tatatc": "atatct", + "tatatg": "atatgt", + "tatatt": "atattt", + "tatcaa": "aatatc", + "tatcac": "actatc", + "tatcag": "agtatc", + "tatcat": "atcatt", + "tatcca": "atatcc", + "tatccc": "atccct", + "tatccg": "atccgt", + "tatcct": "atcctt", + "tatcga": "atatcg", + "tatcgc": "atcgct", + "tatcgg": "atcggt", + "tatcgt": "atcgtt", + "tatcta": "atatct", + "tatctc": "atctct", + "tatctg": "atctgt", + "tatctt": "atcttt", + "tatgaa": "aatatg", + "tatgac": "actatg", + "tatgag": "agtatg", + "tatgat": "atgatt", + "tatgca": "atatgc", + "tatgcc": "atgcct", + "tatgcg": "atgcgt", + "tatgct": "atgctt", + "tatgga": "atatgg", + "tatggc": "atggct", + "tatggg": "atgggt", + "tatggt": "atggtt", + "tatgta": "atatgt", + "tatgtc": "atgtct", + "tatgtg": "atgtgt", + "tatgtt": "atgttt", + "tattaa": "aatatt", + "tattac": "actatt", + "tattag": "agtatt", + "tattat": "attatt", + "tattca": "atattc", + "tattcc": "attcct", + "tattcg": "attcgt", + "tattct": "attctt", + "tattga": "atattg", + "tattgc": "attgct", + "tattgg": "attggt", + "tattgt": "attgtt", + "tattta": "atattt", + "tatttc": "atttct", + "tatttg": "atttgt", + "tatttt": "attttt", + "tcaaaa": "aaaatc", + "tcaaac": "aaactc", + "tcaaag": "aaagtc", + "tcaaat": "aaattc", + "tcaaca": "aacatc", + "tcaacc": "aacctc", + "tcaacg": "aacgtc", + "tcaact": "aacttc", + "tcaaga": "aagatc", + "tcaagc": "aagctc", + "tcaagg": "aaggtc", + "tcaagt": "aagttc", + "tcaata": "aatatc", + "tcaatc": "aatctc", + "tcaatg": "aatgtc", + "tcaatt": "aatttc", + "tcacaa": "aatcac", + "tcacac": "acactc", + "tcacag": "acagtc", + "tcacat": "acattc", + "tcacca": "accatc", + "tcaccc": "accctc", + "tcaccg": "accgtc", + "tcacct": "accttc", + "tcacga": "acgatc", + "tcacgc": "acgctc", + "tcacgg": "acggtc", + "tcacgt": "acgttc", + "tcacta": "actatc", + "tcactc": "actctc", + "tcactg": "actgtc", + "tcactt": "actttc", + "tcagaa": "aatcag", + "tcagac": "actcag", + "tcagag": "agagtc", + "tcagat": "agattc", + "tcagca": "agcatc", + "tcagcc": "agcctc", + "tcagcg": "agcgtc", + "tcagct": "agcttc", + "tcagga": "aggatc", + "tcaggc": "aggctc", + "tcaggg": "agggtc", + "tcaggt": "aggttc", + "tcagta": "agtatc", + "tcagtc": "agtctc", + "tcagtg": "agtgtc", + "tcagtt": "agtttc", + "tcataa": "aatcat", + "tcatac": "actcat", + "tcatag": "agtcat", + "tcatat": "atattc", + "tcatca": "atcatc", + "tcatcc": "atcctc", + "tcatcg": "atcgtc", + "tcatct": "atcttc", + "tcatga": "atcatg", + "tcatgc": "atgctc", + "tcatgg": "atggtc", + "tcatgt": "atgttc", + "tcatta": "atcatt", + "tcattc": "attctc", + "tcattg": "attgtc", + "tcattt": "attttc", + "tccaaa": "aaatcc", + "tccaac": "aactcc", + "tccaag": "aagtcc", + "tccaat": "aattcc", + "tccaca": "acatcc", + "tccacc": "acctcc", + "tccacg": "acgtcc", + "tccact": "acttcc", + "tccaga": "agatcc", + "tccagc": "agctcc", + "tccagg": "aggtcc", + "tccagt": "agttcc", + "tccata": "atatcc", + "tccatc": "atctcc", + "tccatg": "atgtcc", + "tccatt": "atttcc", + "tcccaa": "aatccc", + "tcccac": "actccc", + "tcccag": "agtccc", + "tcccat": "attccc", + "tcccca": "atcccc", + "tccccc": "ccccct", + "tccccg": "ccccgt", + "tcccct": "cccctt", + "tcccga": "atcccg", + "tcccgc": "cccgct", + "tcccgg": "cccggt", + "tcccgt": "cccgtt", + "tcccta": "atccct", + "tccctc": "ccctct", + "tccctg": "ccctgt", + "tccctt": "cccttt", + "tccgaa": "aatccg", + "tccgac": "actccg", + "tccgag": "agtccg", + "tccgat": "attccg", + "tccgca": "atccgc", + "tccgcc": "ccgcct", + "tccgcg": "ccgcgt", + "tccgct": "ccgctt", + "tccgga": "atccgg", + "tccggc": "ccggct", + "tccggg": "ccgggt", + "tccggt": "ccggtt", + "tccgta": "atccgt", + "tccgtc": "ccgtct", + "tccgtg": "ccgtgt", + "tccgtt": "ccgttt", + "tcctaa": "aatcct", + "tcctac": "actcct", + "tcctag": "agtcct", + "tcctat": "attcct", + "tcctca": "atcctc", + "tcctcc": "cctcct", + "tcctcg": "cctcgt", + "tcctct": "cctctt", + "tcctga": "atcctg", + "tcctgc": "cctgct", + "tcctgg": "cctggt", + "tcctgt": "cctgtt", + "tcctta": "atcctt", + "tccttc": "ccttct", + "tccttg": "ccttgt", + "tccttt": "cctttt", + "tcgaaa": "aaatcg", + "tcgaac": "aactcg", + "tcgaag": "aagtcg", + "tcgaat": "aattcg", + "tcgaca": "acatcg", + "tcgacc": "acctcg", + "tcgacg": "acgtcg", + "tcgact": "acttcg", + "tcgaga": "agatcg", + "tcgagc": "agctcg", + "tcgagg": "aggtcg", + "tcgagt": "agttcg", + "tcgata": "atatcg", + "tcgatc": "atctcg", + "tcgatg": "atgtcg", + "tcgatt": "atttcg", + "tcgcaa": "aatcgc", + "tcgcac": "actcgc", + "tcgcag": "agtcgc", + "tcgcat": "attcgc", + "tcgcca": "atcgcc", + "tcgccc": "ccctcg", + "tcgccg": "ccgtcg", + "tcgcct": "ccttcg", + "tcgcga": "atcgcg", + "tcgcgc": "cgcgct", + "tcgcgg": "cgcggt", + "tcgcgt": "cgcgtt", + "tcgcta": "atcgct", + "tcgctc": "cgctct", + "tcgctg": "cgctgt", + "tcgctt": "cgcttt", + "tcggaa": "aatcgg", + "tcggac": "actcgg", + "tcggag": "agtcgg", + "tcggat": "attcgg", + "tcggca": "atcggc", + "tcggcc": "cctcgg", + "tcggcg": "cggcgt", + "tcggct": "cggctt", + "tcggga": "atcggg", + "tcgggc": "cgggct", + "tcgggg": "cggggt", + "tcgggt": "cgggtt", + "tcggta": "atcggt", + "tcggtc": "cggtct", + "tcggtg": "cggtgt", + "tcggtt": "cggttt", + "tcgtaa": "aatcgt", + "tcgtac": "actcgt", + "tcgtag": "agtcgt", + "tcgtat": "attcgt", + "tcgtca": "atcgtc", + "tcgtcc": "cctcgt", + "tcgtcg": "cgtcgt", + "tcgtct": "cgtctt", + "tcgtga": "atcgtg", + "tcgtgc": "cgtgct", + "tcgtgg": "cgtggt", + "tcgtgt": "cgtgtt", + "tcgtta": "atcgtt", + "tcgttc": "cgttct", + "tcgttg": "cgttgt", + "tcgttt": "cgtttt", + "tctaaa": "aaatct", + "tctaac": "aactct", + "tctaag": "aagtct", + "tctaat": "aattct", + "tctaca": "acatct", + "tctacc": "acctct", + "tctacg": "acgtct", + "tctact": "acttct", + "tctaga": "agatct", + "tctagc": "agctct", + "tctagg": "aggtct", + "tctagt": "agttct", + "tctata": "atatct", + "tctatc": "atctct", + "tctatg": "atgtct", + "tctatt": "atttct", + "tctcaa": "aatctc", + "tctcac": "actctc", + "tctcag": "agtctc", + "tctcat": "attctc", + "tctcca": "atctcc", + "tctccc": "ccctct", + "tctccg": "ccgtct", + "tctcct": "ccttct", + "tctcga": "atctcg", + "tctcgc": "cgctct", + "tctcgg": "cggtct", + "tctcgt": "cgttct", + "tctcta": "atctct", + "tctctc": "ctctct", + "tctctg": "ctctgt", + "tctctt": "ctcttt", + "tctgaa": "aatctg", + "tctgac": "actctg", + "tctgag": "agtctg", + "tctgat": "attctg", + "tctgca": "atctgc", + "tctgcc": "cctctg", + "tctgcg": "cgtctg", + "tctgct": "ctgctt", + "tctgga": "atctgg", + "tctggc": "ctctgg", + "tctggg": "ctgggt", + "tctggt": "ctggtt", + "tctgta": "atctgt", + "tctgtc": "ctctgt", + "tctgtg": "ctgtgt", + "tctgtt": "ctgttt", + "tcttaa": "aatctt", + "tcttac": "actctt", + "tcttag": "agtctt", + "tcttat": "attctt", + "tcttca": "atcttc", + "tcttcc": "cctctt", + "tcttcg": "cgtctt", + "tcttct": "cttctt", + "tcttga": "atcttg", + "tcttgc": "ctcttg", + "tcttgg": "cttggt", + "tcttgt": "cttgtt", + "tcttta": "atcttt", + "tctttc": "ctcttt", + "tctttg": "ctttgt", + "tctttt": "cttttt", + "tgaaaa": "aaaatg", + "tgaaac": "aaactg", + "tgaaag": "aaagtg", + "tgaaat": "aaattg", + "tgaaca": "aacatg", + "tgaacc": "aacctg", + "tgaacg": "aacgtg", + "tgaact": "aacttg", + "tgaaga": "aagatg", + "tgaagc": "aagctg", + "tgaagg": "aaggtg", + "tgaagt": "aagttg", + "tgaata": "aatatg", + "tgaatc": "aatctg", + "tgaatg": "aatgtg", + "tgaatt": "aatttg", + "tgacaa": "aatgac", + "tgacac": "acactg", + "tgacag": "acagtg", + "tgacat": "acattg", + "tgacca": "accatg", + "tgaccc": "accctg", + "tgaccg": "accgtg", + "tgacct": "accttg", + "tgacga": "acgatg", + "tgacgc": "acgctg", + "tgacgg": "acggtg", + "tgacgt": "acgttg", + "tgacta": "actatg", + "tgactc": "actctg", + "tgactg": "actgtg", + "tgactt": "actttg", + "tgagaa": "aatgag", + "tgagac": "actgag", + "tgagag": "agagtg", + "tgagat": "agattg", + "tgagca": "agcatg", + "tgagcc": "agcctg", + "tgagcg": "agcgtg", + "tgagct": "agcttg", + "tgagga": "aggatg", + "tgaggc": "aggctg", + "tgaggg": "agggtg", + "tgaggt": "aggttg", + "tgagta": "agtatg", + "tgagtc": "agtctg", + "tgagtg": "agtgtg", + "tgagtt": "agtttg", + "tgataa": "aatgat", + "tgatac": "actgat", + "tgatag": "agtgat", + "tgatat": "atattg", + "tgatca": "atcatg", + "tgatcc": "atcctg", + "tgatcg": "atcgtg", + "tgatct": "atcttg", + "tgatga": "atgatg", + "tgatgc": "atgctg", + "tgatgg": "atggtg", + "tgatgt": "atgttg", + "tgatta": "atgatt", + "tgattc": "attctg", + "tgattg": "attgtg", + "tgattt": "attttg", + "tgcaaa": "aaatgc", + "tgcaac": "aactgc", + "tgcaag": "aagtgc", + "tgcaat": "aattgc", + "tgcaca": "acatgc", + "tgcacc": "acctgc", + "tgcacg": "acgtgc", + "tgcact": "acttgc", + "tgcaga": "agatgc", + "tgcagc": "agctgc", + "tgcagg": "aggtgc", + "tgcagt": "agttgc", + "tgcata": "atatgc", + "tgcatc": "atctgc", + "tgcatg": "atgtgc", + "tgcatt": "atttgc", + "tgccaa": "aatgcc", + "tgccac": "actgcc", + "tgccag": "agtgcc", + "tgccat": "attgcc", + "tgccca": "atgccc", + "tgcccc": "cccctg", + "tgcccg": "cccgtg", + "tgccct": "cccttg", + "tgccga": "atgccg", + "tgccgc": "ccgctg", + "tgccgg": "ccggtg", + "tgccgt": "ccgttg", + "tgccta": "atgcct", + "tgcctc": "cctctg", + "tgcctg": "cctgtg", + "tgcctt": "cctttg", + "tgcgaa": "aatgcg", + "tgcgac": "actgcg", + "tgcgag": "agtgcg", + "tgcgat": "attgcg", + "tgcgca": "atgcgc", + "tgcgcc": "cctgcg", + "tgcgcg": "cgcgtg", + "tgcgct": "cgcttg", + "tgcgga": "atgcgg", + "tgcggc": "cggctg", + "tgcggg": "cgggtg", + "tgcggt": "cggttg", + "tgcgta": "atgcgt", + "tgcgtc": "cgtctg", + "tgcgtg": "cgtgtg", + "tgcgtt": "cgtttg", + "tgctaa": "aatgct", + "tgctac": "actgct", + "tgctag": "agtgct", + "tgctat": "attgct", + "tgctca": "atgctc", + "tgctcc": "cctgct", + "tgctcg": "cgtgct", + "tgctct": "ctcttg", + "tgctga": "atgctg", + "tgctgc": "ctgctg", + "tgctgg": "ctggtg", + "tgctgt": "ctgttg", + "tgctta": "atgctt", + "tgcttc": "ctgctt", + "tgcttg": "cttgtg", + "tgcttt": "cttttg", + "tggaaa": "aaatgg", + "tggaac": "aactgg", + "tggaag": "aagtgg", + "tggaat": "aattgg", + "tggaca": "acatgg", + "tggacc": "acctgg", + "tggacg": "acgtgg", + "tggact": "acttgg", + "tggaga": "agatgg", + "tggagc": "agctgg", + "tggagg": "aggtgg", + "tggagt": "agttgg", + "tggata": "atatgg", + "tggatc": "atctgg", + "tggatg": "atgtgg", + "tggatt": "atttgg", + "tggcaa": "aatggc", + "tggcac": "actggc", + "tggcag": "agtggc", + "tggcat": "attggc", + "tggcca": "atggcc", + "tggccc": "ccctgg", + "tggccg": "ccgtgg", + "tggcct": "ccttgg", + "tggcga": "atggcg", + "tggcgc": "cgctgg", + "tggcgg": "cggtgg", + "tggcgt": "cgttgg", + "tggcta": "atggct", + "tggctc": "ctctgg", + "tggctg": "ctgtgg", + "tggctt": "ctttgg", + "tgggaa": "aatggg", + "tgggac": "actggg", + "tgggag": "agtggg", + "tgggat": "attggg", + "tgggca": "atgggc", + "tgggcc": "cctggg", + "tgggcg": "cgtggg", + "tgggct": "cttggg", + "tgggga": "atgggg", + "tggggc": "ctgggg", + "tggggg": "gggggt", + "tggggt": "ggggtt", + "tgggta": "atgggt", + "tgggtc": "ctgggt", + "tgggtg": "gggtgt", + "tgggtt": "gggttt", + "tggtaa": "aatggt", + "tggtac": "actggt", + "tggtag": "agtggt", + "tggtat": "attggt", + "tggtca": "atggtc", + "tggtcc": "cctggt", + "tggtcg": "cgtggt", + "tggtct": "cttggt", + "tggtga": "atggtg", + "tggtgc": "ctggtg", + "tggtgg": "ggtggt", + "tggtgt": "ggtgtt", + "tggtta": "atggtt", + "tggttc": "ctggtt", + "tggttg": "ggttgt", + "tggttt": "ggtttt", + "tgtaaa": "aaatgt", + "tgtaac": "aactgt", + "tgtaag": "aagtgt", + "tgtaat": "aattgt", + "tgtaca": "acatgt", + "tgtacc": "acctgt", + "tgtacg": "acgtgt", + "tgtact": "acttgt", + "tgtaga": "agatgt", + "tgtagc": "agctgt", + "tgtagg": "aggtgt", + "tgtagt": "agttgt", + "tgtata": "atatgt", + "tgtatc": "atctgt", + "tgtatg": "atgtgt", + "tgtatt": "atttgt", + "tgtcaa": "aatgtc", + "tgtcac": "actgtc", + "tgtcag": "agtgtc", + "tgtcat": "attgtc", + "tgtcca": "atgtcc", + "tgtccc": "ccctgt", + "tgtccg": "ccgtgt", + "tgtcct": "ccttgt", + "tgtcga": "atgtcg", + "tgtcgc": "cgctgt", + "tgtcgg": "cggtgt", + "tgtcgt": "cgttgt", + "tgtcta": "atgtct", + "tgtctc": "ctctgt", + "tgtctg": "ctgtgt", + "tgtctt": "ctttgt", + "tgtgaa": "aatgtg", + "tgtgac": "actgtg", + "tgtgag": "agtgtg", + "tgtgat": "attgtg", + "tgtgca": "atgtgc", + "tgtgcc": "cctgtg", + "tgtgcg": "cgtgtg", + "tgtgct": "cttgtg", + "tgtgga": "atgtgg", + "tgtggc": "ctgtgg", + "tgtggg": "gggtgt", + "tgtggt": "ggttgt", + "tgtgta": "atgtgt", + "tgtgtc": "ctgtgt", + "tgtgtg": "gtgtgt", + "tgtgtt": "gtgttt", + "tgttaa": "aatgtt", + "tgttac": "actgtt", + "tgttag": "agtgtt", + "tgttat": "attgtt", + "tgttca": "atgttc", + "tgttcc": "cctgtt", + "tgttcg": "cgtgtt", + "tgttct": "cttgtt", + "tgttga": "atgttg", + "tgttgc": "ctgttg", + "tgttgg": "ggtgtt", + "tgttgt": "gttgtt", + "tgttta": "atgttt", + "tgtttc": "ctgttt", + "tgtttg": "gtgttt", + "tgtttt": "gttttt", + "ttaaaa": "aaaatt", + "ttaaac": "aaactt", + "ttaaag": "aaagtt", + "ttaaat": "aaattt", + "ttaaca": "aacatt", + "ttaacc": "aacctt", + "ttaacg": "aacgtt", + "ttaact": "aacttt", + "ttaaga": "aagatt", + "ttaagc": "aagctt", + "ttaagg": "aaggtt", + "ttaagt": "aagttt", + "ttaata": "aatatt", + "ttaatc": "aatctt", + "ttaatg": "aatgtt", + "ttaatt": "aatttt", + "ttacaa": "aattac", + "ttacac": "acactt", + "ttacag": "acagtt", + "ttacat": "acattt", + "ttacca": "accatt", + "ttaccc": "accctt", + "ttaccg": "accgtt", + "ttacct": "accttt", + "ttacga": "acgatt", + "ttacgc": "acgctt", + "ttacgg": "acggtt", + "ttacgt": "acgttt", + "ttacta": "actatt", + "ttactc": "actctt", + "ttactg": "actgtt", + "ttactt": "actttt", + "ttagaa": "aattag", + "ttagac": "acttag", + "ttagag": "agagtt", + "ttagat": "agattt", + "ttagca": "agcatt", + "ttagcc": "agcctt", + "ttagcg": "agcgtt", + "ttagct": "agcttt", + "ttagga": "aggatt", + "ttaggc": "aggctt", + "ttaggg": "agggtt", + "ttaggt": "aggttt", + "ttagta": "agtatt", + "ttagtc": "agtctt", + "ttagtg": "agtgtt", + "ttagtt": "agtttt", + "ttataa": "aattat", + "ttatac": "acttat", + "ttatag": "agttat", + "ttatat": "atattt", + "ttatca": "atcatt", + "ttatcc": "atcctt", + "ttatcg": "atcgtt", + "ttatct": "atcttt", + "ttatga": "atgatt", + "ttatgc": "atgctt", + "ttatgg": "atggtt", + "ttatgt": "atgttt", + "ttatta": "attatt", + "ttattc": "attctt", + "ttattg": "attgtt", + "ttattt": "attttt", + "ttcaaa": "aaattc", + "ttcaac": "aacttc", + "ttcaag": "aagttc", + "ttcaat": "aatttc", + "ttcaca": "acattc", + "ttcacc": "accttc", + "ttcacg": "acgttc", + "ttcact": "actttc", + "ttcaga": "agattc", + "ttcagc": "agcttc", + "ttcagg": "aggttc", + "ttcagt": "agtttc", + "ttcata": "atattc", + "ttcatc": "atcttc", + "ttcatg": "atgttc", + "ttcatt": "attttc", + "ttccaa": "aattcc", + "ttccac": "acttcc", + "ttccag": "agttcc", + "ttccat": "atttcc", + "ttccca": "attccc", + "ttcccc": "cccctt", + "ttcccg": "cccgtt", + "ttccct": "cccttt", + "ttccga": "attccg", + "ttccgc": "ccgctt", + "ttccgg": "ccggtt", + "ttccgt": "ccgttt", + "ttccta": "attcct", + "ttcctc": "cctctt", + "ttcctg": "cctgtt", + "ttcctt": "cctttt", + "ttcgaa": "aattcg", + "ttcgac": "acttcg", + "ttcgag": "agttcg", + "ttcgat": "atttcg", + "ttcgca": "attcgc", + "ttcgcc": "ccttcg", + "ttcgcg": "cgcgtt", + "ttcgct": "cgcttt", + "ttcgga": "attcgg", + "ttcggc": "cggctt", + "ttcggg": "cgggtt", + "ttcggt": "cggttt", + "ttcgta": "attcgt", + "ttcgtc": "cgtctt", + "ttcgtg": "cgtgtt", + "ttcgtt": "cgtttt", + "ttctaa": "aattct", + "ttctac": "acttct", + "ttctag": "agttct", + "ttctat": "atttct", + "ttctca": "attctc", + "ttctcc": "ccttct", + "ttctcg": "cgttct", + "ttctct": "ctcttt", + "ttctga": "attctg", + "ttctgc": "ctgctt", + "ttctgg": "ctggtt", + "ttctgt": "ctgttt", + "ttctta": "attctt", + "ttcttc": "cttctt", + "ttcttg": "cttgtt", + "ttcttt": "cttttt", + "ttgaaa": "aaattg", + "ttgaac": "aacttg", + "ttgaag": "aagttg", + "ttgaat": "aatttg", + "ttgaca": "acattg", + "ttgacc": "accttg", + "ttgacg": "acgttg", + "ttgact": "actttg", + "ttgaga": "agattg", + "ttgagc": "agcttg", + "ttgagg": "aggttg", + "ttgagt": "agtttg", + "ttgata": "atattg", + "ttgatc": "atcttg", + "ttgatg": "atgttg", + "ttgatt": "attttg", + "ttgcaa": "aattgc", + "ttgcac": "acttgc", + "ttgcag": "agttgc", + "ttgcat": "atttgc", + "ttgcca": "attgcc", + "ttgccc": "cccttg", + "ttgccg": "ccgttg", + "ttgcct": "cctttg", + "ttgcga": "attgcg", + "ttgcgc": "cgcttg", + "ttgcgg": "cggttg", + "ttgcgt": "cgtttg", + "ttgcta": "attgct", + "ttgctc": "ctcttg", + "ttgctg": "ctgttg", + "ttgctt": "cttttg", + "ttggaa": "aattgg", + "ttggac": "acttgg", + "ttggag": "agttgg", + "ttggat": "atttgg", + "ttggca": "attggc", + "ttggcc": "ccttgg", + "ttggcg": "cgttgg", + "ttggct": "ctttgg", + "ttggga": "attggg", + "ttgggc": "cttggg", + "ttgggg": "ggggtt", + "ttgggt": "gggttt", + "ttggta": "attggt", + "ttggtc": "cttggt", + "ttggtg": "ggtgtt", + "ttggtt": "ggtttt", + "ttgtaa": "aattgt", + "ttgtac": "acttgt", + "ttgtag": "agttgt", + "ttgtat": "atttgt", + "ttgtca": "attgtc", + "ttgtcc": "ccttgt", + "ttgtcg": "cgttgt", + "ttgtct": "ctttgt", + "ttgtga": "attgtg", + "ttgtgc": "cttgtg", + "ttgtgg": "ggttgt", + "ttgtgt": "gtgttt", + "ttgtta": "attgtt", + "ttgttc": "cttgtt", + "ttgttg": "gttgtt", + "ttgttt": "gttttt", + "tttaaa": "aaattt", + "tttaac": "aacttt", + "tttaag": "aagttt", + "tttaat": "aatttt", + "tttaca": "acattt", + "tttacc": "accttt", + "tttacg": "acgttt", + "tttact": "actttt", + "tttaga": "agattt", + "tttagc": "agcttt", + "tttagg": "aggttt", + "tttagt": "agtttt", + "tttata": "atattt", + "tttatc": "atcttt", + "tttatg": "atgttt", + "tttatt": "attttt", + "tttcaa": "aatttc", + "tttcac": "actttc", + "tttcag": "agtttc", + "tttcat": "attttc", + "tttcca": "atttcc", + "tttccc": "cccttt", + "tttccg": "ccgttt", + "tttcct": "cctttt", + "tttcga": "atttcg", + "tttcgc": "cgcttt", + "tttcgg": "cggttt", + "tttcgt": "cgtttt", + "tttcta": "atttct", + "tttctc": "ctcttt", + "tttctg": "ctgttt", + "tttctt": "cttttt", + "tttgaa": "aatttg", + "tttgac": "actttg", + "tttgag": "agtttg", + "tttgat": "attttg", + "tttgca": "atttgc", + "tttgcc": "cctttg", + "tttgcg": "cgtttg", + "tttgct": "cttttg", + "tttgga": "atttgg", + "tttggc": "ctttgg", + "tttggg": "gggttt", + "tttggt": "ggtttt", + "tttgta": "atttgt", + "tttgtc": "ctttgt", + "tttgtg": "gtgttt", + "tttgtt": "gttttt", + "ttttaa": "aatttt", + "ttttac": "actttt", + "ttttag": "agtttt", + "ttttat": "attttt", + "ttttca": "attttc", + "ttttcc": "cctttt", + "ttttcg": "cgtttt", + "ttttct": "cttttt", + "ttttga": "attttg", + "ttttgc": "cttttg", + "ttttgg": "ggtttt", + "ttttgt": "gttttt", + "ttttta": "attttt", + "tttttc": "cttttt", + "tttttg": "gttttt", + "tttttt": "tttttt", +} + +// Normalize retourne le k-mer canonique (le plus petit lexicographiquement +// parmi toutes les permutations circulaires). +// Pour les k-mers de taille 1 à 6, utilise la table pré-calculée. +// Pour les k-mers plus grands, calcule à la volée. +func Normalize(kmer string) string { + // Pour les k-mers de taille <= 6, utiliser la table + if len(kmer) <= 6 { + if canonical, ok := LexicographicNormalization[kmer]; ok { + return canonical + } + // Si non trouvé dans la table, calculer (cas où le kmer contient des caractères non-acgt) + } + + // Pour les k-mers > 6 ou non trouvés, calculer les rotations circulaires + return getCanonicalCircular(kmer) +} + +// getCanonicalCircular retourne le plus petit k-mer lexicographiquement +// parmi toutes les permutations circulaires du k-mer donné +func getCanonicalCircular(kmer string) string { + if len(kmer) == 0 { + return kmer + } + + canonical := kmer + + // Générer toutes les permutations circulaires + for i := 1; i < len(kmer); i++ { + rotated := kmer[i:] + kmer[:i] + if rotated < canonical { + canonical = rotated + } + } + + return canonical +} diff --git a/pkg/obikmer/kmernorm_test.go b/pkg/obikmer/kmernorm_test.go new file mode 100644 index 0000000..15c5c69 --- /dev/null +++ b/pkg/obikmer/kmernorm_test.go @@ -0,0 +1,77 @@ +package obikmer + +import "testing" + +func TestNormalize(t *testing.T) { + tests := []struct { + name string + kmer string + expected string + }{ + // Test avec k=1 + {"k=1 a", "a", "a"}, + {"k=1 c", "c", "c"}, + + // Test avec k=2 + {"k=2 ca", "ca", "ac"}, + {"k=2 ac", "ac", "ac"}, + + // Test avec k=4 + {"k=4 acgt", "acgt", "acgt"}, + {"k=4 cgta", "cgta", "acgt"}, + {"k=4 gtac", "gtac", "acgt"}, + {"k=4 tacg", "tacg", "acgt"}, + {"k=4 tgca", "tgca", "atgc"}, + + // Test avec k=6 + {"k=6 aaaaaa", "aaaaaa", "aaaaaa"}, + {"k=6 tttttt", "tttttt", "tttttt"}, + + // Test avec k>6 (calcul à la volée) + {"k=7 aaaaaaa", "aaaaaaa", "aaaaaaa"}, + {"k=7 tgcatgc", "tgcatgc", "atgctgc"}, + {"k=7 gcatgct", "gcatgct", "atgctgc"}, + {"k=8 acgtacgt", "acgtacgt", "acgtacgt"}, + {"k=8 gtacgtac", "gtacgtac", "acgtacgt"}, + {"k=10 acgtacgtac", "acgtacgtac", "acacgtacgt"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := Normalize(tt.kmer) + if result != tt.expected { + t.Errorf("Normalize(%q) = %q, want %q", tt.kmer, result, tt.expected) + } + }) + } +} + +func TestNormalizeTableConsistency(t *testing.T) { + // Vérifier que tous les kmers de la table donnent le bon résultat + // en comparant avec le calcul à la volée + for kmer, expected := range LexicographicNormalization { + calculated := getCanonicalCircular(kmer) + if calculated != expected { + t.Errorf("Table inconsistency for %q: table=%q, calculated=%q", + kmer, expected, calculated) + } + } +} + +func BenchmarkNormalizeSmall(b *testing.B) { + // Benchmark pour k<=6 (utilise la table) + kmer := "acgtac" + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = Normalize(kmer) + } +} + +func BenchmarkNormalizeLarge(b *testing.B) { + // Benchmark pour k>6 (calcul à la volée) + kmer := "acgtacgtac" + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = Normalize(kmer) + } +} diff --git a/pkg/obikmer/kmernormint.go b/pkg/obikmer/kmernormint.go new file mode 100644 index 0000000..f82e882 --- /dev/null +++ b/pkg/obikmer/kmernormint.go @@ -0,0 +1,5670 @@ +package obikmer + +// LexicographicNormalizationInt contient les tables de normalisation +// pour les k-mers encodés en entier (k=1 à 6). +// L'index du tableau correspond à la taille du k-mer. +// Chaque table associe le code entier d'un k-mer à son représentant +// lexicographique canonique obtenu par permutation circulaire. +var LexicographicNormalizationInt = [7]map[int]int{ + 0: nil, // k=0 n'existe pas + 1: { + 0: 0, // a -> a + 1: 1, // c -> c + 2: 2, // g -> g + 3: 3, // t -> t + }, + 2: { + 0: 0, // aa -> aa + 1: 1, // ac -> ac + 2: 2, // ag -> ag + 3: 3, // at -> at + 4: 1, // ca -> ac + 5: 5, // cc -> cc + 6: 6, // cg -> cg + 7: 7, // ct -> ct + 8: 2, // ga -> ag + 9: 6, // gc -> cg + 10: 10, // gg -> gg + 11: 11, // gt -> gt + 12: 3, // ta -> at + 13: 7, // tc -> ct + 14: 11, // tg -> gt + 15: 15, // tt -> tt + }, + 3: { + 0: 0, // aaa -> aaa + 1: 1, // aac -> aac + 2: 2, // aag -> aag + 3: 3, // aat -> aat + 4: 1, // aca -> aac + 5: 5, // acc -> acc + 6: 6, // acg -> acg + 7: 7, // act -> act + 8: 2, // aga -> aag + 9: 9, // agc -> agc + 10: 10, // agg -> agg + 11: 11, // agt -> agt + 12: 3, // ata -> aat + 13: 13, // atc -> atc + 14: 14, // atg -> atg + 15: 15, // att -> att + 16: 1, // caa -> aac + 17: 5, // cac -> acc + 18: 9, // cag -> agc + 19: 13, // cat -> atc + 20: 5, // cca -> acc + 21: 21, // ccc -> ccc + 22: 22, // ccg -> ccg + 23: 23, // cct -> cct + 24: 6, // cga -> acg + 25: 22, // cgc -> ccg + 26: 26, // cgg -> cgg + 27: 27, // cgt -> cgt + 28: 7, // cta -> act + 29: 23, // ctc -> cct + 30: 30, // ctg -> ctg + 31: 31, // ctt -> ctt + 32: 2, // gaa -> aag + 33: 6, // gac -> acg + 34: 10, // gag -> agg + 35: 14, // gat -> atg + 36: 9, // gca -> agc + 37: 22, // gcc -> ccg + 38: 26, // gcg -> cgg + 39: 30, // gct -> ctg + 40: 10, // gga -> agg + 41: 26, // ggc -> cgg + 42: 42, // ggg -> ggg + 43: 43, // ggt -> ggt + 44: 11, // gta -> agt + 45: 27, // gtc -> cgt + 46: 43, // gtg -> ggt + 47: 47, // gtt -> gtt + 48: 3, // taa -> aat + 49: 7, // tac -> act + 50: 11, // tag -> agt + 51: 15, // tat -> att + 52: 13, // tca -> atc + 53: 23, // tcc -> cct + 54: 27, // tcg -> cgt + 55: 31, // tct -> ctt + 56: 14, // tga -> atg + 57: 30, // tgc -> ctg + 58: 43, // tgg -> ggt + 59: 47, // tgt -> gtt + 60: 15, // tta -> att + 61: 31, // ttc -> ctt + 62: 47, // ttg -> gtt + 63: 63, // ttt -> ttt + }, + 4: { + 0: 0, // aaaa -> aaaa + 1: 1, // aaac -> aaac + 2: 2, // aaag -> aaag + 3: 3, // aaat -> aaat + 4: 1, // aaca -> aaac + 5: 5, // aacc -> aacc + 6: 6, // aacg -> aacg + 7: 7, // aact -> aact + 8: 2, // aaga -> aaag + 9: 9, // aagc -> aagc + 10: 10, // aagg -> aagg + 11: 11, // aagt -> aagt + 12: 3, // aata -> aaat + 13: 13, // aatc -> aatc + 14: 14, // aatg -> aatg + 15: 15, // aatt -> aatt + 16: 1, // acaa -> aaac + 17: 17, // acac -> acac + 18: 18, // acag -> acag + 19: 19, // acat -> acat + 20: 5, // acca -> aacc + 21: 21, // accc -> accc + 22: 22, // accg -> accg + 23: 23, // acct -> acct + 24: 6, // acga -> aacg + 25: 25, // acgc -> acgc + 26: 26, // acgg -> acgg + 27: 27, // acgt -> acgt + 28: 7, // acta -> aact + 29: 29, // actc -> actc + 30: 30, // actg -> actg + 31: 31, // actt -> actt + 32: 2, // agaa -> aaag + 33: 18, // agac -> acag + 34: 34, // agag -> agag + 35: 35, // agat -> agat + 36: 9, // agca -> aagc + 37: 37, // agcc -> agcc + 38: 38, // agcg -> agcg + 39: 39, // agct -> agct + 40: 10, // agga -> aagg + 41: 41, // aggc -> aggc + 42: 42, // aggg -> aggg + 43: 43, // aggt -> aggt + 44: 11, // agta -> aagt + 45: 45, // agtc -> agtc + 46: 46, // agtg -> agtg + 47: 47, // agtt -> agtt + 48: 3, // ataa -> aaat + 49: 19, // atac -> acat + 50: 35, // atag -> agat + 51: 51, // atat -> atat + 52: 13, // atca -> aatc + 53: 53, // atcc -> atcc + 54: 54, // atcg -> atcg + 55: 55, // atct -> atct + 56: 14, // atga -> aatg + 57: 57, // atgc -> atgc + 58: 58, // atgg -> atgg + 59: 59, // atgt -> atgt + 60: 15, // atta -> aatt + 61: 61, // attc -> attc + 62: 62, // attg -> attg + 63: 63, // attt -> attt + 64: 1, // caaa -> aaac + 65: 5, // caac -> aacc + 66: 9, // caag -> aagc + 67: 13, // caat -> aatc + 68: 17, // caca -> acac + 69: 21, // cacc -> accc + 70: 25, // cacg -> acgc + 71: 29, // cact -> actc + 72: 18, // caga -> acag + 73: 37, // cagc -> agcc + 74: 41, // cagg -> aggc + 75: 45, // cagt -> agtc + 76: 19, // cata -> acat + 77: 53, // catc -> atcc + 78: 57, // catg -> atgc + 79: 61, // catt -> attc + 80: 5, // ccaa -> aacc + 81: 21, // ccac -> accc + 82: 37, // ccag -> agcc + 83: 53, // ccat -> atcc + 84: 21, // ccca -> accc + 85: 85, // cccc -> cccc + 86: 86, // cccg -> cccg + 87: 87, // ccct -> ccct + 88: 22, // ccga -> accg + 89: 86, // ccgc -> cccg + 90: 90, // ccgg -> ccgg + 91: 91, // ccgt -> ccgt + 92: 23, // ccta -> acct + 93: 87, // cctc -> ccct + 94: 94, // cctg -> cctg + 95: 95, // cctt -> cctt + 96: 6, // cgaa -> aacg + 97: 22, // cgac -> accg + 98: 38, // cgag -> agcg + 99: 54, // cgat -> atcg + 100: 25, // cgca -> acgc + 101: 86, // cgcc -> cccg + 102: 102, // cgcg -> cgcg + 103: 103, // cgct -> cgct + 104: 26, // cgga -> acgg + 105: 90, // cggc -> ccgg + 106: 106, // cggg -> cggg + 107: 107, // cggt -> cggt + 108: 27, // cgta -> acgt + 109: 91, // cgtc -> ccgt + 110: 110, // cgtg -> cgtg + 111: 111, // cgtt -> cgtt + 112: 7, // ctaa -> aact + 113: 23, // ctac -> acct + 114: 39, // ctag -> agct + 115: 55, // ctat -> atct + 116: 29, // ctca -> actc + 117: 87, // ctcc -> ccct + 118: 103, // ctcg -> cgct + 119: 119, // ctct -> ctct + 120: 30, // ctga -> actg + 121: 94, // ctgc -> cctg + 122: 122, // ctgg -> ctgg + 123: 123, // ctgt -> ctgt + 124: 31, // ctta -> actt + 125: 95, // cttc -> cctt + 126: 126, // cttg -> cttg + 127: 127, // cttt -> cttt + 128: 2, // gaaa -> aaag + 129: 6, // gaac -> aacg + 130: 10, // gaag -> aagg + 131: 14, // gaat -> aatg + 132: 18, // gaca -> acag + 133: 22, // gacc -> accg + 134: 26, // gacg -> acgg + 135: 30, // gact -> actg + 136: 34, // gaga -> agag + 137: 38, // gagc -> agcg + 138: 42, // gagg -> aggg + 139: 46, // gagt -> agtg + 140: 35, // gata -> agat + 141: 54, // gatc -> atcg + 142: 58, // gatg -> atgg + 143: 62, // gatt -> attg + 144: 9, // gcaa -> aagc + 145: 25, // gcac -> acgc + 146: 41, // gcag -> aggc + 147: 57, // gcat -> atgc + 148: 37, // gcca -> agcc + 149: 86, // gccc -> cccg + 150: 90, // gccg -> ccgg + 151: 94, // gcct -> cctg + 152: 38, // gcga -> agcg + 153: 102, // gcgc -> cgcg + 154: 106, // gcgg -> cggg + 155: 110, // gcgt -> cgtg + 156: 39, // gcta -> agct + 157: 103, // gctc -> cgct + 158: 122, // gctg -> ctgg + 159: 126, // gctt -> cttg + 160: 10, // ggaa -> aagg + 161: 26, // ggac -> acgg + 162: 42, // ggag -> aggg + 163: 58, // ggat -> atgg + 164: 41, // ggca -> aggc + 165: 90, // ggcc -> ccgg + 166: 106, // ggcg -> cggg + 167: 122, // ggct -> ctgg + 168: 42, // ggga -> aggg + 169: 106, // gggc -> cggg + 170: 170, // gggg -> gggg + 171: 171, // gggt -> gggt + 172: 43, // ggta -> aggt + 173: 107, // ggtc -> cggt + 174: 171, // ggtg -> gggt + 175: 175, // ggtt -> ggtt + 176: 11, // gtaa -> aagt + 177: 27, // gtac -> acgt + 178: 43, // gtag -> aggt + 179: 59, // gtat -> atgt + 180: 45, // gtca -> agtc + 181: 91, // gtcc -> ccgt + 182: 107, // gtcg -> cggt + 183: 123, // gtct -> ctgt + 184: 46, // gtga -> agtg + 185: 110, // gtgc -> cgtg + 186: 171, // gtgg -> gggt + 187: 187, // gtgt -> gtgt + 188: 47, // gtta -> agtt + 189: 111, // gttc -> cgtt + 190: 175, // gttg -> ggtt + 191: 191, // gttt -> gttt + 192: 3, // taaa -> aaat + 193: 7, // taac -> aact + 194: 11, // taag -> aagt + 195: 15, // taat -> aatt + 196: 19, // taca -> acat + 197: 23, // tacc -> acct + 198: 27, // tacg -> acgt + 199: 31, // tact -> actt + 200: 35, // taga -> agat + 201: 39, // tagc -> agct + 202: 43, // tagg -> aggt + 203: 47, // tagt -> agtt + 204: 51, // tata -> atat + 205: 55, // tatc -> atct + 206: 59, // tatg -> atgt + 207: 63, // tatt -> attt + 208: 13, // tcaa -> aatc + 209: 29, // tcac -> actc + 210: 45, // tcag -> agtc + 211: 61, // tcat -> attc + 212: 53, // tcca -> atcc + 213: 87, // tccc -> ccct + 214: 91, // tccg -> ccgt + 215: 95, // tcct -> cctt + 216: 54, // tcga -> atcg + 217: 103, // tcgc -> cgct + 218: 107, // tcgg -> cggt + 219: 111, // tcgt -> cgtt + 220: 55, // tcta -> atct + 221: 119, // tctc -> ctct + 222: 123, // tctg -> ctgt + 223: 127, // tctt -> cttt + 224: 14, // tgaa -> aatg + 225: 30, // tgac -> actg + 226: 46, // tgag -> agtg + 227: 62, // tgat -> attg + 228: 57, // tgca -> atgc + 229: 94, // tgcc -> cctg + 230: 110, // tgcg -> cgtg + 231: 126, // tgct -> cttg + 232: 58, // tgga -> atgg + 233: 122, // tggc -> ctgg + 234: 171, // tggg -> gggt + 235: 175, // tggt -> ggtt + 236: 59, // tgta -> atgt + 237: 123, // tgtc -> ctgt + 238: 187, // tgtg -> gtgt + 239: 191, // tgtt -> gttt + 240: 15, // ttaa -> aatt + 241: 31, // ttac -> actt + 242: 47, // ttag -> agtt + 243: 63, // ttat -> attt + 244: 61, // ttca -> attc + 245: 95, // ttcc -> cctt + 246: 111, // ttcg -> cgtt + 247: 127, // ttct -> cttt + 248: 62, // ttga -> attg + 249: 126, // ttgc -> cttg + 250: 175, // ttgg -> ggtt + 251: 191, // ttgt -> gttt + 252: 63, // ttta -> attt + 253: 127, // tttc -> cttt + 254: 191, // tttg -> gttt + 255: 255, // tttt -> tttt + }, + 5: { + 0: 0, // aaaaa -> aaaaa + 1: 1, // aaaac -> aaaac + 2: 2, // aaaag -> aaaag + 3: 3, // aaaat -> aaaat + 4: 1, // aaaca -> aaaac + 5: 5, // aaacc -> aaacc + 6: 6, // aaacg -> aaacg + 7: 7, // aaact -> aaact + 8: 2, // aaaga -> aaaag + 9: 9, // aaagc -> aaagc + 10: 10, // aaagg -> aaagg + 11: 11, // aaagt -> aaagt + 12: 3, // aaata -> aaaat + 13: 13, // aaatc -> aaatc + 14: 14, // aaatg -> aaatg + 15: 15, // aaatt -> aaatt + 16: 1, // aacaa -> aaaac + 17: 17, // aacac -> aacac + 18: 18, // aacag -> aacag + 19: 19, // aacat -> aacat + 20: 5, // aacca -> aaacc + 21: 21, // aaccc -> aaccc + 22: 22, // aaccg -> aaccg + 23: 23, // aacct -> aacct + 24: 6, // aacga -> aaacg + 25: 25, // aacgc -> aacgc + 26: 26, // aacgg -> aacgg + 27: 27, // aacgt -> aacgt + 28: 7, // aacta -> aaact + 29: 29, // aactc -> aactc + 30: 30, // aactg -> aactg + 31: 31, // aactt -> aactt + 32: 2, // aagaa -> aaaag + 33: 33, // aagac -> aagac + 34: 34, // aagag -> aagag + 35: 35, // aagat -> aagat + 36: 9, // aagca -> aaagc + 37: 37, // aagcc -> aagcc + 38: 38, // aagcg -> aagcg + 39: 39, // aagct -> aagct + 40: 10, // aagga -> aaagg + 41: 41, // aaggc -> aaggc + 42: 42, // aaggg -> aaggg + 43: 43, // aaggt -> aaggt + 44: 11, // aagta -> aaagt + 45: 45, // aagtc -> aagtc + 46: 46, // aagtg -> aagtg + 47: 47, // aagtt -> aagtt + 48: 3, // aataa -> aaaat + 49: 49, // aatac -> aatac + 50: 50, // aatag -> aatag + 51: 51, // aatat -> aatat + 52: 13, // aatca -> aaatc + 53: 53, // aatcc -> aatcc + 54: 54, // aatcg -> aatcg + 55: 55, // aatct -> aatct + 56: 14, // aatga -> aaatg + 57: 57, // aatgc -> aatgc + 58: 58, // aatgg -> aatgg + 59: 59, // aatgt -> aatgt + 60: 15, // aatta -> aaatt + 61: 61, // aattc -> aattc + 62: 62, // aattg -> aattg + 63: 63, // aattt -> aattt + 64: 1, // acaaa -> aaaac + 65: 17, // acaac -> aacac + 66: 33, // acaag -> aagac + 67: 49, // acaat -> aatac + 68: 17, // acaca -> aacac + 69: 69, // acacc -> acacc + 70: 70, // acacg -> acacg + 71: 71, // acact -> acact + 72: 18, // acaga -> aacag + 73: 73, // acagc -> acagc + 74: 74, // acagg -> acagg + 75: 75, // acagt -> acagt + 76: 19, // acata -> aacat + 77: 77, // acatc -> acatc + 78: 78, // acatg -> acatg + 79: 79, // acatt -> acatt + 80: 5, // accaa -> aaacc + 81: 69, // accac -> acacc + 82: 82, // accag -> accag + 83: 83, // accat -> accat + 84: 21, // accca -> aaccc + 85: 85, // acccc -> acccc + 86: 86, // acccg -> acccg + 87: 87, // accct -> accct + 88: 22, // accga -> aaccg + 89: 89, // accgc -> accgc + 90: 90, // accgg -> accgg + 91: 91, // accgt -> accgt + 92: 23, // accta -> aacct + 93: 93, // acctc -> acctc + 94: 94, // acctg -> acctg + 95: 95, // acctt -> acctt + 96: 6, // acgaa -> aaacg + 97: 70, // acgac -> acacg + 98: 98, // acgag -> acgag + 99: 99, // acgat -> acgat + 100: 25, // acgca -> aacgc + 101: 101, // acgcc -> acgcc + 102: 102, // acgcg -> acgcg + 103: 103, // acgct -> acgct + 104: 26, // acgga -> aacgg + 105: 105, // acggc -> acggc + 106: 106, // acggg -> acggg + 107: 107, // acggt -> acggt + 108: 27, // acgta -> aacgt + 109: 109, // acgtc -> acgtc + 110: 110, // acgtg -> acgtg + 111: 111, // acgtt -> acgtt + 112: 7, // actaa -> aaact + 113: 71, // actac -> acact + 114: 114, // actag -> actag + 115: 115, // actat -> actat + 116: 29, // actca -> aactc + 117: 117, // actcc -> actcc + 118: 118, // actcg -> actcg + 119: 119, // actct -> actct + 120: 30, // actga -> aactg + 121: 121, // actgc -> actgc + 122: 122, // actgg -> actgg + 123: 123, // actgt -> actgt + 124: 31, // actta -> aactt + 125: 125, // acttc -> acttc + 126: 126, // acttg -> acttg + 127: 127, // acttt -> acttt + 128: 2, // agaaa -> aaaag + 129: 18, // agaac -> aacag + 130: 34, // agaag -> aagag + 131: 50, // agaat -> aatag + 132: 33, // agaca -> aagac + 133: 82, // agacc -> accag + 134: 98, // agacg -> acgag + 135: 114, // agact -> actag + 136: 34, // agaga -> aagag + 137: 137, // agagc -> agagc + 138: 138, // agagg -> agagg + 139: 139, // agagt -> agagt + 140: 35, // agata -> aagat + 141: 141, // agatc -> agatc + 142: 142, // agatg -> agatg + 143: 143, // agatt -> agatt + 144: 9, // agcaa -> aaagc + 145: 73, // agcac -> acagc + 146: 137, // agcag -> agagc + 147: 147, // agcat -> agcat + 148: 37, // agcca -> aagcc + 149: 149, // agccc -> agccc + 150: 150, // agccg -> agccg + 151: 151, // agcct -> agcct + 152: 38, // agcga -> aagcg + 153: 153, // agcgc -> agcgc + 154: 154, // agcgg -> agcgg + 155: 155, // agcgt -> agcgt + 156: 39, // agcta -> aagct + 157: 157, // agctc -> agctc + 158: 158, // agctg -> agctg + 159: 159, // agctt -> agctt + 160: 10, // aggaa -> aaagg + 161: 74, // aggac -> acagg + 162: 138, // aggag -> agagg + 163: 163, // aggat -> aggat + 164: 41, // aggca -> aaggc + 165: 165, // aggcc -> aggcc + 166: 166, // aggcg -> aggcg + 167: 167, // aggct -> aggct + 168: 42, // aggga -> aaggg + 169: 169, // agggc -> agggc + 170: 170, // agggg -> agggg + 171: 171, // agggt -> agggt + 172: 43, // aggta -> aaggt + 173: 173, // aggtc -> aggtc + 174: 174, // aggtg -> aggtg + 175: 175, // aggtt -> aggtt + 176: 11, // agtaa -> aaagt + 177: 75, // agtac -> acagt + 178: 139, // agtag -> agagt + 179: 179, // agtat -> agtat + 180: 45, // agtca -> aagtc + 181: 181, // agtcc -> agtcc + 182: 182, // agtcg -> agtcg + 183: 183, // agtct -> agtct + 184: 46, // agtga -> aagtg + 185: 185, // agtgc -> agtgc + 186: 186, // agtgg -> agtgg + 187: 187, // agtgt -> agtgt + 188: 47, // agtta -> aagtt + 189: 189, // agttc -> agttc + 190: 190, // agttg -> agttg + 191: 191, // agttt -> agttt + 192: 3, // ataaa -> aaaat + 193: 19, // ataac -> aacat + 194: 35, // ataag -> aagat + 195: 51, // ataat -> aatat + 196: 49, // ataca -> aatac + 197: 83, // atacc -> accat + 198: 99, // atacg -> acgat + 199: 115, // atact -> actat + 200: 50, // ataga -> aatag + 201: 147, // atagc -> agcat + 202: 163, // atagg -> aggat + 203: 179, // atagt -> agtat + 204: 51, // atata -> aatat + 205: 205, // atatc -> atatc + 206: 206, // atatg -> atatg + 207: 207, // atatt -> atatt + 208: 13, // atcaa -> aaatc + 209: 77, // atcac -> acatc + 210: 141, // atcag -> agatc + 211: 205, // atcat -> atatc + 212: 53, // atcca -> aatcc + 213: 213, // atccc -> atccc + 214: 214, // atccg -> atccg + 215: 215, // atcct -> atcct + 216: 54, // atcga -> aatcg + 217: 217, // atcgc -> atcgc + 218: 218, // atcgg -> atcgg + 219: 219, // atcgt -> atcgt + 220: 55, // atcta -> aatct + 221: 221, // atctc -> atctc + 222: 222, // atctg -> atctg + 223: 223, // atctt -> atctt + 224: 14, // atgaa -> aaatg + 225: 78, // atgac -> acatg + 226: 142, // atgag -> agatg + 227: 206, // atgat -> atatg + 228: 57, // atgca -> aatgc + 229: 229, // atgcc -> atgcc + 230: 230, // atgcg -> atgcg + 231: 231, // atgct -> atgct + 232: 58, // atgga -> aatgg + 233: 233, // atggc -> atggc + 234: 234, // atggg -> atggg + 235: 235, // atggt -> atggt + 236: 59, // atgta -> aatgt + 237: 237, // atgtc -> atgtc + 238: 238, // atgtg -> atgtg + 239: 239, // atgtt -> atgtt + 240: 15, // attaa -> aaatt + 241: 79, // attac -> acatt + 242: 143, // attag -> agatt + 243: 207, // attat -> atatt + 244: 61, // attca -> aattc + 245: 245, // attcc -> attcc + 246: 246, // attcg -> attcg + 247: 247, // attct -> attct + 248: 62, // attga -> aattg + 249: 249, // attgc -> attgc + 250: 250, // attgg -> attgg + 251: 251, // attgt -> attgt + 252: 63, // attta -> aattt + 253: 253, // atttc -> atttc + 254: 254, // atttg -> atttg + 255: 255, // atttt -> atttt + 256: 1, // caaaa -> aaaac + 257: 5, // caaac -> aaacc + 258: 9, // caaag -> aaagc + 259: 13, // caaat -> aaatc + 260: 17, // caaca -> aacac + 261: 21, // caacc -> aaccc + 262: 25, // caacg -> aacgc + 263: 29, // caact -> aactc + 264: 33, // caaga -> aagac + 265: 37, // caagc -> aagcc + 266: 41, // caagg -> aaggc + 267: 45, // caagt -> aagtc + 268: 49, // caata -> aatac + 269: 53, // caatc -> aatcc + 270: 57, // caatg -> aatgc + 271: 61, // caatt -> aattc + 272: 17, // cacaa -> aacac + 273: 69, // cacac -> acacc + 274: 73, // cacag -> acagc + 275: 77, // cacat -> acatc + 276: 69, // cacca -> acacc + 277: 85, // caccc -> acccc + 278: 89, // caccg -> accgc + 279: 93, // cacct -> acctc + 280: 70, // cacga -> acacg + 281: 101, // cacgc -> acgcc + 282: 105, // cacgg -> acggc + 283: 109, // cacgt -> acgtc + 284: 71, // cacta -> acact + 285: 117, // cactc -> actcc + 286: 121, // cactg -> actgc + 287: 125, // cactt -> acttc + 288: 18, // cagaa -> aacag + 289: 82, // cagac -> accag + 290: 137, // cagag -> agagc + 291: 141, // cagat -> agatc + 292: 73, // cagca -> acagc + 293: 149, // cagcc -> agccc + 294: 153, // cagcg -> agcgc + 295: 157, // cagct -> agctc + 296: 74, // cagga -> acagg + 297: 165, // caggc -> aggcc + 298: 169, // caggg -> agggc + 299: 173, // caggt -> aggtc + 300: 75, // cagta -> acagt + 301: 181, // cagtc -> agtcc + 302: 185, // cagtg -> agtgc + 303: 189, // cagtt -> agttc + 304: 19, // cataa -> aacat + 305: 83, // catac -> accat + 306: 147, // catag -> agcat + 307: 205, // catat -> atatc + 308: 77, // catca -> acatc + 309: 213, // catcc -> atccc + 310: 217, // catcg -> atcgc + 311: 221, // catct -> atctc + 312: 78, // catga -> acatg + 313: 229, // catgc -> atgcc + 314: 233, // catgg -> atggc + 315: 237, // catgt -> atgtc + 316: 79, // catta -> acatt + 317: 245, // cattc -> attcc + 318: 249, // cattg -> attgc + 319: 253, // cattt -> atttc + 320: 5, // ccaaa -> aaacc + 321: 21, // ccaac -> aaccc + 322: 37, // ccaag -> aagcc + 323: 53, // ccaat -> aatcc + 324: 69, // ccaca -> acacc + 325: 85, // ccacc -> acccc + 326: 101, // ccacg -> acgcc + 327: 117, // ccact -> actcc + 328: 82, // ccaga -> accag + 329: 149, // ccagc -> agccc + 330: 165, // ccagg -> aggcc + 331: 181, // ccagt -> agtcc + 332: 83, // ccata -> accat + 333: 213, // ccatc -> atccc + 334: 229, // ccatg -> atgcc + 335: 245, // ccatt -> attcc + 336: 21, // cccaa -> aaccc + 337: 85, // cccac -> acccc + 338: 149, // cccag -> agccc + 339: 213, // cccat -> atccc + 340: 85, // cccca -> acccc + 341: 341, // ccccc -> ccccc + 342: 342, // ccccg -> ccccg + 343: 343, // cccct -> cccct + 344: 86, // cccga -> acccg + 345: 342, // cccgc -> ccccg + 346: 346, // cccgg -> cccgg + 347: 347, // cccgt -> cccgt + 348: 87, // cccta -> accct + 349: 343, // ccctc -> cccct + 350: 350, // ccctg -> ccctg + 351: 351, // ccctt -> ccctt + 352: 22, // ccgaa -> aaccg + 353: 86, // ccgac -> acccg + 354: 150, // ccgag -> agccg + 355: 214, // ccgat -> atccg + 356: 89, // ccgca -> accgc + 357: 342, // ccgcc -> ccccg + 358: 358, // ccgcg -> ccgcg + 359: 359, // ccgct -> ccgct + 360: 90, // ccgga -> accgg + 361: 346, // ccggc -> cccgg + 362: 362, // ccggg -> ccggg + 363: 363, // ccggt -> ccggt + 364: 91, // ccgta -> accgt + 365: 347, // ccgtc -> cccgt + 366: 366, // ccgtg -> ccgtg + 367: 367, // ccgtt -> ccgtt + 368: 23, // cctaa -> aacct + 369: 87, // cctac -> accct + 370: 151, // cctag -> agcct + 371: 215, // cctat -> atcct + 372: 93, // cctca -> acctc + 373: 343, // cctcc -> cccct + 374: 374, // cctcg -> cctcg + 375: 375, // cctct -> cctct + 376: 94, // cctga -> acctg + 377: 350, // cctgc -> ccctg + 378: 378, // cctgg -> cctgg + 379: 379, // cctgt -> cctgt + 380: 95, // cctta -> acctt + 381: 351, // ccttc -> ccctt + 382: 382, // ccttg -> ccttg + 383: 383, // ccttt -> ccttt + 384: 6, // cgaaa -> aaacg + 385: 22, // cgaac -> aaccg + 386: 38, // cgaag -> aagcg + 387: 54, // cgaat -> aatcg + 388: 70, // cgaca -> acacg + 389: 86, // cgacc -> acccg + 390: 102, // cgacg -> acgcg + 391: 118, // cgact -> actcg + 392: 98, // cgaga -> acgag + 393: 150, // cgagc -> agccg + 394: 166, // cgagg -> aggcg + 395: 182, // cgagt -> agtcg + 396: 99, // cgata -> acgat + 397: 214, // cgatc -> atccg + 398: 230, // cgatg -> atgcg + 399: 246, // cgatt -> attcg + 400: 25, // cgcaa -> aacgc + 401: 89, // cgcac -> accgc + 402: 153, // cgcag -> agcgc + 403: 217, // cgcat -> atcgc + 404: 101, // cgcca -> acgcc + 405: 342, // cgccc -> ccccg + 406: 358, // cgccg -> ccgcg + 407: 374, // cgcct -> cctcg + 408: 102, // cgcga -> acgcg + 409: 358, // cgcgc -> ccgcg + 410: 410, // cgcgg -> cgcgg + 411: 411, // cgcgt -> cgcgt + 412: 103, // cgcta -> acgct + 413: 359, // cgctc -> ccgct + 414: 414, // cgctg -> cgctg + 415: 415, // cgctt -> cgctt + 416: 26, // cggaa -> aacgg + 417: 90, // cggac -> accgg + 418: 154, // cggag -> agcgg + 419: 218, // cggat -> atcgg + 420: 105, // cggca -> acggc + 421: 346, // cggcc -> cccgg + 422: 410, // cggcg -> cgcgg + 423: 423, // cggct -> cggct + 424: 106, // cggga -> acggg + 425: 362, // cgggc -> ccggg + 426: 426, // cgggg -> cgggg + 427: 427, // cgggt -> cgggt + 428: 107, // cggta -> acggt + 429: 363, // cggtc -> ccggt + 430: 430, // cggtg -> cggtg + 431: 431, // cggtt -> cggtt + 432: 27, // cgtaa -> aacgt + 433: 91, // cgtac -> accgt + 434: 155, // cgtag -> agcgt + 435: 219, // cgtat -> atcgt + 436: 109, // cgtca -> acgtc + 437: 347, // cgtcc -> cccgt + 438: 411, // cgtcg -> cgcgt + 439: 439, // cgtct -> cgtct + 440: 110, // cgtga -> acgtg + 441: 366, // cgtgc -> ccgtg + 442: 442, // cgtgg -> cgtgg + 443: 443, // cgtgt -> cgtgt + 444: 111, // cgtta -> acgtt + 445: 367, // cgttc -> ccgtt + 446: 446, // cgttg -> cgttg + 447: 447, // cgttt -> cgttt + 448: 7, // ctaaa -> aaact + 449: 23, // ctaac -> aacct + 450: 39, // ctaag -> aagct + 451: 55, // ctaat -> aatct + 452: 71, // ctaca -> acact + 453: 87, // ctacc -> accct + 454: 103, // ctacg -> acgct + 455: 119, // ctact -> actct + 456: 114, // ctaga -> actag + 457: 151, // ctagc -> agcct + 458: 167, // ctagg -> aggct + 459: 183, // ctagt -> agtct + 460: 115, // ctata -> actat + 461: 215, // ctatc -> atcct + 462: 231, // ctatg -> atgct + 463: 247, // ctatt -> attct + 464: 29, // ctcaa -> aactc + 465: 93, // ctcac -> acctc + 466: 157, // ctcag -> agctc + 467: 221, // ctcat -> atctc + 468: 117, // ctcca -> actcc + 469: 343, // ctccc -> cccct + 470: 359, // ctccg -> ccgct + 471: 375, // ctcct -> cctct + 472: 118, // ctcga -> actcg + 473: 374, // ctcgc -> cctcg + 474: 423, // ctcgg -> cggct + 475: 439, // ctcgt -> cgtct + 476: 119, // ctcta -> actct + 477: 375, // ctctc -> cctct + 478: 478, // ctctg -> ctctg + 479: 479, // ctctt -> ctctt + 480: 30, // ctgaa -> aactg + 481: 94, // ctgac -> acctg + 482: 158, // ctgag -> agctg + 483: 222, // ctgat -> atctg + 484: 121, // ctgca -> actgc + 485: 350, // ctgcc -> ccctg + 486: 414, // ctgcg -> cgctg + 487: 478, // ctgct -> ctctg + 488: 122, // ctgga -> actgg + 489: 378, // ctggc -> cctgg + 490: 490, // ctggg -> ctggg + 491: 491, // ctggt -> ctggt + 492: 123, // ctgta -> actgt + 493: 379, // ctgtc -> cctgt + 494: 494, // ctgtg -> ctgtg + 495: 495, // ctgtt -> ctgtt + 496: 31, // cttaa -> aactt + 497: 95, // cttac -> acctt + 498: 159, // cttag -> agctt + 499: 223, // cttat -> atctt + 500: 125, // cttca -> acttc + 501: 351, // cttcc -> ccctt + 502: 415, // cttcg -> cgctt + 503: 479, // cttct -> ctctt + 504: 126, // cttga -> acttg + 505: 382, // cttgc -> ccttg + 506: 506, // cttgg -> cttgg + 507: 507, // cttgt -> cttgt + 508: 127, // cttta -> acttt + 509: 383, // ctttc -> ccttt + 510: 510, // ctttg -> ctttg + 511: 511, // ctttt -> ctttt + 512: 2, // gaaaa -> aaaag + 513: 6, // gaaac -> aaacg + 514: 10, // gaaag -> aaagg + 515: 14, // gaaat -> aaatg + 516: 18, // gaaca -> aacag + 517: 22, // gaacc -> aaccg + 518: 26, // gaacg -> aacgg + 519: 30, // gaact -> aactg + 520: 34, // gaaga -> aagag + 521: 38, // gaagc -> aagcg + 522: 42, // gaagg -> aaggg + 523: 46, // gaagt -> aagtg + 524: 50, // gaata -> aatag + 525: 54, // gaatc -> aatcg + 526: 58, // gaatg -> aatgg + 527: 62, // gaatt -> aattg + 528: 33, // gacaa -> aagac + 529: 70, // gacac -> acacg + 530: 74, // gacag -> acagg + 531: 78, // gacat -> acatg + 532: 82, // gacca -> accag + 533: 86, // gaccc -> acccg + 534: 90, // gaccg -> accgg + 535: 94, // gacct -> acctg + 536: 98, // gacga -> acgag + 537: 102, // gacgc -> acgcg + 538: 106, // gacgg -> acggg + 539: 110, // gacgt -> acgtg + 540: 114, // gacta -> actag + 541: 118, // gactc -> actcg + 542: 122, // gactg -> actgg + 543: 126, // gactt -> acttg + 544: 34, // gagaa -> aagag + 545: 98, // gagac -> acgag + 546: 138, // gagag -> agagg + 547: 142, // gagat -> agatg + 548: 137, // gagca -> agagc + 549: 150, // gagcc -> agccg + 550: 154, // gagcg -> agcgg + 551: 158, // gagct -> agctg + 552: 138, // gagga -> agagg + 553: 166, // gaggc -> aggcg + 554: 170, // gaggg -> agggg + 555: 174, // gaggt -> aggtg + 556: 139, // gagta -> agagt + 557: 182, // gagtc -> agtcg + 558: 186, // gagtg -> agtgg + 559: 190, // gagtt -> agttg + 560: 35, // gataa -> aagat + 561: 99, // gatac -> acgat + 562: 163, // gatag -> aggat + 563: 206, // gatat -> atatg + 564: 141, // gatca -> agatc + 565: 214, // gatcc -> atccg + 566: 218, // gatcg -> atcgg + 567: 222, // gatct -> atctg + 568: 142, // gatga -> agatg + 569: 230, // gatgc -> atgcg + 570: 234, // gatgg -> atggg + 571: 238, // gatgt -> atgtg + 572: 143, // gatta -> agatt + 573: 246, // gattc -> attcg + 574: 250, // gattg -> attgg + 575: 254, // gattt -> atttg + 576: 9, // gcaaa -> aaagc + 577: 25, // gcaac -> aacgc + 578: 41, // gcaag -> aaggc + 579: 57, // gcaat -> aatgc + 580: 73, // gcaca -> acagc + 581: 89, // gcacc -> accgc + 582: 105, // gcacg -> acggc + 583: 121, // gcact -> actgc + 584: 137, // gcaga -> agagc + 585: 153, // gcagc -> agcgc + 586: 169, // gcagg -> agggc + 587: 185, // gcagt -> agtgc + 588: 147, // gcata -> agcat + 589: 217, // gcatc -> atcgc + 590: 233, // gcatg -> atggc + 591: 249, // gcatt -> attgc + 592: 37, // gccaa -> aagcc + 593: 101, // gccac -> acgcc + 594: 165, // gccag -> aggcc + 595: 229, // gccat -> atgcc + 596: 149, // gccca -> agccc + 597: 342, // gcccc -> ccccg + 598: 346, // gcccg -> cccgg + 599: 350, // gccct -> ccctg + 600: 150, // gccga -> agccg + 601: 358, // gccgc -> ccgcg + 602: 362, // gccgg -> ccggg + 603: 366, // gccgt -> ccgtg + 604: 151, // gccta -> agcct + 605: 374, // gcctc -> cctcg + 606: 378, // gcctg -> cctgg + 607: 382, // gcctt -> ccttg + 608: 38, // gcgaa -> aagcg + 609: 102, // gcgac -> acgcg + 610: 166, // gcgag -> aggcg + 611: 230, // gcgat -> atgcg + 612: 153, // gcgca -> agcgc + 613: 358, // gcgcc -> ccgcg + 614: 410, // gcgcg -> cgcgg + 615: 414, // gcgct -> cgctg + 616: 154, // gcgga -> agcgg + 617: 410, // gcggc -> cgcgg + 618: 426, // gcggg -> cgggg + 619: 430, // gcggt -> cggtg + 620: 155, // gcgta -> agcgt + 621: 411, // gcgtc -> cgcgt + 622: 442, // gcgtg -> cgtgg + 623: 446, // gcgtt -> cgttg + 624: 39, // gctaa -> aagct + 625: 103, // gctac -> acgct + 626: 167, // gctag -> aggct + 627: 231, // gctat -> atgct + 628: 157, // gctca -> agctc + 629: 359, // gctcc -> ccgct + 630: 423, // gctcg -> cggct + 631: 478, // gctct -> ctctg + 632: 158, // gctga -> agctg + 633: 414, // gctgc -> cgctg + 634: 490, // gctgg -> ctggg + 635: 494, // gctgt -> ctgtg + 636: 159, // gctta -> agctt + 637: 415, // gcttc -> cgctt + 638: 506, // gcttg -> cttgg + 639: 510, // gcttt -> ctttg + 640: 10, // ggaaa -> aaagg + 641: 26, // ggaac -> aacgg + 642: 42, // ggaag -> aaggg + 643: 58, // ggaat -> aatgg + 644: 74, // ggaca -> acagg + 645: 90, // ggacc -> accgg + 646: 106, // ggacg -> acggg + 647: 122, // ggact -> actgg + 648: 138, // ggaga -> agagg + 649: 154, // ggagc -> agcgg + 650: 170, // ggagg -> agggg + 651: 186, // ggagt -> agtgg + 652: 163, // ggata -> aggat + 653: 218, // ggatc -> atcgg + 654: 234, // ggatg -> atggg + 655: 250, // ggatt -> attgg + 656: 41, // ggcaa -> aaggc + 657: 105, // ggcac -> acggc + 658: 169, // ggcag -> agggc + 659: 233, // ggcat -> atggc + 660: 165, // ggcca -> aggcc + 661: 346, // ggccc -> cccgg + 662: 362, // ggccg -> ccggg + 663: 378, // ggcct -> cctgg + 664: 166, // ggcga -> aggcg + 665: 410, // ggcgc -> cgcgg + 666: 426, // ggcgg -> cgggg + 667: 442, // ggcgt -> cgtgg + 668: 167, // ggcta -> aggct + 669: 423, // ggctc -> cggct + 670: 490, // ggctg -> ctggg + 671: 506, // ggctt -> cttgg + 672: 42, // gggaa -> aaggg + 673: 106, // gggac -> acggg + 674: 170, // gggag -> agggg + 675: 234, // gggat -> atggg + 676: 169, // gggca -> agggc + 677: 362, // gggcc -> ccggg + 678: 426, // gggcg -> cgggg + 679: 490, // gggct -> ctggg + 680: 170, // gggga -> agggg + 681: 426, // ggggc -> cgggg + 682: 682, // ggggg -> ggggg + 683: 683, // ggggt -> ggggt + 684: 171, // gggta -> agggt + 685: 427, // gggtc -> cgggt + 686: 683, // gggtg -> ggggt + 687: 687, // gggtt -> gggtt + 688: 43, // ggtaa -> aaggt + 689: 107, // ggtac -> acggt + 690: 171, // ggtag -> agggt + 691: 235, // ggtat -> atggt + 692: 173, // ggtca -> aggtc + 693: 363, // ggtcc -> ccggt + 694: 427, // ggtcg -> cgggt + 695: 491, // ggtct -> ctggt + 696: 174, // ggtga -> aggtg + 697: 430, // ggtgc -> cggtg + 698: 683, // ggtgg -> ggggt + 699: 699, // ggtgt -> ggtgt + 700: 175, // ggtta -> aggtt + 701: 431, // ggttc -> cggtt + 702: 687, // ggttg -> gggtt + 703: 703, // ggttt -> ggttt + 704: 11, // gtaaa -> aaagt + 705: 27, // gtaac -> aacgt + 706: 43, // gtaag -> aaggt + 707: 59, // gtaat -> aatgt + 708: 75, // gtaca -> acagt + 709: 91, // gtacc -> accgt + 710: 107, // gtacg -> acggt + 711: 123, // gtact -> actgt + 712: 139, // gtaga -> agagt + 713: 155, // gtagc -> agcgt + 714: 171, // gtagg -> agggt + 715: 187, // gtagt -> agtgt + 716: 179, // gtata -> agtat + 717: 219, // gtatc -> atcgt + 718: 235, // gtatg -> atggt + 719: 251, // gtatt -> attgt + 720: 45, // gtcaa -> aagtc + 721: 109, // gtcac -> acgtc + 722: 173, // gtcag -> aggtc + 723: 237, // gtcat -> atgtc + 724: 181, // gtcca -> agtcc + 725: 347, // gtccc -> cccgt + 726: 363, // gtccg -> ccggt + 727: 379, // gtcct -> cctgt + 728: 182, // gtcga -> agtcg + 729: 411, // gtcgc -> cgcgt + 730: 427, // gtcgg -> cgggt + 731: 443, // gtcgt -> cgtgt + 732: 183, // gtcta -> agtct + 733: 439, // gtctc -> cgtct + 734: 491, // gtctg -> ctggt + 735: 507, // gtctt -> cttgt + 736: 46, // gtgaa -> aagtg + 737: 110, // gtgac -> acgtg + 738: 174, // gtgag -> aggtg + 739: 238, // gtgat -> atgtg + 740: 185, // gtgca -> agtgc + 741: 366, // gtgcc -> ccgtg + 742: 430, // gtgcg -> cggtg + 743: 494, // gtgct -> ctgtg + 744: 186, // gtgga -> agtgg + 745: 442, // gtggc -> cgtgg + 746: 683, // gtggg -> ggggt + 747: 699, // gtggt -> ggtgt + 748: 187, // gtgta -> agtgt + 749: 443, // gtgtc -> cgtgt + 750: 699, // gtgtg -> ggtgt + 751: 751, // gtgtt -> gtgtt + 752: 47, // gttaa -> aagtt + 753: 111, // gttac -> acgtt + 754: 175, // gttag -> aggtt + 755: 239, // gttat -> atgtt + 756: 189, // gttca -> agttc + 757: 367, // gttcc -> ccgtt + 758: 431, // gttcg -> cggtt + 759: 495, // gttct -> ctgtt + 760: 190, // gttga -> agttg + 761: 446, // gttgc -> cgttg + 762: 687, // gttgg -> gggtt + 763: 751, // gttgt -> gtgtt + 764: 191, // gttta -> agttt + 765: 447, // gtttc -> cgttt + 766: 703, // gtttg -> ggttt + 767: 767, // gtttt -> gtttt + 768: 3, // taaaa -> aaaat + 769: 7, // taaac -> aaact + 770: 11, // taaag -> aaagt + 771: 15, // taaat -> aaatt + 772: 19, // taaca -> aacat + 773: 23, // taacc -> aacct + 774: 27, // taacg -> aacgt + 775: 31, // taact -> aactt + 776: 35, // taaga -> aagat + 777: 39, // taagc -> aagct + 778: 43, // taagg -> aaggt + 779: 47, // taagt -> aagtt + 780: 51, // taata -> aatat + 781: 55, // taatc -> aatct + 782: 59, // taatg -> aatgt + 783: 63, // taatt -> aattt + 784: 49, // tacaa -> aatac + 785: 71, // tacac -> acact + 786: 75, // tacag -> acagt + 787: 79, // tacat -> acatt + 788: 83, // tacca -> accat + 789: 87, // taccc -> accct + 790: 91, // taccg -> accgt + 791: 95, // tacct -> acctt + 792: 99, // tacga -> acgat + 793: 103, // tacgc -> acgct + 794: 107, // tacgg -> acggt + 795: 111, // tacgt -> acgtt + 796: 115, // tacta -> actat + 797: 119, // tactc -> actct + 798: 123, // tactg -> actgt + 799: 127, // tactt -> acttt + 800: 50, // tagaa -> aatag + 801: 114, // tagac -> actag + 802: 139, // tagag -> agagt + 803: 143, // tagat -> agatt + 804: 147, // tagca -> agcat + 805: 151, // tagcc -> agcct + 806: 155, // tagcg -> agcgt + 807: 159, // tagct -> agctt + 808: 163, // tagga -> aggat + 809: 167, // taggc -> aggct + 810: 171, // taggg -> agggt + 811: 175, // taggt -> aggtt + 812: 179, // tagta -> agtat + 813: 183, // tagtc -> agtct + 814: 187, // tagtg -> agtgt + 815: 191, // tagtt -> agttt + 816: 51, // tataa -> aatat + 817: 115, // tatac -> actat + 818: 179, // tatag -> agtat + 819: 207, // tatat -> atatt + 820: 205, // tatca -> atatc + 821: 215, // tatcc -> atcct + 822: 219, // tatcg -> atcgt + 823: 223, // tatct -> atctt + 824: 206, // tatga -> atatg + 825: 231, // tatgc -> atgct + 826: 235, // tatgg -> atggt + 827: 239, // tatgt -> atgtt + 828: 207, // tatta -> atatt + 829: 247, // tattc -> attct + 830: 251, // tattg -> attgt + 831: 255, // tattt -> atttt + 832: 13, // tcaaa -> aaatc + 833: 29, // tcaac -> aactc + 834: 45, // tcaag -> aagtc + 835: 61, // tcaat -> aattc + 836: 77, // tcaca -> acatc + 837: 93, // tcacc -> acctc + 838: 109, // tcacg -> acgtc + 839: 125, // tcact -> acttc + 840: 141, // tcaga -> agatc + 841: 157, // tcagc -> agctc + 842: 173, // tcagg -> aggtc + 843: 189, // tcagt -> agttc + 844: 205, // tcata -> atatc + 845: 221, // tcatc -> atctc + 846: 237, // tcatg -> atgtc + 847: 253, // tcatt -> atttc + 848: 53, // tccaa -> aatcc + 849: 117, // tccac -> actcc + 850: 181, // tccag -> agtcc + 851: 245, // tccat -> attcc + 852: 213, // tccca -> atccc + 853: 343, // tcccc -> cccct + 854: 347, // tcccg -> cccgt + 855: 351, // tccct -> ccctt + 856: 214, // tccga -> atccg + 857: 359, // tccgc -> ccgct + 858: 363, // tccgg -> ccggt + 859: 367, // tccgt -> ccgtt + 860: 215, // tccta -> atcct + 861: 375, // tcctc -> cctct + 862: 379, // tcctg -> cctgt + 863: 383, // tcctt -> ccttt + 864: 54, // tcgaa -> aatcg + 865: 118, // tcgac -> actcg + 866: 182, // tcgag -> agtcg + 867: 246, // tcgat -> attcg + 868: 217, // tcgca -> atcgc + 869: 374, // tcgcc -> cctcg + 870: 411, // tcgcg -> cgcgt + 871: 415, // tcgct -> cgctt + 872: 218, // tcgga -> atcgg + 873: 423, // tcggc -> cggct + 874: 427, // tcggg -> cgggt + 875: 431, // tcggt -> cggtt + 876: 219, // tcgta -> atcgt + 877: 439, // tcgtc -> cgtct + 878: 443, // tcgtg -> cgtgt + 879: 447, // tcgtt -> cgttt + 880: 55, // tctaa -> aatct + 881: 119, // tctac -> actct + 882: 183, // tctag -> agtct + 883: 247, // tctat -> attct + 884: 221, // tctca -> atctc + 885: 375, // tctcc -> cctct + 886: 439, // tctcg -> cgtct + 887: 479, // tctct -> ctctt + 888: 222, // tctga -> atctg + 889: 478, // tctgc -> ctctg + 890: 491, // tctgg -> ctggt + 891: 495, // tctgt -> ctgtt + 892: 223, // tctta -> atctt + 893: 479, // tcttc -> ctctt + 894: 507, // tcttg -> cttgt + 895: 511, // tcttt -> ctttt + 896: 14, // tgaaa -> aaatg + 897: 30, // tgaac -> aactg + 898: 46, // tgaag -> aagtg + 899: 62, // tgaat -> aattg + 900: 78, // tgaca -> acatg + 901: 94, // tgacc -> acctg + 902: 110, // tgacg -> acgtg + 903: 126, // tgact -> acttg + 904: 142, // tgaga -> agatg + 905: 158, // tgagc -> agctg + 906: 174, // tgagg -> aggtg + 907: 190, // tgagt -> agttg + 908: 206, // tgata -> atatg + 909: 222, // tgatc -> atctg + 910: 238, // tgatg -> atgtg + 911: 254, // tgatt -> atttg + 912: 57, // tgcaa -> aatgc + 913: 121, // tgcac -> actgc + 914: 185, // tgcag -> agtgc + 915: 249, // tgcat -> attgc + 916: 229, // tgcca -> atgcc + 917: 350, // tgccc -> ccctg + 918: 366, // tgccg -> ccgtg + 919: 382, // tgcct -> ccttg + 920: 230, // tgcga -> atgcg + 921: 414, // tgcgc -> cgctg + 922: 430, // tgcgg -> cggtg + 923: 446, // tgcgt -> cgttg + 924: 231, // tgcta -> atgct + 925: 478, // tgctc -> ctctg + 926: 494, // tgctg -> ctgtg + 927: 510, // tgctt -> ctttg + 928: 58, // tggaa -> aatgg + 929: 122, // tggac -> actgg + 930: 186, // tggag -> agtgg + 931: 250, // tggat -> attgg + 932: 233, // tggca -> atggc + 933: 378, // tggcc -> cctgg + 934: 442, // tggcg -> cgtgg + 935: 506, // tggct -> cttgg + 936: 234, // tggga -> atggg + 937: 490, // tgggc -> ctggg + 938: 683, // tgggg -> ggggt + 939: 687, // tgggt -> gggtt + 940: 235, // tggta -> atggt + 941: 491, // tggtc -> ctggt + 942: 699, // tggtg -> ggtgt + 943: 703, // tggtt -> ggttt + 944: 59, // tgtaa -> aatgt + 945: 123, // tgtac -> actgt + 946: 187, // tgtag -> agtgt + 947: 251, // tgtat -> attgt + 948: 237, // tgtca -> atgtc + 949: 379, // tgtcc -> cctgt + 950: 443, // tgtcg -> cgtgt + 951: 507, // tgtct -> cttgt + 952: 238, // tgtga -> atgtg + 953: 494, // tgtgc -> ctgtg + 954: 699, // tgtgg -> ggtgt + 955: 751, // tgtgt -> gtgtt + 956: 239, // tgtta -> atgtt + 957: 495, // tgttc -> ctgtt + 958: 751, // tgttg -> gtgtt + 959: 767, // tgttt -> gtttt + 960: 15, // ttaaa -> aaatt + 961: 31, // ttaac -> aactt + 962: 47, // ttaag -> aagtt + 963: 63, // ttaat -> aattt + 964: 79, // ttaca -> acatt + 965: 95, // ttacc -> acctt + 966: 111, // ttacg -> acgtt + 967: 127, // ttact -> acttt + 968: 143, // ttaga -> agatt + 969: 159, // ttagc -> agctt + 970: 175, // ttagg -> aggtt + 971: 191, // ttagt -> agttt + 972: 207, // ttata -> atatt + 973: 223, // ttatc -> atctt + 974: 239, // ttatg -> atgtt + 975: 255, // ttatt -> atttt + 976: 61, // ttcaa -> aattc + 977: 125, // ttcac -> acttc + 978: 189, // ttcag -> agttc + 979: 253, // ttcat -> atttc + 980: 245, // ttcca -> attcc + 981: 351, // ttccc -> ccctt + 982: 367, // ttccg -> ccgtt + 983: 383, // ttcct -> ccttt + 984: 246, // ttcga -> attcg + 985: 415, // ttcgc -> cgctt + 986: 431, // ttcgg -> cggtt + 987: 447, // ttcgt -> cgttt + 988: 247, // ttcta -> attct + 989: 479, // ttctc -> ctctt + 990: 495, // ttctg -> ctgtt + 991: 511, // ttctt -> ctttt + 992: 62, // ttgaa -> aattg + 993: 126, // ttgac -> acttg + 994: 190, // ttgag -> agttg + 995: 254, // ttgat -> atttg + 996: 249, // ttgca -> attgc + 997: 382, // ttgcc -> ccttg + 998: 446, // ttgcg -> cgttg + 999: 510, // ttgct -> ctttg + 1000: 250, // ttgga -> attgg + 1001: 506, // ttggc -> cttgg + 1002: 687, // ttggg -> gggtt + 1003: 703, // ttggt -> ggttt + 1004: 251, // ttgta -> attgt + 1005: 507, // ttgtc -> cttgt + 1006: 751, // ttgtg -> gtgtt + 1007: 767, // ttgtt -> gtttt + 1008: 63, // tttaa -> aattt + 1009: 127, // tttac -> acttt + 1010: 191, // tttag -> agttt + 1011: 255, // tttat -> atttt + 1012: 253, // tttca -> atttc + 1013: 383, // tttcc -> ccttt + 1014: 447, // tttcg -> cgttt + 1015: 511, // tttct -> ctttt + 1016: 254, // tttga -> atttg + 1017: 510, // tttgc -> ctttg + 1018: 703, // tttgg -> ggttt + 1019: 767, // tttgt -> gtttt + 1020: 255, // tttta -> atttt + 1021: 511, // ttttc -> ctttt + 1022: 767, // ttttg -> gtttt + 1023: 1023, // ttttt -> ttttt + }, + 6: { + 0: 0, // aaaaaa -> aaaaaa + 1: 1, // aaaaac -> aaaaac + 2: 2, // aaaaag -> aaaaag + 3: 3, // aaaaat -> aaaaat + 4: 1, // aaaaca -> aaaaac + 5: 5, // aaaacc -> aaaacc + 6: 6, // aaaacg -> aaaacg + 7: 7, // aaaact -> aaaact + 8: 2, // aaaaga -> aaaaag + 9: 9, // aaaagc -> aaaagc + 10: 10, // aaaagg -> aaaagg + 11: 11, // aaaagt -> aaaagt + 12: 3, // aaaata -> aaaaat + 13: 13, // aaaatc -> aaaatc + 14: 14, // aaaatg -> aaaatg + 15: 15, // aaaatt -> aaaatt + 16: 1, // aaacaa -> aaaaac + 17: 17, // aaacac -> aaacac + 18: 18, // aaacag -> aaacag + 19: 19, // aaacat -> aaacat + 20: 5, // aaacca -> aaaacc + 21: 21, // aaaccc -> aaaccc + 22: 22, // aaaccg -> aaaccg + 23: 23, // aaacct -> aaacct + 24: 6, // aaacga -> aaaacg + 25: 25, // aaacgc -> aaacgc + 26: 26, // aaacgg -> aaacgg + 27: 27, // aaacgt -> aaacgt + 28: 7, // aaacta -> aaaact + 29: 29, // aaactc -> aaactc + 30: 30, // aaactg -> aaactg + 31: 31, // aaactt -> aaactt + 32: 2, // aaagaa -> aaaaag + 33: 33, // aaagac -> aaagac + 34: 34, // aaagag -> aaagag + 35: 35, // aaagat -> aaagat + 36: 9, // aaagca -> aaaagc + 37: 37, // aaagcc -> aaagcc + 38: 38, // aaagcg -> aaagcg + 39: 39, // aaagct -> aaagct + 40: 10, // aaagga -> aaaagg + 41: 41, // aaaggc -> aaaggc + 42: 42, // aaaggg -> aaaggg + 43: 43, // aaaggt -> aaaggt + 44: 11, // aaagta -> aaaagt + 45: 45, // aaagtc -> aaagtc + 46: 46, // aaagtg -> aaagtg + 47: 47, // aaagtt -> aaagtt + 48: 3, // aaataa -> aaaaat + 49: 49, // aaatac -> aaatac + 50: 50, // aaatag -> aaatag + 51: 51, // aaatat -> aaatat + 52: 13, // aaatca -> aaaatc + 53: 53, // aaatcc -> aaatcc + 54: 54, // aaatcg -> aaatcg + 55: 55, // aaatct -> aaatct + 56: 14, // aaatga -> aaaatg + 57: 57, // aaatgc -> aaatgc + 58: 58, // aaatgg -> aaatgg + 59: 59, // aaatgt -> aaatgt + 60: 15, // aaatta -> aaaatt + 61: 61, // aaattc -> aaattc + 62: 62, // aaattg -> aaattg + 63: 63, // aaattt -> aaattt + 64: 1, // aacaaa -> aaaaac + 65: 65, // aacaac -> aacaac + 66: 66, // aacaag -> aacaag + 67: 67, // aacaat -> aacaat + 68: 17, // aacaca -> aaacac + 69: 69, // aacacc -> aacacc + 70: 70, // aacacg -> aacacg + 71: 71, // aacact -> aacact + 72: 18, // aacaga -> aaacag + 73: 73, // aacagc -> aacagc + 74: 74, // aacagg -> aacagg + 75: 75, // aacagt -> aacagt + 76: 19, // aacata -> aaacat + 77: 77, // aacatc -> aacatc + 78: 78, // aacatg -> aacatg + 79: 79, // aacatt -> aacatt + 80: 5, // aaccaa -> aaaacc + 81: 81, // aaccac -> aaccac + 82: 82, // aaccag -> aaccag + 83: 83, // aaccat -> aaccat + 84: 21, // aaccca -> aaaccc + 85: 85, // aacccc -> aacccc + 86: 86, // aacccg -> aacccg + 87: 87, // aaccct -> aaccct + 88: 22, // aaccga -> aaaccg + 89: 89, // aaccgc -> aaccgc + 90: 90, // aaccgg -> aaccgg + 91: 91, // aaccgt -> aaccgt + 92: 23, // aaccta -> aaacct + 93: 93, // aacctc -> aacctc + 94: 94, // aacctg -> aacctg + 95: 95, // aacctt -> aacctt + 96: 6, // aacgaa -> aaaacg + 97: 97, // aacgac -> aacgac + 98: 98, // aacgag -> aacgag + 99: 99, // aacgat -> aacgat + 100: 25, // aacgca -> aaacgc + 101: 101, // aacgcc -> aacgcc + 102: 102, // aacgcg -> aacgcg + 103: 103, // aacgct -> aacgct + 104: 26, // aacgga -> aaacgg + 105: 105, // aacggc -> aacggc + 106: 106, // aacggg -> aacggg + 107: 107, // aacggt -> aacggt + 108: 27, // aacgta -> aaacgt + 109: 109, // aacgtc -> aacgtc + 110: 110, // aacgtg -> aacgtg + 111: 111, // aacgtt -> aacgtt + 112: 7, // aactaa -> aaaact + 113: 113, // aactac -> aactac + 114: 114, // aactag -> aactag + 115: 115, // aactat -> aactat + 116: 29, // aactca -> aaactc + 117: 117, // aactcc -> aactcc + 118: 118, // aactcg -> aactcg + 119: 119, // aactct -> aactct + 120: 30, // aactga -> aaactg + 121: 121, // aactgc -> aactgc + 122: 122, // aactgg -> aactgg + 123: 123, // aactgt -> aactgt + 124: 31, // aactta -> aaactt + 125: 125, // aacttc -> aacttc + 126: 126, // aacttg -> aacttg + 127: 127, // aacttt -> aacttt + 128: 2, // aagaaa -> aaaaag + 129: 66, // aagaac -> aacaag + 130: 130, // aagaag -> aagaag + 131: 131, // aagaat -> aagaat + 132: 33, // aagaca -> aaagac + 133: 133, // aagacc -> aagacc + 134: 134, // aagacg -> aagacg + 135: 135, // aagact -> aagact + 136: 34, // aagaga -> aaagag + 137: 137, // aagagc -> aagagc + 138: 138, // aagagg -> aagagg + 139: 139, // aagagt -> aagagt + 140: 35, // aagata -> aaagat + 141: 141, // aagatc -> aagatc + 142: 142, // aagatg -> aagatg + 143: 143, // aagatt -> aagatt + 144: 9, // aagcaa -> aaaagc + 145: 145, // aagcac -> aagcac + 146: 146, // aagcag -> aagcag + 147: 147, // aagcat -> aagcat + 148: 37, // aagcca -> aaagcc + 149: 149, // aagccc -> aagccc + 150: 150, // aagccg -> aagccg + 151: 151, // aagcct -> aagcct + 152: 38, // aagcga -> aaagcg + 153: 153, // aagcgc -> aagcgc + 154: 154, // aagcgg -> aagcgg + 155: 155, // aagcgt -> aagcgt + 156: 39, // aagcta -> aaagct + 157: 157, // aagctc -> aagctc + 158: 158, // aagctg -> aagctg + 159: 159, // aagctt -> aagctt + 160: 10, // aaggaa -> aaaagg + 161: 161, // aaggac -> aaggac + 162: 162, // aaggag -> aaggag + 163: 163, // aaggat -> aaggat + 164: 41, // aaggca -> aaaggc + 165: 165, // aaggcc -> aaggcc + 166: 166, // aaggcg -> aaggcg + 167: 167, // aaggct -> aaggct + 168: 42, // aaggga -> aaaggg + 169: 169, // aagggc -> aagggc + 170: 170, // aagggg -> aagggg + 171: 171, // aagggt -> aagggt + 172: 43, // aaggta -> aaaggt + 173: 173, // aaggtc -> aaggtc + 174: 174, // aaggtg -> aaggtg + 175: 175, // aaggtt -> aaggtt + 176: 11, // aagtaa -> aaaagt + 177: 177, // aagtac -> aagtac + 178: 178, // aagtag -> aagtag + 179: 179, // aagtat -> aagtat + 180: 45, // aagtca -> aaagtc + 181: 181, // aagtcc -> aagtcc + 182: 182, // aagtcg -> aagtcg + 183: 183, // aagtct -> aagtct + 184: 46, // aagtga -> aaagtg + 185: 185, // aagtgc -> aagtgc + 186: 186, // aagtgg -> aagtgg + 187: 187, // aagtgt -> aagtgt + 188: 47, // aagtta -> aaagtt + 189: 189, // aagttc -> aagttc + 190: 190, // aagttg -> aagttg + 191: 191, // aagttt -> aagttt + 192: 3, // aataaa -> aaaaat + 193: 67, // aataac -> aacaat + 194: 131, // aataag -> aagaat + 195: 195, // aataat -> aataat + 196: 49, // aataca -> aaatac + 197: 197, // aatacc -> aatacc + 198: 198, // aatacg -> aatacg + 199: 199, // aatact -> aatact + 200: 50, // aataga -> aaatag + 201: 201, // aatagc -> aatagc + 202: 202, // aatagg -> aatagg + 203: 203, // aatagt -> aatagt + 204: 51, // aatata -> aaatat + 205: 205, // aatatc -> aatatc + 206: 206, // aatatg -> aatatg + 207: 207, // aatatt -> aatatt + 208: 13, // aatcaa -> aaaatc + 209: 209, // aatcac -> aatcac + 210: 210, // aatcag -> aatcag + 211: 211, // aatcat -> aatcat + 212: 53, // aatcca -> aaatcc + 213: 213, // aatccc -> aatccc + 214: 214, // aatccg -> aatccg + 215: 215, // aatcct -> aatcct + 216: 54, // aatcga -> aaatcg + 217: 217, // aatcgc -> aatcgc + 218: 218, // aatcgg -> aatcgg + 219: 219, // aatcgt -> aatcgt + 220: 55, // aatcta -> aaatct + 221: 221, // aatctc -> aatctc + 222: 222, // aatctg -> aatctg + 223: 223, // aatctt -> aatctt + 224: 14, // aatgaa -> aaaatg + 225: 225, // aatgac -> aatgac + 226: 226, // aatgag -> aatgag + 227: 227, // aatgat -> aatgat + 228: 57, // aatgca -> aaatgc + 229: 229, // aatgcc -> aatgcc + 230: 230, // aatgcg -> aatgcg + 231: 231, // aatgct -> aatgct + 232: 58, // aatgga -> aaatgg + 233: 233, // aatggc -> aatggc + 234: 234, // aatggg -> aatggg + 235: 235, // aatggt -> aatggt + 236: 59, // aatgta -> aaatgt + 237: 237, // aatgtc -> aatgtc + 238: 238, // aatgtg -> aatgtg + 239: 239, // aatgtt -> aatgtt + 240: 15, // aattaa -> aaaatt + 241: 241, // aattac -> aattac + 242: 242, // aattag -> aattag + 243: 243, // aattat -> aattat + 244: 61, // aattca -> aaattc + 245: 245, // aattcc -> aattcc + 246: 246, // aattcg -> aattcg + 247: 247, // aattct -> aattct + 248: 62, // aattga -> aaattg + 249: 249, // aattgc -> aattgc + 250: 250, // aattgg -> aattgg + 251: 251, // aattgt -> aattgt + 252: 63, // aattta -> aaattt + 253: 253, // aatttc -> aatttc + 254: 254, // aatttg -> aatttg + 255: 255, // aatttt -> aatttt + 256: 1, // acaaaa -> aaaaac + 257: 17, // acaaac -> aaacac + 258: 33, // acaaag -> aaagac + 259: 49, // acaaat -> aaatac + 260: 65, // acaaca -> aacaac + 261: 81, // acaacc -> aaccac + 262: 97, // acaacg -> aacgac + 263: 113, // acaact -> aactac + 264: 66, // acaaga -> aacaag + 265: 145, // acaagc -> aagcac + 266: 161, // acaagg -> aaggac + 267: 177, // acaagt -> aagtac + 268: 67, // acaata -> aacaat + 269: 209, // acaatc -> aatcac + 270: 225, // acaatg -> aatgac + 271: 241, // acaatt -> aattac + 272: 17, // acacaa -> aaacac + 273: 273, // acacac -> acacac + 274: 274, // acacag -> acacag + 275: 275, // acacat -> acacat + 276: 69, // acacca -> aacacc + 277: 277, // acaccc -> acaccc + 278: 278, // acaccg -> acaccg + 279: 279, // acacct -> acacct + 280: 70, // acacga -> aacacg + 281: 281, // acacgc -> acacgc + 282: 282, // acacgg -> acacgg + 283: 283, // acacgt -> acacgt + 284: 71, // acacta -> aacact + 285: 285, // acactc -> acactc + 286: 286, // acactg -> acactg + 287: 287, // acactt -> acactt + 288: 18, // acagaa -> aaacag + 289: 274, // acagac -> acacag + 290: 290, // acagag -> acagag + 291: 291, // acagat -> acagat + 292: 73, // acagca -> aacagc + 293: 293, // acagcc -> acagcc + 294: 294, // acagcg -> acagcg + 295: 295, // acagct -> acagct + 296: 74, // acagga -> aacagg + 297: 297, // acaggc -> acaggc + 298: 298, // acaggg -> acaggg + 299: 299, // acaggt -> acaggt + 300: 75, // acagta -> aacagt + 301: 301, // acagtc -> acagtc + 302: 302, // acagtg -> acagtg + 303: 303, // acagtt -> acagtt + 304: 19, // acataa -> aaacat + 305: 275, // acatac -> acacat + 306: 306, // acatag -> acatag + 307: 307, // acatat -> acatat + 308: 77, // acatca -> aacatc + 309: 309, // acatcc -> acatcc + 310: 310, // acatcg -> acatcg + 311: 311, // acatct -> acatct + 312: 78, // acatga -> aacatg + 313: 313, // acatgc -> acatgc + 314: 314, // acatgg -> acatgg + 315: 315, // acatgt -> acatgt + 316: 79, // acatta -> aacatt + 317: 317, // acattc -> acattc + 318: 318, // acattg -> acattg + 319: 319, // acattt -> acattt + 320: 5, // accaaa -> aaaacc + 321: 69, // accaac -> aacacc + 322: 133, // accaag -> aagacc + 323: 197, // accaat -> aatacc + 324: 81, // accaca -> aaccac + 325: 325, // accacc -> accacc + 326: 326, // accacg -> accacg + 327: 327, // accact -> accact + 328: 82, // accaga -> aaccag + 329: 329, // accagc -> accagc + 330: 330, // accagg -> accagg + 331: 331, // accagt -> accagt + 332: 83, // accata -> aaccat + 333: 333, // accatc -> accatc + 334: 334, // accatg -> accatg + 335: 335, // accatt -> accatt + 336: 21, // acccaa -> aaaccc + 337: 277, // acccac -> acaccc + 338: 338, // acccag -> acccag + 339: 339, // acccat -> acccat + 340: 85, // acccca -> aacccc + 341: 341, // accccc -> accccc + 342: 342, // accccg -> accccg + 343: 343, // acccct -> acccct + 344: 86, // acccga -> aacccg + 345: 345, // acccgc -> acccgc + 346: 346, // acccgg -> acccgg + 347: 347, // acccgt -> acccgt + 348: 87, // acccta -> aaccct + 349: 349, // accctc -> accctc + 350: 350, // accctg -> accctg + 351: 351, // accctt -> accctt + 352: 22, // accgaa -> aaaccg + 353: 278, // accgac -> acaccg + 354: 354, // accgag -> accgag + 355: 355, // accgat -> accgat + 356: 89, // accgca -> aaccgc + 357: 357, // accgcc -> accgcc + 358: 358, // accgcg -> accgcg + 359: 359, // accgct -> accgct + 360: 90, // accgga -> aaccgg + 361: 361, // accggc -> accggc + 362: 362, // accggg -> accggg + 363: 363, // accggt -> accggt + 364: 91, // accgta -> aaccgt + 365: 365, // accgtc -> accgtc + 366: 366, // accgtg -> accgtg + 367: 367, // accgtt -> accgtt + 368: 23, // acctaa -> aaacct + 369: 279, // acctac -> acacct + 370: 370, // acctag -> acctag + 371: 371, // acctat -> acctat + 372: 93, // acctca -> aacctc + 373: 373, // acctcc -> acctcc + 374: 374, // acctcg -> acctcg + 375: 375, // acctct -> acctct + 376: 94, // acctga -> aacctg + 377: 377, // acctgc -> acctgc + 378: 378, // acctgg -> acctgg + 379: 379, // acctgt -> acctgt + 380: 95, // acctta -> aacctt + 381: 381, // accttc -> accttc + 382: 382, // accttg -> accttg + 383: 383, // accttt -> accttt + 384: 6, // acgaaa -> aaaacg + 385: 70, // acgaac -> aacacg + 386: 134, // acgaag -> aagacg + 387: 198, // acgaat -> aatacg + 388: 97, // acgaca -> aacgac + 389: 326, // acgacc -> accacg + 390: 390, // acgacg -> acgacg + 391: 391, // acgact -> acgact + 392: 98, // acgaga -> aacgag + 393: 393, // acgagc -> acgagc + 394: 394, // acgagg -> acgagg + 395: 395, // acgagt -> acgagt + 396: 99, // acgata -> aacgat + 397: 397, // acgatc -> acgatc + 398: 398, // acgatg -> acgatg + 399: 399, // acgatt -> acgatt + 400: 25, // acgcaa -> aaacgc + 401: 281, // acgcac -> acacgc + 402: 402, // acgcag -> acgcag + 403: 403, // acgcat -> acgcat + 404: 101, // acgcca -> aacgcc + 405: 405, // acgccc -> acgccc + 406: 406, // acgccg -> acgccg + 407: 407, // acgcct -> acgcct + 408: 102, // acgcga -> aacgcg + 409: 409, // acgcgc -> acgcgc + 410: 410, // acgcgg -> acgcgg + 411: 411, // acgcgt -> acgcgt + 412: 103, // acgcta -> aacgct + 413: 413, // acgctc -> acgctc + 414: 414, // acgctg -> acgctg + 415: 415, // acgctt -> acgctt + 416: 26, // acggaa -> aaacgg + 417: 282, // acggac -> acacgg + 418: 418, // acggag -> acggag + 419: 419, // acggat -> acggat + 420: 105, // acggca -> aacggc + 421: 421, // acggcc -> acggcc + 422: 422, // acggcg -> acggcg + 423: 423, // acggct -> acggct + 424: 106, // acggga -> aacggg + 425: 425, // acgggc -> acgggc + 426: 426, // acgggg -> acgggg + 427: 427, // acgggt -> acgggt + 428: 107, // acggta -> aacggt + 429: 429, // acggtc -> acggtc + 430: 430, // acggtg -> acggtg + 431: 431, // acggtt -> acggtt + 432: 27, // acgtaa -> aaacgt + 433: 283, // acgtac -> acacgt + 434: 434, // acgtag -> acgtag + 435: 435, // acgtat -> acgtat + 436: 109, // acgtca -> aacgtc + 437: 437, // acgtcc -> acgtcc + 438: 438, // acgtcg -> acgtcg + 439: 439, // acgtct -> acgtct + 440: 110, // acgtga -> aacgtg + 441: 441, // acgtgc -> acgtgc + 442: 442, // acgtgg -> acgtgg + 443: 443, // acgtgt -> acgtgt + 444: 111, // acgtta -> aacgtt + 445: 445, // acgttc -> acgttc + 446: 446, // acgttg -> acgttg + 447: 447, // acgttt -> acgttt + 448: 7, // actaaa -> aaaact + 449: 71, // actaac -> aacact + 450: 135, // actaag -> aagact + 451: 199, // actaat -> aatact + 452: 113, // actaca -> aactac + 453: 327, // actacc -> accact + 454: 391, // actacg -> acgact + 455: 455, // actact -> actact + 456: 114, // actaga -> aactag + 457: 457, // actagc -> actagc + 458: 458, // actagg -> actagg + 459: 459, // actagt -> actagt + 460: 115, // actata -> aactat + 461: 461, // actatc -> actatc + 462: 462, // actatg -> actatg + 463: 463, // actatt -> actatt + 464: 29, // actcaa -> aaactc + 465: 285, // actcac -> acactc + 466: 466, // actcag -> actcag + 467: 467, // actcat -> actcat + 468: 117, // actcca -> aactcc + 469: 469, // actccc -> actccc + 470: 470, // actccg -> actccg + 471: 471, // actcct -> actcct + 472: 118, // actcga -> aactcg + 473: 473, // actcgc -> actcgc + 474: 474, // actcgg -> actcgg + 475: 475, // actcgt -> actcgt + 476: 119, // actcta -> aactct + 477: 477, // actctc -> actctc + 478: 478, // actctg -> actctg + 479: 479, // actctt -> actctt + 480: 30, // actgaa -> aaactg + 481: 286, // actgac -> acactg + 482: 482, // actgag -> actgag + 483: 483, // actgat -> actgat + 484: 121, // actgca -> aactgc + 485: 485, // actgcc -> actgcc + 486: 486, // actgcg -> actgcg + 487: 487, // actgct -> actgct + 488: 122, // actgga -> aactgg + 489: 489, // actggc -> actggc + 490: 490, // actggg -> actggg + 491: 491, // actggt -> actggt + 492: 123, // actgta -> aactgt + 493: 493, // actgtc -> actgtc + 494: 494, // actgtg -> actgtg + 495: 495, // actgtt -> actgtt + 496: 31, // acttaa -> aaactt + 497: 287, // acttac -> acactt + 498: 498, // acttag -> acttag + 499: 499, // acttat -> acttat + 500: 125, // acttca -> aacttc + 501: 501, // acttcc -> acttcc + 502: 502, // acttcg -> acttcg + 503: 503, // acttct -> acttct + 504: 126, // acttga -> aacttg + 505: 505, // acttgc -> acttgc + 506: 506, // acttgg -> acttgg + 507: 507, // acttgt -> acttgt + 508: 127, // acttta -> aacttt + 509: 509, // actttc -> actttc + 510: 510, // actttg -> actttg + 511: 511, // actttt -> actttt + 512: 2, // agaaaa -> aaaaag + 513: 18, // agaaac -> aaacag + 514: 34, // agaaag -> aaagag + 515: 50, // agaaat -> aaatag + 516: 66, // agaaca -> aacaag + 517: 82, // agaacc -> aaccag + 518: 98, // agaacg -> aacgag + 519: 114, // agaact -> aactag + 520: 130, // agaaga -> aagaag + 521: 146, // agaagc -> aagcag + 522: 162, // agaagg -> aaggag + 523: 178, // agaagt -> aagtag + 524: 131, // agaata -> aagaat + 525: 210, // agaatc -> aatcag + 526: 226, // agaatg -> aatgag + 527: 242, // agaatt -> aattag + 528: 33, // agacaa -> aaagac + 529: 274, // agacac -> acacag + 530: 290, // agacag -> acagag + 531: 306, // agacat -> acatag + 532: 133, // agacca -> aagacc + 533: 338, // agaccc -> acccag + 534: 354, // agaccg -> accgag + 535: 370, // agacct -> acctag + 536: 134, // agacga -> aagacg + 537: 402, // agacgc -> acgcag + 538: 418, // agacgg -> acggag + 539: 434, // agacgt -> acgtag + 540: 135, // agacta -> aagact + 541: 466, // agactc -> actcag + 542: 482, // agactg -> actgag + 543: 498, // agactt -> acttag + 544: 34, // agagaa -> aaagag + 545: 290, // agagac -> acagag + 546: 546, // agagag -> agagag + 547: 547, // agagat -> agagat + 548: 137, // agagca -> aagagc + 549: 549, // agagcc -> agagcc + 550: 550, // agagcg -> agagcg + 551: 551, // agagct -> agagct + 552: 138, // agagga -> aagagg + 553: 553, // agaggc -> agaggc + 554: 554, // agaggg -> agaggg + 555: 555, // agaggt -> agaggt + 556: 139, // agagta -> aagagt + 557: 557, // agagtc -> agagtc + 558: 558, // agagtg -> agagtg + 559: 559, // agagtt -> agagtt + 560: 35, // agataa -> aaagat + 561: 291, // agatac -> acagat + 562: 547, // agatag -> agagat + 563: 563, // agatat -> agatat + 564: 141, // agatca -> aagatc + 565: 565, // agatcc -> agatcc + 566: 566, // agatcg -> agatcg + 567: 567, // agatct -> agatct + 568: 142, // agatga -> aagatg + 569: 569, // agatgc -> agatgc + 570: 570, // agatgg -> agatgg + 571: 571, // agatgt -> agatgt + 572: 143, // agatta -> aagatt + 573: 573, // agattc -> agattc + 574: 574, // agattg -> agattg + 575: 575, // agattt -> agattt + 576: 9, // agcaaa -> aaaagc + 577: 73, // agcaac -> aacagc + 578: 137, // agcaag -> aagagc + 579: 201, // agcaat -> aatagc + 580: 145, // agcaca -> aagcac + 581: 329, // agcacc -> accagc + 582: 393, // agcacg -> acgagc + 583: 457, // agcact -> actagc + 584: 146, // agcaga -> aagcag + 585: 585, // agcagc -> agcagc + 586: 586, // agcagg -> agcagg + 587: 587, // agcagt -> agcagt + 588: 147, // agcata -> aagcat + 589: 589, // agcatc -> agcatc + 590: 590, // agcatg -> agcatg + 591: 591, // agcatt -> agcatt + 592: 37, // agccaa -> aaagcc + 593: 293, // agccac -> acagcc + 594: 549, // agccag -> agagcc + 595: 595, // agccat -> agccat + 596: 149, // agccca -> aagccc + 597: 597, // agcccc -> agcccc + 598: 598, // agcccg -> agcccg + 599: 599, // agccct -> agccct + 600: 150, // agccga -> aagccg + 601: 601, // agccgc -> agccgc + 602: 602, // agccgg -> agccgg + 603: 603, // agccgt -> agccgt + 604: 151, // agccta -> aagcct + 605: 605, // agcctc -> agcctc + 606: 606, // agcctg -> agcctg + 607: 607, // agcctt -> agcctt + 608: 38, // agcgaa -> aaagcg + 609: 294, // agcgac -> acagcg + 610: 550, // agcgag -> agagcg + 611: 611, // agcgat -> agcgat + 612: 153, // agcgca -> aagcgc + 613: 613, // agcgcc -> agcgcc + 614: 614, // agcgcg -> agcgcg + 615: 615, // agcgct -> agcgct + 616: 154, // agcgga -> aagcgg + 617: 617, // agcggc -> agcggc + 618: 618, // agcggg -> agcggg + 619: 619, // agcggt -> agcggt + 620: 155, // agcgta -> aagcgt + 621: 621, // agcgtc -> agcgtc + 622: 622, // agcgtg -> agcgtg + 623: 623, // agcgtt -> agcgtt + 624: 39, // agctaa -> aaagct + 625: 295, // agctac -> acagct + 626: 551, // agctag -> agagct + 627: 627, // agctat -> agctat + 628: 157, // agctca -> aagctc + 629: 629, // agctcc -> agctcc + 630: 630, // agctcg -> agctcg + 631: 631, // agctct -> agctct + 632: 158, // agctga -> aagctg + 633: 633, // agctgc -> agctgc + 634: 634, // agctgg -> agctgg + 635: 635, // agctgt -> agctgt + 636: 159, // agctta -> aagctt + 637: 637, // agcttc -> agcttc + 638: 638, // agcttg -> agcttg + 639: 639, // agcttt -> agcttt + 640: 10, // aggaaa -> aaaagg + 641: 74, // aggaac -> aacagg + 642: 138, // aggaag -> aagagg + 643: 202, // aggaat -> aatagg + 644: 161, // aggaca -> aaggac + 645: 330, // aggacc -> accagg + 646: 394, // aggacg -> acgagg + 647: 458, // aggact -> actagg + 648: 162, // aggaga -> aaggag + 649: 586, // aggagc -> agcagg + 650: 650, // aggagg -> aggagg + 651: 651, // aggagt -> aggagt + 652: 163, // aggata -> aaggat + 653: 653, // aggatc -> aggatc + 654: 654, // aggatg -> aggatg + 655: 655, // aggatt -> aggatt + 656: 41, // aggcaa -> aaaggc + 657: 297, // aggcac -> acaggc + 658: 553, // aggcag -> agaggc + 659: 659, // aggcat -> aggcat + 660: 165, // aggcca -> aaggcc + 661: 661, // aggccc -> aggccc + 662: 662, // aggccg -> aggccg + 663: 663, // aggcct -> aggcct + 664: 166, // aggcga -> aaggcg + 665: 665, // aggcgc -> aggcgc + 666: 666, // aggcgg -> aggcgg + 667: 667, // aggcgt -> aggcgt + 668: 167, // aggcta -> aaggct + 669: 669, // aggctc -> aggctc + 670: 670, // aggctg -> aggctg + 671: 671, // aggctt -> aggctt + 672: 42, // agggaa -> aaaggg + 673: 298, // agggac -> acaggg + 674: 554, // agggag -> agaggg + 675: 675, // agggat -> agggat + 676: 169, // agggca -> aagggc + 677: 677, // agggcc -> agggcc + 678: 678, // agggcg -> agggcg + 679: 679, // agggct -> agggct + 680: 170, // agggga -> aagggg + 681: 681, // aggggc -> aggggc + 682: 682, // aggggg -> aggggg + 683: 683, // aggggt -> aggggt + 684: 171, // agggta -> aagggt + 685: 685, // agggtc -> agggtc + 686: 686, // agggtg -> agggtg + 687: 687, // agggtt -> agggtt + 688: 43, // aggtaa -> aaaggt + 689: 299, // aggtac -> acaggt + 690: 555, // aggtag -> agaggt + 691: 691, // aggtat -> aggtat + 692: 173, // aggtca -> aaggtc + 693: 693, // aggtcc -> aggtcc + 694: 694, // aggtcg -> aggtcg + 695: 695, // aggtct -> aggtct + 696: 174, // aggtga -> aaggtg + 697: 697, // aggtgc -> aggtgc + 698: 698, // aggtgg -> aggtgg + 699: 699, // aggtgt -> aggtgt + 700: 175, // aggtta -> aaggtt + 701: 701, // aggttc -> aggttc + 702: 702, // aggttg -> aggttg + 703: 703, // aggttt -> aggttt + 704: 11, // agtaaa -> aaaagt + 705: 75, // agtaac -> aacagt + 706: 139, // agtaag -> aagagt + 707: 203, // agtaat -> aatagt + 708: 177, // agtaca -> aagtac + 709: 331, // agtacc -> accagt + 710: 395, // agtacg -> acgagt + 711: 459, // agtact -> actagt + 712: 178, // agtaga -> aagtag + 713: 587, // agtagc -> agcagt + 714: 651, // agtagg -> aggagt + 715: 715, // agtagt -> agtagt + 716: 179, // agtata -> aagtat + 717: 717, // agtatc -> agtatc + 718: 718, // agtatg -> agtatg + 719: 719, // agtatt -> agtatt + 720: 45, // agtcaa -> aaagtc + 721: 301, // agtcac -> acagtc + 722: 557, // agtcag -> agagtc + 723: 723, // agtcat -> agtcat + 724: 181, // agtcca -> aagtcc + 725: 725, // agtccc -> agtccc + 726: 726, // agtccg -> agtccg + 727: 727, // agtcct -> agtcct + 728: 182, // agtcga -> aagtcg + 729: 729, // agtcgc -> agtcgc + 730: 730, // agtcgg -> agtcgg + 731: 731, // agtcgt -> agtcgt + 732: 183, // agtcta -> aagtct + 733: 733, // agtctc -> agtctc + 734: 734, // agtctg -> agtctg + 735: 735, // agtctt -> agtctt + 736: 46, // agtgaa -> aaagtg + 737: 302, // agtgac -> acagtg + 738: 558, // agtgag -> agagtg + 739: 739, // agtgat -> agtgat + 740: 185, // agtgca -> aagtgc + 741: 741, // agtgcc -> agtgcc + 742: 742, // agtgcg -> agtgcg + 743: 743, // agtgct -> agtgct + 744: 186, // agtgga -> aagtgg + 745: 745, // agtggc -> agtggc + 746: 746, // agtggg -> agtggg + 747: 747, // agtggt -> agtggt + 748: 187, // agtgta -> aagtgt + 749: 749, // agtgtc -> agtgtc + 750: 750, // agtgtg -> agtgtg + 751: 751, // agtgtt -> agtgtt + 752: 47, // agttaa -> aaagtt + 753: 303, // agttac -> acagtt + 754: 559, // agttag -> agagtt + 755: 755, // agttat -> agttat + 756: 189, // agttca -> aagttc + 757: 757, // agttcc -> agttcc + 758: 758, // agttcg -> agttcg + 759: 759, // agttct -> agttct + 760: 190, // agttga -> aagttg + 761: 761, // agttgc -> agttgc + 762: 762, // agttgg -> agttgg + 763: 763, // agttgt -> agttgt + 764: 191, // agttta -> aagttt + 765: 765, // agtttc -> agtttc + 766: 766, // agtttg -> agtttg + 767: 767, // agtttt -> agtttt + 768: 3, // ataaaa -> aaaaat + 769: 19, // ataaac -> aaacat + 770: 35, // ataaag -> aaagat + 771: 51, // ataaat -> aaatat + 772: 67, // ataaca -> aacaat + 773: 83, // ataacc -> aaccat + 774: 99, // ataacg -> aacgat + 775: 115, // ataact -> aactat + 776: 131, // ataaga -> aagaat + 777: 147, // ataagc -> aagcat + 778: 163, // ataagg -> aaggat + 779: 179, // ataagt -> aagtat + 780: 195, // ataata -> aataat + 781: 211, // ataatc -> aatcat + 782: 227, // ataatg -> aatgat + 783: 243, // ataatt -> aattat + 784: 49, // atacaa -> aaatac + 785: 275, // atacac -> acacat + 786: 291, // atacag -> acagat + 787: 307, // atacat -> acatat + 788: 197, // atacca -> aatacc + 789: 339, // ataccc -> acccat + 790: 355, // ataccg -> accgat + 791: 371, // atacct -> acctat + 792: 198, // atacga -> aatacg + 793: 403, // atacgc -> acgcat + 794: 419, // atacgg -> acggat + 795: 435, // atacgt -> acgtat + 796: 199, // atacta -> aatact + 797: 467, // atactc -> actcat + 798: 483, // atactg -> actgat + 799: 499, // atactt -> acttat + 800: 50, // atagaa -> aaatag + 801: 306, // atagac -> acatag + 802: 547, // atagag -> agagat + 803: 563, // atagat -> agatat + 804: 201, // atagca -> aatagc + 805: 595, // atagcc -> agccat + 806: 611, // atagcg -> agcgat + 807: 627, // atagct -> agctat + 808: 202, // atagga -> aatagg + 809: 659, // ataggc -> aggcat + 810: 675, // ataggg -> agggat + 811: 691, // ataggt -> aggtat + 812: 203, // atagta -> aatagt + 813: 723, // atagtc -> agtcat + 814: 739, // atagtg -> agtgat + 815: 755, // atagtt -> agttat + 816: 51, // atataa -> aaatat + 817: 307, // atatac -> acatat + 818: 563, // atatag -> agatat + 819: 819, // atatat -> atatat + 820: 205, // atatca -> aatatc + 821: 821, // atatcc -> atatcc + 822: 822, // atatcg -> atatcg + 823: 823, // atatct -> atatct + 824: 206, // atatga -> aatatg + 825: 825, // atatgc -> atatgc + 826: 826, // atatgg -> atatgg + 827: 827, // atatgt -> atatgt + 828: 207, // atatta -> aatatt + 829: 829, // atattc -> atattc + 830: 830, // atattg -> atattg + 831: 831, // atattt -> atattt + 832: 13, // atcaaa -> aaaatc + 833: 77, // atcaac -> aacatc + 834: 141, // atcaag -> aagatc + 835: 205, // atcaat -> aatatc + 836: 209, // atcaca -> aatcac + 837: 333, // atcacc -> accatc + 838: 397, // atcacg -> acgatc + 839: 461, // atcact -> actatc + 840: 210, // atcaga -> aatcag + 841: 589, // atcagc -> agcatc + 842: 653, // atcagg -> aggatc + 843: 717, // atcagt -> agtatc + 844: 211, // atcata -> aatcat + 845: 845, // atcatc -> atcatc + 846: 846, // atcatg -> atcatg + 847: 847, // atcatt -> atcatt + 848: 53, // atccaa -> aaatcc + 849: 309, // atccac -> acatcc + 850: 565, // atccag -> agatcc + 851: 821, // atccat -> atatcc + 852: 213, // atccca -> aatccc + 853: 853, // atcccc -> atcccc + 854: 854, // atcccg -> atcccg + 855: 855, // atccct -> atccct + 856: 214, // atccga -> aatccg + 857: 857, // atccgc -> atccgc + 858: 858, // atccgg -> atccgg + 859: 859, // atccgt -> atccgt + 860: 215, // atccta -> aatcct + 861: 861, // atcctc -> atcctc + 862: 862, // atcctg -> atcctg + 863: 863, // atcctt -> atcctt + 864: 54, // atcgaa -> aaatcg + 865: 310, // atcgac -> acatcg + 866: 566, // atcgag -> agatcg + 867: 822, // atcgat -> atatcg + 868: 217, // atcgca -> aatcgc + 869: 869, // atcgcc -> atcgcc + 870: 870, // atcgcg -> atcgcg + 871: 871, // atcgct -> atcgct + 872: 218, // atcgga -> aatcgg + 873: 873, // atcggc -> atcggc + 874: 874, // atcggg -> atcggg + 875: 875, // atcggt -> atcggt + 876: 219, // atcgta -> aatcgt + 877: 877, // atcgtc -> atcgtc + 878: 878, // atcgtg -> atcgtg + 879: 879, // atcgtt -> atcgtt + 880: 55, // atctaa -> aaatct + 881: 311, // atctac -> acatct + 882: 567, // atctag -> agatct + 883: 823, // atctat -> atatct + 884: 221, // atctca -> aatctc + 885: 885, // atctcc -> atctcc + 886: 886, // atctcg -> atctcg + 887: 887, // atctct -> atctct + 888: 222, // atctga -> aatctg + 889: 889, // atctgc -> atctgc + 890: 890, // atctgg -> atctgg + 891: 891, // atctgt -> atctgt + 892: 223, // atctta -> aatctt + 893: 893, // atcttc -> atcttc + 894: 894, // atcttg -> atcttg + 895: 895, // atcttt -> atcttt + 896: 14, // atgaaa -> aaaatg + 897: 78, // atgaac -> aacatg + 898: 142, // atgaag -> aagatg + 899: 206, // atgaat -> aatatg + 900: 225, // atgaca -> aatgac + 901: 334, // atgacc -> accatg + 902: 398, // atgacg -> acgatg + 903: 462, // atgact -> actatg + 904: 226, // atgaga -> aatgag + 905: 590, // atgagc -> agcatg + 906: 654, // atgagg -> aggatg + 907: 718, // atgagt -> agtatg + 908: 227, // atgata -> aatgat + 909: 846, // atgatc -> atcatg + 910: 910, // atgatg -> atgatg + 911: 911, // atgatt -> atgatt + 912: 57, // atgcaa -> aaatgc + 913: 313, // atgcac -> acatgc + 914: 569, // atgcag -> agatgc + 915: 825, // atgcat -> atatgc + 916: 229, // atgcca -> aatgcc + 917: 917, // atgccc -> atgccc + 918: 918, // atgccg -> atgccg + 919: 919, // atgcct -> atgcct + 920: 230, // atgcga -> aatgcg + 921: 921, // atgcgc -> atgcgc + 922: 922, // atgcgg -> atgcgg + 923: 923, // atgcgt -> atgcgt + 924: 231, // atgcta -> aatgct + 925: 925, // atgctc -> atgctc + 926: 926, // atgctg -> atgctg + 927: 927, // atgctt -> atgctt + 928: 58, // atggaa -> aaatgg + 929: 314, // atggac -> acatgg + 930: 570, // atggag -> agatgg + 931: 826, // atggat -> atatgg + 932: 233, // atggca -> aatggc + 933: 933, // atggcc -> atggcc + 934: 934, // atggcg -> atggcg + 935: 935, // atggct -> atggct + 936: 234, // atggga -> aatggg + 937: 937, // atgggc -> atgggc + 938: 938, // atgggg -> atgggg + 939: 939, // atgggt -> atgggt + 940: 235, // atggta -> aatggt + 941: 941, // atggtc -> atggtc + 942: 942, // atggtg -> atggtg + 943: 943, // atggtt -> atggtt + 944: 59, // atgtaa -> aaatgt + 945: 315, // atgtac -> acatgt + 946: 571, // atgtag -> agatgt + 947: 827, // atgtat -> atatgt + 948: 237, // atgtca -> aatgtc + 949: 949, // atgtcc -> atgtcc + 950: 950, // atgtcg -> atgtcg + 951: 951, // atgtct -> atgtct + 952: 238, // atgtga -> aatgtg + 953: 953, // atgtgc -> atgtgc + 954: 954, // atgtgg -> atgtgg + 955: 955, // atgtgt -> atgtgt + 956: 239, // atgtta -> aatgtt + 957: 957, // atgttc -> atgttc + 958: 958, // atgttg -> atgttg + 959: 959, // atgttt -> atgttt + 960: 15, // attaaa -> aaaatt + 961: 79, // attaac -> aacatt + 962: 143, // attaag -> aagatt + 963: 207, // attaat -> aatatt + 964: 241, // attaca -> aattac + 965: 335, // attacc -> accatt + 966: 399, // attacg -> acgatt + 967: 463, // attact -> actatt + 968: 242, // attaga -> aattag + 969: 591, // attagc -> agcatt + 970: 655, // attagg -> aggatt + 971: 719, // attagt -> agtatt + 972: 243, // attata -> aattat + 973: 847, // attatc -> atcatt + 974: 911, // attatg -> atgatt + 975: 975, // attatt -> attatt + 976: 61, // attcaa -> aaattc + 977: 317, // attcac -> acattc + 978: 573, // attcag -> agattc + 979: 829, // attcat -> atattc + 980: 245, // attcca -> aattcc + 981: 981, // attccc -> attccc + 982: 982, // attccg -> attccg + 983: 983, // attcct -> attcct + 984: 246, // attcga -> aattcg + 985: 985, // attcgc -> attcgc + 986: 986, // attcgg -> attcgg + 987: 987, // attcgt -> attcgt + 988: 247, // attcta -> aattct + 989: 989, // attctc -> attctc + 990: 990, // attctg -> attctg + 991: 991, // attctt -> attctt + 992: 62, // attgaa -> aaattg + 993: 318, // attgac -> acattg + 994: 574, // attgag -> agattg + 995: 830, // attgat -> atattg + 996: 249, // attgca -> aattgc + 997: 997, // attgcc -> attgcc + 998: 998, // attgcg -> attgcg + 999: 999, // attgct -> attgct + 1000: 250, // attgga -> aattgg + 1001: 1001, // attggc -> attggc + 1002: 1002, // attggg -> attggg + 1003: 1003, // attggt -> attggt + 1004: 251, // attgta -> aattgt + 1005: 1005, // attgtc -> attgtc + 1006: 1006, // attgtg -> attgtg + 1007: 1007, // attgtt -> attgtt + 1008: 63, // atttaa -> aaattt + 1009: 319, // atttac -> acattt + 1010: 575, // atttag -> agattt + 1011: 831, // atttat -> atattt + 1012: 253, // atttca -> aatttc + 1013: 1013, // atttcc -> atttcc + 1014: 1014, // atttcg -> atttcg + 1015: 1015, // atttct -> atttct + 1016: 254, // atttga -> aatttg + 1017: 1017, // atttgc -> atttgc + 1018: 1018, // atttgg -> atttgg + 1019: 1019, // atttgt -> atttgt + 1020: 255, // atttta -> aatttt + 1021: 1021, // attttc -> attttc + 1022: 1022, // attttg -> attttg + 1023: 1023, // attttt -> attttt + 1024: 1, // caaaaa -> aaaaac + 1025: 5, // caaaac -> aaaacc + 1026: 9, // caaaag -> aaaagc + 1027: 13, // caaaat -> aaaatc + 1028: 17, // caaaca -> aaacac + 1029: 21, // caaacc -> aaaccc + 1030: 25, // caaacg -> aaacgc + 1031: 29, // caaact -> aaactc + 1032: 33, // caaaga -> aaagac + 1033: 37, // caaagc -> aaagcc + 1034: 41, // caaagg -> aaaggc + 1035: 45, // caaagt -> aaagtc + 1036: 49, // caaata -> aaatac + 1037: 53, // caaatc -> aaatcc + 1038: 57, // caaatg -> aaatgc + 1039: 61, // caaatt -> aaattc + 1040: 65, // caacaa -> aacaac + 1041: 69, // caacac -> aacacc + 1042: 73, // caacag -> aacagc + 1043: 77, // caacat -> aacatc + 1044: 81, // caacca -> aaccac + 1045: 85, // caaccc -> aacccc + 1046: 89, // caaccg -> aaccgc + 1047: 93, // caacct -> aacctc + 1048: 97, // caacga -> aacgac + 1049: 101, // caacgc -> aacgcc + 1050: 105, // caacgg -> aacggc + 1051: 109, // caacgt -> aacgtc + 1052: 113, // caacta -> aactac + 1053: 117, // caactc -> aactcc + 1054: 121, // caactg -> aactgc + 1055: 125, // caactt -> aacttc + 1056: 66, // caagaa -> aacaag + 1057: 133, // caagac -> aagacc + 1058: 137, // caagag -> aagagc + 1059: 141, // caagat -> aagatc + 1060: 145, // caagca -> aagcac + 1061: 149, // caagcc -> aagccc + 1062: 153, // caagcg -> aagcgc + 1063: 157, // caagct -> aagctc + 1064: 161, // caagga -> aaggac + 1065: 165, // caaggc -> aaggcc + 1066: 169, // caaggg -> aagggc + 1067: 173, // caaggt -> aaggtc + 1068: 177, // caagta -> aagtac + 1069: 181, // caagtc -> aagtcc + 1070: 185, // caagtg -> aagtgc + 1071: 189, // caagtt -> aagttc + 1072: 67, // caataa -> aacaat + 1073: 197, // caatac -> aatacc + 1074: 201, // caatag -> aatagc + 1075: 205, // caatat -> aatatc + 1076: 209, // caatca -> aatcac + 1077: 213, // caatcc -> aatccc + 1078: 217, // caatcg -> aatcgc + 1079: 221, // caatct -> aatctc + 1080: 225, // caatga -> aatgac + 1081: 229, // caatgc -> aatgcc + 1082: 233, // caatgg -> aatggc + 1083: 237, // caatgt -> aatgtc + 1084: 241, // caatta -> aattac + 1085: 245, // caattc -> aattcc + 1086: 249, // caattg -> aattgc + 1087: 253, // caattt -> aatttc + 1088: 17, // cacaaa -> aaacac + 1089: 81, // cacaac -> aaccac + 1090: 145, // cacaag -> aagcac + 1091: 209, // cacaat -> aatcac + 1092: 273, // cacaca -> acacac + 1093: 277, // cacacc -> acaccc + 1094: 281, // cacacg -> acacgc + 1095: 285, // cacact -> acactc + 1096: 274, // cacaga -> acacag + 1097: 293, // cacagc -> acagcc + 1098: 297, // cacagg -> acaggc + 1099: 301, // cacagt -> acagtc + 1100: 275, // cacata -> acacat + 1101: 309, // cacatc -> acatcc + 1102: 313, // cacatg -> acatgc + 1103: 317, // cacatt -> acattc + 1104: 69, // caccaa -> aacacc + 1105: 325, // caccac -> accacc + 1106: 329, // caccag -> accagc + 1107: 333, // caccat -> accatc + 1108: 277, // caccca -> acaccc + 1109: 341, // cacccc -> accccc + 1110: 345, // cacccg -> acccgc + 1111: 349, // caccct -> accctc + 1112: 278, // caccga -> acaccg + 1113: 357, // caccgc -> accgcc + 1114: 361, // caccgg -> accggc + 1115: 365, // caccgt -> accgtc + 1116: 279, // caccta -> acacct + 1117: 373, // cacctc -> acctcc + 1118: 377, // cacctg -> acctgc + 1119: 381, // cacctt -> accttc + 1120: 70, // cacgaa -> aacacg + 1121: 326, // cacgac -> accacg + 1122: 393, // cacgag -> acgagc + 1123: 397, // cacgat -> acgatc + 1124: 281, // cacgca -> acacgc + 1125: 405, // cacgcc -> acgccc + 1126: 409, // cacgcg -> acgcgc + 1127: 413, // cacgct -> acgctc + 1128: 282, // cacgga -> acacgg + 1129: 421, // cacggc -> acggcc + 1130: 425, // cacggg -> acgggc + 1131: 429, // cacggt -> acggtc + 1132: 283, // cacgta -> acacgt + 1133: 437, // cacgtc -> acgtcc + 1134: 441, // cacgtg -> acgtgc + 1135: 445, // cacgtt -> acgttc + 1136: 71, // cactaa -> aacact + 1137: 327, // cactac -> accact + 1138: 457, // cactag -> actagc + 1139: 461, // cactat -> actatc + 1140: 285, // cactca -> acactc + 1141: 469, // cactcc -> actccc + 1142: 473, // cactcg -> actcgc + 1143: 477, // cactct -> actctc + 1144: 286, // cactga -> acactg + 1145: 485, // cactgc -> actgcc + 1146: 489, // cactgg -> actggc + 1147: 493, // cactgt -> actgtc + 1148: 287, // cactta -> acactt + 1149: 501, // cacttc -> acttcc + 1150: 505, // cacttg -> acttgc + 1151: 509, // cacttt -> actttc + 1152: 18, // cagaaa -> aaacag + 1153: 82, // cagaac -> aaccag + 1154: 146, // cagaag -> aagcag + 1155: 210, // cagaat -> aatcag + 1156: 274, // cagaca -> acacag + 1157: 338, // cagacc -> acccag + 1158: 402, // cagacg -> acgcag + 1159: 466, // cagact -> actcag + 1160: 290, // cagaga -> acagag + 1161: 549, // cagagc -> agagcc + 1162: 553, // cagagg -> agaggc + 1163: 557, // cagagt -> agagtc + 1164: 291, // cagata -> acagat + 1165: 565, // cagatc -> agatcc + 1166: 569, // cagatg -> agatgc + 1167: 573, // cagatt -> agattc + 1168: 73, // cagcaa -> aacagc + 1169: 329, // cagcac -> accagc + 1170: 585, // cagcag -> agcagc + 1171: 589, // cagcat -> agcatc + 1172: 293, // cagcca -> acagcc + 1173: 597, // cagccc -> agcccc + 1174: 601, // cagccg -> agccgc + 1175: 605, // cagcct -> agcctc + 1176: 294, // cagcga -> acagcg + 1177: 613, // cagcgc -> agcgcc + 1178: 617, // cagcgg -> agcggc + 1179: 621, // cagcgt -> agcgtc + 1180: 295, // cagcta -> acagct + 1181: 629, // cagctc -> agctcc + 1182: 633, // cagctg -> agctgc + 1183: 637, // cagctt -> agcttc + 1184: 74, // caggaa -> aacagg + 1185: 330, // caggac -> accagg + 1186: 586, // caggag -> agcagg + 1187: 653, // caggat -> aggatc + 1188: 297, // caggca -> acaggc + 1189: 661, // caggcc -> aggccc + 1190: 665, // caggcg -> aggcgc + 1191: 669, // caggct -> aggctc + 1192: 298, // caggga -> acaggg + 1193: 677, // cagggc -> agggcc + 1194: 681, // cagggg -> aggggc + 1195: 685, // cagggt -> agggtc + 1196: 299, // caggta -> acaggt + 1197: 693, // caggtc -> aggtcc + 1198: 697, // caggtg -> aggtgc + 1199: 701, // caggtt -> aggttc + 1200: 75, // cagtaa -> aacagt + 1201: 331, // cagtac -> accagt + 1202: 587, // cagtag -> agcagt + 1203: 717, // cagtat -> agtatc + 1204: 301, // cagtca -> acagtc + 1205: 725, // cagtcc -> agtccc + 1206: 729, // cagtcg -> agtcgc + 1207: 733, // cagtct -> agtctc + 1208: 302, // cagtga -> acagtg + 1209: 741, // cagtgc -> agtgcc + 1210: 745, // cagtgg -> agtggc + 1211: 749, // cagtgt -> agtgtc + 1212: 303, // cagtta -> acagtt + 1213: 757, // cagttc -> agttcc + 1214: 761, // cagttg -> agttgc + 1215: 765, // cagttt -> agtttc + 1216: 19, // cataaa -> aaacat + 1217: 83, // cataac -> aaccat + 1218: 147, // cataag -> aagcat + 1219: 211, // cataat -> aatcat + 1220: 275, // cataca -> acacat + 1221: 339, // catacc -> acccat + 1222: 403, // catacg -> acgcat + 1223: 467, // catact -> actcat + 1224: 306, // cataga -> acatag + 1225: 595, // catagc -> agccat + 1226: 659, // catagg -> aggcat + 1227: 723, // catagt -> agtcat + 1228: 307, // catata -> acatat + 1229: 821, // catatc -> atatcc + 1230: 825, // catatg -> atatgc + 1231: 829, // catatt -> atattc + 1232: 77, // catcaa -> aacatc + 1233: 333, // catcac -> accatc + 1234: 589, // catcag -> agcatc + 1235: 845, // catcat -> atcatc + 1236: 309, // catcca -> acatcc + 1237: 853, // catccc -> atcccc + 1238: 857, // catccg -> atccgc + 1239: 861, // catcct -> atcctc + 1240: 310, // catcga -> acatcg + 1241: 869, // catcgc -> atcgcc + 1242: 873, // catcgg -> atcggc + 1243: 877, // catcgt -> atcgtc + 1244: 311, // catcta -> acatct + 1245: 885, // catctc -> atctcc + 1246: 889, // catctg -> atctgc + 1247: 893, // catctt -> atcttc + 1248: 78, // catgaa -> aacatg + 1249: 334, // catgac -> accatg + 1250: 590, // catgag -> agcatg + 1251: 846, // catgat -> atcatg + 1252: 313, // catgca -> acatgc + 1253: 917, // catgcc -> atgccc + 1254: 921, // catgcg -> atgcgc + 1255: 925, // catgct -> atgctc + 1256: 314, // catgga -> acatgg + 1257: 933, // catggc -> atggcc + 1258: 937, // catggg -> atgggc + 1259: 941, // catggt -> atggtc + 1260: 315, // catgta -> acatgt + 1261: 949, // catgtc -> atgtcc + 1262: 953, // catgtg -> atgtgc + 1263: 957, // catgtt -> atgttc + 1264: 79, // cattaa -> aacatt + 1265: 335, // cattac -> accatt + 1266: 591, // cattag -> agcatt + 1267: 847, // cattat -> atcatt + 1268: 317, // cattca -> acattc + 1269: 981, // cattcc -> attccc + 1270: 985, // cattcg -> attcgc + 1271: 989, // cattct -> attctc + 1272: 318, // cattga -> acattg + 1273: 997, // cattgc -> attgcc + 1274: 1001, // cattgg -> attggc + 1275: 1005, // cattgt -> attgtc + 1276: 319, // cattta -> acattt + 1277: 1013, // catttc -> atttcc + 1278: 1017, // catttg -> atttgc + 1279: 1021, // catttt -> attttc + 1280: 5, // ccaaaa -> aaaacc + 1281: 21, // ccaaac -> aaaccc + 1282: 37, // ccaaag -> aaagcc + 1283: 53, // ccaaat -> aaatcc + 1284: 69, // ccaaca -> aacacc + 1285: 85, // ccaacc -> aacccc + 1286: 101, // ccaacg -> aacgcc + 1287: 117, // ccaact -> aactcc + 1288: 133, // ccaaga -> aagacc + 1289: 149, // ccaagc -> aagccc + 1290: 165, // ccaagg -> aaggcc + 1291: 181, // ccaagt -> aagtcc + 1292: 197, // ccaata -> aatacc + 1293: 213, // ccaatc -> aatccc + 1294: 229, // ccaatg -> aatgcc + 1295: 245, // ccaatt -> aattcc + 1296: 81, // ccacaa -> aaccac + 1297: 277, // ccacac -> acaccc + 1298: 293, // ccacag -> acagcc + 1299: 309, // ccacat -> acatcc + 1300: 325, // ccacca -> accacc + 1301: 341, // ccaccc -> accccc + 1302: 357, // ccaccg -> accgcc + 1303: 373, // ccacct -> acctcc + 1304: 326, // ccacga -> accacg + 1305: 405, // ccacgc -> acgccc + 1306: 421, // ccacgg -> acggcc + 1307: 437, // ccacgt -> acgtcc + 1308: 327, // ccacta -> accact + 1309: 469, // ccactc -> actccc + 1310: 485, // ccactg -> actgcc + 1311: 501, // ccactt -> acttcc + 1312: 82, // ccagaa -> aaccag + 1313: 338, // ccagac -> acccag + 1314: 549, // ccagag -> agagcc + 1315: 565, // ccagat -> agatcc + 1316: 329, // ccagca -> accagc + 1317: 597, // ccagcc -> agcccc + 1318: 613, // ccagcg -> agcgcc + 1319: 629, // ccagct -> agctcc + 1320: 330, // ccagga -> accagg + 1321: 661, // ccaggc -> aggccc + 1322: 677, // ccaggg -> agggcc + 1323: 693, // ccaggt -> aggtcc + 1324: 331, // ccagta -> accagt + 1325: 725, // ccagtc -> agtccc + 1326: 741, // ccagtg -> agtgcc + 1327: 757, // ccagtt -> agttcc + 1328: 83, // ccataa -> aaccat + 1329: 339, // ccatac -> acccat + 1330: 595, // ccatag -> agccat + 1331: 821, // ccatat -> atatcc + 1332: 333, // ccatca -> accatc + 1333: 853, // ccatcc -> atcccc + 1334: 869, // ccatcg -> atcgcc + 1335: 885, // ccatct -> atctcc + 1336: 334, // ccatga -> accatg + 1337: 917, // ccatgc -> atgccc + 1338: 933, // ccatgg -> atggcc + 1339: 949, // ccatgt -> atgtcc + 1340: 335, // ccatta -> accatt + 1341: 981, // ccattc -> attccc + 1342: 997, // ccattg -> attgcc + 1343: 1013, // ccattt -> atttcc + 1344: 21, // cccaaa -> aaaccc + 1345: 85, // cccaac -> aacccc + 1346: 149, // cccaag -> aagccc + 1347: 213, // cccaat -> aatccc + 1348: 277, // cccaca -> acaccc + 1349: 341, // cccacc -> accccc + 1350: 405, // cccacg -> acgccc + 1351: 469, // cccact -> actccc + 1352: 338, // cccaga -> acccag + 1353: 597, // cccagc -> agcccc + 1354: 661, // cccagg -> aggccc + 1355: 725, // cccagt -> agtccc + 1356: 339, // cccata -> acccat + 1357: 853, // cccatc -> atcccc + 1358: 917, // cccatg -> atgccc + 1359: 981, // cccatt -> attccc + 1360: 85, // ccccaa -> aacccc + 1361: 341, // ccccac -> accccc + 1362: 597, // ccccag -> agcccc + 1363: 853, // ccccat -> atcccc + 1364: 341, // ccccca -> accccc + 1365: 1365, // cccccc -> cccccc + 1366: 1366, // cccccg -> cccccg + 1367: 1367, // ccccct -> ccccct + 1368: 342, // ccccga -> accccg + 1369: 1366, // ccccgc -> cccccg + 1370: 1370, // ccccgg -> ccccgg + 1371: 1371, // ccccgt -> ccccgt + 1372: 343, // ccccta -> acccct + 1373: 1367, // cccctc -> ccccct + 1374: 1374, // cccctg -> cccctg + 1375: 1375, // cccctt -> cccctt + 1376: 86, // cccgaa -> aacccg + 1377: 342, // cccgac -> accccg + 1378: 598, // cccgag -> agcccg + 1379: 854, // cccgat -> atcccg + 1380: 345, // cccgca -> acccgc + 1381: 1366, // cccgcc -> cccccg + 1382: 1382, // cccgcg -> cccgcg + 1383: 1383, // cccgct -> cccgct + 1384: 346, // cccgga -> acccgg + 1385: 1370, // cccggc -> ccccgg + 1386: 1386, // cccggg -> cccggg + 1387: 1387, // cccggt -> cccggt + 1388: 347, // cccgta -> acccgt + 1389: 1371, // cccgtc -> ccccgt + 1390: 1390, // cccgtg -> cccgtg + 1391: 1391, // cccgtt -> cccgtt + 1392: 87, // ccctaa -> aaccct + 1393: 343, // ccctac -> acccct + 1394: 599, // ccctag -> agccct + 1395: 855, // ccctat -> atccct + 1396: 349, // ccctca -> accctc + 1397: 1367, // ccctcc -> ccccct + 1398: 1398, // ccctcg -> ccctcg + 1399: 1399, // ccctct -> ccctct + 1400: 350, // ccctga -> accctg + 1401: 1374, // ccctgc -> cccctg + 1402: 1402, // ccctgg -> ccctgg + 1403: 1403, // ccctgt -> ccctgt + 1404: 351, // ccctta -> accctt + 1405: 1375, // cccttc -> cccctt + 1406: 1406, // cccttg -> cccttg + 1407: 1407, // cccttt -> cccttt + 1408: 22, // ccgaaa -> aaaccg + 1409: 86, // ccgaac -> aacccg + 1410: 150, // ccgaag -> aagccg + 1411: 214, // ccgaat -> aatccg + 1412: 278, // ccgaca -> acaccg + 1413: 342, // ccgacc -> accccg + 1414: 406, // ccgacg -> acgccg + 1415: 470, // ccgact -> actccg + 1416: 354, // ccgaga -> accgag + 1417: 598, // ccgagc -> agcccg + 1418: 662, // ccgagg -> aggccg + 1419: 726, // ccgagt -> agtccg + 1420: 355, // ccgata -> accgat + 1421: 854, // ccgatc -> atcccg + 1422: 918, // ccgatg -> atgccg + 1423: 982, // ccgatt -> attccg + 1424: 89, // ccgcaa -> aaccgc + 1425: 345, // ccgcac -> acccgc + 1426: 601, // ccgcag -> agccgc + 1427: 857, // ccgcat -> atccgc + 1428: 357, // ccgcca -> accgcc + 1429: 1366, // ccgccc -> cccccg + 1430: 1430, // ccgccg -> ccgccg + 1431: 1431, // ccgcct -> ccgcct + 1432: 358, // ccgcga -> accgcg + 1433: 1382, // ccgcgc -> cccgcg + 1434: 1434, // ccgcgg -> ccgcgg + 1435: 1435, // ccgcgt -> ccgcgt + 1436: 359, // ccgcta -> accgct + 1437: 1383, // ccgctc -> cccgct + 1438: 1438, // ccgctg -> ccgctg + 1439: 1439, // ccgctt -> ccgctt + 1440: 90, // ccggaa -> aaccgg + 1441: 346, // ccggac -> acccgg + 1442: 602, // ccggag -> agccgg + 1443: 858, // ccggat -> atccgg + 1444: 361, // ccggca -> accggc + 1445: 1370, // ccggcc -> ccccgg + 1446: 1446, // ccggcg -> ccggcg + 1447: 1447, // ccggct -> ccggct + 1448: 362, // ccggga -> accggg + 1449: 1386, // ccgggc -> cccggg + 1450: 1450, // ccgggg -> ccgggg + 1451: 1451, // ccgggt -> ccgggt + 1452: 363, // ccggta -> accggt + 1453: 1387, // ccggtc -> cccggt + 1454: 1454, // ccggtg -> ccggtg + 1455: 1455, // ccggtt -> ccggtt + 1456: 91, // ccgtaa -> aaccgt + 1457: 347, // ccgtac -> acccgt + 1458: 603, // ccgtag -> agccgt + 1459: 859, // ccgtat -> atccgt + 1460: 365, // ccgtca -> accgtc + 1461: 1371, // ccgtcc -> ccccgt + 1462: 1462, // ccgtcg -> ccgtcg + 1463: 1463, // ccgtct -> ccgtct + 1464: 366, // ccgtga -> accgtg + 1465: 1390, // ccgtgc -> cccgtg + 1466: 1466, // ccgtgg -> ccgtgg + 1467: 1467, // ccgtgt -> ccgtgt + 1468: 367, // ccgtta -> accgtt + 1469: 1391, // ccgttc -> cccgtt + 1470: 1470, // ccgttg -> ccgttg + 1471: 1471, // ccgttt -> ccgttt + 1472: 23, // cctaaa -> aaacct + 1473: 87, // cctaac -> aaccct + 1474: 151, // cctaag -> aagcct + 1475: 215, // cctaat -> aatcct + 1476: 279, // cctaca -> acacct + 1477: 343, // cctacc -> acccct + 1478: 407, // cctacg -> acgcct + 1479: 471, // cctact -> actcct + 1480: 370, // cctaga -> acctag + 1481: 599, // cctagc -> agccct + 1482: 663, // cctagg -> aggcct + 1483: 727, // cctagt -> agtcct + 1484: 371, // cctata -> acctat + 1485: 855, // cctatc -> atccct + 1486: 919, // cctatg -> atgcct + 1487: 983, // cctatt -> attcct + 1488: 93, // cctcaa -> aacctc + 1489: 349, // cctcac -> accctc + 1490: 605, // cctcag -> agcctc + 1491: 861, // cctcat -> atcctc + 1492: 373, // cctcca -> acctcc + 1493: 1367, // cctccc -> ccccct + 1494: 1431, // cctccg -> ccgcct + 1495: 1495, // cctcct -> cctcct + 1496: 374, // cctcga -> acctcg + 1497: 1398, // cctcgc -> ccctcg + 1498: 1498, // cctcgg -> cctcgg + 1499: 1499, // cctcgt -> cctcgt + 1500: 375, // cctcta -> acctct + 1501: 1399, // cctctc -> ccctct + 1502: 1502, // cctctg -> cctctg + 1503: 1503, // cctctt -> cctctt + 1504: 94, // cctgaa -> aacctg + 1505: 350, // cctgac -> accctg + 1506: 606, // cctgag -> agcctg + 1507: 862, // cctgat -> atcctg + 1508: 377, // cctgca -> acctgc + 1509: 1374, // cctgcc -> cccctg + 1510: 1510, // cctgcg -> cctgcg + 1511: 1511, // cctgct -> cctgct + 1512: 378, // cctgga -> acctgg + 1513: 1402, // cctggc -> ccctgg + 1514: 1514, // cctggg -> cctggg + 1515: 1515, // cctggt -> cctggt + 1516: 379, // cctgta -> acctgt + 1517: 1403, // cctgtc -> ccctgt + 1518: 1518, // cctgtg -> cctgtg + 1519: 1519, // cctgtt -> cctgtt + 1520: 95, // ccttaa -> aacctt + 1521: 351, // ccttac -> accctt + 1522: 607, // ccttag -> agcctt + 1523: 863, // ccttat -> atcctt + 1524: 381, // ccttca -> accttc + 1525: 1375, // ccttcc -> cccctt + 1526: 1526, // ccttcg -> ccttcg + 1527: 1527, // ccttct -> ccttct + 1528: 382, // ccttga -> accttg + 1529: 1406, // ccttgc -> cccttg + 1530: 1530, // ccttgg -> ccttgg + 1531: 1531, // ccttgt -> ccttgt + 1532: 383, // ccttta -> accttt + 1533: 1407, // cctttc -> cccttt + 1534: 1534, // cctttg -> cctttg + 1535: 1535, // cctttt -> cctttt + 1536: 6, // cgaaaa -> aaaacg + 1537: 22, // cgaaac -> aaaccg + 1538: 38, // cgaaag -> aaagcg + 1539: 54, // cgaaat -> aaatcg + 1540: 70, // cgaaca -> aacacg + 1541: 86, // cgaacc -> aacccg + 1542: 102, // cgaacg -> aacgcg + 1543: 118, // cgaact -> aactcg + 1544: 134, // cgaaga -> aagacg + 1545: 150, // cgaagc -> aagccg + 1546: 166, // cgaagg -> aaggcg + 1547: 182, // cgaagt -> aagtcg + 1548: 198, // cgaata -> aatacg + 1549: 214, // cgaatc -> aatccg + 1550: 230, // cgaatg -> aatgcg + 1551: 246, // cgaatt -> aattcg + 1552: 97, // cgacaa -> aacgac + 1553: 278, // cgacac -> acaccg + 1554: 294, // cgacag -> acagcg + 1555: 310, // cgacat -> acatcg + 1556: 326, // cgacca -> accacg + 1557: 342, // cgaccc -> accccg + 1558: 358, // cgaccg -> accgcg + 1559: 374, // cgacct -> acctcg + 1560: 390, // cgacga -> acgacg + 1561: 406, // cgacgc -> acgccg + 1562: 422, // cgacgg -> acggcg + 1563: 438, // cgacgt -> acgtcg + 1564: 391, // cgacta -> acgact + 1565: 470, // cgactc -> actccg + 1566: 486, // cgactg -> actgcg + 1567: 502, // cgactt -> acttcg + 1568: 98, // cgagaa -> aacgag + 1569: 354, // cgagac -> accgag + 1570: 550, // cgagag -> agagcg + 1571: 566, // cgagat -> agatcg + 1572: 393, // cgagca -> acgagc + 1573: 598, // cgagcc -> agcccg + 1574: 614, // cgagcg -> agcgcg + 1575: 630, // cgagct -> agctcg + 1576: 394, // cgagga -> acgagg + 1577: 662, // cgaggc -> aggccg + 1578: 678, // cgaggg -> agggcg + 1579: 694, // cgaggt -> aggtcg + 1580: 395, // cgagta -> acgagt + 1581: 726, // cgagtc -> agtccg + 1582: 742, // cgagtg -> agtgcg + 1583: 758, // cgagtt -> agttcg + 1584: 99, // cgataa -> aacgat + 1585: 355, // cgatac -> accgat + 1586: 611, // cgatag -> agcgat + 1587: 822, // cgatat -> atatcg + 1588: 397, // cgatca -> acgatc + 1589: 854, // cgatcc -> atcccg + 1590: 870, // cgatcg -> atcgcg + 1591: 886, // cgatct -> atctcg + 1592: 398, // cgatga -> acgatg + 1593: 918, // cgatgc -> atgccg + 1594: 934, // cgatgg -> atggcg + 1595: 950, // cgatgt -> atgtcg + 1596: 399, // cgatta -> acgatt + 1597: 982, // cgattc -> attccg + 1598: 998, // cgattg -> attgcg + 1599: 1014, // cgattt -> atttcg + 1600: 25, // cgcaaa -> aaacgc + 1601: 89, // cgcaac -> aaccgc + 1602: 153, // cgcaag -> aagcgc + 1603: 217, // cgcaat -> aatcgc + 1604: 281, // cgcaca -> acacgc + 1605: 345, // cgcacc -> acccgc + 1606: 409, // cgcacg -> acgcgc + 1607: 473, // cgcact -> actcgc + 1608: 402, // cgcaga -> acgcag + 1609: 601, // cgcagc -> agccgc + 1610: 665, // cgcagg -> aggcgc + 1611: 729, // cgcagt -> agtcgc + 1612: 403, // cgcata -> acgcat + 1613: 857, // cgcatc -> atccgc + 1614: 921, // cgcatg -> atgcgc + 1615: 985, // cgcatt -> attcgc + 1616: 101, // cgccaa -> aacgcc + 1617: 357, // cgccac -> accgcc + 1618: 613, // cgccag -> agcgcc + 1619: 869, // cgccat -> atcgcc + 1620: 405, // cgccca -> acgccc + 1621: 1366, // cgcccc -> cccccg + 1622: 1382, // cgcccg -> cccgcg + 1623: 1398, // cgccct -> ccctcg + 1624: 406, // cgccga -> acgccg + 1625: 1430, // cgccgc -> ccgccg + 1626: 1446, // cgccgg -> ccggcg + 1627: 1462, // cgccgt -> ccgtcg + 1628: 407, // cgccta -> acgcct + 1629: 1431, // cgcctc -> ccgcct + 1630: 1510, // cgcctg -> cctgcg + 1631: 1526, // cgcctt -> ccttcg + 1632: 102, // cgcgaa -> aacgcg + 1633: 358, // cgcgac -> accgcg + 1634: 614, // cgcgag -> agcgcg + 1635: 870, // cgcgat -> atcgcg + 1636: 409, // cgcgca -> acgcgc + 1637: 1382, // cgcgcc -> cccgcg + 1638: 1638, // cgcgcg -> cgcgcg + 1639: 1639, // cgcgct -> cgcgct + 1640: 410, // cgcgga -> acgcgg + 1641: 1434, // cgcggc -> ccgcgg + 1642: 1642, // cgcggg -> cgcggg + 1643: 1643, // cgcggt -> cgcggt + 1644: 411, // cgcgta -> acgcgt + 1645: 1435, // cgcgtc -> ccgcgt + 1646: 1646, // cgcgtg -> cgcgtg + 1647: 1647, // cgcgtt -> cgcgtt + 1648: 103, // cgctaa -> aacgct + 1649: 359, // cgctac -> accgct + 1650: 615, // cgctag -> agcgct + 1651: 871, // cgctat -> atcgct + 1652: 413, // cgctca -> acgctc + 1653: 1383, // cgctcc -> cccgct + 1654: 1639, // cgctcg -> cgcgct + 1655: 1655, // cgctct -> cgctct + 1656: 414, // cgctga -> acgctg + 1657: 1438, // cgctgc -> ccgctg + 1658: 1658, // cgctgg -> cgctgg + 1659: 1659, // cgctgt -> cgctgt + 1660: 415, // cgctta -> acgctt + 1661: 1439, // cgcttc -> ccgctt + 1662: 1662, // cgcttg -> cgcttg + 1663: 1663, // cgcttt -> cgcttt + 1664: 26, // cggaaa -> aaacgg + 1665: 90, // cggaac -> aaccgg + 1666: 154, // cggaag -> aagcgg + 1667: 218, // cggaat -> aatcgg + 1668: 282, // cggaca -> acacgg + 1669: 346, // cggacc -> acccgg + 1670: 410, // cggacg -> acgcgg + 1671: 474, // cggact -> actcgg + 1672: 418, // cggaga -> acggag + 1673: 602, // cggagc -> agccgg + 1674: 666, // cggagg -> aggcgg + 1675: 730, // cggagt -> agtcgg + 1676: 419, // cggata -> acggat + 1677: 858, // cggatc -> atccgg + 1678: 922, // cggatg -> atgcgg + 1679: 986, // cggatt -> attcgg + 1680: 105, // cggcaa -> aacggc + 1681: 361, // cggcac -> accggc + 1682: 617, // cggcag -> agcggc + 1683: 873, // cggcat -> atcggc + 1684: 421, // cggcca -> acggcc + 1685: 1370, // cggccc -> ccccgg + 1686: 1434, // cggccg -> ccgcgg + 1687: 1498, // cggcct -> cctcgg + 1688: 422, // cggcga -> acggcg + 1689: 1446, // cggcgc -> ccggcg + 1690: 1690, // cggcgg -> cggcgg + 1691: 1691, // cggcgt -> cggcgt + 1692: 423, // cggcta -> acggct + 1693: 1447, // cggctc -> ccggct + 1694: 1694, // cggctg -> cggctg + 1695: 1695, // cggctt -> cggctt + 1696: 106, // cgggaa -> aacggg + 1697: 362, // cgggac -> accggg + 1698: 618, // cgggag -> agcggg + 1699: 874, // cgggat -> atcggg + 1700: 425, // cgggca -> acgggc + 1701: 1386, // cgggcc -> cccggg + 1702: 1642, // cgggcg -> cgcggg + 1703: 1703, // cgggct -> cgggct + 1704: 426, // cgggga -> acgggg + 1705: 1450, // cggggc -> ccgggg + 1706: 1706, // cggggg -> cggggg + 1707: 1707, // cggggt -> cggggt + 1708: 427, // cgggta -> acgggt + 1709: 1451, // cgggtc -> ccgggt + 1710: 1710, // cgggtg -> cgggtg + 1711: 1711, // cgggtt -> cgggtt + 1712: 107, // cggtaa -> aacggt + 1713: 363, // cggtac -> accggt + 1714: 619, // cggtag -> agcggt + 1715: 875, // cggtat -> atcggt + 1716: 429, // cggtca -> acggtc + 1717: 1387, // cggtcc -> cccggt + 1718: 1643, // cggtcg -> cgcggt + 1719: 1719, // cggtct -> cggtct + 1720: 430, // cggtga -> acggtg + 1721: 1454, // cggtgc -> ccggtg + 1722: 1722, // cggtgg -> cggtgg + 1723: 1723, // cggtgt -> cggtgt + 1724: 431, // cggtta -> acggtt + 1725: 1455, // cggttc -> ccggtt + 1726: 1726, // cggttg -> cggttg + 1727: 1727, // cggttt -> cggttt + 1728: 27, // cgtaaa -> aaacgt + 1729: 91, // cgtaac -> aaccgt + 1730: 155, // cgtaag -> aagcgt + 1731: 219, // cgtaat -> aatcgt + 1732: 283, // cgtaca -> acacgt + 1733: 347, // cgtacc -> acccgt + 1734: 411, // cgtacg -> acgcgt + 1735: 475, // cgtact -> actcgt + 1736: 434, // cgtaga -> acgtag + 1737: 603, // cgtagc -> agccgt + 1738: 667, // cgtagg -> aggcgt + 1739: 731, // cgtagt -> agtcgt + 1740: 435, // cgtata -> acgtat + 1741: 859, // cgtatc -> atccgt + 1742: 923, // cgtatg -> atgcgt + 1743: 987, // cgtatt -> attcgt + 1744: 109, // cgtcaa -> aacgtc + 1745: 365, // cgtcac -> accgtc + 1746: 621, // cgtcag -> agcgtc + 1747: 877, // cgtcat -> atcgtc + 1748: 437, // cgtcca -> acgtcc + 1749: 1371, // cgtccc -> ccccgt + 1750: 1435, // cgtccg -> ccgcgt + 1751: 1499, // cgtcct -> cctcgt + 1752: 438, // cgtcga -> acgtcg + 1753: 1462, // cgtcgc -> ccgtcg + 1754: 1691, // cgtcgg -> cggcgt + 1755: 1755, // cgtcgt -> cgtcgt + 1756: 439, // cgtcta -> acgtct + 1757: 1463, // cgtctc -> ccgtct + 1758: 1758, // cgtctg -> cgtctg + 1759: 1759, // cgtctt -> cgtctt + 1760: 110, // cgtgaa -> aacgtg + 1761: 366, // cgtgac -> accgtg + 1762: 622, // cgtgag -> agcgtg + 1763: 878, // cgtgat -> atcgtg + 1764: 441, // cgtgca -> acgtgc + 1765: 1390, // cgtgcc -> cccgtg + 1766: 1646, // cgtgcg -> cgcgtg + 1767: 1767, // cgtgct -> cgtgct + 1768: 442, // cgtgga -> acgtgg + 1769: 1466, // cgtggc -> ccgtgg + 1770: 1770, // cgtggg -> cgtggg + 1771: 1771, // cgtggt -> cgtggt + 1772: 443, // cgtgta -> acgtgt + 1773: 1467, // cgtgtc -> ccgtgt + 1774: 1774, // cgtgtg -> cgtgtg + 1775: 1775, // cgtgtt -> cgtgtt + 1776: 111, // cgttaa -> aacgtt + 1777: 367, // cgttac -> accgtt + 1778: 623, // cgttag -> agcgtt + 1779: 879, // cgttat -> atcgtt + 1780: 445, // cgttca -> acgttc + 1781: 1391, // cgttcc -> cccgtt + 1782: 1647, // cgttcg -> cgcgtt + 1783: 1783, // cgttct -> cgttct + 1784: 446, // cgttga -> acgttg + 1785: 1470, // cgttgc -> ccgttg + 1786: 1786, // cgttgg -> cgttgg + 1787: 1787, // cgttgt -> cgttgt + 1788: 447, // cgttta -> acgttt + 1789: 1471, // cgtttc -> ccgttt + 1790: 1790, // cgtttg -> cgtttg + 1791: 1791, // cgtttt -> cgtttt + 1792: 7, // ctaaaa -> aaaact + 1793: 23, // ctaaac -> aaacct + 1794: 39, // ctaaag -> aaagct + 1795: 55, // ctaaat -> aaatct + 1796: 71, // ctaaca -> aacact + 1797: 87, // ctaacc -> aaccct + 1798: 103, // ctaacg -> aacgct + 1799: 119, // ctaact -> aactct + 1800: 135, // ctaaga -> aagact + 1801: 151, // ctaagc -> aagcct + 1802: 167, // ctaagg -> aaggct + 1803: 183, // ctaagt -> aagtct + 1804: 199, // ctaata -> aatact + 1805: 215, // ctaatc -> aatcct + 1806: 231, // ctaatg -> aatgct + 1807: 247, // ctaatt -> aattct + 1808: 113, // ctacaa -> aactac + 1809: 279, // ctacac -> acacct + 1810: 295, // ctacag -> acagct + 1811: 311, // ctacat -> acatct + 1812: 327, // ctacca -> accact + 1813: 343, // ctaccc -> acccct + 1814: 359, // ctaccg -> accgct + 1815: 375, // ctacct -> acctct + 1816: 391, // ctacga -> acgact + 1817: 407, // ctacgc -> acgcct + 1818: 423, // ctacgg -> acggct + 1819: 439, // ctacgt -> acgtct + 1820: 455, // ctacta -> actact + 1821: 471, // ctactc -> actcct + 1822: 487, // ctactg -> actgct + 1823: 503, // ctactt -> acttct + 1824: 114, // ctagaa -> aactag + 1825: 370, // ctagac -> acctag + 1826: 551, // ctagag -> agagct + 1827: 567, // ctagat -> agatct + 1828: 457, // ctagca -> actagc + 1829: 599, // ctagcc -> agccct + 1830: 615, // ctagcg -> agcgct + 1831: 631, // ctagct -> agctct + 1832: 458, // ctagga -> actagg + 1833: 663, // ctaggc -> aggcct + 1834: 679, // ctaggg -> agggct + 1835: 695, // ctaggt -> aggtct + 1836: 459, // ctagta -> actagt + 1837: 727, // ctagtc -> agtcct + 1838: 743, // ctagtg -> agtgct + 1839: 759, // ctagtt -> agttct + 1840: 115, // ctataa -> aactat + 1841: 371, // ctatac -> acctat + 1842: 627, // ctatag -> agctat + 1843: 823, // ctatat -> atatct + 1844: 461, // ctatca -> actatc + 1845: 855, // ctatcc -> atccct + 1846: 871, // ctatcg -> atcgct + 1847: 887, // ctatct -> atctct + 1848: 462, // ctatga -> actatg + 1849: 919, // ctatgc -> atgcct + 1850: 935, // ctatgg -> atggct + 1851: 951, // ctatgt -> atgtct + 1852: 463, // ctatta -> actatt + 1853: 983, // ctattc -> attcct + 1854: 999, // ctattg -> attgct + 1855: 1015, // ctattt -> atttct + 1856: 29, // ctcaaa -> aaactc + 1857: 93, // ctcaac -> aacctc + 1858: 157, // ctcaag -> aagctc + 1859: 221, // ctcaat -> aatctc + 1860: 285, // ctcaca -> acactc + 1861: 349, // ctcacc -> accctc + 1862: 413, // ctcacg -> acgctc + 1863: 477, // ctcact -> actctc + 1864: 466, // ctcaga -> actcag + 1865: 605, // ctcagc -> agcctc + 1866: 669, // ctcagg -> aggctc + 1867: 733, // ctcagt -> agtctc + 1868: 467, // ctcata -> actcat + 1869: 861, // ctcatc -> atcctc + 1870: 925, // ctcatg -> atgctc + 1871: 989, // ctcatt -> attctc + 1872: 117, // ctccaa -> aactcc + 1873: 373, // ctccac -> acctcc + 1874: 629, // ctccag -> agctcc + 1875: 885, // ctccat -> atctcc + 1876: 469, // ctccca -> actccc + 1877: 1367, // ctcccc -> ccccct + 1878: 1383, // ctcccg -> cccgct + 1879: 1399, // ctccct -> ccctct + 1880: 470, // ctccga -> actccg + 1881: 1431, // ctccgc -> ccgcct + 1882: 1447, // ctccgg -> ccggct + 1883: 1463, // ctccgt -> ccgtct + 1884: 471, // ctccta -> actcct + 1885: 1495, // ctcctc -> cctcct + 1886: 1511, // ctcctg -> cctgct + 1887: 1527, // ctcctt -> ccttct + 1888: 118, // ctcgaa -> aactcg + 1889: 374, // ctcgac -> acctcg + 1890: 630, // ctcgag -> agctcg + 1891: 886, // ctcgat -> atctcg + 1892: 473, // ctcgca -> actcgc + 1893: 1398, // ctcgcc -> ccctcg + 1894: 1639, // ctcgcg -> cgcgct + 1895: 1655, // ctcgct -> cgctct + 1896: 474, // ctcgga -> actcgg + 1897: 1498, // ctcggc -> cctcgg + 1898: 1703, // ctcggg -> cgggct + 1899: 1719, // ctcggt -> cggtct + 1900: 475, // ctcgta -> actcgt + 1901: 1499, // ctcgtc -> cctcgt + 1902: 1767, // ctcgtg -> cgtgct + 1903: 1783, // ctcgtt -> cgttct + 1904: 119, // ctctaa -> aactct + 1905: 375, // ctctac -> acctct + 1906: 631, // ctctag -> agctct + 1907: 887, // ctctat -> atctct + 1908: 477, // ctctca -> actctc + 1909: 1399, // ctctcc -> ccctct + 1910: 1655, // ctctcg -> cgctct + 1911: 1911, // ctctct -> ctctct + 1912: 478, // ctctga -> actctg + 1913: 1502, // ctctgc -> cctctg + 1914: 1914, // ctctgg -> ctctgg + 1915: 1915, // ctctgt -> ctctgt + 1916: 479, // ctctta -> actctt + 1917: 1503, // ctcttc -> cctctt + 1918: 1918, // ctcttg -> ctcttg + 1919: 1919, // ctcttt -> ctcttt + 1920: 30, // ctgaaa -> aaactg + 1921: 94, // ctgaac -> aacctg + 1922: 158, // ctgaag -> aagctg + 1923: 222, // ctgaat -> aatctg + 1924: 286, // ctgaca -> acactg + 1925: 350, // ctgacc -> accctg + 1926: 414, // ctgacg -> acgctg + 1927: 478, // ctgact -> actctg + 1928: 482, // ctgaga -> actgag + 1929: 606, // ctgagc -> agcctg + 1930: 670, // ctgagg -> aggctg + 1931: 734, // ctgagt -> agtctg + 1932: 483, // ctgata -> actgat + 1933: 862, // ctgatc -> atcctg + 1934: 926, // ctgatg -> atgctg + 1935: 990, // ctgatt -> attctg + 1936: 121, // ctgcaa -> aactgc + 1937: 377, // ctgcac -> acctgc + 1938: 633, // ctgcag -> agctgc + 1939: 889, // ctgcat -> atctgc + 1940: 485, // ctgcca -> actgcc + 1941: 1374, // ctgccc -> cccctg + 1942: 1438, // ctgccg -> ccgctg + 1943: 1502, // ctgcct -> cctctg + 1944: 486, // ctgcga -> actgcg + 1945: 1510, // ctgcgc -> cctgcg + 1946: 1694, // ctgcgg -> cggctg + 1947: 1758, // ctgcgt -> cgtctg + 1948: 487, // ctgcta -> actgct + 1949: 1511, // ctgctc -> cctgct + 1950: 1950, // ctgctg -> ctgctg + 1951: 1951, // ctgctt -> ctgctt + 1952: 122, // ctggaa -> aactgg + 1953: 378, // ctggac -> acctgg + 1954: 634, // ctggag -> agctgg + 1955: 890, // ctggat -> atctgg + 1956: 489, // ctggca -> actggc + 1957: 1402, // ctggcc -> ccctgg + 1958: 1658, // ctggcg -> cgctgg + 1959: 1914, // ctggct -> ctctgg + 1960: 490, // ctggga -> actggg + 1961: 1514, // ctgggc -> cctggg + 1962: 1962, // ctgggg -> ctgggg + 1963: 1963, // ctgggt -> ctgggt + 1964: 491, // ctggta -> actggt + 1965: 1515, // ctggtc -> cctggt + 1966: 1966, // ctggtg -> ctggtg + 1967: 1967, // ctggtt -> ctggtt + 1968: 123, // ctgtaa -> aactgt + 1969: 379, // ctgtac -> acctgt + 1970: 635, // ctgtag -> agctgt + 1971: 891, // ctgtat -> atctgt + 1972: 493, // ctgtca -> actgtc + 1973: 1403, // ctgtcc -> ccctgt + 1974: 1659, // ctgtcg -> cgctgt + 1975: 1915, // ctgtct -> ctctgt + 1976: 494, // ctgtga -> actgtg + 1977: 1518, // ctgtgc -> cctgtg + 1978: 1978, // ctgtgg -> ctgtgg + 1979: 1979, // ctgtgt -> ctgtgt + 1980: 495, // ctgtta -> actgtt + 1981: 1519, // ctgttc -> cctgtt + 1982: 1982, // ctgttg -> ctgttg + 1983: 1983, // ctgttt -> ctgttt + 1984: 31, // cttaaa -> aaactt + 1985: 95, // cttaac -> aacctt + 1986: 159, // cttaag -> aagctt + 1987: 223, // cttaat -> aatctt + 1988: 287, // cttaca -> acactt + 1989: 351, // cttacc -> accctt + 1990: 415, // cttacg -> acgctt + 1991: 479, // cttact -> actctt + 1992: 498, // cttaga -> acttag + 1993: 607, // cttagc -> agcctt + 1994: 671, // cttagg -> aggctt + 1995: 735, // cttagt -> agtctt + 1996: 499, // cttata -> acttat + 1997: 863, // cttatc -> atcctt + 1998: 927, // cttatg -> atgctt + 1999: 991, // cttatt -> attctt + 2000: 125, // cttcaa -> aacttc + 2001: 381, // cttcac -> accttc + 2002: 637, // cttcag -> agcttc + 2003: 893, // cttcat -> atcttc + 2004: 501, // cttcca -> acttcc + 2005: 1375, // cttccc -> cccctt + 2006: 1439, // cttccg -> ccgctt + 2007: 1503, // cttcct -> cctctt + 2008: 502, // cttcga -> acttcg + 2009: 1526, // cttcgc -> ccttcg + 2010: 1695, // cttcgg -> cggctt + 2011: 1759, // cttcgt -> cgtctt + 2012: 503, // cttcta -> acttct + 2013: 1527, // cttctc -> ccttct + 2014: 1951, // cttctg -> ctgctt + 2015: 2015, // cttctt -> cttctt + 2016: 126, // cttgaa -> aacttg + 2017: 382, // cttgac -> accttg + 2018: 638, // cttgag -> agcttg + 2019: 894, // cttgat -> atcttg + 2020: 505, // cttgca -> acttgc + 2021: 1406, // cttgcc -> cccttg + 2022: 1662, // cttgcg -> cgcttg + 2023: 1918, // cttgct -> ctcttg + 2024: 506, // cttgga -> acttgg + 2025: 1530, // cttggc -> ccttgg + 2026: 2026, // cttggg -> cttggg + 2027: 2027, // cttggt -> cttggt + 2028: 507, // cttgta -> acttgt + 2029: 1531, // cttgtc -> ccttgt + 2030: 2030, // cttgtg -> cttgtg + 2031: 2031, // cttgtt -> cttgtt + 2032: 127, // ctttaa -> aacttt + 2033: 383, // ctttac -> accttt + 2034: 639, // ctttag -> agcttt + 2035: 895, // ctttat -> atcttt + 2036: 509, // ctttca -> actttc + 2037: 1407, // ctttcc -> cccttt + 2038: 1663, // ctttcg -> cgcttt + 2039: 1919, // ctttct -> ctcttt + 2040: 510, // ctttga -> actttg + 2041: 1534, // ctttgc -> cctttg + 2042: 2042, // ctttgg -> ctttgg + 2043: 2043, // ctttgt -> ctttgt + 2044: 511, // ctttta -> actttt + 2045: 1535, // cttttc -> cctttt + 2046: 2046, // cttttg -> cttttg + 2047: 2047, // cttttt -> cttttt + 2048: 2, // gaaaaa -> aaaaag + 2049: 6, // gaaaac -> aaaacg + 2050: 10, // gaaaag -> aaaagg + 2051: 14, // gaaaat -> aaaatg + 2052: 18, // gaaaca -> aaacag + 2053: 22, // gaaacc -> aaaccg + 2054: 26, // gaaacg -> aaacgg + 2055: 30, // gaaact -> aaactg + 2056: 34, // gaaaga -> aaagag + 2057: 38, // gaaagc -> aaagcg + 2058: 42, // gaaagg -> aaaggg + 2059: 46, // gaaagt -> aaagtg + 2060: 50, // gaaata -> aaatag + 2061: 54, // gaaatc -> aaatcg + 2062: 58, // gaaatg -> aaatgg + 2063: 62, // gaaatt -> aaattg + 2064: 66, // gaacaa -> aacaag + 2065: 70, // gaacac -> aacacg + 2066: 74, // gaacag -> aacagg + 2067: 78, // gaacat -> aacatg + 2068: 82, // gaacca -> aaccag + 2069: 86, // gaaccc -> aacccg + 2070: 90, // gaaccg -> aaccgg + 2071: 94, // gaacct -> aacctg + 2072: 98, // gaacga -> aacgag + 2073: 102, // gaacgc -> aacgcg + 2074: 106, // gaacgg -> aacggg + 2075: 110, // gaacgt -> aacgtg + 2076: 114, // gaacta -> aactag + 2077: 118, // gaactc -> aactcg + 2078: 122, // gaactg -> aactgg + 2079: 126, // gaactt -> aacttg + 2080: 130, // gaagaa -> aagaag + 2081: 134, // gaagac -> aagacg + 2082: 138, // gaagag -> aagagg + 2083: 142, // gaagat -> aagatg + 2084: 146, // gaagca -> aagcag + 2085: 150, // gaagcc -> aagccg + 2086: 154, // gaagcg -> aagcgg + 2087: 158, // gaagct -> aagctg + 2088: 162, // gaagga -> aaggag + 2089: 166, // gaaggc -> aaggcg + 2090: 170, // gaaggg -> aagggg + 2091: 174, // gaaggt -> aaggtg + 2092: 178, // gaagta -> aagtag + 2093: 182, // gaagtc -> aagtcg + 2094: 186, // gaagtg -> aagtgg + 2095: 190, // gaagtt -> aagttg + 2096: 131, // gaataa -> aagaat + 2097: 198, // gaatac -> aatacg + 2098: 202, // gaatag -> aatagg + 2099: 206, // gaatat -> aatatg + 2100: 210, // gaatca -> aatcag + 2101: 214, // gaatcc -> aatccg + 2102: 218, // gaatcg -> aatcgg + 2103: 222, // gaatct -> aatctg + 2104: 226, // gaatga -> aatgag + 2105: 230, // gaatgc -> aatgcg + 2106: 234, // gaatgg -> aatggg + 2107: 238, // gaatgt -> aatgtg + 2108: 242, // gaatta -> aattag + 2109: 246, // gaattc -> aattcg + 2110: 250, // gaattg -> aattgg + 2111: 254, // gaattt -> aatttg + 2112: 33, // gacaaa -> aaagac + 2113: 97, // gacaac -> aacgac + 2114: 161, // gacaag -> aaggac + 2115: 225, // gacaat -> aatgac + 2116: 274, // gacaca -> acacag + 2117: 278, // gacacc -> acaccg + 2118: 282, // gacacg -> acacgg + 2119: 286, // gacact -> acactg + 2120: 290, // gacaga -> acagag + 2121: 294, // gacagc -> acagcg + 2122: 298, // gacagg -> acaggg + 2123: 302, // gacagt -> acagtg + 2124: 306, // gacata -> acatag + 2125: 310, // gacatc -> acatcg + 2126: 314, // gacatg -> acatgg + 2127: 318, // gacatt -> acattg + 2128: 133, // gaccaa -> aagacc + 2129: 326, // gaccac -> accacg + 2130: 330, // gaccag -> accagg + 2131: 334, // gaccat -> accatg + 2132: 338, // gaccca -> acccag + 2133: 342, // gacccc -> accccg + 2134: 346, // gacccg -> acccgg + 2135: 350, // gaccct -> accctg + 2136: 354, // gaccga -> accgag + 2137: 358, // gaccgc -> accgcg + 2138: 362, // gaccgg -> accggg + 2139: 366, // gaccgt -> accgtg + 2140: 370, // gaccta -> acctag + 2141: 374, // gacctc -> acctcg + 2142: 378, // gacctg -> acctgg + 2143: 382, // gacctt -> accttg + 2144: 134, // gacgaa -> aagacg + 2145: 390, // gacgac -> acgacg + 2146: 394, // gacgag -> acgagg + 2147: 398, // gacgat -> acgatg + 2148: 402, // gacgca -> acgcag + 2149: 406, // gacgcc -> acgccg + 2150: 410, // gacgcg -> acgcgg + 2151: 414, // gacgct -> acgctg + 2152: 418, // gacgga -> acggag + 2153: 422, // gacggc -> acggcg + 2154: 426, // gacggg -> acgggg + 2155: 430, // gacggt -> acggtg + 2156: 434, // gacgta -> acgtag + 2157: 438, // gacgtc -> acgtcg + 2158: 442, // gacgtg -> acgtgg + 2159: 446, // gacgtt -> acgttg + 2160: 135, // gactaa -> aagact + 2161: 391, // gactac -> acgact + 2162: 458, // gactag -> actagg + 2163: 462, // gactat -> actatg + 2164: 466, // gactca -> actcag + 2165: 470, // gactcc -> actccg + 2166: 474, // gactcg -> actcgg + 2167: 478, // gactct -> actctg + 2168: 482, // gactga -> actgag + 2169: 486, // gactgc -> actgcg + 2170: 490, // gactgg -> actggg + 2171: 494, // gactgt -> actgtg + 2172: 498, // gactta -> acttag + 2173: 502, // gacttc -> acttcg + 2174: 506, // gacttg -> acttgg + 2175: 510, // gacttt -> actttg + 2176: 34, // gagaaa -> aaagag + 2177: 98, // gagaac -> aacgag + 2178: 162, // gagaag -> aaggag + 2179: 226, // gagaat -> aatgag + 2180: 290, // gagaca -> acagag + 2181: 354, // gagacc -> accgag + 2182: 418, // gagacg -> acggag + 2183: 482, // gagact -> actgag + 2184: 546, // gagaga -> agagag + 2185: 550, // gagagc -> agagcg + 2186: 554, // gagagg -> agaggg + 2187: 558, // gagagt -> agagtg + 2188: 547, // gagata -> agagat + 2189: 566, // gagatc -> agatcg + 2190: 570, // gagatg -> agatgg + 2191: 574, // gagatt -> agattg + 2192: 137, // gagcaa -> aagagc + 2193: 393, // gagcac -> acgagc + 2194: 586, // gagcag -> agcagg + 2195: 590, // gagcat -> agcatg + 2196: 549, // gagcca -> agagcc + 2197: 598, // gagccc -> agcccg + 2198: 602, // gagccg -> agccgg + 2199: 606, // gagcct -> agcctg + 2200: 550, // gagcga -> agagcg + 2201: 614, // gagcgc -> agcgcg + 2202: 618, // gagcgg -> agcggg + 2203: 622, // gagcgt -> agcgtg + 2204: 551, // gagcta -> agagct + 2205: 630, // gagctc -> agctcg + 2206: 634, // gagctg -> agctgg + 2207: 638, // gagctt -> agcttg + 2208: 138, // gaggaa -> aagagg + 2209: 394, // gaggac -> acgagg + 2210: 650, // gaggag -> aggagg + 2211: 654, // gaggat -> aggatg + 2212: 553, // gaggca -> agaggc + 2213: 662, // gaggcc -> aggccg + 2214: 666, // gaggcg -> aggcgg + 2215: 670, // gaggct -> aggctg + 2216: 554, // gaggga -> agaggg + 2217: 678, // gagggc -> agggcg + 2218: 682, // gagggg -> aggggg + 2219: 686, // gagggt -> agggtg + 2220: 555, // gaggta -> agaggt + 2221: 694, // gaggtc -> aggtcg + 2222: 698, // gaggtg -> aggtgg + 2223: 702, // gaggtt -> aggttg + 2224: 139, // gagtaa -> aagagt + 2225: 395, // gagtac -> acgagt + 2226: 651, // gagtag -> aggagt + 2227: 718, // gagtat -> agtatg + 2228: 557, // gagtca -> agagtc + 2229: 726, // gagtcc -> agtccg + 2230: 730, // gagtcg -> agtcgg + 2231: 734, // gagtct -> agtctg + 2232: 558, // gagtga -> agagtg + 2233: 742, // gagtgc -> agtgcg + 2234: 746, // gagtgg -> agtggg + 2235: 750, // gagtgt -> agtgtg + 2236: 559, // gagtta -> agagtt + 2237: 758, // gagttc -> agttcg + 2238: 762, // gagttg -> agttgg + 2239: 766, // gagttt -> agtttg + 2240: 35, // gataaa -> aaagat + 2241: 99, // gataac -> aacgat + 2242: 163, // gataag -> aaggat + 2243: 227, // gataat -> aatgat + 2244: 291, // gataca -> acagat + 2245: 355, // gatacc -> accgat + 2246: 419, // gatacg -> acggat + 2247: 483, // gatact -> actgat + 2248: 547, // gataga -> agagat + 2249: 611, // gatagc -> agcgat + 2250: 675, // gatagg -> agggat + 2251: 739, // gatagt -> agtgat + 2252: 563, // gatata -> agatat + 2253: 822, // gatatc -> atatcg + 2254: 826, // gatatg -> atatgg + 2255: 830, // gatatt -> atattg + 2256: 141, // gatcaa -> aagatc + 2257: 397, // gatcac -> acgatc + 2258: 653, // gatcag -> aggatc + 2259: 846, // gatcat -> atcatg + 2260: 565, // gatcca -> agatcc + 2261: 854, // gatccc -> atcccg + 2262: 858, // gatccg -> atccgg + 2263: 862, // gatcct -> atcctg + 2264: 566, // gatcga -> agatcg + 2265: 870, // gatcgc -> atcgcg + 2266: 874, // gatcgg -> atcggg + 2267: 878, // gatcgt -> atcgtg + 2268: 567, // gatcta -> agatct + 2269: 886, // gatctc -> atctcg + 2270: 890, // gatctg -> atctgg + 2271: 894, // gatctt -> atcttg + 2272: 142, // gatgaa -> aagatg + 2273: 398, // gatgac -> acgatg + 2274: 654, // gatgag -> aggatg + 2275: 910, // gatgat -> atgatg + 2276: 569, // gatgca -> agatgc + 2277: 918, // gatgcc -> atgccg + 2278: 922, // gatgcg -> atgcgg + 2279: 926, // gatgct -> atgctg + 2280: 570, // gatgga -> agatgg + 2281: 934, // gatggc -> atggcg + 2282: 938, // gatggg -> atgggg + 2283: 942, // gatggt -> atggtg + 2284: 571, // gatgta -> agatgt + 2285: 950, // gatgtc -> atgtcg + 2286: 954, // gatgtg -> atgtgg + 2287: 958, // gatgtt -> atgttg + 2288: 143, // gattaa -> aagatt + 2289: 399, // gattac -> acgatt + 2290: 655, // gattag -> aggatt + 2291: 911, // gattat -> atgatt + 2292: 573, // gattca -> agattc + 2293: 982, // gattcc -> attccg + 2294: 986, // gattcg -> attcgg + 2295: 990, // gattct -> attctg + 2296: 574, // gattga -> agattg + 2297: 998, // gattgc -> attgcg + 2298: 1002, // gattgg -> attggg + 2299: 1006, // gattgt -> attgtg + 2300: 575, // gattta -> agattt + 2301: 1014, // gatttc -> atttcg + 2302: 1018, // gatttg -> atttgg + 2303: 1022, // gatttt -> attttg + 2304: 9, // gcaaaa -> aaaagc + 2305: 25, // gcaaac -> aaacgc + 2306: 41, // gcaaag -> aaaggc + 2307: 57, // gcaaat -> aaatgc + 2308: 73, // gcaaca -> aacagc + 2309: 89, // gcaacc -> aaccgc + 2310: 105, // gcaacg -> aacggc + 2311: 121, // gcaact -> aactgc + 2312: 137, // gcaaga -> aagagc + 2313: 153, // gcaagc -> aagcgc + 2314: 169, // gcaagg -> aagggc + 2315: 185, // gcaagt -> aagtgc + 2316: 201, // gcaata -> aatagc + 2317: 217, // gcaatc -> aatcgc + 2318: 233, // gcaatg -> aatggc + 2319: 249, // gcaatt -> aattgc + 2320: 145, // gcacaa -> aagcac + 2321: 281, // gcacac -> acacgc + 2322: 297, // gcacag -> acaggc + 2323: 313, // gcacat -> acatgc + 2324: 329, // gcacca -> accagc + 2325: 345, // gcaccc -> acccgc + 2326: 361, // gcaccg -> accggc + 2327: 377, // gcacct -> acctgc + 2328: 393, // gcacga -> acgagc + 2329: 409, // gcacgc -> acgcgc + 2330: 425, // gcacgg -> acgggc + 2331: 441, // gcacgt -> acgtgc + 2332: 457, // gcacta -> actagc + 2333: 473, // gcactc -> actcgc + 2334: 489, // gcactg -> actggc + 2335: 505, // gcactt -> acttgc + 2336: 146, // gcagaa -> aagcag + 2337: 402, // gcagac -> acgcag + 2338: 553, // gcagag -> agaggc + 2339: 569, // gcagat -> agatgc + 2340: 585, // gcagca -> agcagc + 2341: 601, // gcagcc -> agccgc + 2342: 617, // gcagcg -> agcggc + 2343: 633, // gcagct -> agctgc + 2344: 586, // gcagga -> agcagg + 2345: 665, // gcaggc -> aggcgc + 2346: 681, // gcaggg -> aggggc + 2347: 697, // gcaggt -> aggtgc + 2348: 587, // gcagta -> agcagt + 2349: 729, // gcagtc -> agtcgc + 2350: 745, // gcagtg -> agtggc + 2351: 761, // gcagtt -> agttgc + 2352: 147, // gcataa -> aagcat + 2353: 403, // gcatac -> acgcat + 2354: 659, // gcatag -> aggcat + 2355: 825, // gcatat -> atatgc + 2356: 589, // gcatca -> agcatc + 2357: 857, // gcatcc -> atccgc + 2358: 873, // gcatcg -> atcggc + 2359: 889, // gcatct -> atctgc + 2360: 590, // gcatga -> agcatg + 2361: 921, // gcatgc -> atgcgc + 2362: 937, // gcatgg -> atgggc + 2363: 953, // gcatgt -> atgtgc + 2364: 591, // gcatta -> agcatt + 2365: 985, // gcattc -> attcgc + 2366: 1001, // gcattg -> attggc + 2367: 1017, // gcattt -> atttgc + 2368: 37, // gccaaa -> aaagcc + 2369: 101, // gccaac -> aacgcc + 2370: 165, // gccaag -> aaggcc + 2371: 229, // gccaat -> aatgcc + 2372: 293, // gccaca -> acagcc + 2373: 357, // gccacc -> accgcc + 2374: 421, // gccacg -> acggcc + 2375: 485, // gccact -> actgcc + 2376: 549, // gccaga -> agagcc + 2377: 613, // gccagc -> agcgcc + 2378: 677, // gccagg -> agggcc + 2379: 741, // gccagt -> agtgcc + 2380: 595, // gccata -> agccat + 2381: 869, // gccatc -> atcgcc + 2382: 933, // gccatg -> atggcc + 2383: 997, // gccatt -> attgcc + 2384: 149, // gcccaa -> aagccc + 2385: 405, // gcccac -> acgccc + 2386: 661, // gcccag -> aggccc + 2387: 917, // gcccat -> atgccc + 2388: 597, // gcccca -> agcccc + 2389: 1366, // gccccc -> cccccg + 2390: 1370, // gccccg -> ccccgg + 2391: 1374, // gcccct -> cccctg + 2392: 598, // gcccga -> agcccg + 2393: 1382, // gcccgc -> cccgcg + 2394: 1386, // gcccgg -> cccggg + 2395: 1390, // gcccgt -> cccgtg + 2396: 599, // gcccta -> agccct + 2397: 1398, // gccctc -> ccctcg + 2398: 1402, // gccctg -> ccctgg + 2399: 1406, // gccctt -> cccttg + 2400: 150, // gccgaa -> aagccg + 2401: 406, // gccgac -> acgccg + 2402: 662, // gccgag -> aggccg + 2403: 918, // gccgat -> atgccg + 2404: 601, // gccgca -> agccgc + 2405: 1430, // gccgcc -> ccgccg + 2406: 1434, // gccgcg -> ccgcgg + 2407: 1438, // gccgct -> ccgctg + 2408: 602, // gccgga -> agccgg + 2409: 1446, // gccggc -> ccggcg + 2410: 1450, // gccggg -> ccgggg + 2411: 1454, // gccggt -> ccggtg + 2412: 603, // gccgta -> agccgt + 2413: 1462, // gccgtc -> ccgtcg + 2414: 1466, // gccgtg -> ccgtgg + 2415: 1470, // gccgtt -> ccgttg + 2416: 151, // gcctaa -> aagcct + 2417: 407, // gcctac -> acgcct + 2418: 663, // gcctag -> aggcct + 2419: 919, // gcctat -> atgcct + 2420: 605, // gcctca -> agcctc + 2421: 1431, // gcctcc -> ccgcct + 2422: 1498, // gcctcg -> cctcgg + 2423: 1502, // gcctct -> cctctg + 2424: 606, // gcctga -> agcctg + 2425: 1510, // gcctgc -> cctgcg + 2426: 1514, // gcctgg -> cctggg + 2427: 1518, // gcctgt -> cctgtg + 2428: 607, // gcctta -> agcctt + 2429: 1526, // gccttc -> ccttcg + 2430: 1530, // gccttg -> ccttgg + 2431: 1534, // gccttt -> cctttg + 2432: 38, // gcgaaa -> aaagcg + 2433: 102, // gcgaac -> aacgcg + 2434: 166, // gcgaag -> aaggcg + 2435: 230, // gcgaat -> aatgcg + 2436: 294, // gcgaca -> acagcg + 2437: 358, // gcgacc -> accgcg + 2438: 422, // gcgacg -> acggcg + 2439: 486, // gcgact -> actgcg + 2440: 550, // gcgaga -> agagcg + 2441: 614, // gcgagc -> agcgcg + 2442: 678, // gcgagg -> agggcg + 2443: 742, // gcgagt -> agtgcg + 2444: 611, // gcgata -> agcgat + 2445: 870, // gcgatc -> atcgcg + 2446: 934, // gcgatg -> atggcg + 2447: 998, // gcgatt -> attgcg + 2448: 153, // gcgcaa -> aagcgc + 2449: 409, // gcgcac -> acgcgc + 2450: 665, // gcgcag -> aggcgc + 2451: 921, // gcgcat -> atgcgc + 2452: 613, // gcgcca -> agcgcc + 2453: 1382, // gcgccc -> cccgcg + 2454: 1446, // gcgccg -> ccggcg + 2455: 1510, // gcgcct -> cctgcg + 2456: 614, // gcgcga -> agcgcg + 2457: 1638, // gcgcgc -> cgcgcg + 2458: 1642, // gcgcgg -> cgcggg + 2459: 1646, // gcgcgt -> cgcgtg + 2460: 615, // gcgcta -> agcgct + 2461: 1639, // gcgctc -> cgcgct + 2462: 1658, // gcgctg -> cgctgg + 2463: 1662, // gcgctt -> cgcttg + 2464: 154, // gcggaa -> aagcgg + 2465: 410, // gcggac -> acgcgg + 2466: 666, // gcggag -> aggcgg + 2467: 922, // gcggat -> atgcgg + 2468: 617, // gcggca -> agcggc + 2469: 1434, // gcggcc -> ccgcgg + 2470: 1690, // gcggcg -> cggcgg + 2471: 1694, // gcggct -> cggctg + 2472: 618, // gcggga -> agcggg + 2473: 1642, // gcgggc -> cgcggg + 2474: 1706, // gcgggg -> cggggg + 2475: 1710, // gcgggt -> cgggtg + 2476: 619, // gcggta -> agcggt + 2477: 1643, // gcggtc -> cgcggt + 2478: 1722, // gcggtg -> cggtgg + 2479: 1726, // gcggtt -> cggttg + 2480: 155, // gcgtaa -> aagcgt + 2481: 411, // gcgtac -> acgcgt + 2482: 667, // gcgtag -> aggcgt + 2483: 923, // gcgtat -> atgcgt + 2484: 621, // gcgtca -> agcgtc + 2485: 1435, // gcgtcc -> ccgcgt + 2486: 1691, // gcgtcg -> cggcgt + 2487: 1758, // gcgtct -> cgtctg + 2488: 622, // gcgtga -> agcgtg + 2489: 1646, // gcgtgc -> cgcgtg + 2490: 1770, // gcgtgg -> cgtggg + 2491: 1774, // gcgtgt -> cgtgtg + 2492: 623, // gcgtta -> agcgtt + 2493: 1647, // gcgttc -> cgcgtt + 2494: 1786, // gcgttg -> cgttgg + 2495: 1790, // gcgttt -> cgtttg + 2496: 39, // gctaaa -> aaagct + 2497: 103, // gctaac -> aacgct + 2498: 167, // gctaag -> aaggct + 2499: 231, // gctaat -> aatgct + 2500: 295, // gctaca -> acagct + 2501: 359, // gctacc -> accgct + 2502: 423, // gctacg -> acggct + 2503: 487, // gctact -> actgct + 2504: 551, // gctaga -> agagct + 2505: 615, // gctagc -> agcgct + 2506: 679, // gctagg -> agggct + 2507: 743, // gctagt -> agtgct + 2508: 627, // gctata -> agctat + 2509: 871, // gctatc -> atcgct + 2510: 935, // gctatg -> atggct + 2511: 999, // gctatt -> attgct + 2512: 157, // gctcaa -> aagctc + 2513: 413, // gctcac -> acgctc + 2514: 669, // gctcag -> aggctc + 2515: 925, // gctcat -> atgctc + 2516: 629, // gctcca -> agctcc + 2517: 1383, // gctccc -> cccgct + 2518: 1447, // gctccg -> ccggct + 2519: 1511, // gctcct -> cctgct + 2520: 630, // gctcga -> agctcg + 2521: 1639, // gctcgc -> cgcgct + 2522: 1703, // gctcgg -> cgggct + 2523: 1767, // gctcgt -> cgtgct + 2524: 631, // gctcta -> agctct + 2525: 1655, // gctctc -> cgctct + 2526: 1914, // gctctg -> ctctgg + 2527: 1918, // gctctt -> ctcttg + 2528: 158, // gctgaa -> aagctg + 2529: 414, // gctgac -> acgctg + 2530: 670, // gctgag -> aggctg + 2531: 926, // gctgat -> atgctg + 2532: 633, // gctgca -> agctgc + 2533: 1438, // gctgcc -> ccgctg + 2534: 1694, // gctgcg -> cggctg + 2535: 1950, // gctgct -> ctgctg + 2536: 634, // gctgga -> agctgg + 2537: 1658, // gctggc -> cgctgg + 2538: 1962, // gctggg -> ctgggg + 2539: 1966, // gctggt -> ctggtg + 2540: 635, // gctgta -> agctgt + 2541: 1659, // gctgtc -> cgctgt + 2542: 1978, // gctgtg -> ctgtgg + 2543: 1982, // gctgtt -> ctgttg + 2544: 159, // gcttaa -> aagctt + 2545: 415, // gcttac -> acgctt + 2546: 671, // gcttag -> aggctt + 2547: 927, // gcttat -> atgctt + 2548: 637, // gcttca -> agcttc + 2549: 1439, // gcttcc -> ccgctt + 2550: 1695, // gcttcg -> cggctt + 2551: 1951, // gcttct -> ctgctt + 2552: 638, // gcttga -> agcttg + 2553: 1662, // gcttgc -> cgcttg + 2554: 2026, // gcttgg -> cttggg + 2555: 2030, // gcttgt -> cttgtg + 2556: 639, // gcttta -> agcttt + 2557: 1663, // gctttc -> cgcttt + 2558: 2042, // gctttg -> ctttgg + 2559: 2046, // gctttt -> cttttg + 2560: 10, // ggaaaa -> aaaagg + 2561: 26, // ggaaac -> aaacgg + 2562: 42, // ggaaag -> aaaggg + 2563: 58, // ggaaat -> aaatgg + 2564: 74, // ggaaca -> aacagg + 2565: 90, // ggaacc -> aaccgg + 2566: 106, // ggaacg -> aacggg + 2567: 122, // ggaact -> aactgg + 2568: 138, // ggaaga -> aagagg + 2569: 154, // ggaagc -> aagcgg + 2570: 170, // ggaagg -> aagggg + 2571: 186, // ggaagt -> aagtgg + 2572: 202, // ggaata -> aatagg + 2573: 218, // ggaatc -> aatcgg + 2574: 234, // ggaatg -> aatggg + 2575: 250, // ggaatt -> aattgg + 2576: 161, // ggacaa -> aaggac + 2577: 282, // ggacac -> acacgg + 2578: 298, // ggacag -> acaggg + 2579: 314, // ggacat -> acatgg + 2580: 330, // ggacca -> accagg + 2581: 346, // ggaccc -> acccgg + 2582: 362, // ggaccg -> accggg + 2583: 378, // ggacct -> acctgg + 2584: 394, // ggacga -> acgagg + 2585: 410, // ggacgc -> acgcgg + 2586: 426, // ggacgg -> acgggg + 2587: 442, // ggacgt -> acgtgg + 2588: 458, // ggacta -> actagg + 2589: 474, // ggactc -> actcgg + 2590: 490, // ggactg -> actggg + 2591: 506, // ggactt -> acttgg + 2592: 162, // ggagaa -> aaggag + 2593: 418, // ggagac -> acggag + 2594: 554, // ggagag -> agaggg + 2595: 570, // ggagat -> agatgg + 2596: 586, // ggagca -> agcagg + 2597: 602, // ggagcc -> agccgg + 2598: 618, // ggagcg -> agcggg + 2599: 634, // ggagct -> agctgg + 2600: 650, // ggagga -> aggagg + 2601: 666, // ggaggc -> aggcgg + 2602: 682, // ggaggg -> aggggg + 2603: 698, // ggaggt -> aggtgg + 2604: 651, // ggagta -> aggagt + 2605: 730, // ggagtc -> agtcgg + 2606: 746, // ggagtg -> agtggg + 2607: 762, // ggagtt -> agttgg + 2608: 163, // ggataa -> aaggat + 2609: 419, // ggatac -> acggat + 2610: 675, // ggatag -> agggat + 2611: 826, // ggatat -> atatgg + 2612: 653, // ggatca -> aggatc + 2613: 858, // ggatcc -> atccgg + 2614: 874, // ggatcg -> atcggg + 2615: 890, // ggatct -> atctgg + 2616: 654, // ggatga -> aggatg + 2617: 922, // ggatgc -> atgcgg + 2618: 938, // ggatgg -> atgggg + 2619: 954, // ggatgt -> atgtgg + 2620: 655, // ggatta -> aggatt + 2621: 986, // ggattc -> attcgg + 2622: 1002, // ggattg -> attggg + 2623: 1018, // ggattt -> atttgg + 2624: 41, // ggcaaa -> aaaggc + 2625: 105, // ggcaac -> aacggc + 2626: 169, // ggcaag -> aagggc + 2627: 233, // ggcaat -> aatggc + 2628: 297, // ggcaca -> acaggc + 2629: 361, // ggcacc -> accggc + 2630: 425, // ggcacg -> acgggc + 2631: 489, // ggcact -> actggc + 2632: 553, // ggcaga -> agaggc + 2633: 617, // ggcagc -> agcggc + 2634: 681, // ggcagg -> aggggc + 2635: 745, // ggcagt -> agtggc + 2636: 659, // ggcata -> aggcat + 2637: 873, // ggcatc -> atcggc + 2638: 937, // ggcatg -> atgggc + 2639: 1001, // ggcatt -> attggc + 2640: 165, // ggccaa -> aaggcc + 2641: 421, // ggccac -> acggcc + 2642: 677, // ggccag -> agggcc + 2643: 933, // ggccat -> atggcc + 2644: 661, // ggccca -> aggccc + 2645: 1370, // ggcccc -> ccccgg + 2646: 1386, // ggcccg -> cccggg + 2647: 1402, // ggccct -> ccctgg + 2648: 662, // ggccga -> aggccg + 2649: 1434, // ggccgc -> ccgcgg + 2650: 1450, // ggccgg -> ccgggg + 2651: 1466, // ggccgt -> ccgtgg + 2652: 663, // ggccta -> aggcct + 2653: 1498, // ggcctc -> cctcgg + 2654: 1514, // ggcctg -> cctggg + 2655: 1530, // ggcctt -> ccttgg + 2656: 166, // ggcgaa -> aaggcg + 2657: 422, // ggcgac -> acggcg + 2658: 678, // ggcgag -> agggcg + 2659: 934, // ggcgat -> atggcg + 2660: 665, // ggcgca -> aggcgc + 2661: 1446, // ggcgcc -> ccggcg + 2662: 1642, // ggcgcg -> cgcggg + 2663: 1658, // ggcgct -> cgctgg + 2664: 666, // ggcgga -> aggcgg + 2665: 1690, // ggcggc -> cggcgg + 2666: 1706, // ggcggg -> cggggg + 2667: 1722, // ggcggt -> cggtgg + 2668: 667, // ggcgta -> aggcgt + 2669: 1691, // ggcgtc -> cggcgt + 2670: 1770, // ggcgtg -> cgtggg + 2671: 1786, // ggcgtt -> cgttgg + 2672: 167, // ggctaa -> aaggct + 2673: 423, // ggctac -> acggct + 2674: 679, // ggctag -> agggct + 2675: 935, // ggctat -> atggct + 2676: 669, // ggctca -> aggctc + 2677: 1447, // ggctcc -> ccggct + 2678: 1703, // ggctcg -> cgggct + 2679: 1914, // ggctct -> ctctgg + 2680: 670, // ggctga -> aggctg + 2681: 1694, // ggctgc -> cggctg + 2682: 1962, // ggctgg -> ctgggg + 2683: 1978, // ggctgt -> ctgtgg + 2684: 671, // ggctta -> aggctt + 2685: 1695, // ggcttc -> cggctt + 2686: 2026, // ggcttg -> cttggg + 2687: 2042, // ggcttt -> ctttgg + 2688: 42, // gggaaa -> aaaggg + 2689: 106, // gggaac -> aacggg + 2690: 170, // gggaag -> aagggg + 2691: 234, // gggaat -> aatggg + 2692: 298, // gggaca -> acaggg + 2693: 362, // gggacc -> accggg + 2694: 426, // gggacg -> acgggg + 2695: 490, // gggact -> actggg + 2696: 554, // gggaga -> agaggg + 2697: 618, // gggagc -> agcggg + 2698: 682, // gggagg -> aggggg + 2699: 746, // gggagt -> agtggg + 2700: 675, // gggata -> agggat + 2701: 874, // gggatc -> atcggg + 2702: 938, // gggatg -> atgggg + 2703: 1002, // gggatt -> attggg + 2704: 169, // gggcaa -> aagggc + 2705: 425, // gggcac -> acgggc + 2706: 681, // gggcag -> aggggc + 2707: 937, // gggcat -> atgggc + 2708: 677, // gggcca -> agggcc + 2709: 1386, // gggccc -> cccggg + 2710: 1450, // gggccg -> ccgggg + 2711: 1514, // gggcct -> cctggg + 2712: 678, // gggcga -> agggcg + 2713: 1642, // gggcgc -> cgcggg + 2714: 1706, // gggcgg -> cggggg + 2715: 1770, // gggcgt -> cgtggg + 2716: 679, // gggcta -> agggct + 2717: 1703, // gggctc -> cgggct + 2718: 1962, // gggctg -> ctgggg + 2719: 2026, // gggctt -> cttggg + 2720: 170, // ggggaa -> aagggg + 2721: 426, // ggggac -> acgggg + 2722: 682, // ggggag -> aggggg + 2723: 938, // ggggat -> atgggg + 2724: 681, // ggggca -> aggggc + 2725: 1450, // ggggcc -> ccgggg + 2726: 1706, // ggggcg -> cggggg + 2727: 1962, // ggggct -> ctgggg + 2728: 682, // ggggga -> aggggg + 2729: 1706, // gggggc -> cggggg + 2730: 2730, // gggggg -> gggggg + 2731: 2731, // gggggt -> gggggt + 2732: 683, // ggggta -> aggggt + 2733: 1707, // ggggtc -> cggggt + 2734: 2731, // ggggtg -> gggggt + 2735: 2735, // ggggtt -> ggggtt + 2736: 171, // gggtaa -> aagggt + 2737: 427, // gggtac -> acgggt + 2738: 683, // gggtag -> aggggt + 2739: 939, // gggtat -> atgggt + 2740: 685, // gggtca -> agggtc + 2741: 1451, // gggtcc -> ccgggt + 2742: 1707, // gggtcg -> cggggt + 2743: 1963, // gggtct -> ctgggt + 2744: 686, // gggtga -> agggtg + 2745: 1710, // gggtgc -> cgggtg + 2746: 2731, // gggtgg -> gggggt + 2747: 2747, // gggtgt -> gggtgt + 2748: 687, // gggtta -> agggtt + 2749: 1711, // gggttc -> cgggtt + 2750: 2735, // gggttg -> ggggtt + 2751: 2751, // gggttt -> gggttt + 2752: 43, // ggtaaa -> aaaggt + 2753: 107, // ggtaac -> aacggt + 2754: 171, // ggtaag -> aagggt + 2755: 235, // ggtaat -> aatggt + 2756: 299, // ggtaca -> acaggt + 2757: 363, // ggtacc -> accggt + 2758: 427, // ggtacg -> acgggt + 2759: 491, // ggtact -> actggt + 2760: 555, // ggtaga -> agaggt + 2761: 619, // ggtagc -> agcggt + 2762: 683, // ggtagg -> aggggt + 2763: 747, // ggtagt -> agtggt + 2764: 691, // ggtata -> aggtat + 2765: 875, // ggtatc -> atcggt + 2766: 939, // ggtatg -> atgggt + 2767: 1003, // ggtatt -> attggt + 2768: 173, // ggtcaa -> aaggtc + 2769: 429, // ggtcac -> acggtc + 2770: 685, // ggtcag -> agggtc + 2771: 941, // ggtcat -> atggtc + 2772: 693, // ggtcca -> aggtcc + 2773: 1387, // ggtccc -> cccggt + 2774: 1451, // ggtccg -> ccgggt + 2775: 1515, // ggtcct -> cctggt + 2776: 694, // ggtcga -> aggtcg + 2777: 1643, // ggtcgc -> cgcggt + 2778: 1707, // ggtcgg -> cggggt + 2779: 1771, // ggtcgt -> cgtggt + 2780: 695, // ggtcta -> aggtct + 2781: 1719, // ggtctc -> cggtct + 2782: 1963, // ggtctg -> ctgggt + 2783: 2027, // ggtctt -> cttggt + 2784: 174, // ggtgaa -> aaggtg + 2785: 430, // ggtgac -> acggtg + 2786: 686, // ggtgag -> agggtg + 2787: 942, // ggtgat -> atggtg + 2788: 697, // ggtgca -> aggtgc + 2789: 1454, // ggtgcc -> ccggtg + 2790: 1710, // ggtgcg -> cgggtg + 2791: 1966, // ggtgct -> ctggtg + 2792: 698, // ggtgga -> aggtgg + 2793: 1722, // ggtggc -> cggtgg + 2794: 2731, // ggtggg -> gggggt + 2795: 2795, // ggtggt -> ggtggt + 2796: 699, // ggtgta -> aggtgt + 2797: 1723, // ggtgtc -> cggtgt + 2798: 2747, // ggtgtg -> gggtgt + 2799: 2799, // ggtgtt -> ggtgtt + 2800: 175, // ggttaa -> aaggtt + 2801: 431, // ggttac -> acggtt + 2802: 687, // ggttag -> agggtt + 2803: 943, // ggttat -> atggtt + 2804: 701, // ggttca -> aggttc + 2805: 1455, // ggttcc -> ccggtt + 2806: 1711, // ggttcg -> cgggtt + 2807: 1967, // ggttct -> ctggtt + 2808: 702, // ggttga -> aggttg + 2809: 1726, // ggttgc -> cggttg + 2810: 2735, // ggttgg -> ggggtt + 2811: 2811, // ggttgt -> ggttgt + 2812: 703, // ggttta -> aggttt + 2813: 1727, // ggtttc -> cggttt + 2814: 2751, // ggtttg -> gggttt + 2815: 2815, // ggtttt -> ggtttt + 2816: 11, // gtaaaa -> aaaagt + 2817: 27, // gtaaac -> aaacgt + 2818: 43, // gtaaag -> aaaggt + 2819: 59, // gtaaat -> aaatgt + 2820: 75, // gtaaca -> aacagt + 2821: 91, // gtaacc -> aaccgt + 2822: 107, // gtaacg -> aacggt + 2823: 123, // gtaact -> aactgt + 2824: 139, // gtaaga -> aagagt + 2825: 155, // gtaagc -> aagcgt + 2826: 171, // gtaagg -> aagggt + 2827: 187, // gtaagt -> aagtgt + 2828: 203, // gtaata -> aatagt + 2829: 219, // gtaatc -> aatcgt + 2830: 235, // gtaatg -> aatggt + 2831: 251, // gtaatt -> aattgt + 2832: 177, // gtacaa -> aagtac + 2833: 283, // gtacac -> acacgt + 2834: 299, // gtacag -> acaggt + 2835: 315, // gtacat -> acatgt + 2836: 331, // gtacca -> accagt + 2837: 347, // gtaccc -> acccgt + 2838: 363, // gtaccg -> accggt + 2839: 379, // gtacct -> acctgt + 2840: 395, // gtacga -> acgagt + 2841: 411, // gtacgc -> acgcgt + 2842: 427, // gtacgg -> acgggt + 2843: 443, // gtacgt -> acgtgt + 2844: 459, // gtacta -> actagt + 2845: 475, // gtactc -> actcgt + 2846: 491, // gtactg -> actggt + 2847: 507, // gtactt -> acttgt + 2848: 178, // gtagaa -> aagtag + 2849: 434, // gtagac -> acgtag + 2850: 555, // gtagag -> agaggt + 2851: 571, // gtagat -> agatgt + 2852: 587, // gtagca -> agcagt + 2853: 603, // gtagcc -> agccgt + 2854: 619, // gtagcg -> agcggt + 2855: 635, // gtagct -> agctgt + 2856: 651, // gtagga -> aggagt + 2857: 667, // gtaggc -> aggcgt + 2858: 683, // gtaggg -> aggggt + 2859: 699, // gtaggt -> aggtgt + 2860: 715, // gtagta -> agtagt + 2861: 731, // gtagtc -> agtcgt + 2862: 747, // gtagtg -> agtggt + 2863: 763, // gtagtt -> agttgt + 2864: 179, // gtataa -> aagtat + 2865: 435, // gtatac -> acgtat + 2866: 691, // gtatag -> aggtat + 2867: 827, // gtatat -> atatgt + 2868: 717, // gtatca -> agtatc + 2869: 859, // gtatcc -> atccgt + 2870: 875, // gtatcg -> atcggt + 2871: 891, // gtatct -> atctgt + 2872: 718, // gtatga -> agtatg + 2873: 923, // gtatgc -> atgcgt + 2874: 939, // gtatgg -> atgggt + 2875: 955, // gtatgt -> atgtgt + 2876: 719, // gtatta -> agtatt + 2877: 987, // gtattc -> attcgt + 2878: 1003, // gtattg -> attggt + 2879: 1019, // gtattt -> atttgt + 2880: 45, // gtcaaa -> aaagtc + 2881: 109, // gtcaac -> aacgtc + 2882: 173, // gtcaag -> aaggtc + 2883: 237, // gtcaat -> aatgtc + 2884: 301, // gtcaca -> acagtc + 2885: 365, // gtcacc -> accgtc + 2886: 429, // gtcacg -> acggtc + 2887: 493, // gtcact -> actgtc + 2888: 557, // gtcaga -> agagtc + 2889: 621, // gtcagc -> agcgtc + 2890: 685, // gtcagg -> agggtc + 2891: 749, // gtcagt -> agtgtc + 2892: 723, // gtcata -> agtcat + 2893: 877, // gtcatc -> atcgtc + 2894: 941, // gtcatg -> atggtc + 2895: 1005, // gtcatt -> attgtc + 2896: 181, // gtccaa -> aagtcc + 2897: 437, // gtccac -> acgtcc + 2898: 693, // gtccag -> aggtcc + 2899: 949, // gtccat -> atgtcc + 2900: 725, // gtccca -> agtccc + 2901: 1371, // gtcccc -> ccccgt + 2902: 1387, // gtcccg -> cccggt + 2903: 1403, // gtccct -> ccctgt + 2904: 726, // gtccga -> agtccg + 2905: 1435, // gtccgc -> ccgcgt + 2906: 1451, // gtccgg -> ccgggt + 2907: 1467, // gtccgt -> ccgtgt + 2908: 727, // gtccta -> agtcct + 2909: 1499, // gtcctc -> cctcgt + 2910: 1515, // gtcctg -> cctggt + 2911: 1531, // gtcctt -> ccttgt + 2912: 182, // gtcgaa -> aagtcg + 2913: 438, // gtcgac -> acgtcg + 2914: 694, // gtcgag -> aggtcg + 2915: 950, // gtcgat -> atgtcg + 2916: 729, // gtcgca -> agtcgc + 2917: 1462, // gtcgcc -> ccgtcg + 2918: 1643, // gtcgcg -> cgcggt + 2919: 1659, // gtcgct -> cgctgt + 2920: 730, // gtcgga -> agtcgg + 2921: 1691, // gtcggc -> cggcgt + 2922: 1707, // gtcggg -> cggggt + 2923: 1723, // gtcggt -> cggtgt + 2924: 731, // gtcgta -> agtcgt + 2925: 1755, // gtcgtc -> cgtcgt + 2926: 1771, // gtcgtg -> cgtggt + 2927: 1787, // gtcgtt -> cgttgt + 2928: 183, // gtctaa -> aagtct + 2929: 439, // gtctac -> acgtct + 2930: 695, // gtctag -> aggtct + 2931: 951, // gtctat -> atgtct + 2932: 733, // gtctca -> agtctc + 2933: 1463, // gtctcc -> ccgtct + 2934: 1719, // gtctcg -> cggtct + 2935: 1915, // gtctct -> ctctgt + 2936: 734, // gtctga -> agtctg + 2937: 1758, // gtctgc -> cgtctg + 2938: 1963, // gtctgg -> ctgggt + 2939: 1979, // gtctgt -> ctgtgt + 2940: 735, // gtctta -> agtctt + 2941: 1759, // gtcttc -> cgtctt + 2942: 2027, // gtcttg -> cttggt + 2943: 2043, // gtcttt -> ctttgt + 2944: 46, // gtgaaa -> aaagtg + 2945: 110, // gtgaac -> aacgtg + 2946: 174, // gtgaag -> aaggtg + 2947: 238, // gtgaat -> aatgtg + 2948: 302, // gtgaca -> acagtg + 2949: 366, // gtgacc -> accgtg + 2950: 430, // gtgacg -> acggtg + 2951: 494, // gtgact -> actgtg + 2952: 558, // gtgaga -> agagtg + 2953: 622, // gtgagc -> agcgtg + 2954: 686, // gtgagg -> agggtg + 2955: 750, // gtgagt -> agtgtg + 2956: 739, // gtgata -> agtgat + 2957: 878, // gtgatc -> atcgtg + 2958: 942, // gtgatg -> atggtg + 2959: 1006, // gtgatt -> attgtg + 2960: 185, // gtgcaa -> aagtgc + 2961: 441, // gtgcac -> acgtgc + 2962: 697, // gtgcag -> aggtgc + 2963: 953, // gtgcat -> atgtgc + 2964: 741, // gtgcca -> agtgcc + 2965: 1390, // gtgccc -> cccgtg + 2966: 1454, // gtgccg -> ccggtg + 2967: 1518, // gtgcct -> cctgtg + 2968: 742, // gtgcga -> agtgcg + 2969: 1646, // gtgcgc -> cgcgtg + 2970: 1710, // gtgcgg -> cgggtg + 2971: 1774, // gtgcgt -> cgtgtg + 2972: 743, // gtgcta -> agtgct + 2973: 1767, // gtgctc -> cgtgct + 2974: 1966, // gtgctg -> ctggtg + 2975: 2030, // gtgctt -> cttgtg + 2976: 186, // gtggaa -> aagtgg + 2977: 442, // gtggac -> acgtgg + 2978: 698, // gtggag -> aggtgg + 2979: 954, // gtggat -> atgtgg + 2980: 745, // gtggca -> agtggc + 2981: 1466, // gtggcc -> ccgtgg + 2982: 1722, // gtggcg -> cggtgg + 2983: 1978, // gtggct -> ctgtgg + 2984: 746, // gtggga -> agtggg + 2985: 1770, // gtgggc -> cgtggg + 2986: 2731, // gtgggg -> gggggt + 2987: 2747, // gtgggt -> gggtgt + 2988: 747, // gtggta -> agtggt + 2989: 1771, // gtggtc -> cgtggt + 2990: 2795, // gtggtg -> ggtggt + 2991: 2811, // gtggtt -> ggttgt + 2992: 187, // gtgtaa -> aagtgt + 2993: 443, // gtgtac -> acgtgt + 2994: 699, // gtgtag -> aggtgt + 2995: 955, // gtgtat -> atgtgt + 2996: 749, // gtgtca -> agtgtc + 2997: 1467, // gtgtcc -> ccgtgt + 2998: 1723, // gtgtcg -> cggtgt + 2999: 1979, // gtgtct -> ctgtgt + 3000: 750, // gtgtga -> agtgtg + 3001: 1774, // gtgtgc -> cgtgtg + 3002: 2747, // gtgtgg -> gggtgt + 3003: 3003, // gtgtgt -> gtgtgt + 3004: 751, // gtgtta -> agtgtt + 3005: 1775, // gtgttc -> cgtgtt + 3006: 2799, // gtgttg -> ggtgtt + 3007: 3007, // gtgttt -> gtgttt + 3008: 47, // gttaaa -> aaagtt + 3009: 111, // gttaac -> aacgtt + 3010: 175, // gttaag -> aaggtt + 3011: 239, // gttaat -> aatgtt + 3012: 303, // gttaca -> acagtt + 3013: 367, // gttacc -> accgtt + 3014: 431, // gttacg -> acggtt + 3015: 495, // gttact -> actgtt + 3016: 559, // gttaga -> agagtt + 3017: 623, // gttagc -> agcgtt + 3018: 687, // gttagg -> agggtt + 3019: 751, // gttagt -> agtgtt + 3020: 755, // gttata -> agttat + 3021: 879, // gttatc -> atcgtt + 3022: 943, // gttatg -> atggtt + 3023: 1007, // gttatt -> attgtt + 3024: 189, // gttcaa -> aagttc + 3025: 445, // gttcac -> acgttc + 3026: 701, // gttcag -> aggttc + 3027: 957, // gttcat -> atgttc + 3028: 757, // gttcca -> agttcc + 3029: 1391, // gttccc -> cccgtt + 3030: 1455, // gttccg -> ccggtt + 3031: 1519, // gttcct -> cctgtt + 3032: 758, // gttcga -> agttcg + 3033: 1647, // gttcgc -> cgcgtt + 3034: 1711, // gttcgg -> cgggtt + 3035: 1775, // gttcgt -> cgtgtt + 3036: 759, // gttcta -> agttct + 3037: 1783, // gttctc -> cgttct + 3038: 1967, // gttctg -> ctggtt + 3039: 2031, // gttctt -> cttgtt + 3040: 190, // gttgaa -> aagttg + 3041: 446, // gttgac -> acgttg + 3042: 702, // gttgag -> aggttg + 3043: 958, // gttgat -> atgttg + 3044: 761, // gttgca -> agttgc + 3045: 1470, // gttgcc -> ccgttg + 3046: 1726, // gttgcg -> cggttg + 3047: 1982, // gttgct -> ctgttg + 3048: 762, // gttgga -> agttgg + 3049: 1786, // gttggc -> cgttgg + 3050: 2735, // gttggg -> ggggtt + 3051: 2799, // gttggt -> ggtgtt + 3052: 763, // gttgta -> agttgt + 3053: 1787, // gttgtc -> cgttgt + 3054: 2811, // gttgtg -> ggttgt + 3055: 3055, // gttgtt -> gttgtt + 3056: 191, // gtttaa -> aagttt + 3057: 447, // gtttac -> acgttt + 3058: 703, // gtttag -> aggttt + 3059: 959, // gtttat -> atgttt + 3060: 765, // gtttca -> agtttc + 3061: 1471, // gtttcc -> ccgttt + 3062: 1727, // gtttcg -> cggttt + 3063: 1983, // gtttct -> ctgttt + 3064: 766, // gtttga -> agtttg + 3065: 1790, // gtttgc -> cgtttg + 3066: 2751, // gtttgg -> gggttt + 3067: 3007, // gtttgt -> gtgttt + 3068: 767, // gtttta -> agtttt + 3069: 1791, // gttttc -> cgtttt + 3070: 2815, // gttttg -> ggtttt + 3071: 3071, // gttttt -> gttttt + 3072: 3, // taaaaa -> aaaaat + 3073: 7, // taaaac -> aaaact + 3074: 11, // taaaag -> aaaagt + 3075: 15, // taaaat -> aaaatt + 3076: 19, // taaaca -> aaacat + 3077: 23, // taaacc -> aaacct + 3078: 27, // taaacg -> aaacgt + 3079: 31, // taaact -> aaactt + 3080: 35, // taaaga -> aaagat + 3081: 39, // taaagc -> aaagct + 3082: 43, // taaagg -> aaaggt + 3083: 47, // taaagt -> aaagtt + 3084: 51, // taaata -> aaatat + 3085: 55, // taaatc -> aaatct + 3086: 59, // taaatg -> aaatgt + 3087: 63, // taaatt -> aaattt + 3088: 67, // taacaa -> aacaat + 3089: 71, // taacac -> aacact + 3090: 75, // taacag -> aacagt + 3091: 79, // taacat -> aacatt + 3092: 83, // taacca -> aaccat + 3093: 87, // taaccc -> aaccct + 3094: 91, // taaccg -> aaccgt + 3095: 95, // taacct -> aacctt + 3096: 99, // taacga -> aacgat + 3097: 103, // taacgc -> aacgct + 3098: 107, // taacgg -> aacggt + 3099: 111, // taacgt -> aacgtt + 3100: 115, // taacta -> aactat + 3101: 119, // taactc -> aactct + 3102: 123, // taactg -> aactgt + 3103: 127, // taactt -> aacttt + 3104: 131, // taagaa -> aagaat + 3105: 135, // taagac -> aagact + 3106: 139, // taagag -> aagagt + 3107: 143, // taagat -> aagatt + 3108: 147, // taagca -> aagcat + 3109: 151, // taagcc -> aagcct + 3110: 155, // taagcg -> aagcgt + 3111: 159, // taagct -> aagctt + 3112: 163, // taagga -> aaggat + 3113: 167, // taaggc -> aaggct + 3114: 171, // taaggg -> aagggt + 3115: 175, // taaggt -> aaggtt + 3116: 179, // taagta -> aagtat + 3117: 183, // taagtc -> aagtct + 3118: 187, // taagtg -> aagtgt + 3119: 191, // taagtt -> aagttt + 3120: 195, // taataa -> aataat + 3121: 199, // taatac -> aatact + 3122: 203, // taatag -> aatagt + 3123: 207, // taatat -> aatatt + 3124: 211, // taatca -> aatcat + 3125: 215, // taatcc -> aatcct + 3126: 219, // taatcg -> aatcgt + 3127: 223, // taatct -> aatctt + 3128: 227, // taatga -> aatgat + 3129: 231, // taatgc -> aatgct + 3130: 235, // taatgg -> aatggt + 3131: 239, // taatgt -> aatgtt + 3132: 243, // taatta -> aattat + 3133: 247, // taattc -> aattct + 3134: 251, // taattg -> aattgt + 3135: 255, // taattt -> aatttt + 3136: 49, // tacaaa -> aaatac + 3137: 113, // tacaac -> aactac + 3138: 177, // tacaag -> aagtac + 3139: 241, // tacaat -> aattac + 3140: 275, // tacaca -> acacat + 3141: 279, // tacacc -> acacct + 3142: 283, // tacacg -> acacgt + 3143: 287, // tacact -> acactt + 3144: 291, // tacaga -> acagat + 3145: 295, // tacagc -> acagct + 3146: 299, // tacagg -> acaggt + 3147: 303, // tacagt -> acagtt + 3148: 307, // tacata -> acatat + 3149: 311, // tacatc -> acatct + 3150: 315, // tacatg -> acatgt + 3151: 319, // tacatt -> acattt + 3152: 197, // taccaa -> aatacc + 3153: 327, // taccac -> accact + 3154: 331, // taccag -> accagt + 3155: 335, // taccat -> accatt + 3156: 339, // taccca -> acccat + 3157: 343, // tacccc -> acccct + 3158: 347, // tacccg -> acccgt + 3159: 351, // taccct -> accctt + 3160: 355, // taccga -> accgat + 3161: 359, // taccgc -> accgct + 3162: 363, // taccgg -> accggt + 3163: 367, // taccgt -> accgtt + 3164: 371, // taccta -> acctat + 3165: 375, // tacctc -> acctct + 3166: 379, // tacctg -> acctgt + 3167: 383, // tacctt -> accttt + 3168: 198, // tacgaa -> aatacg + 3169: 391, // tacgac -> acgact + 3170: 395, // tacgag -> acgagt + 3171: 399, // tacgat -> acgatt + 3172: 403, // tacgca -> acgcat + 3173: 407, // tacgcc -> acgcct + 3174: 411, // tacgcg -> acgcgt + 3175: 415, // tacgct -> acgctt + 3176: 419, // tacgga -> acggat + 3177: 423, // tacggc -> acggct + 3178: 427, // tacggg -> acgggt + 3179: 431, // tacggt -> acggtt + 3180: 435, // tacgta -> acgtat + 3181: 439, // tacgtc -> acgtct + 3182: 443, // tacgtg -> acgtgt + 3183: 447, // tacgtt -> acgttt + 3184: 199, // tactaa -> aatact + 3185: 455, // tactac -> actact + 3186: 459, // tactag -> actagt + 3187: 463, // tactat -> actatt + 3188: 467, // tactca -> actcat + 3189: 471, // tactcc -> actcct + 3190: 475, // tactcg -> actcgt + 3191: 479, // tactct -> actctt + 3192: 483, // tactga -> actgat + 3193: 487, // tactgc -> actgct + 3194: 491, // tactgg -> actggt + 3195: 495, // tactgt -> actgtt + 3196: 499, // tactta -> acttat + 3197: 503, // tacttc -> acttct + 3198: 507, // tacttg -> acttgt + 3199: 511, // tacttt -> actttt + 3200: 50, // tagaaa -> aaatag + 3201: 114, // tagaac -> aactag + 3202: 178, // tagaag -> aagtag + 3203: 242, // tagaat -> aattag + 3204: 306, // tagaca -> acatag + 3205: 370, // tagacc -> acctag + 3206: 434, // tagacg -> acgtag + 3207: 498, // tagact -> acttag + 3208: 547, // tagaga -> agagat + 3209: 551, // tagagc -> agagct + 3210: 555, // tagagg -> agaggt + 3211: 559, // tagagt -> agagtt + 3212: 563, // tagata -> agatat + 3213: 567, // tagatc -> agatct + 3214: 571, // tagatg -> agatgt + 3215: 575, // tagatt -> agattt + 3216: 201, // tagcaa -> aatagc + 3217: 457, // tagcac -> actagc + 3218: 587, // tagcag -> agcagt + 3219: 591, // tagcat -> agcatt + 3220: 595, // tagcca -> agccat + 3221: 599, // tagccc -> agccct + 3222: 603, // tagccg -> agccgt + 3223: 607, // tagcct -> agcctt + 3224: 611, // tagcga -> agcgat + 3225: 615, // tagcgc -> agcgct + 3226: 619, // tagcgg -> agcggt + 3227: 623, // tagcgt -> agcgtt + 3228: 627, // tagcta -> agctat + 3229: 631, // tagctc -> agctct + 3230: 635, // tagctg -> agctgt + 3231: 639, // tagctt -> agcttt + 3232: 202, // taggaa -> aatagg + 3233: 458, // taggac -> actagg + 3234: 651, // taggag -> aggagt + 3235: 655, // taggat -> aggatt + 3236: 659, // taggca -> aggcat + 3237: 663, // taggcc -> aggcct + 3238: 667, // taggcg -> aggcgt + 3239: 671, // taggct -> aggctt + 3240: 675, // taggga -> agggat + 3241: 679, // tagggc -> agggct + 3242: 683, // tagggg -> aggggt + 3243: 687, // tagggt -> agggtt + 3244: 691, // taggta -> aggtat + 3245: 695, // taggtc -> aggtct + 3246: 699, // taggtg -> aggtgt + 3247: 703, // taggtt -> aggttt + 3248: 203, // tagtaa -> aatagt + 3249: 459, // tagtac -> actagt + 3250: 715, // tagtag -> agtagt + 3251: 719, // tagtat -> agtatt + 3252: 723, // tagtca -> agtcat + 3253: 727, // tagtcc -> agtcct + 3254: 731, // tagtcg -> agtcgt + 3255: 735, // tagtct -> agtctt + 3256: 739, // tagtga -> agtgat + 3257: 743, // tagtgc -> agtgct + 3258: 747, // tagtgg -> agtggt + 3259: 751, // tagtgt -> agtgtt + 3260: 755, // tagtta -> agttat + 3261: 759, // tagttc -> agttct + 3262: 763, // tagttg -> agttgt + 3263: 767, // tagttt -> agtttt + 3264: 51, // tataaa -> aaatat + 3265: 115, // tataac -> aactat + 3266: 179, // tataag -> aagtat + 3267: 243, // tataat -> aattat + 3268: 307, // tataca -> acatat + 3269: 371, // tatacc -> acctat + 3270: 435, // tatacg -> acgtat + 3271: 499, // tatact -> acttat + 3272: 563, // tataga -> agatat + 3273: 627, // tatagc -> agctat + 3274: 691, // tatagg -> aggtat + 3275: 755, // tatagt -> agttat + 3276: 819, // tatata -> atatat + 3277: 823, // tatatc -> atatct + 3278: 827, // tatatg -> atatgt + 3279: 831, // tatatt -> atattt + 3280: 205, // tatcaa -> aatatc + 3281: 461, // tatcac -> actatc + 3282: 717, // tatcag -> agtatc + 3283: 847, // tatcat -> atcatt + 3284: 821, // tatcca -> atatcc + 3285: 855, // tatccc -> atccct + 3286: 859, // tatccg -> atccgt + 3287: 863, // tatcct -> atcctt + 3288: 822, // tatcga -> atatcg + 3289: 871, // tatcgc -> atcgct + 3290: 875, // tatcgg -> atcggt + 3291: 879, // tatcgt -> atcgtt + 3292: 823, // tatcta -> atatct + 3293: 887, // tatctc -> atctct + 3294: 891, // tatctg -> atctgt + 3295: 895, // tatctt -> atcttt + 3296: 206, // tatgaa -> aatatg + 3297: 462, // tatgac -> actatg + 3298: 718, // tatgag -> agtatg + 3299: 911, // tatgat -> atgatt + 3300: 825, // tatgca -> atatgc + 3301: 919, // tatgcc -> atgcct + 3302: 923, // tatgcg -> atgcgt + 3303: 927, // tatgct -> atgctt + 3304: 826, // tatgga -> atatgg + 3305: 935, // tatggc -> atggct + 3306: 939, // tatggg -> atgggt + 3307: 943, // tatggt -> atggtt + 3308: 827, // tatgta -> atatgt + 3309: 951, // tatgtc -> atgtct + 3310: 955, // tatgtg -> atgtgt + 3311: 959, // tatgtt -> atgttt + 3312: 207, // tattaa -> aatatt + 3313: 463, // tattac -> actatt + 3314: 719, // tattag -> agtatt + 3315: 975, // tattat -> attatt + 3316: 829, // tattca -> atattc + 3317: 983, // tattcc -> attcct + 3318: 987, // tattcg -> attcgt + 3319: 991, // tattct -> attctt + 3320: 830, // tattga -> atattg + 3321: 999, // tattgc -> attgct + 3322: 1003, // tattgg -> attggt + 3323: 1007, // tattgt -> attgtt + 3324: 831, // tattta -> atattt + 3325: 1015, // tatttc -> atttct + 3326: 1019, // tatttg -> atttgt + 3327: 1023, // tatttt -> attttt + 3328: 13, // tcaaaa -> aaaatc + 3329: 29, // tcaaac -> aaactc + 3330: 45, // tcaaag -> aaagtc + 3331: 61, // tcaaat -> aaattc + 3332: 77, // tcaaca -> aacatc + 3333: 93, // tcaacc -> aacctc + 3334: 109, // tcaacg -> aacgtc + 3335: 125, // tcaact -> aacttc + 3336: 141, // tcaaga -> aagatc + 3337: 157, // tcaagc -> aagctc + 3338: 173, // tcaagg -> aaggtc + 3339: 189, // tcaagt -> aagttc + 3340: 205, // tcaata -> aatatc + 3341: 221, // tcaatc -> aatctc + 3342: 237, // tcaatg -> aatgtc + 3343: 253, // tcaatt -> aatttc + 3344: 209, // tcacaa -> aatcac + 3345: 285, // tcacac -> acactc + 3346: 301, // tcacag -> acagtc + 3347: 317, // tcacat -> acattc + 3348: 333, // tcacca -> accatc + 3349: 349, // tcaccc -> accctc + 3350: 365, // tcaccg -> accgtc + 3351: 381, // tcacct -> accttc + 3352: 397, // tcacga -> acgatc + 3353: 413, // tcacgc -> acgctc + 3354: 429, // tcacgg -> acggtc + 3355: 445, // tcacgt -> acgttc + 3356: 461, // tcacta -> actatc + 3357: 477, // tcactc -> actctc + 3358: 493, // tcactg -> actgtc + 3359: 509, // tcactt -> actttc + 3360: 210, // tcagaa -> aatcag + 3361: 466, // tcagac -> actcag + 3362: 557, // tcagag -> agagtc + 3363: 573, // tcagat -> agattc + 3364: 589, // tcagca -> agcatc + 3365: 605, // tcagcc -> agcctc + 3366: 621, // tcagcg -> agcgtc + 3367: 637, // tcagct -> agcttc + 3368: 653, // tcagga -> aggatc + 3369: 669, // tcaggc -> aggctc + 3370: 685, // tcaggg -> agggtc + 3371: 701, // tcaggt -> aggttc + 3372: 717, // tcagta -> agtatc + 3373: 733, // tcagtc -> agtctc + 3374: 749, // tcagtg -> agtgtc + 3375: 765, // tcagtt -> agtttc + 3376: 211, // tcataa -> aatcat + 3377: 467, // tcatac -> actcat + 3378: 723, // tcatag -> agtcat + 3379: 829, // tcatat -> atattc + 3380: 845, // tcatca -> atcatc + 3381: 861, // tcatcc -> atcctc + 3382: 877, // tcatcg -> atcgtc + 3383: 893, // tcatct -> atcttc + 3384: 846, // tcatga -> atcatg + 3385: 925, // tcatgc -> atgctc + 3386: 941, // tcatgg -> atggtc + 3387: 957, // tcatgt -> atgttc + 3388: 847, // tcatta -> atcatt + 3389: 989, // tcattc -> attctc + 3390: 1005, // tcattg -> attgtc + 3391: 1021, // tcattt -> attttc + 3392: 53, // tccaaa -> aaatcc + 3393: 117, // tccaac -> aactcc + 3394: 181, // tccaag -> aagtcc + 3395: 245, // tccaat -> aattcc + 3396: 309, // tccaca -> acatcc + 3397: 373, // tccacc -> acctcc + 3398: 437, // tccacg -> acgtcc + 3399: 501, // tccact -> acttcc + 3400: 565, // tccaga -> agatcc + 3401: 629, // tccagc -> agctcc + 3402: 693, // tccagg -> aggtcc + 3403: 757, // tccagt -> agttcc + 3404: 821, // tccata -> atatcc + 3405: 885, // tccatc -> atctcc + 3406: 949, // tccatg -> atgtcc + 3407: 1013, // tccatt -> atttcc + 3408: 213, // tcccaa -> aatccc + 3409: 469, // tcccac -> actccc + 3410: 725, // tcccag -> agtccc + 3411: 981, // tcccat -> attccc + 3412: 853, // tcccca -> atcccc + 3413: 1367, // tccccc -> ccccct + 3414: 1371, // tccccg -> ccccgt + 3415: 1375, // tcccct -> cccctt + 3416: 854, // tcccga -> atcccg + 3417: 1383, // tcccgc -> cccgct + 3418: 1387, // tcccgg -> cccggt + 3419: 1391, // tcccgt -> cccgtt + 3420: 855, // tcccta -> atccct + 3421: 1399, // tccctc -> ccctct + 3422: 1403, // tccctg -> ccctgt + 3423: 1407, // tccctt -> cccttt + 3424: 214, // tccgaa -> aatccg + 3425: 470, // tccgac -> actccg + 3426: 726, // tccgag -> agtccg + 3427: 982, // tccgat -> attccg + 3428: 857, // tccgca -> atccgc + 3429: 1431, // tccgcc -> ccgcct + 3430: 1435, // tccgcg -> ccgcgt + 3431: 1439, // tccgct -> ccgctt + 3432: 858, // tccgga -> atccgg + 3433: 1447, // tccggc -> ccggct + 3434: 1451, // tccggg -> ccgggt + 3435: 1455, // tccggt -> ccggtt + 3436: 859, // tccgta -> atccgt + 3437: 1463, // tccgtc -> ccgtct + 3438: 1467, // tccgtg -> ccgtgt + 3439: 1471, // tccgtt -> ccgttt + 3440: 215, // tcctaa -> aatcct + 3441: 471, // tcctac -> actcct + 3442: 727, // tcctag -> agtcct + 3443: 983, // tcctat -> attcct + 3444: 861, // tcctca -> atcctc + 3445: 1495, // tcctcc -> cctcct + 3446: 1499, // tcctcg -> cctcgt + 3447: 1503, // tcctct -> cctctt + 3448: 862, // tcctga -> atcctg + 3449: 1511, // tcctgc -> cctgct + 3450: 1515, // tcctgg -> cctggt + 3451: 1519, // tcctgt -> cctgtt + 3452: 863, // tcctta -> atcctt + 3453: 1527, // tccttc -> ccttct + 3454: 1531, // tccttg -> ccttgt + 3455: 1535, // tccttt -> cctttt + 3456: 54, // tcgaaa -> aaatcg + 3457: 118, // tcgaac -> aactcg + 3458: 182, // tcgaag -> aagtcg + 3459: 246, // tcgaat -> aattcg + 3460: 310, // tcgaca -> acatcg + 3461: 374, // tcgacc -> acctcg + 3462: 438, // tcgacg -> acgtcg + 3463: 502, // tcgact -> acttcg + 3464: 566, // tcgaga -> agatcg + 3465: 630, // tcgagc -> agctcg + 3466: 694, // tcgagg -> aggtcg + 3467: 758, // tcgagt -> agttcg + 3468: 822, // tcgata -> atatcg + 3469: 886, // tcgatc -> atctcg + 3470: 950, // tcgatg -> atgtcg + 3471: 1014, // tcgatt -> atttcg + 3472: 217, // tcgcaa -> aatcgc + 3473: 473, // tcgcac -> actcgc + 3474: 729, // tcgcag -> agtcgc + 3475: 985, // tcgcat -> attcgc + 3476: 869, // tcgcca -> atcgcc + 3477: 1398, // tcgccc -> ccctcg + 3478: 1462, // tcgccg -> ccgtcg + 3479: 1526, // tcgcct -> ccttcg + 3480: 870, // tcgcga -> atcgcg + 3481: 1639, // tcgcgc -> cgcgct + 3482: 1643, // tcgcgg -> cgcggt + 3483: 1647, // tcgcgt -> cgcgtt + 3484: 871, // tcgcta -> atcgct + 3485: 1655, // tcgctc -> cgctct + 3486: 1659, // tcgctg -> cgctgt + 3487: 1663, // tcgctt -> cgcttt + 3488: 218, // tcggaa -> aatcgg + 3489: 474, // tcggac -> actcgg + 3490: 730, // tcggag -> agtcgg + 3491: 986, // tcggat -> attcgg + 3492: 873, // tcggca -> atcggc + 3493: 1498, // tcggcc -> cctcgg + 3494: 1691, // tcggcg -> cggcgt + 3495: 1695, // tcggct -> cggctt + 3496: 874, // tcggga -> atcggg + 3497: 1703, // tcgggc -> cgggct + 3498: 1707, // tcgggg -> cggggt + 3499: 1711, // tcgggt -> cgggtt + 3500: 875, // tcggta -> atcggt + 3501: 1719, // tcggtc -> cggtct + 3502: 1723, // tcggtg -> cggtgt + 3503: 1727, // tcggtt -> cggttt + 3504: 219, // tcgtaa -> aatcgt + 3505: 475, // tcgtac -> actcgt + 3506: 731, // tcgtag -> agtcgt + 3507: 987, // tcgtat -> attcgt + 3508: 877, // tcgtca -> atcgtc + 3509: 1499, // tcgtcc -> cctcgt + 3510: 1755, // tcgtcg -> cgtcgt + 3511: 1759, // tcgtct -> cgtctt + 3512: 878, // tcgtga -> atcgtg + 3513: 1767, // tcgtgc -> cgtgct + 3514: 1771, // tcgtgg -> cgtggt + 3515: 1775, // tcgtgt -> cgtgtt + 3516: 879, // tcgtta -> atcgtt + 3517: 1783, // tcgttc -> cgttct + 3518: 1787, // tcgttg -> cgttgt + 3519: 1791, // tcgttt -> cgtttt + 3520: 55, // tctaaa -> aaatct + 3521: 119, // tctaac -> aactct + 3522: 183, // tctaag -> aagtct + 3523: 247, // tctaat -> aattct + 3524: 311, // tctaca -> acatct + 3525: 375, // tctacc -> acctct + 3526: 439, // tctacg -> acgtct + 3527: 503, // tctact -> acttct + 3528: 567, // tctaga -> agatct + 3529: 631, // tctagc -> agctct + 3530: 695, // tctagg -> aggtct + 3531: 759, // tctagt -> agttct + 3532: 823, // tctata -> atatct + 3533: 887, // tctatc -> atctct + 3534: 951, // tctatg -> atgtct + 3535: 1015, // tctatt -> atttct + 3536: 221, // tctcaa -> aatctc + 3537: 477, // tctcac -> actctc + 3538: 733, // tctcag -> agtctc + 3539: 989, // tctcat -> attctc + 3540: 885, // tctcca -> atctcc + 3541: 1399, // tctccc -> ccctct + 3542: 1463, // tctccg -> ccgtct + 3543: 1527, // tctcct -> ccttct + 3544: 886, // tctcga -> atctcg + 3545: 1655, // tctcgc -> cgctct + 3546: 1719, // tctcgg -> cggtct + 3547: 1783, // tctcgt -> cgttct + 3548: 887, // tctcta -> atctct + 3549: 1911, // tctctc -> ctctct + 3550: 1915, // tctctg -> ctctgt + 3551: 1919, // tctctt -> ctcttt + 3552: 222, // tctgaa -> aatctg + 3553: 478, // tctgac -> actctg + 3554: 734, // tctgag -> agtctg + 3555: 990, // tctgat -> attctg + 3556: 889, // tctgca -> atctgc + 3557: 1502, // tctgcc -> cctctg + 3558: 1758, // tctgcg -> cgtctg + 3559: 1951, // tctgct -> ctgctt + 3560: 890, // tctgga -> atctgg + 3561: 1914, // tctggc -> ctctgg + 3562: 1963, // tctggg -> ctgggt + 3563: 1967, // tctggt -> ctggtt + 3564: 891, // tctgta -> atctgt + 3565: 1915, // tctgtc -> ctctgt + 3566: 1979, // tctgtg -> ctgtgt + 3567: 1983, // tctgtt -> ctgttt + 3568: 223, // tcttaa -> aatctt + 3569: 479, // tcttac -> actctt + 3570: 735, // tcttag -> agtctt + 3571: 991, // tcttat -> attctt + 3572: 893, // tcttca -> atcttc + 3573: 1503, // tcttcc -> cctctt + 3574: 1759, // tcttcg -> cgtctt + 3575: 2015, // tcttct -> cttctt + 3576: 894, // tcttga -> atcttg + 3577: 1918, // tcttgc -> ctcttg + 3578: 2027, // tcttgg -> cttggt + 3579: 2031, // tcttgt -> cttgtt + 3580: 895, // tcttta -> atcttt + 3581: 1919, // tctttc -> ctcttt + 3582: 2043, // tctttg -> ctttgt + 3583: 2047, // tctttt -> cttttt + 3584: 14, // tgaaaa -> aaaatg + 3585: 30, // tgaaac -> aaactg + 3586: 46, // tgaaag -> aaagtg + 3587: 62, // tgaaat -> aaattg + 3588: 78, // tgaaca -> aacatg + 3589: 94, // tgaacc -> aacctg + 3590: 110, // tgaacg -> aacgtg + 3591: 126, // tgaact -> aacttg + 3592: 142, // tgaaga -> aagatg + 3593: 158, // tgaagc -> aagctg + 3594: 174, // tgaagg -> aaggtg + 3595: 190, // tgaagt -> aagttg + 3596: 206, // tgaata -> aatatg + 3597: 222, // tgaatc -> aatctg + 3598: 238, // tgaatg -> aatgtg + 3599: 254, // tgaatt -> aatttg + 3600: 225, // tgacaa -> aatgac + 3601: 286, // tgacac -> acactg + 3602: 302, // tgacag -> acagtg + 3603: 318, // tgacat -> acattg + 3604: 334, // tgacca -> accatg + 3605: 350, // tgaccc -> accctg + 3606: 366, // tgaccg -> accgtg + 3607: 382, // tgacct -> accttg + 3608: 398, // tgacga -> acgatg + 3609: 414, // tgacgc -> acgctg + 3610: 430, // tgacgg -> acggtg + 3611: 446, // tgacgt -> acgttg + 3612: 462, // tgacta -> actatg + 3613: 478, // tgactc -> actctg + 3614: 494, // tgactg -> actgtg + 3615: 510, // tgactt -> actttg + 3616: 226, // tgagaa -> aatgag + 3617: 482, // tgagac -> actgag + 3618: 558, // tgagag -> agagtg + 3619: 574, // tgagat -> agattg + 3620: 590, // tgagca -> agcatg + 3621: 606, // tgagcc -> agcctg + 3622: 622, // tgagcg -> agcgtg + 3623: 638, // tgagct -> agcttg + 3624: 654, // tgagga -> aggatg + 3625: 670, // tgaggc -> aggctg + 3626: 686, // tgaggg -> agggtg + 3627: 702, // tgaggt -> aggttg + 3628: 718, // tgagta -> agtatg + 3629: 734, // tgagtc -> agtctg + 3630: 750, // tgagtg -> agtgtg + 3631: 766, // tgagtt -> agtttg + 3632: 227, // tgataa -> aatgat + 3633: 483, // tgatac -> actgat + 3634: 739, // tgatag -> agtgat + 3635: 830, // tgatat -> atattg + 3636: 846, // tgatca -> atcatg + 3637: 862, // tgatcc -> atcctg + 3638: 878, // tgatcg -> atcgtg + 3639: 894, // tgatct -> atcttg + 3640: 910, // tgatga -> atgatg + 3641: 926, // tgatgc -> atgctg + 3642: 942, // tgatgg -> atggtg + 3643: 958, // tgatgt -> atgttg + 3644: 911, // tgatta -> atgatt + 3645: 990, // tgattc -> attctg + 3646: 1006, // tgattg -> attgtg + 3647: 1022, // tgattt -> attttg + 3648: 57, // tgcaaa -> aaatgc + 3649: 121, // tgcaac -> aactgc + 3650: 185, // tgcaag -> aagtgc + 3651: 249, // tgcaat -> aattgc + 3652: 313, // tgcaca -> acatgc + 3653: 377, // tgcacc -> acctgc + 3654: 441, // tgcacg -> acgtgc + 3655: 505, // tgcact -> acttgc + 3656: 569, // tgcaga -> agatgc + 3657: 633, // tgcagc -> agctgc + 3658: 697, // tgcagg -> aggtgc + 3659: 761, // tgcagt -> agttgc + 3660: 825, // tgcata -> atatgc + 3661: 889, // tgcatc -> atctgc + 3662: 953, // tgcatg -> atgtgc + 3663: 1017, // tgcatt -> atttgc + 3664: 229, // tgccaa -> aatgcc + 3665: 485, // tgccac -> actgcc + 3666: 741, // tgccag -> agtgcc + 3667: 997, // tgccat -> attgcc + 3668: 917, // tgccca -> atgccc + 3669: 1374, // tgcccc -> cccctg + 3670: 1390, // tgcccg -> cccgtg + 3671: 1406, // tgccct -> cccttg + 3672: 918, // tgccga -> atgccg + 3673: 1438, // tgccgc -> ccgctg + 3674: 1454, // tgccgg -> ccggtg + 3675: 1470, // tgccgt -> ccgttg + 3676: 919, // tgccta -> atgcct + 3677: 1502, // tgcctc -> cctctg + 3678: 1518, // tgcctg -> cctgtg + 3679: 1534, // tgcctt -> cctttg + 3680: 230, // tgcgaa -> aatgcg + 3681: 486, // tgcgac -> actgcg + 3682: 742, // tgcgag -> agtgcg + 3683: 998, // tgcgat -> attgcg + 3684: 921, // tgcgca -> atgcgc + 3685: 1510, // tgcgcc -> cctgcg + 3686: 1646, // tgcgcg -> cgcgtg + 3687: 1662, // tgcgct -> cgcttg + 3688: 922, // tgcgga -> atgcgg + 3689: 1694, // tgcggc -> cggctg + 3690: 1710, // tgcggg -> cgggtg + 3691: 1726, // tgcggt -> cggttg + 3692: 923, // tgcgta -> atgcgt + 3693: 1758, // tgcgtc -> cgtctg + 3694: 1774, // tgcgtg -> cgtgtg + 3695: 1790, // tgcgtt -> cgtttg + 3696: 231, // tgctaa -> aatgct + 3697: 487, // tgctac -> actgct + 3698: 743, // tgctag -> agtgct + 3699: 999, // tgctat -> attgct + 3700: 925, // tgctca -> atgctc + 3701: 1511, // tgctcc -> cctgct + 3702: 1767, // tgctcg -> cgtgct + 3703: 1918, // tgctct -> ctcttg + 3704: 926, // tgctga -> atgctg + 3705: 1950, // tgctgc -> ctgctg + 3706: 1966, // tgctgg -> ctggtg + 3707: 1982, // tgctgt -> ctgttg + 3708: 927, // tgctta -> atgctt + 3709: 1951, // tgcttc -> ctgctt + 3710: 2030, // tgcttg -> cttgtg + 3711: 2046, // tgcttt -> cttttg + 3712: 58, // tggaaa -> aaatgg + 3713: 122, // tggaac -> aactgg + 3714: 186, // tggaag -> aagtgg + 3715: 250, // tggaat -> aattgg + 3716: 314, // tggaca -> acatgg + 3717: 378, // tggacc -> acctgg + 3718: 442, // tggacg -> acgtgg + 3719: 506, // tggact -> acttgg + 3720: 570, // tggaga -> agatgg + 3721: 634, // tggagc -> agctgg + 3722: 698, // tggagg -> aggtgg + 3723: 762, // tggagt -> agttgg + 3724: 826, // tggata -> atatgg + 3725: 890, // tggatc -> atctgg + 3726: 954, // tggatg -> atgtgg + 3727: 1018, // tggatt -> atttgg + 3728: 233, // tggcaa -> aatggc + 3729: 489, // tggcac -> actggc + 3730: 745, // tggcag -> agtggc + 3731: 1001, // tggcat -> attggc + 3732: 933, // tggcca -> atggcc + 3733: 1402, // tggccc -> ccctgg + 3734: 1466, // tggccg -> ccgtgg + 3735: 1530, // tggcct -> ccttgg + 3736: 934, // tggcga -> atggcg + 3737: 1658, // tggcgc -> cgctgg + 3738: 1722, // tggcgg -> cggtgg + 3739: 1786, // tggcgt -> cgttgg + 3740: 935, // tggcta -> atggct + 3741: 1914, // tggctc -> ctctgg + 3742: 1978, // tggctg -> ctgtgg + 3743: 2042, // tggctt -> ctttgg + 3744: 234, // tgggaa -> aatggg + 3745: 490, // tgggac -> actggg + 3746: 746, // tgggag -> agtggg + 3747: 1002, // tgggat -> attggg + 3748: 937, // tgggca -> atgggc + 3749: 1514, // tgggcc -> cctggg + 3750: 1770, // tgggcg -> cgtggg + 3751: 2026, // tgggct -> cttggg + 3752: 938, // tgggga -> atgggg + 3753: 1962, // tggggc -> ctgggg + 3754: 2731, // tggggg -> gggggt + 3755: 2735, // tggggt -> ggggtt + 3756: 939, // tgggta -> atgggt + 3757: 1963, // tgggtc -> ctgggt + 3758: 2747, // tgggtg -> gggtgt + 3759: 2751, // tgggtt -> gggttt + 3760: 235, // tggtaa -> aatggt + 3761: 491, // tggtac -> actggt + 3762: 747, // tggtag -> agtggt + 3763: 1003, // tggtat -> attggt + 3764: 941, // tggtca -> atggtc + 3765: 1515, // tggtcc -> cctggt + 3766: 1771, // tggtcg -> cgtggt + 3767: 2027, // tggtct -> cttggt + 3768: 942, // tggtga -> atggtg + 3769: 1966, // tggtgc -> ctggtg + 3770: 2795, // tggtgg -> ggtggt + 3771: 2799, // tggtgt -> ggtgtt + 3772: 943, // tggtta -> atggtt + 3773: 1967, // tggttc -> ctggtt + 3774: 2811, // tggttg -> ggttgt + 3775: 2815, // tggttt -> ggtttt + 3776: 59, // tgtaaa -> aaatgt + 3777: 123, // tgtaac -> aactgt + 3778: 187, // tgtaag -> aagtgt + 3779: 251, // tgtaat -> aattgt + 3780: 315, // tgtaca -> acatgt + 3781: 379, // tgtacc -> acctgt + 3782: 443, // tgtacg -> acgtgt + 3783: 507, // tgtact -> acttgt + 3784: 571, // tgtaga -> agatgt + 3785: 635, // tgtagc -> agctgt + 3786: 699, // tgtagg -> aggtgt + 3787: 763, // tgtagt -> agttgt + 3788: 827, // tgtata -> atatgt + 3789: 891, // tgtatc -> atctgt + 3790: 955, // tgtatg -> atgtgt + 3791: 1019, // tgtatt -> atttgt + 3792: 237, // tgtcaa -> aatgtc + 3793: 493, // tgtcac -> actgtc + 3794: 749, // tgtcag -> agtgtc + 3795: 1005, // tgtcat -> attgtc + 3796: 949, // tgtcca -> atgtcc + 3797: 1403, // tgtccc -> ccctgt + 3798: 1467, // tgtccg -> ccgtgt + 3799: 1531, // tgtcct -> ccttgt + 3800: 950, // tgtcga -> atgtcg + 3801: 1659, // tgtcgc -> cgctgt + 3802: 1723, // tgtcgg -> cggtgt + 3803: 1787, // tgtcgt -> cgttgt + 3804: 951, // tgtcta -> atgtct + 3805: 1915, // tgtctc -> ctctgt + 3806: 1979, // tgtctg -> ctgtgt + 3807: 2043, // tgtctt -> ctttgt + 3808: 238, // tgtgaa -> aatgtg + 3809: 494, // tgtgac -> actgtg + 3810: 750, // tgtgag -> agtgtg + 3811: 1006, // tgtgat -> attgtg + 3812: 953, // tgtgca -> atgtgc + 3813: 1518, // tgtgcc -> cctgtg + 3814: 1774, // tgtgcg -> cgtgtg + 3815: 2030, // tgtgct -> cttgtg + 3816: 954, // tgtgga -> atgtgg + 3817: 1978, // tgtggc -> ctgtgg + 3818: 2747, // tgtggg -> gggtgt + 3819: 2811, // tgtggt -> ggttgt + 3820: 955, // tgtgta -> atgtgt + 3821: 1979, // tgtgtc -> ctgtgt + 3822: 3003, // tgtgtg -> gtgtgt + 3823: 3007, // tgtgtt -> gtgttt + 3824: 239, // tgttaa -> aatgtt + 3825: 495, // tgttac -> actgtt + 3826: 751, // tgttag -> agtgtt + 3827: 1007, // tgttat -> attgtt + 3828: 957, // tgttca -> atgttc + 3829: 1519, // tgttcc -> cctgtt + 3830: 1775, // tgttcg -> cgtgtt + 3831: 2031, // tgttct -> cttgtt + 3832: 958, // tgttga -> atgttg + 3833: 1982, // tgttgc -> ctgttg + 3834: 2799, // tgttgg -> ggtgtt + 3835: 3055, // tgttgt -> gttgtt + 3836: 959, // tgttta -> atgttt + 3837: 1983, // tgtttc -> ctgttt + 3838: 3007, // tgtttg -> gtgttt + 3839: 3071, // tgtttt -> gttttt + 3840: 15, // ttaaaa -> aaaatt + 3841: 31, // ttaaac -> aaactt + 3842: 47, // ttaaag -> aaagtt + 3843: 63, // ttaaat -> aaattt + 3844: 79, // ttaaca -> aacatt + 3845: 95, // ttaacc -> aacctt + 3846: 111, // ttaacg -> aacgtt + 3847: 127, // ttaact -> aacttt + 3848: 143, // ttaaga -> aagatt + 3849: 159, // ttaagc -> aagctt + 3850: 175, // ttaagg -> aaggtt + 3851: 191, // ttaagt -> aagttt + 3852: 207, // ttaata -> aatatt + 3853: 223, // ttaatc -> aatctt + 3854: 239, // ttaatg -> aatgtt + 3855: 255, // ttaatt -> aatttt + 3856: 241, // ttacaa -> aattac + 3857: 287, // ttacac -> acactt + 3858: 303, // ttacag -> acagtt + 3859: 319, // ttacat -> acattt + 3860: 335, // ttacca -> accatt + 3861: 351, // ttaccc -> accctt + 3862: 367, // ttaccg -> accgtt + 3863: 383, // ttacct -> accttt + 3864: 399, // ttacga -> acgatt + 3865: 415, // ttacgc -> acgctt + 3866: 431, // ttacgg -> acggtt + 3867: 447, // ttacgt -> acgttt + 3868: 463, // ttacta -> actatt + 3869: 479, // ttactc -> actctt + 3870: 495, // ttactg -> actgtt + 3871: 511, // ttactt -> actttt + 3872: 242, // ttagaa -> aattag + 3873: 498, // ttagac -> acttag + 3874: 559, // ttagag -> agagtt + 3875: 575, // ttagat -> agattt + 3876: 591, // ttagca -> agcatt + 3877: 607, // ttagcc -> agcctt + 3878: 623, // ttagcg -> agcgtt + 3879: 639, // ttagct -> agcttt + 3880: 655, // ttagga -> aggatt + 3881: 671, // ttaggc -> aggctt + 3882: 687, // ttaggg -> agggtt + 3883: 703, // ttaggt -> aggttt + 3884: 719, // ttagta -> agtatt + 3885: 735, // ttagtc -> agtctt + 3886: 751, // ttagtg -> agtgtt + 3887: 767, // ttagtt -> agtttt + 3888: 243, // ttataa -> aattat + 3889: 499, // ttatac -> acttat + 3890: 755, // ttatag -> agttat + 3891: 831, // ttatat -> atattt + 3892: 847, // ttatca -> atcatt + 3893: 863, // ttatcc -> atcctt + 3894: 879, // ttatcg -> atcgtt + 3895: 895, // ttatct -> atcttt + 3896: 911, // ttatga -> atgatt + 3897: 927, // ttatgc -> atgctt + 3898: 943, // ttatgg -> atggtt + 3899: 959, // ttatgt -> atgttt + 3900: 975, // ttatta -> attatt + 3901: 991, // ttattc -> attctt + 3902: 1007, // ttattg -> attgtt + 3903: 1023, // ttattt -> attttt + 3904: 61, // ttcaaa -> aaattc + 3905: 125, // ttcaac -> aacttc + 3906: 189, // ttcaag -> aagttc + 3907: 253, // ttcaat -> aatttc + 3908: 317, // ttcaca -> acattc + 3909: 381, // ttcacc -> accttc + 3910: 445, // ttcacg -> acgttc + 3911: 509, // ttcact -> actttc + 3912: 573, // ttcaga -> agattc + 3913: 637, // ttcagc -> agcttc + 3914: 701, // ttcagg -> aggttc + 3915: 765, // ttcagt -> agtttc + 3916: 829, // ttcata -> atattc + 3917: 893, // ttcatc -> atcttc + 3918: 957, // ttcatg -> atgttc + 3919: 1021, // ttcatt -> attttc + 3920: 245, // ttccaa -> aattcc + 3921: 501, // ttccac -> acttcc + 3922: 757, // ttccag -> agttcc + 3923: 1013, // ttccat -> atttcc + 3924: 981, // ttccca -> attccc + 3925: 1375, // ttcccc -> cccctt + 3926: 1391, // ttcccg -> cccgtt + 3927: 1407, // ttccct -> cccttt + 3928: 982, // ttccga -> attccg + 3929: 1439, // ttccgc -> ccgctt + 3930: 1455, // ttccgg -> ccggtt + 3931: 1471, // ttccgt -> ccgttt + 3932: 983, // ttccta -> attcct + 3933: 1503, // ttcctc -> cctctt + 3934: 1519, // ttcctg -> cctgtt + 3935: 1535, // ttcctt -> cctttt + 3936: 246, // ttcgaa -> aattcg + 3937: 502, // ttcgac -> acttcg + 3938: 758, // ttcgag -> agttcg + 3939: 1014, // ttcgat -> atttcg + 3940: 985, // ttcgca -> attcgc + 3941: 1526, // ttcgcc -> ccttcg + 3942: 1647, // ttcgcg -> cgcgtt + 3943: 1663, // ttcgct -> cgcttt + 3944: 986, // ttcgga -> attcgg + 3945: 1695, // ttcggc -> cggctt + 3946: 1711, // ttcggg -> cgggtt + 3947: 1727, // ttcggt -> cggttt + 3948: 987, // ttcgta -> attcgt + 3949: 1759, // ttcgtc -> cgtctt + 3950: 1775, // ttcgtg -> cgtgtt + 3951: 1791, // ttcgtt -> cgtttt + 3952: 247, // ttctaa -> aattct + 3953: 503, // ttctac -> acttct + 3954: 759, // ttctag -> agttct + 3955: 1015, // ttctat -> atttct + 3956: 989, // ttctca -> attctc + 3957: 1527, // ttctcc -> ccttct + 3958: 1783, // ttctcg -> cgttct + 3959: 1919, // ttctct -> ctcttt + 3960: 990, // ttctga -> attctg + 3961: 1951, // ttctgc -> ctgctt + 3962: 1967, // ttctgg -> ctggtt + 3963: 1983, // ttctgt -> ctgttt + 3964: 991, // ttctta -> attctt + 3965: 2015, // ttcttc -> cttctt + 3966: 2031, // ttcttg -> cttgtt + 3967: 2047, // ttcttt -> cttttt + 3968: 62, // ttgaaa -> aaattg + 3969: 126, // ttgaac -> aacttg + 3970: 190, // ttgaag -> aagttg + 3971: 254, // ttgaat -> aatttg + 3972: 318, // ttgaca -> acattg + 3973: 382, // ttgacc -> accttg + 3974: 446, // ttgacg -> acgttg + 3975: 510, // ttgact -> actttg + 3976: 574, // ttgaga -> agattg + 3977: 638, // ttgagc -> agcttg + 3978: 702, // ttgagg -> aggttg + 3979: 766, // ttgagt -> agtttg + 3980: 830, // ttgata -> atattg + 3981: 894, // ttgatc -> atcttg + 3982: 958, // ttgatg -> atgttg + 3983: 1022, // ttgatt -> attttg + 3984: 249, // ttgcaa -> aattgc + 3985: 505, // ttgcac -> acttgc + 3986: 761, // ttgcag -> agttgc + 3987: 1017, // ttgcat -> atttgc + 3988: 997, // ttgcca -> attgcc + 3989: 1406, // ttgccc -> cccttg + 3990: 1470, // ttgccg -> ccgttg + 3991: 1534, // ttgcct -> cctttg + 3992: 998, // ttgcga -> attgcg + 3993: 1662, // ttgcgc -> cgcttg + 3994: 1726, // ttgcgg -> cggttg + 3995: 1790, // ttgcgt -> cgtttg + 3996: 999, // ttgcta -> attgct + 3997: 1918, // ttgctc -> ctcttg + 3998: 1982, // ttgctg -> ctgttg + 3999: 2046, // ttgctt -> cttttg + 4000: 250, // ttggaa -> aattgg + 4001: 506, // ttggac -> acttgg + 4002: 762, // ttggag -> agttgg + 4003: 1018, // ttggat -> atttgg + 4004: 1001, // ttggca -> attggc + 4005: 1530, // ttggcc -> ccttgg + 4006: 1786, // ttggcg -> cgttgg + 4007: 2042, // ttggct -> ctttgg + 4008: 1002, // ttggga -> attggg + 4009: 2026, // ttgggc -> cttggg + 4010: 2735, // ttgggg -> ggggtt + 4011: 2751, // ttgggt -> gggttt + 4012: 1003, // ttggta -> attggt + 4013: 2027, // ttggtc -> cttggt + 4014: 2799, // ttggtg -> ggtgtt + 4015: 2815, // ttggtt -> ggtttt + 4016: 251, // ttgtaa -> aattgt + 4017: 507, // ttgtac -> acttgt + 4018: 763, // ttgtag -> agttgt + 4019: 1019, // ttgtat -> atttgt + 4020: 1005, // ttgtca -> attgtc + 4021: 1531, // ttgtcc -> ccttgt + 4022: 1787, // ttgtcg -> cgttgt + 4023: 2043, // ttgtct -> ctttgt + 4024: 1006, // ttgtga -> attgtg + 4025: 2030, // ttgtgc -> cttgtg + 4026: 2811, // ttgtgg -> ggttgt + 4027: 3007, // ttgtgt -> gtgttt + 4028: 1007, // ttgtta -> attgtt + 4029: 2031, // ttgttc -> cttgtt + 4030: 3055, // ttgttg -> gttgtt + 4031: 3071, // ttgttt -> gttttt + 4032: 63, // tttaaa -> aaattt + 4033: 127, // tttaac -> aacttt + 4034: 191, // tttaag -> aagttt + 4035: 255, // tttaat -> aatttt + 4036: 319, // tttaca -> acattt + 4037: 383, // tttacc -> accttt + 4038: 447, // tttacg -> acgttt + 4039: 511, // tttact -> actttt + 4040: 575, // tttaga -> agattt + 4041: 639, // tttagc -> agcttt + 4042: 703, // tttagg -> aggttt + 4043: 767, // tttagt -> agtttt + 4044: 831, // tttata -> atattt + 4045: 895, // tttatc -> atcttt + 4046: 959, // tttatg -> atgttt + 4047: 1023, // tttatt -> attttt + 4048: 253, // tttcaa -> aatttc + 4049: 509, // tttcac -> actttc + 4050: 765, // tttcag -> agtttc + 4051: 1021, // tttcat -> attttc + 4052: 1013, // tttcca -> atttcc + 4053: 1407, // tttccc -> cccttt + 4054: 1471, // tttccg -> ccgttt + 4055: 1535, // tttcct -> cctttt + 4056: 1014, // tttcga -> atttcg + 4057: 1663, // tttcgc -> cgcttt + 4058: 1727, // tttcgg -> cggttt + 4059: 1791, // tttcgt -> cgtttt + 4060: 1015, // tttcta -> atttct + 4061: 1919, // tttctc -> ctcttt + 4062: 1983, // tttctg -> ctgttt + 4063: 2047, // tttctt -> cttttt + 4064: 254, // tttgaa -> aatttg + 4065: 510, // tttgac -> actttg + 4066: 766, // tttgag -> agtttg + 4067: 1022, // tttgat -> attttg + 4068: 1017, // tttgca -> atttgc + 4069: 1534, // tttgcc -> cctttg + 4070: 1790, // tttgcg -> cgtttg + 4071: 2046, // tttgct -> cttttg + 4072: 1018, // tttgga -> atttgg + 4073: 2042, // tttggc -> ctttgg + 4074: 2751, // tttggg -> gggttt + 4075: 2815, // tttggt -> ggtttt + 4076: 1019, // tttgta -> atttgt + 4077: 2043, // tttgtc -> ctttgt + 4078: 3007, // tttgtg -> gtgttt + 4079: 3071, // tttgtt -> gttttt + 4080: 255, // ttttaa -> aatttt + 4081: 511, // ttttac -> actttt + 4082: 767, // ttttag -> agtttt + 4083: 1023, // ttttat -> attttt + 4084: 1021, // ttttca -> attttc + 4085: 1535, // ttttcc -> cctttt + 4086: 1791, // ttttcg -> cgtttt + 4087: 2047, // ttttct -> cttttt + 4088: 1022, // ttttga -> attttg + 4089: 2046, // ttttgc -> cttttg + 4090: 2815, // ttttgg -> ggtttt + 4091: 3071, // ttttgt -> gttttt + 4092: 1023, // ttttta -> attttt + 4093: 2047, // tttttc -> cttttt + 4094: 3071, // tttttg -> gttttt + 4095: 4095, // tttttt -> tttttt + }, +} + +// NormalizeInt retourne le code du k-mer canonique (le plus petit lexicographiquement +// parmi toutes les permutations circulaires) pour un k-mer encodé en entier. +// Pour les k-mers de taille 1 à 6, utilise la table pré-calculée. +// Pour les k-mers plus grands, calcule à la volée. +func NormalizeInt(kmerCode int, kmerSize int) int { + // Pour les k-mers de taille <= 6, utiliser la table + if kmerSize <= 6 && kmerSize > 0 { + if canonical, ok := LexicographicNormalizationInt[kmerSize][kmerCode]; ok { + return canonical + } + // Si non trouvé dans la table (ne devrait pas arriver pour des k-mers valides) + } + + // Pour les k-mers > 6 ou non trouvés, calculer les rotations circulaires + return getCanonicalCircularInt(kmerCode, kmerSize) +} + +// getCanonicalCircularInt retourne le code du plus petit k-mer lexicographiquement +// parmi toutes les permutations circulaires du k-mer encodé donné. +func getCanonicalCircularInt(kmerCode int, kmerSize int) int { + if kmerSize <= 0 { + return kmerCode + } + + canonical := kmerCode + mask := (1 << (kmerSize * 2)) - 1 // Masque pour garder k*2 bits + shiftAmount := (kmerSize * 2) - 2 // Position du premier nucléotide + + // Générer toutes les permutations circulaires + currentCode := kmerCode + for i := 1; i < kmerSize; i++ { + // Extraire le premier nucléotide (2 bits de poids fort) + firstNuc := (currentCode >> shiftAmount) & 3 + // Décaler vers la gauche et ajouter le premier nucléotide à la fin + currentCode = ((currentCode << 2) & mask) | firstNuc + + // Comparer lexicographiquement (le plus petit code est le plus petit lexicographiquement) + if currentCode < canonical { + canonical = currentCode + } + } + + return canonical +} + +// EncodeKmer encode un k-mer (string) en entier selon le schéma de EncodeNucleotide. +// Cette fonction est utile pour les tests et le debug. +func EncodeKmer(kmer string) int { + code := 0 + for i := 0; i < len(kmer); i++ { + code = (code << 2) + int(EncodeNucleotide(kmer[i])) + } + return code +} + +// DecodeKmer décode un entier en k-mer (string). +// Cette fonction est utile pour les tests et le debug. +func DecodeKmer(code int, kmerSize int) string { + bases := []byte{'a', 'c', 'g', 't'} + result := make([]byte, kmerSize) + for i := kmerSize - 1; i >= 0; i-- { + result[i] = bases[code&3] + code >>= 2 + } + return string(result) +} + +// eulerTotient computes Euler's totient function φ(n), which counts +// the number of integers from 1 to n that are coprime with n. +func eulerTotient(n int) int { + if n <= 0 { + return 0 + } + + result := n + + // Process all prime factors + for p := 2; p*p <= n; p++ { + // Check if p is a prime factor + if n%p == 0 { + // Remove all occurrences of p + for n%p == 0 { + n /= p + } + // Apply the formula: φ(n) = n * (1 - 1/p) = n * (p-1)/p + result -= result / p + } + } + + // If n is still greater than 1, then it's a prime factor + if n > 1 { + result -= result / n + } + + return result +} + +// divisors returns all divisors of n in ascending order. +func divisors(n int) []int { + if n <= 0 { + return []int{} + } + + divs := []int{} + for i := 1; i*i <= n; i++ { + if n%i == 0 { + divs = append(divs, i) + if i != n/i { + divs = append(divs, n/i) + } + } + } + + // Sort in ascending order + for i := 0; i < len(divs)-1; i++ { + for j := i + 1; j < len(divs); j++ { + if divs[i] > divs[j] { + divs[i], divs[j] = divs[j], divs[i] + } + } + } + + return divs +} + +// necklaceCount computes the number of distinct necklaces (equivalence classes +// under rotation) for sequences of length n over an alphabet of size a. +// Uses Moreau's necklace-counting formula (also known as the necklace polynomial): +// +// N(n, a) = (1/n) * Σ φ(d) * a^(n/d) +// +// where the sum is over all divisors d of n, and φ is Euler's totient function. +func necklaceCount(n, alphabetSize int) int { + if n <= 0 { + return 0 + } + + divs := divisors(n) + sum := 0 + + for _, d := range divs { + // Compute a^(n/d) + power := 1 + exp := n / d + for i := 0; i < exp; i++ { + power *= alphabetSize + } + + sum += eulerTotient(d) * power + } + + return sum / n +} + +// CanonicalKmerCount returns the number of canonical k-mers (unique normalized forms) +// for a given k-mer size after circular normalization. +// +// For k=1 to 6, uses exact counts from pre-computed tables. +// For k>6, uses Moreau's necklace-counting formula for exact computation: +// +// N(n, 4) = (1/n) * Σ φ(d) * 4^(n/d) +// +// where the sum is over all divisors d of n, and φ is Euler's totient function. +// +// These values are critical for calculating maximum entropy in entropy-based +// complexity filters, as circular normalization reduces the effective alphabet size. +func CanonicalKmerCount(kmerSize int) int { + // Exact counts for k=1 to 6 (counted from normalization tables) + switch kmerSize { + case 1: + return 4 + case 2: + return 10 + case 3: + return 24 + case 4: + return 70 + case 5: + return 208 + case 6: + return 700 + default: + // For k>6, use Moreau's necklace-counting formula for exact count + // DNA alphabet has 4 bases + return necklaceCount(kmerSize, 4) + } +} diff --git a/pkg/obikmer/kmernormint_test.go b/pkg/obikmer/kmernormint_test.go new file mode 100644 index 0000000..b6c26d2 --- /dev/null +++ b/pkg/obikmer/kmernormint_test.go @@ -0,0 +1,357 @@ +package obikmer + +import ( + "fmt" + "testing" +) + +func TestEncodeDecodeKmer(t *testing.T) { + tests := []struct { + kmer string + code int + }{ + {"a", 0}, + {"c", 1}, + {"g", 2}, + {"t", 3}, + {"aa", 0}, + {"ac", 1}, + {"ca", 4}, + {"acgt", 27}, // 0b00011011 + {"cgta", 108}, // 0b01101100 + {"tttt", 255}, // 0b11111111 + } + + for _, tt := range tests { + t.Run(tt.kmer, func(t *testing.T) { + // Test encoding + encoded := EncodeKmer(tt.kmer) + if encoded != tt.code { + t.Errorf("EncodeKmer(%q) = %d, want %d", tt.kmer, encoded, tt.code) + } + + // Test decoding + decoded := DecodeKmer(tt.code, len(tt.kmer)) + if decoded != tt.kmer { + t.Errorf("DecodeKmer(%d, %d) = %q, want %q", tt.code, len(tt.kmer), decoded, tt.kmer) + } + }) + } +} + +func TestNormalizeInt(t *testing.T) { + tests := []struct { + name string + kmer string + expected string + }{ + // Test avec k=1 + {"k=1 a", "a", "a"}, + {"k=1 c", "c", "c"}, + + // Test avec k=2 + {"k=2 ca", "ca", "ac"}, + {"k=2 ac", "ac", "ac"}, + {"k=2 ta", "ta", "at"}, + + // Test avec k=4 - toutes les rotations de "acgt" + {"k=4 acgt", "acgt", "acgt"}, + {"k=4 cgta", "cgta", "acgt"}, + {"k=4 gtac", "gtac", "acgt"}, + {"k=4 tacg", "tacg", "acgt"}, + + // Test avec k=4 - rotations de "tgca" + {"k=4 tgca", "tgca", "atgc"}, + {"k=4 gcat", "gcat", "atgc"}, + {"k=4 catg", "catg", "atgc"}, + {"k=4 atgc", "atgc", "atgc"}, + + // Test avec k=3 - rotations de "atg" + {"k=3 atg", "atg", "atg"}, + {"k=3 tga", "tga", "atg"}, + {"k=3 gat", "gat", "atg"}, + + // Test avec k=6 + {"k=6 aaaaaa", "aaaaaa", "aaaaaa"}, + {"k=6 tttttt", "tttttt", "tttttt"}, + + // Test avec k>6 (calcul à la volée) + {"k=7 aaaaaaa", "aaaaaaa", "aaaaaaa"}, + {"k=7 tgcatgc", "tgcatgc", "atgctgc"}, + {"k=7 gcatgct", "gcatgct", "atgctgc"}, + {"k=8 acgtacgt", "acgtacgt", "acgtacgt"}, + {"k=8 gtacgtac", "gtacgtac", "acgtacgt"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + kmerCode := EncodeKmer(tt.kmer) + expectedCode := EncodeKmer(tt.expected) + + result := NormalizeInt(kmerCode, len(tt.kmer)) + + if result != expectedCode { + resultKmer := DecodeKmer(result, len(tt.kmer)) + t.Errorf("NormalizeInt(%q) = %q (code %d), want %q (code %d)", + tt.kmer, resultKmer, result, tt.expected, expectedCode) + } + }) + } +} + +func TestNormalizeIntConsistencyWithString(t *testing.T) { + // Vérifier que NormalizeInt donne le même résultat que Normalize + // pour tous les k-mers de taille 1 à 4 (pour ne pas trop ralentir les tests) + bases := []byte{'a', 'c', 'g', 't'} + + var testKmers func(current string, maxSize int) + testKmers = func(current string, maxSize int) { + if len(current) > 0 { + // Test normalization + normalizedStr := Normalize(current) + normalizedStrCode := EncodeKmer(normalizedStr) + + kmerCode := EncodeKmer(current) + normalizedInt := NormalizeInt(kmerCode, len(current)) + + if normalizedInt != normalizedStrCode { + normalizedIntStr := DecodeKmer(normalizedInt, len(current)) + t.Errorf("Inconsistency for %q: Normalize=%q (code %d), NormalizeInt=%q (code %d)", + current, normalizedStr, normalizedStrCode, normalizedIntStr, normalizedInt) + } + } + + if len(current) < maxSize { + for _, base := range bases { + testKmers(current+string(base), maxSize) + } + } + } + + testKmers("", 4) // Test jusqu'à k=4 pour rester raisonnable +} + +func TestCircularRotations(t *testing.T) { + // Test que toutes les rotations circulaires donnent le même canonical + tests := []struct { + kmers []string + canonical string + }{ + {[]string{"atg", "tga", "gat"}, "atg"}, + {[]string{"acgt", "cgta", "gtac", "tacg"}, "acgt"}, + {[]string{"tgca", "gcat", "catg", "atgc"}, "atgc"}, + } + + for _, tt := range tests { + canonicalCode := EncodeKmer(tt.canonical) + + for _, kmer := range tt.kmers { + kmerCode := EncodeKmer(kmer) + result := NormalizeInt(kmerCode, len(kmer)) + + if result != canonicalCode { + resultKmer := DecodeKmer(result, len(kmer)) + t.Errorf("NormalizeInt(%q) = %q, want %q", kmer, resultKmer, tt.canonical) + } + } + } +} + +func BenchmarkNormalizeIntSmall(b *testing.B) { + // Benchmark pour k<=6 (utilise la table) + kmer := "acgtac" + kmerCode := EncodeKmer(kmer) + kmerSize := len(kmer) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = NormalizeInt(kmerCode, kmerSize) + } +} + +func BenchmarkNormalizeIntLarge(b *testing.B) { + // Benchmark pour k>6 (calcul à la volée) + kmer := "acgtacgtac" + kmerCode := EncodeKmer(kmer) + kmerSize := len(kmer) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = NormalizeInt(kmerCode, kmerSize) + } +} + +func BenchmarkEncodeKmer(b *testing.B) { + kmer := "acgtacgt" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = EncodeKmer(kmer) + } +} + +func TestCanonicalKmerCount(t *testing.T) { + // Test exact counts for k=1 to 6 + tests := []struct { + k int + expected int + }{ + {1, 4}, + {2, 10}, + {3, 24}, + {4, 70}, + {5, 208}, + {6, 700}, + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("k=%d", tt.k), func(t *testing.T) { + result := CanonicalKmerCount(tt.k) + if result != tt.expected { + t.Errorf("CanonicalKmerCount(%d) = %d, want %d", tt.k, result, tt.expected) + } + }) + } + + // Verify counts match table sizes + for k := 1; k <= 6; k++ { + // Count unique canonical codes in the table + uniqueCodes := make(map[int]bool) + for _, canonicalCode := range LexicographicNormalizationInt[k] { + uniqueCodes[canonicalCode] = true + } + + expected := len(uniqueCodes) + result := CanonicalKmerCount(k) + + if result != expected { + t.Errorf("CanonicalKmerCount(%d) = %d, but table has %d unique canonical codes", + k, result, expected) + } + } +} + +func TestNecklaceCountFormula(t *testing.T) { + // Verify Moreau's formula gives the same results as hardcoded values for k=1 to 6 + // and compute exact values for k=7+ + tests := []struct { + k int + expected int + }{ + {1, 4}, + {2, 10}, + {3, 24}, + {4, 70}, + {5, 208}, + {6, 700}, + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("k=%d", tt.k), func(t *testing.T) { + result := necklaceCount(tt.k, 4) + if result != tt.expected { + t.Errorf("necklaceCount(%d, 4) = %d, want %d", tt.k, result, tt.expected) + } + }) + } +} + +func TestNecklaceCountByBruteForce(t *testing.T) { + // Verify necklace count for k=7 and k=8 by brute force + // Generate all 4^k k-mers and count unique normalized ones + bases := []byte{'a', 'c', 'g', 't'} + + for _, k := range []int{7, 8} { + t.Run(fmt.Sprintf("k=%d", k), func(t *testing.T) { + unique := make(map[int]bool) + + // Generate all possible k-mers + var generate func(current int, depth int) + generate = func(current int, depth int) { + if depth == k { + // Normalize and add to set + normalized := NormalizeInt(current, k) + unique[normalized] = true + return + } + + for _, base := range bases { + newCode := (current << 2) | int(EncodeNucleotide(base)) + generate(newCode, depth+1) + } + } + + generate(0, 0) + + bruteForceCount := len(unique) + formulaCount := necklaceCount(k, 4) + + if bruteForceCount != formulaCount { + t.Errorf("For k=%d: brute force count = %d, formula count = %d", + k, bruteForceCount, formulaCount) + } + + t.Logf("k=%d: unique canonical k-mers = %d (formula matches brute force)", k, bruteForceCount) + }) + } +} + +func TestEulerTotient(t *testing.T) { + tests := []struct { + n int + expected int + }{ + {1, 1}, + {2, 1}, + {3, 2}, + {4, 2}, + {5, 4}, + {6, 2}, + {7, 6}, + {8, 4}, + {9, 6}, + {10, 4}, + {12, 4}, + {15, 8}, + {20, 8}, + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("φ(%d)", tt.n), func(t *testing.T) { + result := eulerTotient(tt.n) + if result != tt.expected { + t.Errorf("eulerTotient(%d) = %d, want %d", tt.n, result, tt.expected) + } + }) + } +} + +func TestDivisors(t *testing.T) { + tests := []struct { + n int + expected []int + }{ + {1, []int{1}}, + {2, []int{1, 2}}, + {6, []int{1, 2, 3, 6}}, + {12, []int{1, 2, 3, 4, 6, 12}}, + {15, []int{1, 3, 5, 15}}, + {20, []int{1, 2, 4, 5, 10, 20}}, + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("divisors(%d)", tt.n), func(t *testing.T) { + result := divisors(tt.n) + if len(result) != len(tt.expected) { + t.Errorf("divisors(%d) = %v, want %v", tt.n, result, tt.expected) + return + } + for i := range result { + if result[i] != tt.expected[i] { + t.Errorf("divisors(%d) = %v, want %v", tt.n, result, tt.expected) + return + } + } + }) + } +} diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index e098778..0f52169 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "7500ee1" +var _Commit = "07cdd6f" var _Version = "Release 4.4.0" // Version returns the version of the obitools package. diff --git a/pkg/obiseq/kmers.go b/pkg/obiseq/kmers.go new file mode 100644 index 0000000..b61ca00 --- /dev/null +++ b/pkg/obiseq/kmers.go @@ -0,0 +1,23 @@ +package obiseq + +import "iter" + +func (seq *BioSequence) Kmers(k int) iter.Seq[[]byte] { + return func(yield func([]byte) bool) { + // Gérer les cas où k est invalide ou la séquence trop courte + if k <= 0 || k > len(seq.sequence) { + return + } + + // Itérer sur tous les k-mers possibles + for i := 0; i <= len(seq.sequence)-k; i++ { + // Extraire le k-mer actuel + kmer := seq.sequence[i : i+k] + + // Passer au consommateur et arrêter si demandé + if !yield(kmer) { + return + } + } + } +} diff --git a/pkg/obitools/obilowmask/entropy.qmd b/pkg/obitools/obilowmask/entropy.qmd new file mode 100644 index 0000000..34be993 --- /dev/null +++ b/pkg/obitools/obilowmask/entropy.qmd @@ -0,0 +1,319 @@ +```{r} +library(tidyverse) +``` + +```{r} +x <- sample(1:4096, 29, replace=TRUE) +``` + +```{r} +emax <- function(lseq,word_size) { + nword = lseq - word_size + 1 + nalpha = 4^word_size + + if (nalpha < nword) { + cov = nword %/% nalpha + remains = nword %% nalpha + f1 = cov/nword + f2 = (cov+1)/nword + print(c(nalpha - remains,f1,remains,f2)) + e = -(nalpha - remains) * f1 * log(f1) - + remains * f2 * log(f2) + } else { + e = log(nword) + } + + e +} +``` + +```{r} +ec <- function(data,kmer_size) { + table <- table(data) + s <- sum(table) + e <- sum(table * log(table))/s + ed <- log(s) - e + + em <- emax(s+kmer_size-1,kmer_size) + + ed/em +} +``` + +```{r} +ef <- function(data,kmer_size) { + table <- table(data) + s <- sum(table) + f <- table / s + + f <- as.numeric(f) + f <- f[f > 0] + + em <- emax(s+kmer_size-1,kmer_size) + ed <- -sum(f * log(f)) + + print(c(ed,em,ed/em)) + + ed/em +} +``` + +```{r} +okmer <- function(data,kmer_size) { + str_sub(data,1:(nchar(data)-kmer_size+1)) %>% + str_sub(1,kmer_size) +} +``` + +```{r} +# Normalisation circulaire: retourne le plus petit k-mer par rotation circulaire +normalize_circular <- function(kmer) { + if (nchar(kmer) == 0) return(kmer) + + canonical <- kmer + n <- nchar(kmer) + + # Tester toutes les rotations circulaires + for (i in 2:n) { + rotated <- paste0(str_sub(kmer, i, n), str_sub(kmer, 1, i-1)) + if (rotated < canonical) { + canonical <- rotated + } + } + + canonical +} +``` + +```{r} +# Fonction totient d'Euler: compte le nombre d'entiers de 1 à n coprimes avec n +euler_totient <- function(n) { + if (n <= 0) return(0) + + result <- n + p <- 2 + + # Traiter tous les facteurs premiers + while (p * p <= n) { + if (n %% p == 0) { + # Retirer toutes les occurrences de p + while (n %% p == 0) { + n <- n %/% p + } + # Appliquer la formule: φ(n) = n * (1 - 1/p) + result <- result - result %/% p + } + p <- p + 1 + } + + # Si n est toujours > 1, alors c'est un facteur premier + if (n > 1) { + result <- result - result %/% n + } + + result +} +``` + +```{r} +# Retourne tous les diviseurs de n +divisors <- function(n) { + if (n <= 0) return(integer(0)) + + divs <- c() + i <- 1 + while (i * i <= n) { + if (n %% i == 0) { + divs <- c(divs, i) + if (i != n %/% i) { + divs <- c(divs, n %/% i) + } + } + i <- i + 1 + } + + sort(divs) +} +``` + +```{r} +# Compte le nombre de colliers (necklaces) distincts de longueur n +# sur un alphabet de taille a en utilisant la formule de Moreau: +# N(n, a) = (1/n) * Σ φ(d) * a^(n/d) +# où la somme est sur tous les diviseurs d de n, et φ est la fonction totient d'Euler +necklace_count <- function(n, alphabet_size) { + if (n <= 0) return(0) + + divs <- divisors(n) + sum_val <- 0 + + for (d in divs) { + # Calculer alphabet_size^(n/d) + power <- alphabet_size^(n %/% d) + sum_val <- sum_val + euler_totient(d) * power + } + + sum_val %/% n +} +``` + +```{r} +# Nombre de classes d'équivalence pour les k-mers normalisés +# Utilise la formule exacte de Moreau pour compter les colliers (necklaces) +n_normalized_kmers <- function(kmer_size) { + # Valeurs exactes pré-calculées pour k=1 à 6 + if (kmer_size == 1) return(4) + if (kmer_size == 2) return(10) + if (kmer_size == 3) return(24) + if (kmer_size == 4) return(70) + if (kmer_size == 5) return(208) + if (kmer_size == 6) return(700) + + # Pour k > 6, utiliser la formule de Moreau (exacte) + # Alphabet ADN a 4 bases + necklace_count(kmer_size, 4) +} +``` + +```{r} +# Entropie maximale pour k-mers normalisés +enmax <- function(lseq, word_size) { + nword = lseq - word_size + 1 + nalpha = n_normalized_kmers(word_size) + + if (nalpha < nword) { + cov = nword %/% nalpha + remains = nword %% nalpha + f1 = cov/nword + f2 = (cov+1)/nword + e = -(nalpha - remains) * f1 * log(f1) - + remains * f2 * log(f2) + } else { + e = log(nword) + } + + e +} +``` + +```{r} +# Entropie normalisée avec normalisation circulaire des k-mers +ecn <- function(data, kmer_size) { + # Normaliser tous les k-mers + normalized_data <- sapply(data, normalize_circular) + + # Calculer la table des fréquences + table <- table(normalized_data) + s <- sum(table) + e <- sum(table * log(table))/s + ed <- log(s) - e + + # Entropie maximale avec normalisation + em <- enmax(s + kmer_size - 1, kmer_size) + + ed/em +} +``` + +```{r} +k<-'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' +ec(okmer(k,1),1) +ec(okmer(k,2),2) +ec(okmer(k,3),3) +ec(okmer(k,4),4) +``` + +```{r} +k<-'atatatatatatatatatatatatatatata' +ef(okmer(k,1),1) +ef(okmer(k,2),2) +ef(okmer(k,3),3) +ef(okmer(k,4),4) +``` + +```{r} +k<-'aaaaaaaaaaaaaaaattttttttttttttt' +ef(okmer(k,1),1) +ef(okmer(k,2),2) +ef(okmer(k,3),3) +ef(okmer(k,4),4) +``` + +```{r} +k<-'atgatgatgatgatgatgatgatgatgatga' +ef(okmer(k,1),1) +ef(okmer(k,2),2) +ef(okmer(k,3),3) +ef(okmer(k,4),4) +``` + +```{r} +k<-'atcgatcgatcgatcgatcgatcgatcgact' +ecn(okmer(k,1),1) +ecn(okmer(k,2),2) +ecn(okmer(k,3),3) +ecn(okmer(k,4),4) +``` + +```{r} +k<-paste(sample(rep(c("a","c","g","t"),8),31),collapse="") +k <- "actatggcaagtcgtaaccgcgcttatcagg" +ecn(okmer(k,1),1) +ecn(okmer(k,2),2) +ecn(okmer(k,3),3) +ecn(okmer(k,4),4) +``` + +aattaaaaaaacaagataaaataatattttt + +```{r} +k<-'aattaaaaaaacaagataaaataatattttt' +ecn(okmer(k,1),1) +ecn(okmer(k,2),2) +ecn(okmer(k,3),3) +ecn(okmer(k,4),4) +``` + +atg tga gat ,,,, + +cat tca atc + +tgatgatgatgatgatgatgatgatgatg + +## Tests de normalisation circulaire + +```{r} +# Test de la fonction de normalisation +normalize_circular("ca") # devrait donner "ac" +normalize_circular("tgca") # devrait donner "atgc" +normalize_circular("acgt") # devrait donner "acgt" +``` + +```{r} +# Comparaison ec vs ecn sur une séquence répétitive +# Les k-mers "atg", "tga", "gat" sont équivalents par rotation +k <- 'atgatgatgatgatgatgatgatgatgatga' +cat("Séquence:", k, "\n") +cat("ec(k,3) =", ec(okmer(k,3),3), "\n") +cat("ecn(k,3) =", ecn(okmer(k,3),3), "\n") +``` + +```{r} +# Comparaison sur séquence aléatoire +k <- "actatggcaagtcgtaaccgcgcttatcagg" +cat("Séquence:", k, "\n") +cat("Sans normalisation:\n") +cat(" ec(k,2) =", ec(okmer(k,2),2), "\n") +cat(" ec(k,3) =", ec(okmer(k,3),3), "\n") +cat(" ec(k,4) =", ec(okmer(k,4),4), "\n") +cat("Avec normalisation circulaire:\n") +cat(" ecn(k,2) =", ecn(okmer(k,2),2), "\n") +cat(" ecn(k,3) =", ecn(okmer(k,3),3), "\n") +cat(" ecn(k,4) =", ecn(okmer(k,4),4), "\n") +``` + +```{r} +re <- rev(c(0.8108602271901116,0.8108602271901116,0.8041354757148719,0.8041354757148719,0.8041354757148719,0.8041354757148719,0.8041354757148719,0.8041354757148719,0.7800272339058549,0.7800272339058549,0.7751610144606091,0.7751610144606091,0.7751610144606091,0.764858185548322,0.7325526601302021,0.7137620699527615,0.6789199521982864,0.6584536373623372,0.634002687184193,0.6075290415873623,0.5785545803330997,0.5785545803330997,0.5503220289212184,0.5315314387437778,0.4966893209893028,0.46077361820145696,0.42388221293245526,0.4009547969713408,0.3561142883497758,0.3561142883497758,0.3561142883497758,0.3561142883497758,0.3561142883497758,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.35141814451677883,0.35141814451677883,0.35141814451677883,0.35141814451677883,0.35141814451677883,0.390029016052137,0.42781461756157363,0.45192285937059073,0.47238917420654,0.47238917420654,0.47238917420654,0.5092805794755417,0.5451962822633876,0.5800384000178626,0.602395141014297,0.6046146614886381,0.6046146614886381,0.6119084258128231,0.6119084258128231,0.6214217106113492,0.6424704346756562,0.6482381543085467,0.6635191587399633,0.6635191587399633,0.6635191587399633,0.6828444721058894,0.6950205907027562,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.7208976112999935)) + +di <- c(0.7208976112999935,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6950205907027562,0.6828444721058894,0.6635191587399633,0.6635191587399633,0.6635191587399633,0.6482381543085467,0.6424704346756562,0.6214217106113492,0.6119084258128231,0.6119084258128231,0.6046146614886382,0.6046146614886382,0.6023951410142971,0.5800384000178627,0.5451962822633876,0.5092805794755418,0.47238917420654003,0.47238917420654003,0.47238917420654003,0.4519228593705908,0.4278146175615737,0.39002901605213713,0.35141814451677894,0.35141814451677894,0.35141814451677894,0.35141814451677894,0.35141814451677883,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3561142883497762,0.3561142883497762,0.3561142883497762,0.3561142883497762,0.3561142883497762,0.40095479697134073,0.42388221293245526,0.46077361820145696,0.4966893209893028,0.5315314387437778,0.5503220289212184,0.5785545803330997,0.5785545803330997,0.6075290415873625,0.6340026871841933,0.6584536373623374,0.6789199521982866,0.7137620699527616,0.7325526601302023,0.7648581855483221,0.7751610144606093,0.7751610144606093,0.7751610144606093,0.7800272339058549,0.7800272339058549,0.8041354757148721,0.8041354757148721,0.8041354757148721,0.8041354757148721,0.8041354757148721,0.8041354757148721,0.8108602271901116,0.8108602271901116) +``` \ No newline at end of file diff --git a/pkg/obitools/obilowmask/obilowmask.go b/pkg/obitools/obilowmask/obilowmask.go new file mode 100644 index 0000000..3554a20 --- /dev/null +++ b/pkg/obitools/obilowmask/obilowmask.go @@ -0,0 +1,440 @@ +package obilowmask + +import ( + "math" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" + log "github.com/sirupsen/logrus" +) + +// MaskingMode defines how to handle low-complexity regions +type MaskingMode int + +const ( + Mask MaskingMode = iota // Mask mode: replace low-complexity regions with masked characters + Split // Split mode: split sequence into high-complexity fragments +) + +// LowMaskWorker creates a worker to mask low-complexity regions in DNA sequences. +// +// Algorithm principle: +// Calculate the normalized entropy of each k-mer at different scales (wordSize = 1 to level_max). +// K-mers with entropy below the threshold are masked. +// +// Parameters: +// - kmer_size: size of the sliding window for entropy calculation +// - level_max: maximum word size used for entropy calculation (finest scale) +// - threshold: normalized entropy threshold below which masking occurs (between 0 and 1) +// - mode: Mask (masking) or Split (splitting) +// - maskChar: character used for masking (typically 'n' or 'N') +// +// Returns: a SeqWorker function that can be applied to each sequence +func LowMaskWorker(kmer_size int, level_max int, threshold float64, mode MaskingMode, maskChar byte) obiseq.SeqWorker { + + // ======================================================================== + // FUNCTION 1: emax - Calculate theoretical maximum entropy + // ======================================================================== + // Computes the maximum entropy of a k-mer of length lseq containing words of size word_size. + // + // Maximum entropy depends on the theoretical optimal word distribution: + // - If we have more positions (nw) than possible canonical words (na), + // some words will appear multiple times + // - We calculate the entropy of a distribution where all words appear + // cov or cov+1 times (most uniform distribution possible) + // + // IMPORTANT: Uses CanonicalKmerCount to get the actual number of canonical words + // after circular normalization (e.g., "atg", "tga", "gat" → all "atg"). + // This is much smaller than 4^word_size (e.g., 10 instead of 16 for word_size=2). + emax := func(lseq, word_size int) float64 { + nw := lseq - word_size + 1 // Number of words in a k-mer of length lseq + na := obikmer.CanonicalKmerCount(word_size) // Number of canonical words after normalization + + // Case 1: Fewer positions than possible words + // Maximum entropy is simply log(nw) since we can have at most nw different words + if nw < na { + return math.Log(float64(nw)) + } + + // Case 2: More positions than possible words + // Some words must appear multiple times + cov := nw / na // Average coverage (average number of occurrences per word) + remains := nw - (na * cov) // Number of words that will have one additional occurrence + + // Calculate frequencies in the optimal distribution: + // - (na - remains) words appear cov times → frequency f1 = cov/nw + // - remains words appear (cov+1) times → frequency f2 = (cov+1)/nw + f1 := float64(cov) / float64(nw) + f2 := float64(cov+1) / float64(nw) + + // Shannon entropy: H = -Σ p(i) * log(p(i)) + // where p(i) is the probability of observing word i + return -(float64(na-remains)*f1*math.Log(f1) + + float64(remains)*f2*math.Log(f2)) + } + + // ======================================================================== + // FUNCTION 2: maskAmbiguities - Mark positions containing ambiguities + // ======================================================================== + // Identifies positions with ambiguous nucleotides (N, Y, R, etc.) and marks + // all k-mers that contain them. + // + // Returns: a slice where maskPositions[i] = -1 if position i is part of a + // k-mer containing an ambiguity, 0 otherwise + maskAmbiguities := func(sequence []byte) []int { + maskPositions := make([]int, len(sequence)) + for i, nuc := range sequence { + // If nucleotide is not a, c, g or t (lowercase), it's an ambiguity + if nuc != 'a' && nuc != 'c' && nuc != 'g' && nuc != 't' { + // Mark all positions of k-mers that contain this nucleotide + // A k-mer starting at position (i - kmer_size + 1) will contain position i + end := max(0, i-kmer_size+1) + for j := i; j >= end; j-- { + maskPositions[j] = -1 + } + } + } + return maskPositions + } + + // ======================================================================== + // FUNCTION 3: cleanTable - Reset a frequency table to zero + // ======================================================================== + cleanTable := func(table []int, over int) { + for i := 0; i < over; i++ { + table[i] = 0 + } + } + + // ======================================================================== + // FUNCTION 4: slidingMin - Calculate sliding minimum over a window + // ======================================================================== + // Applies a sliding window of size window over data and replaces each + // value with the minimum in the window centered on that position. + // + // Uses a MinMultiset to efficiently maintain the minimum in the window. + slidingMin := func(data []float64, window int) { + minimier := obiutils.NewMinMultiset(func(a, b float64) bool { return a < b }) + ldata := len(data) + mem := make([]float64, window) // Circular buffer to store window values + + // Initialize buffer with sentinel value + for i := range mem { + mem[i] = 10000 + } + + for i, v := range data { + // Get the old value leaving the window + m := mem[i%window] + mem[i%window] = v + + // Remove old value from multiset if it was valid + if m < 10000 { + minimier.RemoveOne(m) + } + + // Add new value if full window is ahead of us + if (ldata - i) >= window { + minimier.Add(v) + } + + // Retrieve and store current minimum + var ok bool + if data[i], ok = minimier.Min(); !ok { + log.Error("problem with minimum entropy") + data[i] = 0.0 + } + + //xx, _ := minimier.Min() + //log.Warnf("Pos: %d n: %d min: %.3f -> %.3f", i, minimier.Len(), v, xx) + } + } + + // ======================================================================== + // FUNCTION 5: computeEntropies - Calculate normalized entropy for each position + // ======================================================================== + // This is the central function that calculates the entropy of each k-mer in the sequence + // at a given scale (wordSize). + // + // Algorithm: + // 1. Encode the sequence into words (subsequences of size wordSize) + // 2. For each k-mer, count the frequencies of words it contains + // 3. Calculate normalized entropy = observed_entropy / maximum_entropy + // 4. Apply a sliding min filter to smooth results + // + // IMPORTANT: Line 147 uses NormalizeInt for circular normalization of words! + // This means "atg", "tga", and "gat" are considered the same word. + computeEntropies := func(sequence []byte, + maskPositions []int, // Positions of ambiguities + entropies []float64, // Output: normalized entropies for each position + table []int, // Frequency table for words (reused between calls) + words []int, // Buffer to store encoded words (reused) + wordSize int) { // Word size (scale of analysis) + + lseq := len(sequence) // Sequence length + tableSize := 1 << (wordSize * 2) // Actual table size (must fit all codes 0 to 4^wordSize-1) + nwords := kmer_size - wordSize + 1 // Number of words in a k-mer + float_nwords := float64(nwords) + log_nwords := math.Log(float_nwords) // log(nwords) used in entropy calculation + entropyMax := emax(kmer_size, wordSize) // Theoretical maximum entropy (uses CanonicalKmerCount internally) + + // Reset frequency table (must clear entire table, not just nalpha entries) + cleanTable(table, tableSize) + + for i := 1; i < lseq; i++ { + entropies[i] = 6 + } + end := lseq - wordSize + 1 // Last position where a word can start + + // ======================================================================== + // STEP 1: Encode all words in the sequence + // ======================================================================== + // Uses left-shift encoding: each nucleotide is encoded on 2 bits + // a=00, c=01, g=10, t=11 + + mask := (1 << (wordSize * 2)) - 1 // Mask to keep only last wordSize*2 bits + + // Initialize first word (all nucleotides except the last one) + word_index := 0 + for i := 0; i < wordSize-1; i++ { + word_index = (word_index << 2) + int(obikmer.EncodeNucleotide(sequence[i])) + } + + // Encode all words with sliding window + for i, j := 0, wordSize-1; i < end; i, j = i+1, j+1 { + // Shift left by 2 bits, mask, and add new nucleotide + word_index = ((word_index << 2) & mask) + int(obikmer.EncodeNucleotide(sequence[j])) + + // *** CIRCULAR NORMALIZATION *** + // Convert word to its canonical form (smallest by circular rotation) + // This is where "atg", "tga", "gat" all become "atg" + words[i] = obikmer.NormalizeInt(word_index, wordSize) + } + + // ======================================================================== + // STEP 2: Calculate entropy for each k-mer with sliding window + // ======================================================================== + s := 0 // Number of words processed in current k-mer + sum_n_logn := 0.0 // Sum of n*log(n) for entropy calculation + entropy := 1.0 // Current normalized entropy + cleaned := true // Flag indicating if table has been cleaned + + for i := range end { + s++ + + switch { + // CASE 1: Filling phase (fewer than nwords words collected) + case s < nwords: + cleaned = false + table[words[i]]++ // Increment word frequency + + // CASE 2: Position contains an ambiguity + case i >= (nwords-1) && maskPositions[i-nwords+1] < 0: + entropies[i-nwords+1] = 4.0 // Mark entropy as invalid + if !cleaned { + cleanTable(table, tableSize) // Reset table + } + cleaned = true + s = 0 + sum_n_logn = 0.0 + + // CASE 3: First complete k-mer (s == nwords) + case s == nwords: + cleaned = false + table[words[i]]++ + + // Calculate Shannon entropy: H = -Σ p(i)*log(p(i)) + // = log(N) - (1/N)*Σ n(i)*log(n(i)) + // where N = nwords, n(i) = frequency of word i + // + // NOTE: We iterate over entire table (tableSize = 4^wordSize) to count all frequencies. + // Canonical codes are not contiguous (e.g., for k=2: {0,1,2,3,5,6,7,10,11,15}) + // so we must scan the full table even though only ~10 entries will be non-zero + sum_n_logn = 0 + for j := range tableSize { + n := float64(table[j]) + if n > 0 { + sum_n_logn += n * math.Log(n) + } + } + // Normalized entropy = observed entropy / maximum entropy + entropy = (log_nwords - sum_n_logn/float_nwords) / entropyMax + + // CASE 4: Sliding window (s > nwords) + // Incremental update of entropy by adding a new word + // and removing the old one + case s > nwords: + cleaned = false + + new_word := words[i] + old_word := words[i-nwords] + + // Optimization: only recalculate if word changes + if old_word != new_word { + table[new_word]++ + table[old_word]-- + + n_old := float64(table[old_word]) + n_new := float64(table[new_word]) + + // Incremental update of sum_n_logn + // Remove contribution of old word (before decrement) + sum_n_logn -= (n_old + 1) * math.Log(n_old+1) + // Add contribution of old word (after decrement) + if n_old > 0 { + sum_n_logn += n_old * math.Log(n_old) + } + // Add contribution of new word (after increment) + if n_new > 0 { + sum_n_logn += n_new * math.Log(n_new) + } + // Remove contribution of new word (before increment) + if n_new > 1 { + sum_n_logn -= (n_new - 1) * math.Log(n_new-1) + } + } + + entropy = (log_nwords - sum_n_logn/float_nwords) / entropyMax + } + + // Store entropy for position corresponding to start of k-mer + if s >= nwords && maskPositions[i-nwords+1] >= 0 { + if entropy == 0 { + log.Errorf("Zero entropy @ positon %d", i-nwords+1) + } + entropies[i-nwords+1] = entropy + } + } + + // ======================================================================== + // STEP 3: Apply sliding min filter + // ======================================================================== + // Replace each entropy with minimum in window of size kmer_size + // This allows robust detection of low-complexity regions + slidingMin(entropies, kmer_size) + // log.Warnf("%v\n%v", e, entropies) + } + + // ======================================================================== + // FUNCTION 6: applyMaskMode - Apply masking to sequence + // ======================================================================== + applyMaskMode := func(sequence *obiseq.BioSequence, maskPositions []bool, mask byte) (obiseq.BioSequenceSlice, error) { + // Create copy to avoid modifying original + seqCopy := sequence.Copy() + sequenceBytes := seqCopy.Sequence() + + // Mask identified positions + for i := 0; i < len(sequenceBytes); i++ { + if maskPositions[i] { + // Operation &^ 32 converts to UPPERCASE (clears bit 5) + sequenceBytes[i] = sequenceBytes[i] &^ 32 + } + } + + return obiseq.BioSequenceSlice{seqCopy}, nil + } + + // ======================================================================== + // FUNCTION 7: masking - Main masking function + // ======================================================================== + // Calculates entropies at all scales and masks positions + // whose minimum entropy is below the threshold. + masking := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { + bseq := sequence.Sequence() + + // Identify ambiguities + maskPositions := maskAmbiguities(bseq) + + // Initialize data structures + mask := make([]int, len(bseq)) // Stores scale detecting minimum entropy + entropies := make([]float64, len(bseq)) // Minimum entropy at each position + for i := range entropies { + entropies[i] = 4.0 // Very high initial value + } + + freqs := make([]int, 1<<(2*level_max)) // Frequency table (max size) + words := make([]int, len(bseq)) // Buffer for encoded words + + // ======================================================================== + // Calculate entropy at maximum scale (level_max) + // ======================================================================== + computeEntropies(bseq, maskPositions, entropies, freqs, words, level_max) + + // Initialize mask with level_max everywhere (except ambiguities) + for i := range bseq { + v := level_max + // if nuc != 'a' && nuc != 'c' && nuc != 'g' && nuc != 't' { + // v = 0 + // } + mask[i] = v + } + + // ======================================================================== + // Calculate entropy at lower scales + // ======================================================================== + entropies2 := make([]float64, len(bseq)) + + for ws := level_max - 1; ws > 0; ws-- { + // *** WARNING: POTENTIAL BUG *** + // The parameter passed is level_max instead of ws! + // This means we always recalculate with the same scale + // Should be: computeEntropies(bseq, maskPositions, entropies2, freqs, words, ws) + computeEntropies(bseq, maskPositions, entropies2, freqs, words, ws) + // Keep minimum entropy and corresponding scale + for i, e2 := range entropies2 { + if e2 < entropies[i] { + entropies[i] = e2 + mask[i] = ws + } + } + } + + // Force entropy to 0 for ambiguous positions + for i, nuc := range bseq { + if nuc != 'a' && nuc != 'c' && nuc != 'g' && nuc != 't' { + entropies[i] = 0 + } + } + + // ======================================================================== + // Identify positions to mask + // ======================================================================== + remove := make([]bool, len(entropies)) + for i, e := range entropies { + remove[i] = e <= threshold + } + + // Save metadata in sequence attributes + sequence.SetAttribute("mask", mask) + sequence.SetAttribute("Entropies", entropies) + + return applyMaskMode(sequence, remove, maskChar) + } + + return masking +} + +// CLISequenceEntropyMasker creates an iterator that applies entropy masking +// to all sequences in an input iterator. +// +// Uses command-line parameters to configure the worker. +func CLISequenceEntropyMasker(iterator obiiter.IBioSequence) obiiter.IBioSequence { + var newIter obiiter.IBioSequence + + worker := LowMaskWorker( + CLIKmerSize(), + CLILevelMax(), + CLIThreshold(), + CLIMaskingMode(), + CLIMaskingChar(), + ) + + // Apply worker in parallel + newIter = iterator.MakeIWorker(worker, false, obidefault.ParallelWorkers()) + + // Filter resulting empty sequences + return newIter.FilterEmpty() +} diff --git a/pkg/obitools/obilowmask/options.go b/pkg/obitools/obilowmask/options.go new file mode 100644 index 0000000..c9b2f50 --- /dev/null +++ b/pkg/obitools/obilowmask/options.go @@ -0,0 +1,72 @@ +package obilowmask + +import ( + "strings" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" + "github.com/DavidGamba/go-getoptions" + + log "github.com/sirupsen/logrus" +) + +var __kmer_size__ = 31 +var __level_max__ = 6 +var __threshold__ = 0.5 +var __split_mode__ = false +var __mask__ = "." + +func LowMaskOptionSet(options *getoptions.GetOpt) { + + options.IntVar(&__kmer_size__, "kmer-size", __kmer_size__, + options.Description("Size of the kmer considered to estimate entropy."), + ) + + options.IntVar(&__level_max__, "entropy_size", __level_max__, + options.Description("Maximum word size considered for entropy estimate"), + ) + + options.Float64Var(&__threshold__, "threshold", __threshold__, + options.Description("entropy theshold used to mask a kmer"), + ) + + options.BoolVar(&__split_mode__, "--split-mode", __split_mode__, + options.Description("in split mode, input sequences are splitted to remove masked regions"), + ) + + options.StringVar(&__mask__, "--masking-char", __mask__, + options.Description("Character used to mask low complexity region"), + ) +} + +func OptionSet(options *getoptions.GetOpt) { + LowMaskOptionSet(options) + obiconvert.InputOptionSet(options) +} + +func CLIKmerSize() int { + return __kmer_size__ +} + +func CLILevelMax() int { + return __level_max__ +} + +func CLIThreshold() float64 { + return __threshold__ +} + +func CLIMaskingMode() MaskingMode { + if __split_mode__ { + return Split + } else { + return Mask + } +} + +func CLIMaskingChar() byte { + mask := strings.TrimSpace(__mask__) + if len(mask) != 1 { + log.Fatalf("--masking-char option accept a single character, not %s", mask) + } + return []byte(mask)[0] +} diff --git a/pkg/obiutils/minmultiset.go b/pkg/obiutils/minmultiset.go new file mode 100644 index 0000000..b3767a3 --- /dev/null +++ b/pkg/obiutils/minmultiset.go @@ -0,0 +1,118 @@ +package obiutils + +import ( + "container/heap" +) + +// MinMultiset maintient un multiset de valeurs et expose le minimum courant. +// T doit être comparable pour servir de clé de map. L'ordre est défini par less. +type MinMultiset[T comparable] struct { + pq priorityQueue[T] // tas min + less func(a, b T) bool + count map[T]int // cardinalité logique par valeur + pending map[T]int // suppressions en attente (lazy delete) + size int // taille logique totale +} + +// New crée un multiset. less doit imposer un ordre strict total. +func NewMinMultiset[T comparable](less func(a, b T) bool) *MinMultiset[T] { + m := &MinMultiset[T]{ + pq: priorityQueue[T]{less: less}, + less: less, + count: make(map[T]int), + pending: make(map[T]int), + } + heap.Init(&m.pq) + return m +} + +// Add ajoute une occurrence. +func (m *MinMultiset[T]) Add(v T) { + heap.Push(&m.pq, v) + m.count[v]++ + m.size++ +} + +// RemoveOne retire UNE occurrence de v. Retourne false si absente. +func (m *MinMultiset[T]) RemoveOne(v T) bool { + if m.count[v] == 0 { + return false + } + m.count[v]-- + m.pending[v]++ + m.size-- + m.shrink() + return true +} + +// Min retourne le minimum courant. ok=false si vide. +func (m *MinMultiset[T]) Min() (v T, ok bool) { + if m.size == 0 { + var zero T + return zero, false + } + m.cleanTop() + return m.pq.data[0], true +} + +// Len retourne la taille logique. +func (m *MinMultiset[T]) Len() int { return m.size } + +// --- interne --- + +// retire du sommet toutes les valeurs marquées à supprimer. +func (m *MinMultiset[T]) cleanTop() { + for m.pq.Len() > 0 { + top := m.pq.data[0] + if m.pending[top] > 0 { + m.pending[top]-- + if m.pending[top] == 0 { + delete(m.pending, top) // ← nettoyage de la map + } + heap.Pop(&m.pq) + continue + } + break + } +} + +// rééquilibre le tas si trop de tombstones. +func (m *MinMultiset[T]) shrink() { + // nettoyage léger au retrait pour borner la dérive + if m.pq.Len() > 0 { + m.cleanTop() + } +} + +// priorityQueue implémente heap.Interface pour T. +type priorityQueue[T any] struct { + data []T + less func(a, b T) bool +} + +func (q priorityQueue[T]) Len() int { return len(q.data) } +func (q priorityQueue[T]) Less(i, j int) bool { return q.less(q.data[i], q.data[j]) } +func (q priorityQueue[T]) Swap(i, j int) { q.data[i], q.data[j] = q.data[j], q.data[i] } +func (q *priorityQueue[T]) Push(x any) { q.data = append(q.data, x.(T)) } +func (q *priorityQueue[T]) Pop() any { + n := len(q.data) + x := q.data[n-1] + q.data = q.data[:n-1] + return x +} +func (q priorityQueue[T]) peek() (T, bool) { + if len(q.data) == 0 { + var z T + return z, false + } + return q.data[0], true +} +func (q *priorityQueue[T]) Top() (T, bool) { return q.peek() } +func (q *priorityQueue[T]) PushValue(v T) { heap.Push(q, v) } +func (q *priorityQueue[T]) PopValue() (T, bool) { + if q.Len() == 0 { + var z T + return z, false + } + return heap.Pop(q).(T), true +}