From a0ab14c205a8aa8384caffec4e703c9822f71fe5 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sat, 18 Feb 2023 19:54:21 +0100 Subject: [PATCH] Complete the documentation and add a Release note file --- Release-notes.md | 29 ++ cmd/obitools/obidistribute/main.go | 1 + doc/_book/commands.html | 220 +++++++++--- doc/_book/search.json | 26 +- .../figure-epub/unnamed-chunk-10-1.png | Bin 0 -> 16101 bytes .../figure-epub/unnamed-chunk-9-1.png | Bin 0 -> 15336 bytes .../figure-pdf/unnamed-chunk-10-1.pdf | Bin 0 -> 5305 bytes .../figure-pdf/unnamed-chunk-9-1.pdf | Bin 0 -> 5732 bytes doc/_book/utilities.html | 333 ++++++++++++++++++ doc/_obipairing.qmd | 139 ++++++++ doc/_utilities.qmd | 26 ++ doc/commands.qmd | 208 +++-------- .../figure-epub/unnamed-chunk-1-1.png | Bin 0 -> 19464 bytes .../figure-pdf/unnamed-chunk-1-1.pdf | Bin 0 -> 5610 bytes 14 files changed, 768 insertions(+), 214 deletions(-) create mode 100644 Release-notes.md create mode 100644 doc/_book/tutorial_files/figure-epub/unnamed-chunk-10-1.png create mode 100644 doc/_book/tutorial_files/figure-epub/unnamed-chunk-9-1.png create mode 100644 doc/_book/tutorial_files/figure-pdf/unnamed-chunk-10-1.pdf create mode 100644 doc/_book/tutorial_files/figure-pdf/unnamed-chunk-9-1.pdf create mode 100644 doc/_book/utilities.html create mode 100644 doc/_obipairing.qmd create mode 100644 doc/_utilities.qmd create mode 100644 doc/commands_files/figure-epub/unnamed-chunk-1-1.png create mode 100644 doc/commands_files/figure-pdf/unnamed-chunk-1-1.pdf diff --git a/Release-notes.md b/Release-notes.md new file mode 100644 index 0000000..6291fa6 --- /dev/null +++ b/Release-notes.md @@ -0,0 +1,29 @@ +# OBITools release notes + +## February $18^th$, 2023. Release 4.0.0 + +It is the first version of the *OBITools* version 4. I decided to tag then following two weeks +of intensive data analysis with them allowing to discover many small bugs present in the previous +non-official version. Obviously other bugs are certainly persent in the code, and you are welcome +to use the git ticket system to mention them. But they seems to produce now reliable results. + +### Corrected bugs + +- On some computers the end of the output file was lost, leading to the loose of sequences and + to the production of incorrect file because of the last sequence record, sometime truncated in + its middle. This was only occurring when more than a single CPU was used. It was affecting every obitools. +- The `obiparing` software had a bug in the right aligment procedure. This led to the non alignment + of very sort barcode during the paring of the forward and reverse reads. +- The `obipairing` tools had a non deterministic comportment when aligning a paor very low quality reads. + This induced that the result of the same low quality read pair was not the same from run to run. + +### New functionality + +- Adding of a `--compress|-Z` option to every obitools allowing to produce `gz` compressed output. OBITools + were already able to deal with gziped input files transparently. They can now produce their résults in the same format. +- Adding of a `--append|-A` option to the `obidistribute` tool. It allows to append the result of an + `obidistribute` execution to preexisting files. +- Adding of a `--directory|-d` option to the `obidistribute` tool. It allows to declare a secondary + classification key over the one defined by the '--category|-c` option. This extra key leads to produce + directories in which files produced according to the primary criterion are stored. +- Adding of the functions `subspc`, `printf`, `int`, `numeric`, and `bool` to the expression language. \ No newline at end of file diff --git a/cmd/obitools/obidistribute/main.go b/cmd/obitools/obidistribute/main.go index 29c6ef8..80e47ae 100644 --- a/cmd/obitools/obidistribute/main.go +++ b/cmd/obitools/obidistribute/main.go @@ -16,6 +16,7 @@ func main() { _, args, _ := optionParser(os.Args) fs, _ := obiconvert.ReadBioSequences(args...) + obidistribute.DistributeSequence(fs) obiiter.WaitForLastPipe() diff --git a/doc/_book/commands.html b/doc/_book/commands.html index 2d0eaa3..0e8d7f0 100644 --- a/doc/_book/commands.html +++ b/doc/_book/commands.html @@ -20,6 +20,69 @@ ul.task-list li input[type="checkbox"] { margin: 0 0.8em 0.2em -1.6em; vertical-align: middle; } +pre > code.sourceCode { white-space: pre; position: relative; } +pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } +pre > code.sourceCode > span:empty { height: 1.2em; } +.sourceCode { overflow: visible; } +code.sourceCode > span { color: inherit; text-decoration: inherit; } +div.sourceCode { margin: 1em 0; } +pre.sourceCode { margin: 0; } +@media screen { +div.sourceCode { overflow: auto; } +} +@media print { +pre > code.sourceCode { white-space: pre-wrap; } +pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } +} +pre.numberSource code + { counter-reset: source-line 0; } +pre.numberSource code > span + { position: relative; left: -4em; counter-increment: source-line; } +pre.numberSource code > span > a:first-child::before + { content: counter(source-line); + position: relative; left: -1em; text-align: right; vertical-align: baseline; + border: none; display: inline-block; + -webkit-touch-callout: none; -webkit-user-select: none; + -khtml-user-select: none; -moz-user-select: none; + -ms-user-select: none; user-select: none; + padding: 0 4px; width: 4em; + color: #aaaaaa; + } +pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } +div.sourceCode + { } +@media screen { +pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } +} +code span.al { color: #ff0000; font-weight: bold; } /* Alert */ +code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ +code span.at { color: #7d9029; } /* Attribute */ +code span.bn { color: #40a070; } /* BaseN */ +code span.bu { color: #008000; } /* BuiltIn */ +code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ +code span.ch { color: #4070a0; } /* Char */ +code span.cn { color: #880000; } /* Constant */ +code span.co { color: #60a0b0; font-style: italic; } /* Comment */ +code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ +code span.do { color: #ba2121; font-style: italic; } /* Documentation */ +code span.dt { color: #902000; } /* DataType */ +code span.dv { color: #40a070; } /* DecVal */ +code span.er { color: #ff0000; font-weight: bold; } /* Error */ +code span.ex { } /* Extension */ +code span.fl { color: #40a070; } /* Float */ +code span.fu { color: #06287e; } /* Function */ +code span.im { color: #008000; font-weight: bold; } /* Import */ +code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ +code span.kw { color: #007020; font-weight: bold; } /* Keyword */ +code span.op { color: #666666; } /* Operator */ +code span.ot { color: #007020; } /* Other */ +code span.pp { color: #bc7a00; } /* Preprocessor */ +code span.sc { color: #4070a0; } /* SpecialChar */ +code span.ss { color: #bb6688; } /* SpecialString */ +code span.st { color: #4070a0; } /* String */ +code span.va { color: #19177c; } /* Variable */ +code span.vs { color: #4070a0; } /* VerbatimString */ +code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ div.csl-bib-body { } div.csl-entry { clear: both; @@ -175,16 +238,36 @@ div.csl-indent {
  • 4.3.2 Function defined in the language
  • 4.3.3 Accessing to the sequence annotations
  • -
  • 4.4 Metabarcode design and quality assessment
  • -
  • 4.5 File format conversions
  • -
  • 4.6 Sequence annotations
  • +
  • 4.4 Metabarcode design and quality assessment +
  • +
  • 4.5 File format conversions +
  • +
  • 4.6 Sequence annotations +
  • 4.7 Computations on sequences
  • 4.8 Sequence sampling and filtering
  • +
  • 4.9 Utilities +
  • @@ -268,40 +351,52 @@ div.csl-indent {

    Several OBITools (e.g. obigrep, obiannotate) allow the user to specify some simple expressions to compute values or define predicates. This expressions are parsed and evaluated using the gval go package, which allows for evaluating go-Like expression.

    4.3.1 Variables usable in the expression

    -
    -

    4.3.1.1 sequence

    -

    sequence is the sequence object on which the expression is evaluated

    -
    -
    -

    4.3.1.2 annotation

    -
    +

    4.3.2 Function defined in the language

    -
    -

    4.3.2.1 len

    +
    +

    Instrospection functions

    +
      +
    • len(x)is a generic function allowing to retreive the size of a object. It returns the length of a sequences, the number of element in a map like annotations, the number of elements in an array. The reurned value is an int.
    • +
    -
    -

    4.3.2.2 ismap

    +
    +

    Cast functions

    +
      +
    • int(x) converts if possible the x value to an integer value. The function returns an int.
    • +
    • numeric(x) converts if possible the x value to a float value. The function returns a float.
    • +
    • bool(x) converts if possible the x value to a boolean value. The function returns a bool.
    • +
    -
    -

    4.3.2.3 hasattribute

    -
    -
    -

    4.3.2.4 min

    -
    -
    -

    4.3.2.5 max

    +

    4.3.3 Accessing to the sequence annotations

    +

    The annotations variable is a map object containing all the annotations associated to the currently processed sequence. Index of the map are the attribute names. It exists to possibillities to retreive an annotation. It is possible to use the classical [] indexing operator, putting the attribute name quoted by double quotes between them.

    +
    annotations["direction"]
    +

    The above code retreives the direction annotation. A second notation using the dot (.) is often more convenient.

    +
    annotations.direction
    +

    Special attributes of the sequence are accessible only by dedicated methods of the sequence object.

    +
      +
    • The sequence identifier : Id()
    • +
    • THe sequence definition : Definition()
    • +

    4.4 Metabarcode design and quality assessment

    -
    -

    4.4.0.1 obipcr

    +
    +

    4.4.1 obipcr

    Replace the ecoPCR original OBITools

    @@ -309,14 +404,17 @@ div.csl-indent {

    4.5 File format conversions

    -
    -

    4.5.0.1 obiconvert

    +
    +

    4.5.1 obiconvert

    4.6 Sequence annotations

    -
    -

    4.6.0.1 obitag

    +
    +

    4.6.1 obiannotate

    +
    +
    +

    4.6.2 obitag

    @@ -326,15 +424,15 @@ div.csl-indent {

    Replace the illuminapairedends original OBITools

    -
    -

    4.7.1.1 Alignment procedure

    +
    +

    Alignment procedure

    obipairing is introducing a new alignment algorithm compared to the illuminapairedend command of the OBITools V2. Nethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.

    The new algorithm is a two-step procedure. First, a FASTN-type algorithm (Lipman and Pearson 1985) identifies the best offset between the two matched readings. This identifies the region of overlap.

    In the second step, the matching regions of the two reads are extracted along with a flanking sequence of \(\Delta\) base pairs. The two subsequences are then aligned using a “one side free end-gap” dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step.

    Unless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads.

    -
    -

    4.7.1.2 The scoring system

    +
    +

    The scoring system

    In the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair.

    If we consider a nucleotide read with a quality score \(Q\), the probability of misreading this base (\(P_E\)) is : \[ P_E = 10^{-\frac{Q}{10}} @@ -388,6 +486,12 @@ P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}} \end{aligned} \]

    Probability of a match under the random model

    +

    The second considered model is a pure random model where every base is equiprobable, hence having a probability of occurrence of a nucleotide equals \(0.25\). Under that hypothesis

    +

    \[ +P(MATCH | \text{Random model}) = 0.25 +\]

    +

    The score is a log ration of likelyhood

    +

    Score is define as the logarithm of the ratio between the likelyhood of the observations considering the sequencer error model over tha likelyhood u

    @@ -399,38 +503,49 @@ P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}}
    -
    -

    4.7.1.3 obimultiplex

    +
    +
    +

    4.7.2 obimultiplex

    Replace the ngsfilter original OBITools

    -
    -

    4.7.1.4 obicomplement

    +
    +

    4.7.3 obicomplement

    -
    -

    4.7.1.5 obiclean

    -
    -
    -

    4.7.1.6 obiuniq

    +
    +

    4.7.4 obiclean

    +
    +

    4.7.5 obiuniq

    4.8 Sequence sampling and filtering

    -
    -

    4.8.0.1 obigrep

    +
    +

    4.8.1 obigrep

    -
    -

    4.8.1 Utilities

    -
    -

    4.8.1.1 obicount

    -
    -

    4.8.1.2 obidistribute

    +
    +

    4.9 Utilities

    +
    +

    4.9.1 obicount

    +

    obicount counts the number of sequence records, the sum of the count attributes, and the sum of the length of all the sequences.

    +

    Example:

    +
    obicount seq.fasta  
    +

    Prints the number of sequence records contained in the seq.fasta file and the sum of their count attributes.

    +

    Options specific to the command

    +
      +
    • --reads|-r Prints read counts.
    • +
    • --symbols|-s Prints symbol counts.
    • +
    • --variants|-v Prints variant counts.
    • +
    -
    -

    4.8.1.3 obifind

    +
    +

    4.9.2 obidistribute

    +
    +
    +

    4.9.3 obifind

    Replace the ecofind original OBITools.

    @@ -443,7 +558,6 @@ Lipman, D J, and W R Pearson. 1985. Rapid and sens
    -
    + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + + +
    + +
    +
    +

    5  Utilities

    +
    + + + +
    + + + + +
    + + +
    + +
    +

    5.0.0.1 obicount

    +
    +
    +

    5.0.0.2 obidistribute

    +
    +
    +

    5.0.0.3 obifind

    +
    +

    Replace the ecofind original OBITools.

    +
    + + +
    + +
    + + +
    + + + + \ No newline at end of file diff --git a/doc/_obipairing.qmd b/doc/_obipairing.qmd new file mode 100644 index 0000000..1b3e3a4 --- /dev/null +++ b/doc/_obipairing.qmd @@ -0,0 +1,139 @@ +### `obipairing` + +> Replace the `illuminapairedends` original *OBITools* + +#### Alignment procedure {.unnumbered} + +`obipairing` is introducing a new alignment algorithm compared to the `illuminapairedend` command of the `OBITools V2`. +Nethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases. + +The new algorithm is a two-step procedure. First, a FASTN-type algorithm [@Lipman1985-hw] identifies the best offset between the two matched readings. This identifies the region of overlap. + +In the second step, the matching regions of the two reads are extracted along with a flanking sequence of $\Delta$ base pairs. The two subsequences are then aligned using a "one side free end-gap" dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step. + +Unless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads. + +#### The scoring system {.unnumbered} + +In the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair. + +If we consider a nucleotide read with a quality score $Q$, the probability of misreading this base ($P_E$) is : +$$ +P_E = 10^{-\frac{Q}{10}} +$$ + +Thus, when a given nucleotide $X$ is observed with the quality score $Q$. The probability that $X$ is really an $X$ is : + +$$ +P(X=X) = 1 - P_E +$$ + +Otherwise, $X$ is actually one of the three other possible nucleotides ($X_{E1}$, $X_{E2}$ or $X_{E3}$). If we suppose that the three reading error have the same probability : + +$$ +P(X=X_{E1}) = P(X=X_{E3}) = P(X=X_{E3}) = \frac{P_E}{3} +$$ + +At each position in an alignment where the two nucleotides $X_1$ and $X_2$ face each other (not a gapped position), the probability of a true match varies depending on whether $X_1=X_2$, an observed match, or $X_1 \neq X_2$, an observed mismatch. + +**Probability of a true match when $X_1=X_2$** + +That probability can be divided in two parts. First $X_1$ and $X_2$ have been correctly read. The corresponding probability is : + +$$ +\begin{aligned} +P_{TM} &= (1- PE_1)(1-PE_2)\\ + &=(1 - 10^{-\frac{Q_1}{10} } )(1 - 10^{-\frac{Q_2}{10}} ) +\end{aligned} +$$ + +Secondly, a match can occure if the true nucleotides read as $X_1$ and $X_2$ are not $X_1$ and $X_2$ but identical. + +$$ +\begin{aligned} +P(X_1==X_{E1}) \cap P(X_2==X_{E1}) &= \frac{P_{E1} P_{E2}}{9} \\ +P(X_1==X_{Ex}) \cap P(X_2==X_{Ex}) & = \frac{P_{E1} P_{E2}}{3} +\end{aligned} +$$ + +The probability of a true match between $X_1$ and $X_2$ when $X_1 = X_2$ an observed match : + +$$ +\begin{aligned} +P(MATCH | X_1 = X_2) = (1- PE_1)(1-PE_2) + \frac{P_{E1} P_{E2}}{3} +\end{aligned} +$$ + +**Probability of a true match when $X_1 \neq X_2$** + +That probability can be divided in three parts. + +a. $X_1$ has been correctly read and $X_2$ is a sequencing error and is actually equal to $X_1$. +$$ +P_a = (1-P_{E1})\frac{P_{E2}}{3} +$$ +a. $X_2$ has been correctly read and $X_1$ is a sequencing error and is actually equal to $X_2$. +$$ +P_b = (1-P_{E2})\frac{P_{E1}}{3} +$$ +a. $X_1$ and $X_2$ corresponds to sequencing error but are actually the same base $X_{Ex}$ +$$ +P_c = 2\frac{P_{E1} P_{E2}}{9} +$$ + +Consequently : +$$ +\begin{aligned} +P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}}{3} + 2\frac{P_{E1} P_{E2}}{9} +\end{aligned} +$$ + +**Probability of a match under the random model** + +The second considered model is a pure random model where every base is equiprobable, hence having a probability of occurrence of a nucleotide equals $0.25$. Under that hypothesis + +$$ +P(MATCH | \text{Random model}) = 0.25 +$$ + +**The score is a log ration of likelyhood** + +Score is define as the logarithm of the ratio between the likelyhood of the observations considering the sequencer error model over tha likelyhood u + + +```{r} +#| echo: false +#| warning: false +#| fig-cap: "Evolution of the match and mismatch scores when the quality of base is 20 while the second range from 10 to 40." +require(ggplot2) +require(tidyverse) + +Smatch <- function(Q1,Q2) { + PE1 <- 10^(-Q1/10) + PE2 <- 10^(-Q2/10) + PT1 <- 1 - PE1 + PT2 <- 1 - PE2 + + PM <- PT1*PT2 + PE1 * PE2 / 3 + round((log(PM)+log(4))*10) +} + +Smismatch <- function(Q1,Q2) { + + PE1 <- 10^(-Q1/10) + PE2 <- 10^(-Q2/10) + PT1 <- 1 - PE1 + PT2 <- 1 - PE2 + + PM <- PE1*PT2/3 + PT1 * PE2 / 3 + 2/3 * PE1 * PE2 + round((log(PM)+log(4))*10) +} + +tibble(Q = 10:40) %>% + mutate(Match = mapply(Smatch,Q,20), + Mismatch = mapply(Smismatch,Q,20), + ) %>% pivot_longer(cols = -Q, names_to = "Class", values_to = "Score") %>% + ggplot(aes(x=Q,y=Score,col=Class)) + + geom_line() + + xlab("Q1 (Q2=20)") +``` diff --git a/doc/_utilities.qmd b/doc/_utilities.qmd new file mode 100644 index 0000000..8f44295 --- /dev/null +++ b/doc/_utilities.qmd @@ -0,0 +1,26 @@ +## Utilities + +### `obicount` + +`obicount` counts the number of sequence records, the sum of the ``count`` attributes, and the sum +of the length of all the sequences. + +*Example:* + +``` bash +obicount seq.fasta +``` +Prints the number of sequence records contained in the ``seq.fasta`` +file and the sum of their ``count`` attributes. + +*Options specific to the command* + +- `--reads|-r ` Prints read counts. +- `--symbols|-s` Prints symbol counts. +- `--variants|-v` Prints variant counts. + +### `obidistribute` + +### `obifind` + +> Replace the `ecofind` original *OBITools.* diff --git a/doc/commands.qmd b/doc/commands.qmd index 08f95bd..be5b4e0 100644 --- a/doc/commands.qmd +++ b/doc/commands.qmd @@ -57,191 +57,89 @@ Several OBITools (*e.g.* obigrep, obiannotate) allow the user to specify some si ### Variables usable in the expression -#### sequence +- `sequence` is the sequence object on which the expression is evaluated. +- `annotations`is a map object containing every annotations associated to the currently processed sequence. -sequence is the sequence object on which the expression is evaluated +### Function defined in the language -#### annotation +#### Instrospection functions {.unnumbered} -### Function defined in the language +- `len(x)`is a generic function allowing to retreive the size of a object. It returns + the length of a sequences, the number of element in a map like `annotations`, the number + of elements in an array. The reurned value is an `int`. -#### len +#### Cast functions {.unnumbered} -#### ismap +- `int(x)` converts if possible the `x` value to an integer value. The function + returns an `int`. +- `numeric(x)` converts if possible the `x` value to a float value. The function + returns a `float`. +- `bool(x)` converts if possible the `x` value to a boolean value. The function + returns a `bool`. -#### hasattribute +#### String related functions {.unnumbered} -#### min - -#### max +- `printf(format,...)` allows to combine several values to build a string. `format` follows the + classical C `printf` syntax. The function returns a `string`. +- `subspc(x)` substitutes every space in the `x` string by the underscore (`_`) character. The function + returns a `string`. ### Accessing to the sequence annotations +The `annotations` variable is a map object containing all the annotations associated to the currently processed sequence. Index of the map are the attribute names. It exists to possibillities to retreive +an annotation. It is possible to use the classical `[]` indexing operator, putting the attribute name +quoted by double quotes between them. + +```go +annotations["direction"] +``` + +The above code retreives the `direction` annotation. A second notation using the dot (`.`) is often +more convenient. + +```go +annotations.direction +``` + +Special attributes of the sequence are accessible only by dedicated methods of the `sequence` object. + +- The sequence identifier : `Id()` +- THe sequence definition : `Definition()` + + ## Metabarcode design and quality assessment -#### `obipcr` +### `obipcr` > Replace the `ecoPCR` original *OBITools* ## File format conversions -#### `obiconvert` +### `obiconvert` ## Sequence annotations -#### `obitag` +### `obiannotate` + +### `obitag` ## Computations on sequences -### `obipairing` +{{< include _obipairing.qmd >}} -> Replace the `illuminapairedends` original *OBITools* - -#### Alignment procedure - -`obipairing` is introducing a new alignment algorithm compared to the `illuminapairedend` command of the `OBITools V2`. -Nethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases. - -The new algorithm is a two-step procedure. First, a FASTN-type algorithm [@Lipman1985-hw] identifies the best offset between the two matched readings. This identifies the region of overlap. - -In the second step, the matching regions of the two reads are extracted along with a flanking sequence of $\Delta$ base pairs. The two subsequences are then aligned using a "one side free end-gap" dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step. - -Unless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads. - -#### The scoring system - -In the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair. - -If we consider a nucleotide read with a quality score $Q$, the probability of misreading this base ($P_E$) is : -$$ -P_E = 10^{-\frac{Q}{10}} -$$ - -Thus, when a given nucleotide $X$ is observed with the quality score $Q$. The probability that $X$ is really an $X$ is : - -$$ -P(X=X) = 1 - P_E -$$ - -Otherwise, $X$ is actually one of the three other possible nucleotides ($X_{E1}$, $X_{E2}$ or $X_{E3}$). If we suppose that the three reading error have the same probability : - -$$ -P(X=X_{E1}) = P(X=X_{E3}) = P(X=X_{E3}) = \frac{P_E}{3} -$$ - -At each position in an alignment where the two nucleotides $X_1$ and $X_2$ face each other (not a gapped position), the probability of a true match varies depending on whether $X_1=X_2$, an observed match, or $X_1 \neq X_2$, an observed mismatch. - -**Probability of a true match when $X_1=X_2$** - -That probability can be divided in two parts. First $X_1$ and $X_2$ have been correctly read. The corresponding probability is : - -$$ -\begin{aligned} -P_{TM} &= (1- PE_1)(1-PE_2)\\ - &=(1 - 10^{-\frac{Q_1}{10} } )(1 - 10^{-\frac{Q_2}{10}} ) -\end{aligned} -$$ - -Secondly, a match can occure if the true nucleotides read as $X_1$ and $X_2$ are not $X_1$ and $X_2$ but identical. - -$$ -\begin{aligned} -P(X_1==X_{E1}) \cap P(X_2==X_{E1}) &= \frac{P_{E1} P_{E2}}{9} \\ -P(X_1==X_{Ex}) \cap P(X_2==X_{Ex}) & = \frac{P_{E1} P_{E2}}{3} -\end{aligned} -$$ - -The probability of a true match between $X_1$ and $X_2$ when $X_1 = X_2$ an observed match : - -$$ -\begin{aligned} -P(MATCH | X_1 = X_2) = (1- PE_1)(1-PE_2) + \frac{P_{E1} P_{E2}}{3} -\end{aligned} -$$ - -**Probability of a true match when $X_1 \neq X_2$** - -That probability can be divided in three parts. - -a. $X_1$ has been correctly read and $X_2$ is a sequencing error and is actually equal to $X_1$. -$$ -P_a = (1-P_{E1})\frac{P_{E2}}{3} -$$ -a. $X_2$ has been correctly read and $X_1$ is a sequencing error and is actually equal to $X_2$. -$$ -P_b = (1-P_{E2})\frac{P_{E1}}{3} -$$ -a. $X_1$ and $X_2$ corresponds to sequencing error but are actually the same base $X_{Ex}$ -$$ -P_c = 2\frac{P_{E1} P_{E2}}{9} -$$ - -Consequently : -$$ -\begin{aligned} -P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}}{3} + 2\frac{P_{E1} P_{E2}}{9} -\end{aligned} -$$ - -**Probability of a match under the random model** - - -```{r} -#| echo: false -#| warning: false -#| fig-cap: "Evolution of the match and mismatch scores when the quality of base is 20 while the second range from 10 to 40." -require(ggplot2) -require(tidyverse) - -Smatch <- function(Q1,Q2) { - PE1 <- 10^(-Q1/10) - PE2 <- 10^(-Q2/10) - PT1 <- 1 - PE1 - PT2 <- 1 - PE2 - - PM <- PT1*PT2 + PE1 * PE2 / 3 - round((log(PM)+log(4))*10) -} - -Smismatch <- function(Q1,Q2) { - - PE1 <- 10^(-Q1/10) - PE2 <- 10^(-Q2/10) - PT1 <- 1 - PE1 - PT2 <- 1 - PE2 - - PM <- PE1*PT2/3 + PT1 * PE2 / 3 + 2/3 * PE1 * PE2 - round((log(PM)+log(4))*10) -} - -tibble(Q = 10:40) %>% - mutate(Match = mapply(Smatch,Q,20), - Mismatch = mapply(Smismatch,Q,20), - ) %>% pivot_longer(cols = -Q, names_to = "Class", values_to = "Score") %>% - ggplot(aes(x=Q,y=Score,col=Class)) + - geom_line() + - xlab("Q1 (Q2=20)") -``` -#### `obimultiplex` +### `obimultiplex` > Replace the `ngsfilter` original *OBITools* -#### `obicomplement` +### `obicomplement` -#### `obiclean` +### `obiclean` -#### `obiuniq` +### `obiuniq` -## Sequence sampling and filtering +## Sequence sampling and filtering -#### `obigrep` +### `obigrep` -### Utilities +{{< include _utilities.qmd >}} -#### `obicount` - -#### `obidistribute` - -#### `obifind` - -> Replace the `ecofind` original *OBITools.* diff --git a/doc/commands_files/figure-epub/unnamed-chunk-1-1.png b/doc/commands_files/figure-epub/unnamed-chunk-1-1.png new file mode 100644 index 0000000000000000000000000000000000000000..ca3b8b3b8760724eebfe93d7fb7964b7a9a76d71 GIT binary patch literal 19464 zcmeIaWmME(^fwAfgS0dVNJy82w4?|~cT0DtAVY{EDGf>p(ka~xozkUrqjWdiGvM$4 z+_mnDd)M>gzIxUgX3dxR&YW}h*=O(1-utsBMCrB6Lv&JfI5@b6vM(i7;NajFz#jw^ z8T`kM>c=1)93qyLgoKifgp7oPt%I|wqp^vZjG4Wevz3X8j5r(|e|WgMz9qRjfykHI za%S3rz_IM?XsjEa*a};f;h^6eH)iE_Gb}N-6)&QCwq{eT=((4#zFRSh5WK)?$z6z9 zxYcc8ptdg94rR@#P|83Ie0nx?A@gm72yL^LySzT8#-_1v=6vnz?z{Clmz_C`s2UdC z_&GWR7SdwY)zPK$uC?}d0oNAap@+UhtjrN{OmXz^WXrA1zqfa(I_D-&s!wk%Hj9;H z;6Kj3pDN_G?C7dR4D)wI4VA5x67Z=El7U-WVA8x(yvNt)~0-*C;#=NpDH z=Zqy;;LHbly0r-PcJ4@CRdO1BMR$9r@)dIpn-XqeiHWt;`}7HAse%>L$G&h)OWv-7 zk7qS&gysmoJFFH~0+U0}v&=ueVA%;>KETz;5oy=VOHB~JHe{_P=%f=4Hu#F^+Rfp1 zb$sEjnUaLM6#bkCgEWkeU88beyxUK+lHog!VU??28eu9E7HJ z<`B=meX=){-@0q5{-go(otI^S+Yp+$P)I=h8|VZFwwe+`Oqfit^G0QKV^MbOJP9-GV={zwyOeXq(f#5gOk~LRQfMWu$QQ_b}SivEJSMcDE z6#T)#A*3Vx_aFF$^aua*8UlM$sE7as4o(bCR`P|KC;WB_il>_TMQ8I7IyEAmw@m&+ zC0kVV(Be>c|!@@mHWn~T9qd5t=r_{96Jy(No z-FFi)8M%A>nta_#7-i4qcK84bYSw#PPQw)BM`~EgD1bUowxG!)iveEZn1c#J3`0o{3q8Qak(6j60 zmCsLPzL%B`oI4F#!NISqKAz{SD#f|>*WpmV5qn6mKt5?>r`B0WD8=9husj0=<`ANn^N7D)lPFJ1trBaCcq6CsY zOeGV#?TVnlu&%p##X?Q}VsFX3i-_u@AuU-T(pLx-?b}F5c$EhzAva5^XsQuQ zhXVRcUix*PoUg1yw!F+~Xn53PC5#xmdk=)nLa#ZF5tNNo;bYn*LzJ3IF|f!>kv2!U z@~+_PW6ub4*JteEZy;@>Ba&MeTT>5f9v;ZSSG}N%i|hDUX+wJw6G^)@^rA=flDwz! zfXRtYlYNsg^~wEy!-4%qV)`boJyof?M93P|U8GBE=w`EeLPquem`v>{)12hpdBgER zhmom{Ppz16{b;KE8WNsb>fv0^q9Lj+~PTIm3WN3Cz3UY$yU+Q+Az#`@sSmr z!q&_T$=6>|#jWK7?$r$k(GrarM~#HxpH+yJfFS5$QKiz~ErrTo3@AQI0Mqil?o#w022yHJXA#RbyT@rum4>?{Q!qlLoOzy@XuuVQvuCH zRPU7icfE>J+Q|34riLF4`*d5F=D?5b3J(v@_D=){4UKrhb9q5zwwz#APtx;XnVj}^ zNsP7PE0)FcA2xF>6j$5Hw@+7*Ft3!c>XDW&Ny*3x`WEDFwl^w&CLBCJ)|tlG%Tvwq zDHc3~3wB&zTl=j7*?)qFD}A#~ytkwZ?Ai2HS9|*u@2;o)-qqCcFaM8=zw)|I=eBk~ z?Jos3tgo&nrBGx$z#~)`@(Q}kBXCt?icvb zgM0ZViUYfZ?oR*ePCN^)2n0qsqVO!DcYFPRKKg&e^#7kqP>sZ@^}T$?$ET@Lq(cWF zfb*Q!x~!U-8j0p1!9UwG6$8maJX`6B;j*3Luo}${-nVp`YhJ zD-O);m`a<;UL+#D1(B2M)A>!4PJc>a@0S2o{Ma0~FkSHrrFi_9oh_lS;BBF+vZQ3t z!eB9c8^-YPaCB6ZQ6CMYQU4HY2j=Pmn<+kILn)}~={whVODOzuY|mG*oU>W!pnv}S zQK__6=R>gFook$Kj0|7xOXzFxI-Ke9KG|Ya&sTHWy)65jn7Glm8edOnXj+eq3V*Xj z%5>4Moz>tz*sRX4LHpRr$w_2BJ}qtcP`IJx0+EFd>RHbE^r@q2o5cEgX~bhugFOqx zkGwZNXqKD|3=Dn#4^UumD?yjli8Tm=6@t(3T8l=s$()9VM;)8p)XePMyMV5^t2`zt z$ttAbEE%oAHDeuRu!-V<&Q8wjl<0;Ap=mQwAtAkyk&%_|&rhObVwP2JhP{i`3*r?| z_ug#cmyQuThLiHpK6%on@NnAg@0ioBOW;4VF8&_uT#ahZ>oU_GylZ#QjUnpuo%-FC z8JEF_k0w}9{%uD#0byJ)j^VsjP~x{aRf2J%Z0;#U^0yl{Wl{a zfMtb49Mk??1^|Rcj@bRr>p>9z&)auM_5XU#Q~_|b-7)gW!4v8X8ViFgM=E&Ke+H1!504V!xIx~7zmrjtND}t zWu@z}U+ngDwOopT+srDlkZ0+Pb06!$y00QgaV_}hJYdU+Q-Tdy_>QB%C zy!(p}&f8_Suth3i`KS5aX#broDd{NvDi@kES85z6rZw!0g^is6>|t+9APQ1IL@BT} ze}=vVhLLbfQzRSXo=^ee8Ed{p;7SgR|wxOF)Gw~vZVzYYGS)+2^Y^N4ZidxA*~@)= zum6Cad-42~4za8Oc(u_YXb3 zsV&UR&~!Rc6ZGAat`7Rx1CmD5d=5hTlX>K#CK3$aF@lhK)TGH}eIPDU^hRr^Zj)_R z|N7=~ck}WS9YL!gK&~cT5$7{@{_Gr-$(b^dzYjvg8YB>3f8{sslfmCaRZJDiYc6zW zfoo^x<{pjEP5>A?Br>wv?uObxi%ZotBT}CUfnDTmyXBOZfuSqMcKJQoG2urzkkSzLzH*5hm+Dx+q8cVu${AgyObV&%a~VQViPz>iQ~;VEJsW0$FGbE*V(74ljGZ6Rt7c_o{fQUWsniM%VzWX0o2#6OSTu*h-k?-{ zHvE9Yq!Uw^6T{4NEy?02%<8*;h_>+C6W76XEJ>0gVv9s?mmkL!+zsmII*6_hzLDXo zE&sT%FA%Tb^c# zK}hRn(#>MqhvO*9x-W9*XW_Hu7LgIy`hQa4n-_`kJ9o=Q5pUSy)+y8@Xv`QlICAv_K?(i1ZFba}c zw35b?WCK;y5)#b&7zaGh;qSeIa2g}0at$40wh>7~g96^})XK1z@kZ?b7iqps_dzf~ zD>W9*p>4yqnJ9G9jb{>lZtx$-poIhPa>O@6U!e)<8hz$P(CH|RKDas@wBa_ppt}7R zUcsSxNvlq>wW5i<*aPY3ct z1H(SM>3ofByBxP@$z|ZV%xxT{@Zi798XQ`zB-94pe!8kiK8Z`JxbZ}%_Gm;-?o(*j zKZFB8Y@l5jC+5I(wEb1q5pPb`xB~6__a0R)VuLk&n+Kgv^KxhSD6jArJ_m#TG&2Cr zz5^5X;wL};eHwxaUJxcGWhn3*db*9IoNwB#RvT|FHqBZBka8&2?*n}B>>rKh$qL;w zS5BU!F2py#%Xh&RwwDUgG=YA^vWG5Y{|)Dx8xkS2_?b+{tWh{*pZj$~y7z5+L-ZZK zg2q7im%EVqbenH3Yz9>i&ezjy1o3lHo-V?zJ&k)b=e^0gomNw5(=V{@HhK(>@n|Ri zg2ZWI`9O3a3U+Uk?_~vU#Od7I!OuNsKJKOmteLhm*$S<|K>KMf444NRcu|%3I z&{SL}8X6Ej)g$3H5A!)1mbqYU>kr_tyHgkcS2z$U7CP5BtRFv!%Z~(--(6q)k!`R@ z^~!ES5m`sUB&I627@WA=TgQgpm`NMwltkWKR zbGE;Hyq}xm@Ky}cv) z@Sc6pX|(&aIK&sNCOOSmXKfhWpvLi=(O1I5@K4BRU%QAob?W%$eah{7;wxwgk-sfa zb)yQpZ$-t%5;S6%s7OQi^3($+5imD3Z_fw)Rsbv*!Y>(O3g!+5FQ=Hyj79FxukA}X z1fOwrO*Eb21U=^v`-fy5XD8DRW+UHa13Kq+X6q~EW;ToKU#A6Qk@Q7foNO~|eST2l zYq^>+NvJ=rfn|Wff$ndQAXazrx4X4}u?@AI-73cBCr zeM$5^8Ir)3T^~qs9vuxwjMDuGUhjwt zFnQtR%@yS2Mh9QZnk1U7qO?9pDD_8m-$MzTw^}LpMqBPP3YnMOlY+Y8xbz2Ad7n8% zG7!J9yN?6V++*%0(E*7IS2b7GbC!Nn(43qE_(4A5B> zj^WC90h?um-MfoFIRQLsAbM@_t%GV6Ai2CbT>BYQ#`1ZC?CcKk!L|vi04*Tos1qI9 ze((V^6@Ijqn~O^b+vUGt@?|b}lg(vVRU>zT$<@-Gt%Z-M``9I3(k*xEn*wbr8-5DH5c*}BDGvY9a> zbWGk1lKr*avv!uDW%YWYQ*4JvL=11p5xrL$~*~NPz1Dv)O z54`?;0K$j3AhWPtJs%&>s%u>xAhmlAq_Dcs66m}f&ew}AdX@g+`J$gl6ntAA2Zy@O z8NkQKUVdwt{e&D`T;~0Wc&z%3z@3jYc)4sN=T~4J^8c5#l7w>!D=n-9-mDA+52tiU zNJw0kLsUhqI7UZh^mKF~qVES&rfzwgF8(rmfx#*|WCmS2Zvv1oDgF_wqJ+zW{k!g@&D}nHO5P^NOO^-?w$3IW545KDGSZRyzca|LP~^;;5}H+igu^IXjr&I}9+^ zT6we-dDe7&q6-9DS4v5w0uhEd-a*%|FVp8s{JOC!e`l2~cK)_HupGS|cyvI|hU!>i z&}e$F((`+pRbwt4=IfqB^!Ll#A(P-kmY0{2+R{P9HjWTxLj`>z9X1r4AstSd)lmbU zy(30GJz_pO&~`PGNXO*}A)XzW)N=_BqY%`HXVK31$+m28#_xH)n&9@%`)K3aXp69x z^X06^e8?gOJc17hbC%WKBOuj?PkG|Qlp~1HA5DR*2aV@O=hqNarXD9T=n+1CX8n6( zyB{`_#dsuMbaQ2mgjGbjSKlH zecDvzn={7Rolv!(HRxTg?3Abub1k_9{;zw+ncV!$?4_AK7hB)|uHcMsjwd8!)%ng~ z+X#@G^PuplCAAb0kNBH68JAVj=G<-O9Z>%VZZ;kun~2bIv)6__drC}(V;}aI41T$H zqfGiKDSKz}@I(D1rA1k-IN7nSw~Bc$PS5L4yFZ5OUX852ara8ROSw!XK{&C(e(1Q; zB@l3Uq3gHp$wuxk)bG^EDRwgb`5hhctz)`Km#;BDN8$t4@4P7LiVdAkF4`afE71`t z%YV4IxKs|lb$yFMlSmKU82n&oHApXn9(Ln>R8Ho2rN8%dRb&6>;cvO|Zq!0~h?Zk>3#_Z1b-YC$^YI|xsmVR0v&1kM|W2l2cu=>&Bd z!*1cLiitz-Av>mF!fj1bAh&|}X1-9H+g~ZOGR%HkEQn(~s92Az4xu){gU*$XID(u5 z*>Sh=^ejr1xJf~YcTm9U(Vqa|Xz6#HM>6InL zoo2qHo<8-i9#&ZA;B{JRM{DjN>HnyrstV$v!oor$`pxQ!y{!eLkqG!%*F-2@F5iV1 zx9rQ8L+op%y%yDw5J4PVAwiDqO*zO@5cT&Z=r%N+fV8iH;pXQ1@%-~5()&gd+8Qr}0S~-nKCam_v{*ryOl#P)Me&p7Sms)T#*D zR~kqYH572)dJ57!QO{l0HXd8Cv|_2{5|KUO{=yTlU+d*OAHVI)bhb<5&qTUR2a7?X zXdwx>Ch}D@NbQYum1X+@rXSgtI{GB2I*6y+`m znq*l|Hm&ox(;|VC@jz`ep3D5FfUN559Cq4p;rD44H%u?S1Bbk@^C%L*_4xrWnMc2P zZoB#FsJ4^+0vl5fciS|-Cd5lnEg)c)8$b3v`aFP)=++%Y?M)U_H00fp@MrPEoLj~Nxy)$kENwQ`5CPWhujbC+sSvC7H%vEO3npA6&pr-CcoTxFH@HEid z?r9kOGTkQ==nG$4*EgNSr6u!83+ojt1QN!W#Dl(4oe)mn7$ z^i7nW3-Ecm)x|S@x7R!G+$#b?Z#azVLG1G4iAbSFGI_Tk-Qd<7Gmm;xz2@mHVnF(% zV6=M&rIjoOMXE7q^yc#1TX=&*|N3ig4y4UiYZ&r=m;)tPv4QRl<47!{>L&xdy=|xC z&3rT2bFLR8e{+k?Wjt~Z)|Yl531yMyIBK%N$8g>3K zt42W^MUPU9-@0?kLH0yUc(Y`#hY-HLVdnY7S7N?H&E&YNw0#2LEMh{P5D@ZhKk_ij>`KHf6PAVtaZC z%yO5PzKhyvzI_DGe$RfPf|r7rKt)85IWlgvGLfAP#wAJW-9Ds#T{R(S`1)^KpF$sb zA!o#*D%y>_BxUq#eZuFUdK2WrjQ4Q&E(eb1$N|fvYn|`+x{MZ@hts{*uCA3M`P;2z z^Zq}e3DpJVZ3o#nNBvL9A9-Z7`+8N@n1=C0wA2R0dS_kD{M&*np?)92~r0RVL5mU0!E?}KD(rh&PocM-tTtb<>hsLxXhZ^GuaCJ zbhl)xSU`NZ*85UDUYu2A)?)rl^6X%RRx}5?aW8*yf51O_pcJ}HHWnkNp75-Jp=h8N zT?{?!U&3Vvl>OmzTTsn%O*iHJo>}>QPNm$lTWIbTI#PxzL$AiB=;gK03utUn&>wyVCbU;Yx=$8 z8=XIaik%ln+CG+aUmu3V0g(mjkp+&Zm^CNk3hO5o1bvGrP>Bxw`ryA*4iZoSeCx#~ zZ#OkLena}5DaP?}t}G0-(`Wy}`*Kupa)U0gRD%WR3-%XW@bFifSf|Z=-K)@Qjs8*H z%z<8ikraJJ^4GJ<$|SEKo-ur?Lg6Ie?A5;v}*xI|l`p6zPA>H4>hMp`ms zT-fL8d!wTD84hVGpwii6_P4VV$zL(eXIE$*8muPwp_%^>4V29?bYeXy_;`iJjC!-h zPH?5V5TEcTUAfd<_Do3`oQF@*7<;~-dvzvMtyMHsc_c{tVyf0x5c#E~|1p>3!A(XZ zC!9t6z(CZRE6_4>iSMWc3oOWn_}`z0XJB(#4I&*U;3Oz&-?P=~hU|p|AcU?b{<_a* z?gfK+lD(ogRAr}^Hs`frsgG^6X^e6J-IdDIu`@t5`Z1=z$b3+`d7qnmVPOmZ6A7ut2KH|D5nGImjl&PQyc3 zC0{O>6g#ze0+pel0b9PnsvM7_{$8+vbyT(J;#j~n=Jc|b&By%IAdln`-E4toxiMp+ z8BC=ed4U;L*z^-0?JQIDQcMO1!YJGI15*euD_Z$orfYx}%4zgv;%yDr*uLa4@`#4Q z0BI4unVo>wNH(2u6>N*4Nk9d7i%%2x9FxB3wC3fLk=g{EuCD&7dDbrgQ#Afiu|%DL zr81Y#<>lpcdT5{7;U{HeWaJ_gehSju8s{VF?TKcRU#dO#zOB%~i(4)9OYr9oMH`(ldW&4y)$f}w1iMUyzIUqZ(jTQ)aN zzJOcFx*3+lmIB#E;ujX4UFZ36hDQsy2cTf*dqWc&!R2YOcL0aW@3f@I@bHT|(b3Su zFIn1pUmMa{EWWwfjz=XYGkH4os@Nn0&eWEvx%qF-&FI9$T)%E?a(<(}D+XHHjfBS^ z6K4?&zK)KJ9F;F`_K6zQ*o^0!Rnu@c19I0+iR*Kc^*>99R1Kr*9!JUezqkyFaM>z6 zx}^78o$m5C;!(iX1Tj@MCZmOiX!k{u$1v8P1S$1s5y18ZkT3Ou3EsF}B&=9F=iV@S zSx*`WKezGRuE1fY4wZnZ^fwq~jr3Tnkl=xeO|O50D%V?wx-HE#j|o{-oA*V+oCR9& zHFtMn!PsZRuPCz$&D53O1sTqHh{ce6Y1ydO%8u$qKgpVCJ zD4aIVT1C&!)9vJH#gp>CHZtgY7iqJnJ>{%A@8!PK#FD#nud zcf+tMj5ic+u*<>JX_nQPc12VAs7VwG64(n=z~p^Ub04kIO3Iu-8ML3!a*pN8GA66) zo&ts~LE#}6|M%Zo^5A&j(1YnN_Oj+#?lB5AACnxs4D`-__N>GAay9Gd0m3?#$HccRih>D}Ltp}Myn&8>)66kQ zcxvzqv8mdCLYSLW95V;YH;#iFcHrR95L3Zw_QF`x5OkWf_^wV<>glA;33pUjdk(zJ zk?$0J2azAy_LTHU_2#j?mLl7$d>U-~S?cEe|B}`IP+*f$;!Et_M5>uU6?v?_H!ybe z3V*5YasN_hUGdP-JQf_6eK3^>Kg1hEoSd1gpK5Y((XmKH^}Kd*fxO3Fmlh#4cP49s z4xC>5*~I8_)74Un&G#S zRV3NuSJ%?=*Yd=;xANLxJhNe^y;_a^eXdHYC+FHpg+8vAY`-Jj_sXFez`!B`tHsZu zCqXlTQK0cTq&MPdr2okQOOe@o$&@l2pcxa?z(h~vVaTnmB**@>=F(^q%0c8;^^H)o z$5PGwqdOHH*C7vJ^c2vDDUETn*XAE4Ud2uAaU_-V={NB^O9p?>%5P4_%VDwNyqC6G z7(fopoj;vC0C;13TxCbsX!E_h@6Y9`&Z@5G{x?iU*IM$f0HiyDrypQg2XyVg^u}mg|59P+QMR#?rVsZ#`^=pQz#J+Ca6ChKa&ocX zrPX4E^20p(9ejh03O%o?Qn)F2>x+>A>eOn@dh% zqE`cpBA5xc$?#X)T*?vdg4HIuv8lw*${7>`-~qyKhQV4TLleI<8vxbzAL z1B(_2!zk;E{z{$Gj|2GvZqs5N3B4ywDxJAM78k9R*{><{Su57g|h0vnIc?^-P?}5@MSzx~Upkj*g`DdJZ;F9e}PP z9q#@FdPn;=84Y>na-zdE{{y%694Uv*SIB^&QysO|o7)w%Ex1Zw8GI(%RW4pL#zG2U zNG>=`A9TV6a)lJ)&IWAfB-e69r(!9aoI=a;D;3<=z_J1<9}vbN+RA~+PUo9+K13j5 za**47IvO9Dud%<|`_*i|ax*UFkFfJI(fOL7G@uT&M$dsa=u=1IR>MVDw9e4jCGA~~ zPR`}2&`9Q87?udf9#lsbe(jW@Ar*B%30wjkKujpa9GTc`{`{*>Xxv#u)mC)+`RZ|b z^{21WD#=zt`=7M@L{)`hu>YZcpU_5;QqaYa@NC5JFBX%4YN2`SIiCc!jKn}f5asa~Kz6VPlrd;PzRQKk z@8;@!5XyVvEF4 zjT%X51IQ|;izCY88fmd6bD4b#Bjyx;V`4H(GT-1;USTsi&=|Q5NTzkCfY1oanNkX4 zq(wg1%{e;l6#2B>^By~&$;Kp$9OBsD++l1~LjP_fI<;phpe!JJeQ}%xrYVIj2XcRE zMLOfYnA7zhyY9Ot%Sjfgp8=z*Cert6zbiUAdTH*|EYtjFa+98Yo{jP2=gQBSN=-Np zQUOiX`ean?Zwa3zRlykMpRNhJsLx-j(PpTb*5%BCr}j`u^65Qi^Kah7B~CL!gvE78 zUpX{GuBK5qs}XKs-*WahK#qzX4<2Bsx$oihrbiNK}8J~gIkAy zc#D@M?{#sJ!YMc}ebN2)o6e;Ekw+`P#j)aY2R^>S{`8t0beEua;67pHX&F{6TrmZ! zQvdp4^5JM>SR$sqy#Nqh`tTKnsClf%92L7;W#tFNZxJi~aZ5om4QrSL*;$FoOfnsM?bMhFELJ?<^!?m=R**aTo$vX=F zP00cjAC!FrYo3!i=kniuU#ibL-8`+3Z^>6R@co()KY9$$y)fKat(>64afC0{EJ?+r z6sjSHM=i!JZocuJEt)AcmSC;1r11DVCVMMp#E94ViR2~J8qw&3cU9So>+2uOTyB#F zBvr+ijBWjM6-Qxt985*AA>`=fR9=Ocd7E-ezxup5A*>t=8!|^_2p?oXE@m< z9#RRNe>xt7;_rFUU{uEkF4G9-$SI4T$bU4srR-Jh@i|lb@8D(@E~=^tM)An&WH<8Q z=R;Cq#bpNwb?LsW;AuF##_#r>`-=n=ezjJTEie>}fFbE{zK=n6Q{#o{{%_*Cu{JXYt)z54j zpfsD*nSzEAEOw4gl<3+QRtb1rW~uA4c%?M{o;=TYcIqmD$3u{|Ih!mzU_hblVUMcD zCXc3M>~Z!;`Hg^|42#GIsElOc-I2ZHxzy0U=OuMU7SO$#L-e%vzFeR_pKQ85NYcUL zp=DLS9w*?BGlB&Hb_m(TOQ?3=St!sv6{%jisbgzoTpCYn{7zW{Kq+fI`Pf^&cE~$Z zVvZ;bR!EEmHdv31#y||gpFv6Crw4J)mKwYDtoZL-&fotbrB^(C$-ui%Jwq$S5O}s3 zmsQ5-A8Z}4Q2vv&FMnCvJ7u5ZOI0tOth%Y#Pg1-hoxZq-PS*X;*GwbGmG=`Kakv$_ zi8r*LQ~xX*S$vM?$OYoZ7@IV+Y8HPib-dN0_@ArGVe(0}4Ej47=3Idk*3weFs62UF z$$xu#g)DxR*9NBWWy>IZgYlfvE4#UZXyLU)>!ewg|EB6GxUa6AXN7461etjTV+@VZ z{8W+4H&B+#r?;P{J3M(j65lkS%8mq+&WxG{Fwq4Umg{N0iZ@oUI=lI)$)BPi9xOfB88SdE?A=O z;?-d6t6lIlK`)2JSK(K!E|;$>z0vRzr!K7#01l;jXG#2MT4`{dekDc9`h*2pD%DK* zUqZ1hdMo`3V}U1m+S^W`hhub)x*X1c5Dg(TEIV!2DGM^fa{a;5n^^iwa`t- zG4~ujPE;AC>8+<)^YeP7-ZbVjZp1XWRIfb?R1x@_9mKTSaaGsvaJr2uiK2HYKmQ?y zg_rQK?gBlp)i}WQCA;9UZNYQj{;i(O=V0IR_5C~8#ON0WL6;mSC+CF+O0y^?VC_Ya zKQJG^&s{}9ej$?oLf|D67=`73U=JBI4s+4<^>v~Gw|;pT9n`EX_+dwcOP3!aufrS* zjBy0o3J61CjVObi*RURiu{_m|0jDleM9`tSa(lCTYYG5Dj_{@Dn&{;gcZ&yv+7S_O zZ;R7zGZ^!P=LhK}}4=jP}8U60*f;h1bmB!b#&!MTe@ogeceAQo^rE{L6j`m8Cata81( z^7KF$6uB%E0)&QuT_|hy42_`2j%(U-upgTI7hVN}x7$1(->+t!G99m^O>^=wPz)sT``!kx;H-B@&_Z;xOSA)syqhOwIT+@&hIMN*bz6VYCP;61j1yF1sz-zOp`V*RK2<}1pAmUIP+?vz_KaWo%jwkNj)sP=+xT1dn zuZ>M(6lV?8%KEe|QnDg*va)1BS_#kva;Ds7^7#EHigf!xt$!n80TlWJ*pv}n<*EUs z^2M44b#S*`m&^l1K(sFDAmaQs{(ct$bI^9SPF?gA#}Kh>lKASz;r8a53-E_)0flPD z>CmGqJb>(zST_bo_RaE7QLZvilUq=f2CY0)paW?Kg_@C3mWzvP=5i*N9R3R0)zR_i zC6tVH5fH~WuaAyg^8iI#5Z*RnOm49WLiUKB!)Zy9MA*B+JTjpl6oAoE-JV1>zG(GJ z<9K%1j<2{uNqIFu?B6U1`ceYGsE$_S zX6UaN(J8Vw`unk-7P5$xepgPjKd)L)9&d1Zl(C0I`$aYaKHmr+z&&68>heiytA~=| zyj~UabEf+zJ`4)ABfvd-?z9wSM9-$vgKBv_k*C`Af&jsA%x!bx&kv~zsDQ`L*6UJ0 zSLpC2gEoOu9YD9aLjF896R2S#3)&^1n&87&#gB|7wN!`P_Jt|Hj{44thFXMx$Z!7g_3H`0Zz{Fs2@m90 zIqJ#v)1^9y&I6P$=;Y!zIF0o_WW9uuvwvx0*~8*$4&Ell541p(9q za$^?JR!=%}IqtJaC?Lte+LmBEgqkOyW1%ixeF#PhK=y<};JxYD=rI>%KDGEE7T50x zz$ijyQ>Dh~I{PMW7M^M3Gf5tT+_*ykKe}Ebf z7Bv`b>!!DJs6c)g4$3+ccI$MTQVR;|4%%^;3zH!Q?&G_L)6>(FemEnMM7Wwoqp=O8 zmx8ad?BLM7x7*(opg&BY-}H+B?tqzV$T%!wHIV4PF9eFc#QSbnZ3_p7Vg~yy01zLV zR+cHOm{r2Xx zSq^AgV_ESr;8}N*SbSCPe@0*E(FoYqzk~T_PFloC!*mFE^wFUl&(8qc$(s8MM;-PD z{HB48PI*VqrPgFuEvW;&i1!C9hE009<*nXx*$Q9kHVLb$4i642+uId_$Y_mVjVjYQ z#pFq<{Fas@-$Q0R=6$2%;^+WxCJr!ltv&~k^+dOVeP+i|NA>UmHS2dn8sGko7pynA zuJtDgC=jbc@lrsyM=xMuJp~jR1hrQjPL2}&LL<~8UHg9mq~`k_Yx|CI7XDn(1kf|s%tnT@^e$`F3N+K+?zN!k!{|(x0>=Rh zDw{p}fFo9VsijT!N>Bgi7d{WhQ`en7wF2GsYtUmc+!Z@$;2Z9pG<4Zpkt0{M?gj`t zy}ps&)J}}kZ9Bg9KryBQ;dG%VUYgE&uh;vNGV9+h3W+c=_3FQK-EZwBtTQn5Ttqr` z;*Xa};E-qb#vkgyOmO1(X$jgO={E@Juf&NSax8wFV4CTJ^=@3KfmR90kfKX2&qi1q zO!_4${A{mdyTQm~Oe5$MHQ>k17NGt+{6-}tvoq4<8}Y%ShHWkcm{*3b!%_$)x6%-v zA0H^aUmkMYk?Wxsh~##kMlQyex5!)FIfF{S|1kKNX5ENh7Znb%pFw^|+I?}n87^V= zPQ2ZiY91BG1l14_mck`EM22TWiQvDZiH_hjX274C6uSQ%%e&Zvh2;5<43F?mc}F_< zPe3ckOjjwV-;IwQQyDO_7yJi-Y-btpm!-?Lh>6FNzc*Tu@nMnkTO1>dDazxO8ohih z+U?pd7jYG396-XXYU<=i-an>;>f-2BNkbsivGMcwJqA$T+U$%>LnmDHBQ=Ql|y#s24nq6kbg|9iYj`DRt zpe@~oZX1# zjmRVi7!+Al7D@@=>q#zy%o<*~U+6{AaS+7C?svt$(AKy}{Gr%XX+t-vrE(i*JD|BU z73o&`>fuk(;D1C(NiJ z$a5gSQ$(jh>Orsnsyr+ZIH3lGzss?>W#>=(hG$CG05Hy)H6KstR;nI?ZV2&9o%aQe zpAcEt#ZS2>&Pd@|U(0{Gn4Z5`vv-4;1%dnDMrkW#1;Y)VM{#TcQ;Z6I+H`w}BKud}^5&(33gl{Bybg@~2VNN5VHI7Fm zOc};z=1O9s=MBiLJMqjDn{QIst|*8oh&qW`b$U{FZiTo%In4l!gmHC}(Qyr6Q4PDl zF-{>iC69INJ9wyDVi9P)v7Idnuk4bBKpo~9wC}Zm47aq)nsIHU2QgbdX-Q=%bF!Jq z;=ubYVCb;Xxw7#X_hhx#5K|V$sT;O9Pr{2HkEs^-vDFt&HBKqFZm7s59|@SWXVxX? z=Z|k*Y#QGFRnc!|<9Yi#3)5land96cwoPm!+U;4F=rwkNE1=0E65V=l7T32{x+|k+U&UOaF zR|s+%iQ|QsPL}x5bhGsqgOtJ~@@QX0!izx@g(gO!9TK+@7g6WC@Q^oBqteVI%grTC z7uLgVGoVyZ$`2xr4(5-)Fj(N_y#q*O$Cof|#o<3)4}FuUt0&VxrVDX_M4MFr&3Ol^ zVkZwIrHu^{5+8>D8Z`7hR?m}*EYuSi7cP1jM^M>qwtB_o0PegekOb&r(Lr`}U5%N^ zhN_-GvXo2F&0RlF7sK^)pz$YyvPVnYV8%V*Pm}?**`FqZv9J_11d$x%akmzFEr#ze z`$6o=8!4=M|I-zKF@jzi$ebp19~y9g-(;B*I+0Jh=Pa}!-aNjVe=fQXEilShnNl`#SioP3_UH5t3dH3YxcEc+0UA-LY={YfMpP0TB0^-kCxqCu2f<=9A(Ah` z@2Scgj2(){fFR&zD{R*OwtM~|){b$11o4KC$8Jzy90Achf9S%$xPH`s@@MDIfB?{n z{K~8PNRz7ZfZ|$D;s@2F48mFK5E* z0PHs{cxylWpyB(~xW4;wv^;beBC`>oE$0yXn`a|B(-#gA{{E(chK<=7IwIkiq5ULZ z2v9_VWV){}369~Q7c69fUKDQ*6H;S%dVw4o4c75gBl~b@N7T@8N@1ze4-?#&_H%p> z!KqudYD)8>6DedifcqYg!9THR;Q$Pl8_PdIbIW1Hq?2tSHsAv1h-d_Wc90(%fVGU^ zQ~|oWa8WRTCf67oHE8I^HnwHMMS`Ee6@v3gr88|AB=R=_$g(ju0PsI@#MS>%Yb{O8 z|1EN6v4aIYO;fo3)~!3nc$DArB>!1rxu{L@oxkqo&V|_s6HSY*pI)5TAunz3xDy?0 zi~m!z<7rlj_e8V)?1P+YY$*76M6W5lBKSS_!`b+xTlGOGdRunu?A`MR!G~=m{l%^= zyQOJ;W3A6mjy->S?X9DMIi{6qR$5$uw1fQ5lG``~Fs+>H! zo$^HOO*kmAU(56EV%_pP-w?w)edc+3lgZRTri>qw z(ay^CY`A?hNA42p;w_n$s5Pb2&HGeFmctI8*m1C-HJrC1YDZjvZi@Yq)czP{8Se&% z)0WML^_8qHI(MSMSmMKk${pJikKfdv8M7LRke+cx<+xkz$T6!TRD`+O#7Y<(&TCB^ z7CAXT@dsa#M=N6bOxlSWQ;(#fp2$W*x!A^2YwTO7%KBbrDP}eo?wAy3Z#u@CdB87i zFC!OC`KTA9%!LqEM2f!INrR zroq!)YOJs!tS>^Pn};1E0501~kwq5cU(2mqs($3y(y=l8)t;1T{8dXkhF(TNK<5_O zJrXy-o6Qm#o%Pbs8)|Ov*LI42 z!}rTPvd^td&M*g6vj^kSoeZD4SNfOaZ7GdvDQ*aa3fe7pcG#$V_@ot5^mNd~{Y_0g zQL({?uhXl*z^B1#R!-O%ls11RbQz6YMx-0Y*7zuHcP{wD)AW9)cIYKDvnWsZ_*465 z*VZQN->yJ%dY0}x@WL}Gt-q-Fk5Tk98MW(==!4F_D)H0}3QM!g{^fTkw51OqK0M(x zZyr4?v}?Aa<*$;in}N(pcl^@S*i}IKrIo*bex|LhU$<~g@73!5w^PFXC|HAN7lwZNn59u^Ze7QF4 z(!w50N&oZ#AH-?5-2J@W)d6|EFt@~NERm+WqMxYu{sOwTIokZIZgLt8G2crdyHvI7 zD=QBq{c&eS=Fcqc)U52LG$Fm@px1NbD|(vQ?GKF_bub~BFn+4tTe?GILu&MAW8^D-J zV)#(VEWnb(W^mYCNMzDIIb;|M;ino%3=$2?zWJhaMM~U;@&mQGaJIUe@2JfMyPqJN zD})jC8$84KhX4&oEa)2`f+i647{sQKNiZ1F;6_EEc>`KM5O38!7Xi8UT|iR;fxFZb zKy%xPjoYdQcg4SVuxjkPuhlyMbfD1mX)MaB009yyUS1H~jJQq91<=5Pz7!gV1!&>G zqZ~RLqC#G50Ii_`deY%cu~-z)n@Ktbfh01A4S{3|lg#n;qC$Qkn?m)3KwlD>NvDB_ zm=LT3FCmf1aGwJ`DXE3i2 zbOa=mxb|4wmf-eN%D=_G8nHgN+taw^2Z_)z3K@di2Apj`{qKf>!D`|Df5*6R=D3}0 zzeG$&oo1cYDdD)Z&LciHd?p*}Oj3}_=yk$ox089~a6~%>(K;pnH;m#?SRNszR4y)x z*ShrG8YyD$QO2gNXG}edr3P>MPq&>Pl$j4Xl3myF+uZAaiIYQvhKF4)ozvnt` zdO6Y^0tuC~#MsbcXjJ31^z;u{~LHfkpB zc!hqFA?38~C6mXiW7%nwzpvDFIh`kXEj-Fs&9`2$H`HzU{Hgh4XWS!0r=*B73wJ*` zgk)^sxt0%A^e@KvQp9=O-tk)1uEex?fIM;6>p2^Pk4o1Bx!c@(rRuFxG%(<5&YOKi z2^^hX%-$GEI2hDnQ~!MWEy-^-6@QA2Cb(b(CN1_L>{FMXGp&hWPpP6=T;N^*|4WZ%XDR0+O4;Dsncbg%^7$ToL5th-gIM z7@OHA$U_?Vwa62#9%}}8ZBQGt!j#@49_|&6jA{`Uw~1i`gqCgBqa2gBzTp=jh*)h) z`%Ru;V8SQ9_g#+7m2HL5uSKdC&;13d>inP3Yc1Xk#p?9J{BurRD`(ivwMBSoc??iGeanE%#KX5#GGY z+)sa}=*QUo_ZSz$s-hiDb&0x`+vMxztxddk;*?J8l=YRJRJb7bT&@mCy>H|x#ME`# z_rid-D_eF@;g)rdvZL}E#rK=+ReBZK%C6Ra`LPZfc5`;Ff2Nl>=K2wjntZq_>VDtk zWbr*~=gW3^Y`d}O?sCk5ej_bAErNd;Gk!+kt??$+#@JVcm)3ELON!o#WV^^hOUSmm zm(y!B?lg|yKv2&Y=(M3+E~ooMlq-qH#a%EfH_I|hd~j4;X?yP0F8?VFRrTh2KV;#R$e`Rb+BhIC7lv^XCr2ahrlz!C@ti~Fv$32Nj(p9>nur?pfTjTJfT)1Ufcd#~ zi>fO-SGF(KE!qld3(5)#2o?)gZuk(bdUw-Zx9FAVlEj3>LkYOe9I4$Hd-aW|c1;?# z{)hqYsHT%ww+FLr6ZrwgLjAdKK>zQ!GEH-I5qc{orfvH}f#KS=7`0A`{SpT8g7I7T ziT0#s+@(TE&fRJeYU|Z%RkNL>92;GH%f>4geSGN`XwQ9uo1+hj`PlmOkY|gEGt?g6 zX!`Wjb;fnDx@hEBM=;Hs)?E3Fe7q;?u*{)b4fT(|L`398wB)vty1QhWr(>pQp0wBIX< zm31_m<8H^30wb3Ag@T2$j8mH<3$A(>j_7xIyZ5=vm250|O!fX`_DP9;?^#Ffy}-_x zS7L8%Cs~6I9h&`>kB&d+ctMXER0=H*P5z4ex|P?NcOYyy?CRd;+ow+i>tD(d(0`;a z6y6qY*sxocVdBy9DYf~DGrimFu(~ALl;(1~h&*vR%_G?aSHX#=ua%ggo>xmV+$7v?N&U=0S zRe|K9>OeI^w_#5%{^g?FYeWBdq-vjIX8po~Xrp|lys>E4N%TpzlhazcnS$+e-C>t2 z%nPchQ^+Z?thMyD^i$Ihz!A%x)w@nB>^3`Q@ghq%<5iYT%8cD@3A^=K1&alJm+SYh zw|JKQNon`4y50O|Q__#xnTX`CTf$VR&thWA9+t(GU24rdJL!s(M~Gq}LVx`A+D483 zj^40)#Ov;ubFa_Xrq1rt)SfrVVrHS}9@YIb7Oe_1!LzsAPdGQYb-CoYT)W}HzC7|^ z@jz$c<)Q$GcO^*Y-StiMwi6YR6_g{PiseXEFsK^oBo?FF7e`VhK-uI zD(aPIu}>AB;HxN8`GpoT=6>>$=EdeJW`p!$TK&T5Hiy#(gP)!%hps)$rFPgqw13(6 z?%6=)c0~td2PpGQ`g$E}E!p^f(pjCe6F03$4%U_N*UYb*J4?CtyFpeJ730AivyUc; z4iedZLy}> zH1BHp*xitqA!8?E`Geb%+S=64sX@hzqD!0^k2?*mgMOhs&zq=4^n%K>(tI&HW28gR zFHC;Cs~;2i$F-#LVDAMt-?bHam&C?hUuZo#-7Po+^!L=x{@IbBjtgV+6MFtH zJ}#H8Zow%~R_otSMgHK2~$lvI{x(5twm(>)G#C0Vr+ zjLGhn<&>B2tRK@~m{iHu-LL&TYQI=KQpuuq3^=hQgDc-;b=So$otxhpqOnryHV*9G zGrn8ebVw(<*xDA)}u+&wZFGNU+Qf;h&G3UHWwr;qpAI=g=Qq%{P|A)E5nK*&G{y%Uk>x zDf074mP-Mx8u&guaA_D8ms{e%j0rq8m{UCAqdAv?f)D)vOdaT7$sz)YO`_7hVSii` zzp_g|Z}y+@24;ZlAV)5{hC*UAex7n*@#^jW7_hJ?6%<+lg;Id^{>Pm4vx*%&WijCK z1vDa2SulV0ow?#rsfS238h6yP^P#W+`15^Fs(?MjWWlZhbtGB?jn%^7uTq(o7eHH;X=vhpk)hBSc=r3Z9twqqOZsn_1|9=XtpApwvDjbip>bMpng2^q z6Aw>!|0Tm?e$m6Aa4-q^Z#@k>{uezBEto0&TaV2o!4x4ACQ<>=mJ$GQ^8(t?>F|rk zeZ62xlgj`C=pS^ZDG_kd)h8OEuy{O%h&90B@mQ2PK_5rdC#d7q^$B==1HvBQ|Esuh Y{}wDZiOK#}4=h>>rgr5GEsYTW0d-?Q)c^nh literal 0 HcmV?d00001