From bf27de1528550549e36022296d1fdaaf4e85a188 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 16 May 2023 13:39:01 +0200 Subject: [PATCH] Correction of go_rps12 for not passing anymore the sequence as a variable Former-commit-id: 0f9bb9472a53aa16a91a9cab5106ee66ee781c34 Former-commit-id: 016607c59e62105850d1d25f29bfe214943abc5c --- detectors/cds/bin/do_rps12.sh | 3 +-- detectors/cds/lib/rps12_filter_3.awk | 33 +++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/detectors/cds/bin/do_rps12.sh b/detectors/cds/bin/do_rps12.sh index 130912b..ac34f62 100755 --- a/detectors/cds/bin/do_rps12.sh +++ b/detectors/cds/bin/do_rps12.sh @@ -118,10 +118,9 @@ blastx \ | $AwkCmd -f $LIB_DIR/rps12_filter_2.awk \ | $AwkCmd -v delta="$DELTA" \ -v seqlen="$SEQLEN" \ - -v chloro="$SEQUENCE" \ + -v chloro="${QUERY}" \ -f $LIB_DIR/rps12_filter_3.awk - nrps12=$(ls -1 rps12_fragments_*.fasta | wc -l) if (( nrps12 > 1 )) ; then diff --git a/detectors/cds/lib/rps12_filter_3.awk b/detectors/cds/lib/rps12_filter_3.awk index d25fbab..c549ad9 100644 --- a/detectors/cds/lib/rps12_filter_3.awk +++ b/detectors/cds/lib/rps12_filter_3.awk @@ -6,7 +6,7 @@ function min(a,b) {return (a <= b) ? a:b } x=x substr(s,i,1) return x } - + function swapchar(s,a,b) { gsub(a,"@",s) gsub(b,a,s) @@ -31,6 +31,37 @@ function revcomp(s) { s = swapchar(s,"d","h") return rev(s) } + +function extractFirstSequence(file) { + # Variables + sequence = "" + inSequence = 0 + + # Lecture du fichier + while ((getline line < file) > 0) { + # Ignorer les lignes commençant par ">" + if (substr(line, 1, 1) == ">") { + if (inSequence == 1) + break; # Sortir si nous avons déjà extrait la première séquence + else + inSequence = 1; # Commencer à extraire la séquence + } else if (inSequence == 1) { + # Supprimer les espaces et retours chariot de la séquence + gsub(/[[:space:]]/, "", line); + sequence = sequence line; + } + } + + # Fermer le fichier + close(file); + + # Retourner la première séquence + return sequence; +} + +BEGIN { + chloro = extractFirstSequence(chloro) +} { from = max(1,$1 - delta) to = min($2 + delta,seqlen) sequence = substr(chloro,from,to-from+1)