Switch from awk to gawk

Former-commit-id: 70b104ab89d4c90a35925dbbcacdafaf6c1f2836
Former-commit-id: bf29abf66345cbc0ca81461064dcca63dfd4c15c
This commit is contained in:
2015-11-08 19:33:00 +01:00
parent 6fa63daf3f
commit 2261fa1c48
16 changed files with 52 additions and 49 deletions

View File

@ -33,7 +33,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Building LSC coorientation graph..." loginfo "Building LSC coorientation graph..."
${PROG_DIR}/coorienteSC.sh LSC.fasta 20000 ${ORG_LOGFILE} > LSC.tgf ${PROG_DIR}/coorienteSC.sh LSC.fasta 20000 ${ORG_LOGFILE} > LSC.tgf
${PROG_DIR}/cc.py LSC.tgf > LSC.cc ${PROG_DIR}/cc.py LSC.tgf > LSC.cc
loginfo " --> $(awk '{print $1}' LSC.cc | uniq | wc -l) connected componants" loginfo " --> $($AwkCmd '{print $1}' LSC.cc | uniq | wc -l) connected componants"
loginfo "Done" loginfo "Done"
@ -46,7 +46,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Extracting main connected components for LCS..." loginfo "Extracting main connected components for LCS..."
rm -f LSC.direct.fasta rm -f LSC.direct.fasta
touch LSC.direct.fasta touch LSC.direct.fasta
for id in `awk '($1==0) {print $2}' LSC.cc`; do for id in `$AwkCmd '($1==0) {print $2}' LSC.cc`; do
fastafetch -f LSC.fasta -i LSC.index -q "${id}" >> LSC.direct.fasta fastafetch -f LSC.fasta -i LSC.index -q "${id}" >> LSC.direct.fasta
done done
loginfo " --> $(fastaCount LSC.direct.fasta) sequences" loginfo " --> $(fastaCount LSC.direct.fasta) sequences"
@ -57,7 +57,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Extracting second connected components for LCS..." loginfo "Extracting second connected components for LCS..."
rm -f LSC.reverse.fasta rm -f LSC.reverse.fasta
touch LSC.reverse.fasta touch LSC.reverse.fasta
for id in `awk '($1==1) {print $2}' LSC.cc`; do for id in `$AwkCmd '($1==1) {print $2}' LSC.cc`; do
fastafetch -f LSC.fasta -i LSC.index -q "${id}" >> LSC.reverse.fasta fastafetch -f LSC.fasta -i LSC.index -q "${id}" >> LSC.reverse.fasta
done done
loginfo " --> $(fastaCount LSC.reverse.fasta) sequences" loginfo " --> $(fastaCount LSC.reverse.fasta) sequences"
@ -75,7 +75,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Checking LCS homogeneity..." loginfo "Checking LCS homogeneity..."
${PROG_DIR}/coorienteSC.sh LSC.direct.fasta 20000 ${ORG_LOGFILE} > LSC_RefDB.tgf ${PROG_DIR}/coorienteSC.sh LSC.direct.fasta 20000 ${ORG_LOGFILE} > LSC_RefDB.tgf
${PROG_DIR}/cc.py LSC_RefDB.tgf > LSC_RefDB.cc ${PROG_DIR}/cc.py LSC_RefDB.tgf > LSC_RefDB.cc
NCC=$(awk '{print $1}' LSC_RefDB.cc | uniq | wc -l) NCC=$($AwkCmd '{print $1}' LSC_RefDB.cc | uniq | wc -l)
if (( $NCC == 1 )); then if (( $NCC == 1 )); then
loginfo " --> $NCC connected componants" loginfo " --> $NCC connected componants"
else else
@ -105,7 +105,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Building SSC coorientation graph..." loginfo "Building SSC coorientation graph..."
${PROG_DIR}/coorienteSC.sh SSC.fasta 5000 ${ORG_LOGFILE} > SSC.tgf ${PROG_DIR}/coorienteSC.sh SSC.fasta 5000 ${ORG_LOGFILE} > SSC.tgf
${PROG_DIR}/cc.py SSC.tgf > SSC.cc ${PROG_DIR}/cc.py SSC.tgf > SSC.cc
loginfo " --> $(awk '{print $1}' SSC.cc | uniq | wc -l) connected componants" loginfo " --> $($AwkCmd '{print $1}' SSC.cc | uniq | wc -l) connected componants"
loginfo "Done" loginfo "Done"
@ -119,7 +119,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Extracting main connected components for SSC..." loginfo "Extracting main connected components for SSC..."
rm -f SSC.direct.fasta rm -f SSC.direct.fasta
touch SSC.direct.fasta touch SSC.direct.fasta
for id in `awk '($1==0) {print $2}' SSC.cc`; do for id in `$AwkCmd '($1==0) {print $2}' SSC.cc`; do
fastafetch -f SSC.fasta -i SSC.index -q "${id}" >> SSC.direct.fasta fastafetch -f SSC.fasta -i SSC.index -q "${id}" >> SSC.direct.fasta
done done
loginfo " --> $(fastaCount SSC.direct.fasta) sequences" loginfo " --> $(fastaCount SSC.direct.fasta) sequences"
@ -130,7 +130,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Extracting second connected components for SSC..." loginfo "Extracting second connected components for SSC..."
rm -f SSC.reverse.fasta rm -f SSC.reverse.fasta
touch SSC.reverse.fasta touch SSC.reverse.fasta
for id in `awk '($1==1) {print $2}' SSC.cc`; do for id in `$AwkCmd '($1==1) {print $2}' SSC.cc`; do
fastafetch -f SSC.fasta -i SSC.index -q "${id}" >> SSC.reverse.fasta fastafetch -f SSC.fasta -i SSC.index -q "${id}" >> SSC.reverse.fasta
done done
loginfo " --> $(fastaCount SSC.reverse.fasta) sequences" loginfo " --> $(fastaCount SSC.reverse.fasta) sequences"
@ -148,7 +148,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Checking SSC homogeneity..." loginfo "Checking SSC homogeneity..."
${PROG_DIR}/coorienteSC.sh SSC.direct.fasta 5000 ${ORG_LOGFILE} > SSC_RefDB.tgf ${PROG_DIR}/coorienteSC.sh SSC.direct.fasta 5000 ${ORG_LOGFILE} > SSC_RefDB.tgf
${PROG_DIR}/cc.py SSC_RefDB.tgf > SSC_RefDB.cc ${PROG_DIR}/cc.py SSC_RefDB.tgf > SSC_RefDB.cc
NCC=$(awk '{print $1}' SSC_RefDB.cc | uniq | wc -l) NCC=$($AwkCmd '{print $1}' SSC_RefDB.cc | uniq | wc -l)
if (( $NCC == 1 )); then if (( $NCC == 1 )); then
loginfo " --> $NCC connected componants" loginfo " --> $NCC connected componants"
else else

View File

@ -43,7 +43,7 @@ pushTmpDir ORG.coorienteSC
loginfo "Running Blast..." loginfo "Running Blast..."
blastn -db "${BLASTDB}" -query "${DATA}" -outfmt 6 | \ blastn -db "${BLASTDB}" -query "${DATA}" -outfmt 6 | \
awk ' \ $AwkCmd ' \
($4 > 1000) && ($3 > 70) \ ($4 > 1000) && ($3 > 70) \
($1==QUERY) && \ ($1==QUERY) && \
($2==SUBJECT) && \ ($2==SUBJECT) && \
@ -65,7 +65,7 @@ pushTmpDir ORG.coorienteSC
LDIFF= ($3/100.*$4) }} \ LDIFF= ($3/100.*$4) }} \
} \ } \
END {print QUERY,SUBJECT,LSAME,LDIFF,(LSAME>LDIFF)}' | \ END {print QUERY,SUBJECT,LSAME,LDIFF,(LSAME>LDIFF)}' | \
awk -v minlength="${MINLENGTH}" \ $AwkCmd -v minlength="${MINLENGTH}" \
' (($3>minlength) || \ ' (($3>minlength) || \
($4 > minlength)) && \ ($4 > minlength)) && \
($3/($4+1) > 2) && \ ($3/($4+1) > 2) && \
@ -75,7 +75,7 @@ pushTmpDir ORG.coorienteSC
{print $1,$2,$5}}' | \ {print $1,$2,$5}}' | \
sort | \ sort | \
uniq -c | \ uniq -c | \
awk '($1==2) {print $2,$3,$4}' $AwkCmd '($1==2) {print $2,$3,$4}'
loginfo "Done" loginfo "Done"
popTmpDir popTmpDir

View File

@ -2,7 +2,7 @@
# #
awk 'function printfasta(seq) { \ gawk 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \

View File

@ -2,7 +2,7 @@
# #
awk 'function printfasta(seq) { \ gawk 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \

View File

@ -2,7 +2,7 @@
grep -A 1 ' ORGANISM' $* | \ grep -A 1 ' ORGANISM' $* | \
grep -B 1 Viridiplantae | \ grep -B 1 Viridiplantae | \
awk '{print $1}' | \ gawk '{print $1}' | \
grep '\.gbk' | \ grep '\.gbk' | \
sed -E 's/(^.*\.gbk).$/\1/' | \ sed -E 's/(^.*\.gbk).$/\1/' | \
uniq uniq

View File

@ -33,13 +33,13 @@ pushTmpDir ORG.rrna
RRNA=$(basename ${QUERY}) RRNA=$(basename ${QUERY})
hmmsearch --max ${RRNADB} ${QUERY} | \ hmmsearch --max ${RRNADB} ${QUERY} | \
awk '/Query: / { \ $AwkCmd '/Query: / { \
profil=$2; \ profil=$2; \
match($3,"[0-9][0-9]*");\ match($3,"[0-9][0-9]*");\
lprof=substr($3,RSTART,RLENGTH)} \ lprof=substr($3,RSTART,RLENGTH)} \
/ [0-9][0-9]* ! / { \ / [0-9][0-9]* ! / { \
print profil,lprof,$7,$8,$10,$11}' | \ print profil,lprof,$7,$8,$10,$11}' | \
awk '($3 <=5) && (($2-$4) <=5) { \ $AwkCmd '($3 <=5) && (($2-$4) <=5) { \
full=1;$5=$5-$3+1;$6=$6+($2-$4)} full=1;$5=$5-$3+1;$6=$6+($2-$4)}
{loc=$5".."$6} \ {loc=$5".."$6} \
($1 ~ /_RC$/) { \ ($1 ~ /_RC$/) { \

View File

@ -11,7 +11,7 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"
function fasta1li { function fasta1li {
awk '/^>/ {if (sequence) \ $AwkCmd '/^>/ {if (sequence) \
{print sequence}; \ {print sequence}; \
print $0; \ print $0; \
sequence=""} \ sequence=""} \
@ -28,7 +28,7 @@ function dereplicate {
grep -v -- -- | \ grep -v -- -- | \
sed -E "s/count=[0-9]+; //" | \ sed -E "s/count=[0-9]+; //" | \
sed 's/cluster_weight/count/' | \ sed 's/cluster_weight/count/' | \
awk ' /^>/ {SEQ++;\ $AwkCmd ' /^>/ {SEQ++;\
match($0,"count=[0-9][0-9]*;");\ match($0,"count=[0-9][0-9]*;");\
count=substr($0,RSTART,RLENGTH);\ count=substr($0,RSTART,RLENGTH);\
$1=$1"_"SEQ;\ $1=$1"_"SEQ;\
@ -58,7 +58,7 @@ function clustering {
} }
function revcomp { function revcomp {
awk 'function printfasta(seq) { \ $AwkCmd 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \

View File

@ -2,7 +2,7 @@
# #
awk 'function printfasta(seq) { \ gawk 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \

View File

@ -2,7 +2,7 @@
# #
awk 'function printfasta(seq) { \ gawk 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \

View File

@ -2,7 +2,7 @@
# #
awk 'function printfasta(seq) { \ gawk 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \

View File

@ -2,7 +2,7 @@
# #
awk 'function printfasta(seq) { \ gawk 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \

View File

@ -11,7 +11,7 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"
function revcomp { function revcomp {
awk 'function printfasta(seq) { \ gawk 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \

View File

@ -1,4 +1,4 @@
#!/usr/bin/awk -f #!/usr/bin/env gawk -f
function genomeid() { function genomeid() {
if (gid=="") { if (gid=="") {
gid="XXXXXXX"; gid="XXXXXXX";

View File

@ -11,7 +11,7 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"
function fasta1li { function fasta1li {
awk '/^>/ {if (sequence) \ $AwkCmd '/^>/ {if (sequence) \
{print sequence}; \ {print sequence}; \
print $0; \ print $0; \
sequence=""} \ sequence=""} \
@ -28,7 +28,7 @@ function dereplicate {
grep -v -- -- | \ grep -v -- -- | \
sed -E "s/count=[0-9]+; //" | \ sed -E "s/count=[0-9]+; //" | \
sed 's/cluster_weight/count/' | \ sed 's/cluster_weight/count/' | \
awk ' /^>/ {SEQ++;$1=$1"_"SEQ;print $0} \ $AwkCmd ' /^>/ {SEQ++;$1=$1"_"SEQ;print $0} \
!/^>/ {print $0}' !/^>/ {print $0}'
} }
@ -52,15 +52,16 @@ function goodtrna {
sumatra -t 0.90 -x $QUERY $REF | \ sumatra -t 0.90 -x $QUERY $REF | \
sed -E 's/.(trn.M?)[_A-Z0-9]+/ \1 /' | \ sed -E 's/.(trn.M?)[_A-Z0-9]+/ \1 /' | \
sort -k 1,2 | \ sort -k 1,2 | \
awk '(OLD) && ($1!=OLD) {print OLD,c["trnM"],c["trnfM"],c["trnI"]} \ $AwkCmd '(OLD) && ($1!=OLD) {print OLD,c["trnM"],c["trnfM"],c["trnI"]} \
(OLD !=$1) {c["trnM"]=0;c["trnfM"]=0;c["trnI"]=0;OLD=$1} \ (OLD !=$1) {c["trnM"]=0;c["trnfM"]=0;c["trnI"]=0;OLD=$1} \
{c[$2]+=$5}' | awk '{p=0;} \ {c[$2]+=$5}' | \
$AwkCmd '{p=0;} \
($2 > $3) && ($2 > $4) { print $0,"trnM";p=1 } \ ($2 > $3) && ($2 > $4) { print $0,"trnM";p=1 } \
($3 > $2) && ($3 > $4) {print $0,"trnfM";p=1} \ ($3 > $2) && ($3 > $4) {print $0,"trnfM";p=1} \
($4 > $2) && ($4 > $3) {print $0,"trnI";p=1} \ ($4 > $2) && ($4 > $3) {print $0,"trnI";p=1} \
(p==0) {print $0,"----"}' | sed 's/_/ /' | \ (p==0) {print $0,"----"}' | sed 's/_/ /' | \
awk '{print $1"_"$2,$3,$4,$5,$1,$6}' | \ $AwkCmd '{print $1"_"$2,$3,$4,$5,$1,$6}' | \
awk '(($2+$3+$4) > 1) && ($5==$6) {print $1}' $AwkCmd '(($2+$3+$4) > 1) && ($5==$6) {print $1}'
} }
pushTmpDir ORG.buildSCDB pushTmpDir ORG.buildSCDB

View File

@ -15,11 +15,11 @@ function taxid {
} }
function ac { function ac {
head -1 $1 | awk '{print $2}' head -1 $1 | $AwkCmd '{print $2}'
} }
function definition { function definition {
awk '/^DEFINITION/ {on=1} \ $AwkCmd '/^DEFINITION/ {on=1} \
(on==1) {printf("%s ",$0)} \ (on==1) {printf("%s ",$0)} \
(/\.$/ && (on==1)) {on=0;print ""}' $1 | \ (/\.$/ && (on==1)) {on=0;print ""}' $1 | \
sed 's/^DEFINITION *//' | \ sed 's/^DEFINITION *//' | \
@ -33,7 +33,7 @@ function gb2fasta {
echo ">${AC} taxid=${TAXID}; ${DEFINITION}" echo ">${AC} taxid=${TAXID}; ${DEFINITION}"
awk '/^\/\// {on=0} \ $AwkCmd '/^\/\// {on=0} \
(on==1) {print $0} \ (on==1) {print $0} \
/^ORIGIN / {on=1}' $1 | \ /^ORIGIN / {on=1}' $1 | \
sed -E 's/^ *[0-9]+ +//' | \ sed -E 's/^ *[0-9]+ +//' | \
@ -46,11 +46,11 @@ function findCAUtrna {
gb2fasta $1 > ${FASTATMP} gb2fasta $1 > ${FASTATMP}
aragorn -i -w -seq ${FASTATMP} | \ aragorn -i -w -seq ${FASTATMP} | \
awk '(on==1) && /^ *[0-9]+/ {on=0;print ""} \ $AwkCmd '(on==1) && /^ *[0-9]+/ {on=0;print ""} \
(on==1) {printf($0)} \ (on==1) {printf($0)} \
/\(cat\)$/ {on=1; printf("%s ",$0)} \ /\(cat\)$/ {on=1; printf("%s ",$0)} \
END {print ""}' | \ END {print ""}' | \
awk '{print $3,$6}' | \ $AwkCmd '{print $3,$6}' | \
sed -E 's/c?\[([0-9]+),([0-9]+)\]/\1 \2/' | \ sed -E 's/c?\[([0-9]+),([0-9]+)\]/\1 \2/' | \
sed 's/ /:/g' sed 's/ /:/g'
@ -58,10 +58,10 @@ function findCAUtrna {
} }
function trnaAnnotations { function trnaAnnotations {
awk '/^ORIGIN/ {on=0} \ $AwkCmd '/^ORIGIN/ {on=0} \
(on==1) {print $0} \ (on==1) {print $0} \
/^FEATURE/ {on=1}' $1 | \ /^FEATURE/ {on=1}' $1 | \
awk '/^ [^ ]/ {print ""} \ $AwkCmd '/^ [^ ]/ {print ""} \
{printf("%s ",$0)} \ {printf("%s ",$0)} \
END {print ""}' | \ END {print ""}' | \
sed 's/^ *//' | \ sed 's/^ *//' | \
@ -76,17 +76,17 @@ function trnaAnnotations {
sed -E 's/join\(([0-9]+ [0-9]+)\)/\1/' | \ sed -E 's/join\(([0-9]+ [0-9]+)\)/\1/' | \
sed 's/^tRNA *//' | \ sed 's/^tRNA *//' | \
sed -E 's@([0-9]+) +([0-9]+).*/gene="([^"]+)"@\1 \2 \3@' | \ sed -E 's@([0-9]+) +([0-9]+).*/gene="([^"]+)"@\1 \2 \3@' | \
awk '{print $1,$2,$3}' $AwkCmd '{print $1,$2,$3}'
} }
function annotateCAU { function annotateCAU {
DISTTMP="$$.trna.dist" DISTTMP="$$.trna.dist"
trna=(`echo $1 | sed 's/:/ /g'`) trna=(`echo $1 | sed 's/:/ /g'`)
awk -v b=${trna[0]} -v e=${trna[1]} \ $AwkCmd -v b=${trna[0]} -v e=${trna[1]} \
'{printf("sqrt((%d - %d)^2 + (%d - %d)^2)\n",$1,b,$2,e)}' $2 | \ '{printf("sqrt((%d - %d)^2 + (%d - %d)^2)\n",$1,b,$2,e)}' $2 | \
bc -l | \ bc -l | \
sed 's/\..*$//' > ${DISTTMP} sed 's/\..*$//' > ${DISTTMP}
paste ${DISTTMP} $2 | sort -nk 1 | head -1 | awk '{print $1,$4}' paste ${DISTTMP} $2 | sort -nk 1 | head -1 | $AwkCmd '{print $1,$4}'
rm -f ${DISTTMP} rm -f ${DISTTMP}
} }
@ -98,7 +98,7 @@ function writeTRNA {
TRNATMP="$$.trna.txt" TRNATMP="$$.trna.txt"
trnaAnnotations $1 > ${TRNATMP} trnaAnnotations $1 > ${TRNATMP}
ntrna=`wc -l ${TRNATMP} | awk '{print $1}'` ntrna=`wc -l ${TRNATMP} | $AwkCmd '{print $1}'`
if (( ntrna > 0 )); then if (( ntrna > 0 )); then
trnacau=`findCAUtrna $1` trnacau=`findCAUtrna $1`
@ -110,7 +110,7 @@ function writeTRNA {
if (( distance <= 10 )); then if (( distance <= 10 )); then
echo ">${aa}_${AC} gbac=${AC}; trna=${aa}; taxid=${TAXID}; distance=${distance}; ${DEFINITION}" echo ">${aa}_${AC} gbac=${AC}; trna=${aa}; taxid=${TAXID}; distance=${distance}; ${DEFINITION}"
echo "$t" | awk -F ':' '{print $3}' echo "$t" | $AwkCmd -F ':' '{print $3}'
fi fi
done done
fi fi

View File

@ -3,6 +3,8 @@
# for setting up basic variables and functions # for setting up basic variables and functions
# #
export AwkCmd="gawk"
######################## ########################
# #
# General usage functions # General usage functions
@ -25,8 +27,8 @@ function pushTmpDir {
} }
function popTmpDir { function popTmpDir {
TMP_DIR=$(echo $TMP_DIR_STACK | awk '{print $1}') TMP_DIR=$(echo $TMP_DIR_STACK | $AwkCmd '{print $1}')
TMP_DIR_STACK=$(echo $TMP_DIR_STACK | awk '{$1="";print $0}') TMP_DIR_STACK=$(echo $TMP_DIR_STACK | $AwkCmd '{$1="";print $0}')
popd >& /dev/null popd >& /dev/null
rm -rf $TMP_DIR >& /dev/null rm -rf $TMP_DIR >& /dev/null
logdebug "Poping temp directory $TMP_DIR" logdebug "Poping temp directory $TMP_DIR"
@ -91,7 +93,7 @@ function fastaCount {
function seqlength { function seqlength {
cat $1 | \ cat $1 | \
wc |\ wc |\
awk -v t="`head -1 $1 | wc -c`" '{print $3 - t - $1 + 1}' $AwkCmd -v t="`head -1 $1 | wc -c`" '{print $3 - t - $1 + 1}'
} }
# extract a subseq from a fasta sequence # extract a subseq from a fasta sequence
@ -99,7 +101,7 @@ function seqlength {
# - $2 : First position of the subsequence (first position is numered 1), # - $2 : First position of the subsequence (first position is numered 1),
# - $3 : End of the subsequence (included in the subsequence) # - $3 : End of the subsequence (included in the subsequence)
function cutseq { function cutseq {
awk -v from=$2 -v end=$3 'function printfasta(seq) { \ $AwkCmd -v from=$2 -v end=$3 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \
@ -114,13 +116,13 @@ function cutseq {
# a single sequence # a single sequence
# - $1 : The fasta file containing the sequences to join # - $1 : The fasta file containing the sequences to join
function joinfasta { function joinfasta {
awk '(NR==1 && /^>/) {print $0} \ $AwkCmd '(NR==1 && /^>/) {print $0} \
! /^>/ {print $0}' $1 | \ ! /^>/ {print $0}' $1 | \
formatfasta formatfasta
} }
function formatfasta { function formatfasta {
awk 'function printfasta(seq) { \ $AwkCmd 'function printfasta(seq) { \
seqlen=length(seq); \ seqlen=length(seq); \
for (i=1; i <= seqlen; i+=60) \ for (i=1; i <= seqlen; i+=60) \
print substr(seq,i,60); \ print substr(seq,i,60); \