Switch from awk to gawk
Former-commit-id: 70b104ab89d4c90a35925dbbcacdafaf6c1f2836 Former-commit-id: bf29abf66345cbc0ca81461064dcca63dfd4c15c
This commit is contained in:
@ -33,7 +33,7 @@ pushTmpDir ORG.buildSCDB
|
||||
loginfo "Building LSC coorientation graph..."
|
||||
${PROG_DIR}/coorienteSC.sh LSC.fasta 20000 ${ORG_LOGFILE} > LSC.tgf
|
||||
${PROG_DIR}/cc.py LSC.tgf > LSC.cc
|
||||
loginfo " --> $(awk '{print $1}' LSC.cc | uniq | wc -l) connected componants"
|
||||
loginfo " --> $($AwkCmd '{print $1}' LSC.cc | uniq | wc -l) connected componants"
|
||||
loginfo "Done"
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@ pushTmpDir ORG.buildSCDB
|
||||
loginfo "Extracting main connected components for LCS..."
|
||||
rm -f LSC.direct.fasta
|
||||
touch LSC.direct.fasta
|
||||
for id in `awk '($1==0) {print $2}' LSC.cc`; do
|
||||
for id in `$AwkCmd '($1==0) {print $2}' LSC.cc`; do
|
||||
fastafetch -f LSC.fasta -i LSC.index -q "${id}" >> LSC.direct.fasta
|
||||
done
|
||||
loginfo " --> $(fastaCount LSC.direct.fasta) sequences"
|
||||
@ -57,7 +57,7 @@ pushTmpDir ORG.buildSCDB
|
||||
loginfo "Extracting second connected components for LCS..."
|
||||
rm -f LSC.reverse.fasta
|
||||
touch LSC.reverse.fasta
|
||||
for id in `awk '($1==1) {print $2}' LSC.cc`; do
|
||||
for id in `$AwkCmd '($1==1) {print $2}' LSC.cc`; do
|
||||
fastafetch -f LSC.fasta -i LSC.index -q "${id}" >> LSC.reverse.fasta
|
||||
done
|
||||
loginfo " --> $(fastaCount LSC.reverse.fasta) sequences"
|
||||
@ -75,7 +75,7 @@ pushTmpDir ORG.buildSCDB
|
||||
loginfo "Checking LCS homogeneity..."
|
||||
${PROG_DIR}/coorienteSC.sh LSC.direct.fasta 20000 ${ORG_LOGFILE} > LSC_RefDB.tgf
|
||||
${PROG_DIR}/cc.py LSC_RefDB.tgf > LSC_RefDB.cc
|
||||
NCC=$(awk '{print $1}' LSC_RefDB.cc | uniq | wc -l)
|
||||
NCC=$($AwkCmd '{print $1}' LSC_RefDB.cc | uniq | wc -l)
|
||||
if (( $NCC == 1 )); then
|
||||
loginfo " --> $NCC connected componants"
|
||||
else
|
||||
@ -105,7 +105,7 @@ pushTmpDir ORG.buildSCDB
|
||||
loginfo "Building SSC coorientation graph..."
|
||||
${PROG_DIR}/coorienteSC.sh SSC.fasta 5000 ${ORG_LOGFILE} > SSC.tgf
|
||||
${PROG_DIR}/cc.py SSC.tgf > SSC.cc
|
||||
loginfo " --> $(awk '{print $1}' SSC.cc | uniq | wc -l) connected componants"
|
||||
loginfo " --> $($AwkCmd '{print $1}' SSC.cc | uniq | wc -l) connected componants"
|
||||
loginfo "Done"
|
||||
|
||||
|
||||
@ -119,7 +119,7 @@ pushTmpDir ORG.buildSCDB
|
||||
loginfo "Extracting main connected components for SSC..."
|
||||
rm -f SSC.direct.fasta
|
||||
touch SSC.direct.fasta
|
||||
for id in `awk '($1==0) {print $2}' SSC.cc`; do
|
||||
for id in `$AwkCmd '($1==0) {print $2}' SSC.cc`; do
|
||||
fastafetch -f SSC.fasta -i SSC.index -q "${id}" >> SSC.direct.fasta
|
||||
done
|
||||
loginfo " --> $(fastaCount SSC.direct.fasta) sequences"
|
||||
@ -130,7 +130,7 @@ pushTmpDir ORG.buildSCDB
|
||||
loginfo "Extracting second connected components for SSC..."
|
||||
rm -f SSC.reverse.fasta
|
||||
touch SSC.reverse.fasta
|
||||
for id in `awk '($1==1) {print $2}' SSC.cc`; do
|
||||
for id in `$AwkCmd '($1==1) {print $2}' SSC.cc`; do
|
||||
fastafetch -f SSC.fasta -i SSC.index -q "${id}" >> SSC.reverse.fasta
|
||||
done
|
||||
loginfo " --> $(fastaCount SSC.reverse.fasta) sequences"
|
||||
@ -148,7 +148,7 @@ pushTmpDir ORG.buildSCDB
|
||||
loginfo "Checking SSC homogeneity..."
|
||||
${PROG_DIR}/coorienteSC.sh SSC.direct.fasta 5000 ${ORG_LOGFILE} > SSC_RefDB.tgf
|
||||
${PROG_DIR}/cc.py SSC_RefDB.tgf > SSC_RefDB.cc
|
||||
NCC=$(awk '{print $1}' SSC_RefDB.cc | uniq | wc -l)
|
||||
NCC=$($AwkCmd '{print $1}' SSC_RefDB.cc | uniq | wc -l)
|
||||
if (( $NCC == 1 )); then
|
||||
loginfo " --> $NCC connected componants"
|
||||
else
|
||||
|
@ -43,7 +43,7 @@ pushTmpDir ORG.coorienteSC
|
||||
|
||||
loginfo "Running Blast..."
|
||||
blastn -db "${BLASTDB}" -query "${DATA}" -outfmt 6 | \
|
||||
awk ' \
|
||||
$AwkCmd ' \
|
||||
($4 > 1000) && ($3 > 70) \
|
||||
($1==QUERY) && \
|
||||
($2==SUBJECT) && \
|
||||
@ -65,7 +65,7 @@ pushTmpDir ORG.coorienteSC
|
||||
LDIFF= ($3/100.*$4) }} \
|
||||
} \
|
||||
END {print QUERY,SUBJECT,LSAME,LDIFF,(LSAME>LDIFF)}' | \
|
||||
awk -v minlength="${MINLENGTH}" \
|
||||
$AwkCmd -v minlength="${MINLENGTH}" \
|
||||
' (($3>minlength) || \
|
||||
($4 > minlength)) && \
|
||||
($3/($4+1) > 2) && \
|
||||
@ -75,7 +75,7 @@ pushTmpDir ORG.coorienteSC
|
||||
{print $1,$2,$5}}' | \
|
||||
sort | \
|
||||
uniq -c | \
|
||||
awk '($1==2) {print $2,$3,$4}'
|
||||
$AwkCmd '($1==2) {print $2,$3,$4}'
|
||||
loginfo "Done"
|
||||
|
||||
popTmpDir
|
||||
|
@ -2,7 +2,7 @@
|
||||
#
|
||||
|
||||
|
||||
awk 'function printfasta(seq) { \
|
||||
gawk 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -2,7 +2,7 @@
|
||||
#
|
||||
|
||||
|
||||
awk 'function printfasta(seq) { \
|
||||
gawk 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
grep -A 1 ' ORGANISM' $* | \
|
||||
grep -B 1 Viridiplantae | \
|
||||
awk '{print $1}' | \
|
||||
gawk '{print $1}' | \
|
||||
grep '\.gbk' | \
|
||||
sed -E 's/(^.*\.gbk).$/\1/' | \
|
||||
uniq
|
@ -33,13 +33,13 @@ pushTmpDir ORG.rrna
|
||||
RRNA=$(basename ${QUERY})
|
||||
|
||||
hmmsearch --max ${RRNADB} ${QUERY} | \
|
||||
awk '/Query: / { \
|
||||
$AwkCmd '/Query: / { \
|
||||
profil=$2; \
|
||||
match($3,"[0-9][0-9]*");\
|
||||
lprof=substr($3,RSTART,RLENGTH)} \
|
||||
/ [0-9][0-9]* ! / { \
|
||||
print profil,lprof,$7,$8,$10,$11}' | \
|
||||
awk '($3 <=5) && (($2-$4) <=5) { \
|
||||
$AwkCmd '($3 <=5) && (($2-$4) <=5) { \
|
||||
full=1;$5=$5-$3+1;$6=$6+($2-$4)}
|
||||
{loc=$5".."$6} \
|
||||
($1 ~ /_RC$/) { \
|
||||
|
@ -11,7 +11,7 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
function fasta1li {
|
||||
|
||||
awk '/^>/ {if (sequence) \
|
||||
$AwkCmd '/^>/ {if (sequence) \
|
||||
{print sequence}; \
|
||||
print $0; \
|
||||
sequence=""} \
|
||||
@ -28,7 +28,7 @@ function dereplicate {
|
||||
grep -v -- -- | \
|
||||
sed -E "s/count=[0-9]+; //" | \
|
||||
sed 's/cluster_weight/count/' | \
|
||||
awk ' /^>/ {SEQ++;\
|
||||
$AwkCmd ' /^>/ {SEQ++;\
|
||||
match($0,"count=[0-9][0-9]*;");\
|
||||
count=substr($0,RSTART,RLENGTH);\
|
||||
$1=$1"_"SEQ;\
|
||||
@ -58,7 +58,7 @@ function clustering {
|
||||
}
|
||||
|
||||
function revcomp {
|
||||
awk 'function printfasta(seq) { \
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -2,7 +2,7 @@
|
||||
#
|
||||
|
||||
|
||||
awk 'function printfasta(seq) { \
|
||||
gawk 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -2,7 +2,7 @@
|
||||
#
|
||||
|
||||
|
||||
awk 'function printfasta(seq) { \
|
||||
gawk 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -2,7 +2,7 @@
|
||||
#
|
||||
|
||||
|
||||
awk 'function printfasta(seq) { \
|
||||
gawk 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -2,7 +2,7 @@
|
||||
#
|
||||
|
||||
|
||||
awk 'function printfasta(seq) { \
|
||||
gawk 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -11,7 +11,7 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
|
||||
function revcomp {
|
||||
awk 'function printfasta(seq) { \
|
||||
gawk 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/awk -f
|
||||
#!/usr/bin/env gawk -f
|
||||
function genomeid() {
|
||||
if (gid=="") {
|
||||
gid="XXXXXXX";
|
||||
|
@ -11,7 +11,7 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
function fasta1li {
|
||||
|
||||
awk '/^>/ {if (sequence) \
|
||||
$AwkCmd '/^>/ {if (sequence) \
|
||||
{print sequence}; \
|
||||
print $0; \
|
||||
sequence=""} \
|
||||
@ -28,7 +28,7 @@ function dereplicate {
|
||||
grep -v -- -- | \
|
||||
sed -E "s/count=[0-9]+; //" | \
|
||||
sed 's/cluster_weight/count/' | \
|
||||
awk ' /^>/ {SEQ++;$1=$1"_"SEQ;print $0} \
|
||||
$AwkCmd ' /^>/ {SEQ++;$1=$1"_"SEQ;print $0} \
|
||||
!/^>/ {print $0}'
|
||||
}
|
||||
|
||||
@ -52,15 +52,16 @@ function goodtrna {
|
||||
sumatra -t 0.90 -x $QUERY $REF | \
|
||||
sed -E 's/.(trn.M?)[_A-Z0-9]+/ \1 /' | \
|
||||
sort -k 1,2 | \
|
||||
awk '(OLD) && ($1!=OLD) {print OLD,c["trnM"],c["trnfM"],c["trnI"]} \
|
||||
$AwkCmd '(OLD) && ($1!=OLD) {print OLD,c["trnM"],c["trnfM"],c["trnI"]} \
|
||||
(OLD !=$1) {c["trnM"]=0;c["trnfM"]=0;c["trnI"]=0;OLD=$1} \
|
||||
{c[$2]+=$5}' | awk '{p=0;} \
|
||||
{c[$2]+=$5}' | \
|
||||
$AwkCmd '{p=0;} \
|
||||
($2 > $3) && ($2 > $4) { print $0,"trnM";p=1 } \
|
||||
($3 > $2) && ($3 > $4) {print $0,"trnfM";p=1} \
|
||||
($4 > $2) && ($4 > $3) {print $0,"trnI";p=1} \
|
||||
(p==0) {print $0,"----"}' | sed 's/_/ /' | \
|
||||
awk '{print $1"_"$2,$3,$4,$5,$1,$6}' | \
|
||||
awk '(($2+$3+$4) > 1) && ($5==$6) {print $1}'
|
||||
$AwkCmd '{print $1"_"$2,$3,$4,$5,$1,$6}' | \
|
||||
$AwkCmd '(($2+$3+$4) > 1) && ($5==$6) {print $1}'
|
||||
}
|
||||
|
||||
pushTmpDir ORG.buildSCDB
|
||||
|
@ -15,11 +15,11 @@ function taxid {
|
||||
}
|
||||
|
||||
function ac {
|
||||
head -1 $1 | awk '{print $2}'
|
||||
head -1 $1 | $AwkCmd '{print $2}'
|
||||
}
|
||||
|
||||
function definition {
|
||||
awk '/^DEFINITION/ {on=1} \
|
||||
$AwkCmd '/^DEFINITION/ {on=1} \
|
||||
(on==1) {printf("%s ",$0)} \
|
||||
(/\.$/ && (on==1)) {on=0;print ""}' $1 | \
|
||||
sed 's/^DEFINITION *//' | \
|
||||
@ -33,7 +33,7 @@ function gb2fasta {
|
||||
|
||||
echo ">${AC} taxid=${TAXID}; ${DEFINITION}"
|
||||
|
||||
awk '/^\/\// {on=0} \
|
||||
$AwkCmd '/^\/\// {on=0} \
|
||||
(on==1) {print $0} \
|
||||
/^ORIGIN / {on=1}' $1 | \
|
||||
sed -E 's/^ *[0-9]+ +//' | \
|
||||
@ -46,11 +46,11 @@ function findCAUtrna {
|
||||
|
||||
gb2fasta $1 > ${FASTATMP}
|
||||
aragorn -i -w -seq ${FASTATMP} | \
|
||||
awk '(on==1) && /^ *[0-9]+/ {on=0;print ""} \
|
||||
$AwkCmd '(on==1) && /^ *[0-9]+/ {on=0;print ""} \
|
||||
(on==1) {printf($0)} \
|
||||
/\(cat\)$/ {on=1; printf("%s ",$0)} \
|
||||
END {print ""}' | \
|
||||
awk '{print $3,$6}' | \
|
||||
$AwkCmd '{print $3,$6}' | \
|
||||
sed -E 's/c?\[([0-9]+),([0-9]+)\]/\1 \2/' | \
|
||||
sed 's/ /:/g'
|
||||
|
||||
@ -58,10 +58,10 @@ function findCAUtrna {
|
||||
}
|
||||
|
||||
function trnaAnnotations {
|
||||
awk '/^ORIGIN/ {on=0} \
|
||||
$AwkCmd '/^ORIGIN/ {on=0} \
|
||||
(on==1) {print $0} \
|
||||
/^FEATURE/ {on=1}' $1 | \
|
||||
awk '/^ [^ ]/ {print ""} \
|
||||
$AwkCmd '/^ [^ ]/ {print ""} \
|
||||
{printf("%s ",$0)} \
|
||||
END {print ""}' | \
|
||||
sed 's/^ *//' | \
|
||||
@ -76,17 +76,17 @@ function trnaAnnotations {
|
||||
sed -E 's/join\(([0-9]+ [0-9]+)\)/\1/' | \
|
||||
sed 's/^tRNA *//' | \
|
||||
sed -E 's@([0-9]+) +([0-9]+).*/gene="([^"]+)"@\1 \2 \3@' | \
|
||||
awk '{print $1,$2,$3}'
|
||||
$AwkCmd '{print $1,$2,$3}'
|
||||
}
|
||||
|
||||
function annotateCAU {
|
||||
DISTTMP="$$.trna.dist"
|
||||
trna=(`echo $1 | sed 's/:/ /g'`)
|
||||
awk -v b=${trna[0]} -v e=${trna[1]} \
|
||||
$AwkCmd -v b=${trna[0]} -v e=${trna[1]} \
|
||||
'{printf("sqrt((%d - %d)^2 + (%d - %d)^2)\n",$1,b,$2,e)}' $2 | \
|
||||
bc -l | \
|
||||
sed 's/\..*$//' > ${DISTTMP}
|
||||
paste ${DISTTMP} $2 | sort -nk 1 | head -1 | awk '{print $1,$4}'
|
||||
paste ${DISTTMP} $2 | sort -nk 1 | head -1 | $AwkCmd '{print $1,$4}'
|
||||
rm -f ${DISTTMP}
|
||||
}
|
||||
|
||||
@ -98,7 +98,7 @@ function writeTRNA {
|
||||
TRNATMP="$$.trna.txt"
|
||||
|
||||
trnaAnnotations $1 > ${TRNATMP}
|
||||
ntrna=`wc -l ${TRNATMP} | awk '{print $1}'`
|
||||
ntrna=`wc -l ${TRNATMP} | $AwkCmd '{print $1}'`
|
||||
|
||||
if (( ntrna > 0 )); then
|
||||
trnacau=`findCAUtrna $1`
|
||||
@ -110,7 +110,7 @@ function writeTRNA {
|
||||
|
||||
if (( distance <= 10 )); then
|
||||
echo ">${aa}_${AC} gbac=${AC}; trna=${aa}; taxid=${TAXID}; distance=${distance}; ${DEFINITION}"
|
||||
echo "$t" | awk -F ':' '{print $3}'
|
||||
echo "$t" | $AwkCmd -F ':' '{print $3}'
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
@ -3,6 +3,8 @@
|
||||
# for setting up basic variables and functions
|
||||
#
|
||||
|
||||
export AwkCmd="gawk"
|
||||
|
||||
########################
|
||||
#
|
||||
# General usage functions
|
||||
@ -25,8 +27,8 @@ function pushTmpDir {
|
||||
}
|
||||
|
||||
function popTmpDir {
|
||||
TMP_DIR=$(echo $TMP_DIR_STACK | awk '{print $1}')
|
||||
TMP_DIR_STACK=$(echo $TMP_DIR_STACK | awk '{$1="";print $0}')
|
||||
TMP_DIR=$(echo $TMP_DIR_STACK | $AwkCmd '{print $1}')
|
||||
TMP_DIR_STACK=$(echo $TMP_DIR_STACK | $AwkCmd '{$1="";print $0}')
|
||||
popd >& /dev/null
|
||||
rm -rf $TMP_DIR >& /dev/null
|
||||
logdebug "Poping temp directory $TMP_DIR"
|
||||
@ -91,7 +93,7 @@ function fastaCount {
|
||||
function seqlength {
|
||||
cat $1 | \
|
||||
wc |\
|
||||
awk -v t="`head -1 $1 | wc -c`" '{print $3 - t - $1 + 1}'
|
||||
$AwkCmd -v t="`head -1 $1 | wc -c`" '{print $3 - t - $1 + 1}'
|
||||
}
|
||||
|
||||
# extract a subseq from a fasta sequence
|
||||
@ -99,7 +101,7 @@ function seqlength {
|
||||
# - $2 : First position of the subsequence (first position is numered 1),
|
||||
# - $3 : End of the subsequence (included in the subsequence)
|
||||
function cutseq {
|
||||
awk -v from=$2 -v end=$3 'function printfasta(seq) { \
|
||||
$AwkCmd -v from=$2 -v end=$3 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
@ -114,13 +116,13 @@ function cutseq {
|
||||
# a single sequence
|
||||
# - $1 : The fasta file containing the sequences to join
|
||||
function joinfasta {
|
||||
awk '(NR==1 && /^>/) {print $0} \
|
||||
$AwkCmd '(NR==1 && /^>/) {print $0} \
|
||||
! /^>/ {print $0}' $1 | \
|
||||
formatfasta
|
||||
}
|
||||
|
||||
function formatfasta {
|
||||
awk 'function printfasta(seq) { \
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
Reference in New Issue
Block a user