diff --git a/detectors/cds/bin/do_rps12.sh b/detectors/cds/bin/do_rps12.sh index a4ffeb7..130912b 100755 --- a/detectors/cds/bin/do_rps12.sh +++ b/detectors/cds/bin/do_rps12.sh @@ -105,40 +105,7 @@ blastx \ ($7 > $8) {print $8,$7,$9,$10,"R"}' \ | sort -n \ | uniq \ - | $AwkCmd 'function overlap(x1,y1,x2,y2) { - return (((x1+0 <= x2+0) && ((y1+1) >= x2+0)) || - ((x2+0 <= x1+0) && ((y2+1) >= x1+0))) - } - function min(a,b) {return (a <= b) ? a:b } - function max(a,b) {return (a >= b) ? a:b } - (NR==1) {i=0 - frg[i]=$0 - } - (x1 && y1) { - if (overlap(x1,y1,$1,$2)) { - $1 = min(x1,$1) - $2 = max(y1,$2) - if (overlap(v1,w1,$3,$4)) { - $3 = min(v1,$3) - $4 = max(w1,$4) - } - } - else i++ - } - (x1 && y1) { - frg[i] = $0 - } - { x1 = $1 - y1 = $2 - v1 = $3 - w1 = $4 - } - END { - for (j = 0; j <= i; j++) { - print frg[j] - } - } - ' \ + | $AwkCmd -f $LIB_DIR/rps12_filter_1.awk \ | sort -nk 3 \ | $AwkCmd '($3 != old3 || $4 != old4) { i++ @@ -148,108 +115,12 @@ blastx \ {print $0,i} ' \ | sort -nk 6 \ - | $AwkCmd 'function min(a,b) {return (a <= b) ? a:b } - (old6 == 1) { - print old - oldprint = 1 - } - ((old6 == 2 && $6==2) || - full == 1) { - print old - full = 0 - } - (((old6 == 2 && $6==3) || - (old6 == 3 && $6==2)) && full != 1) { - $1 = old1 - $6 = min(old6,$6) - full = 1 - } - END {print old} - { - old = $0 - old1 = $1 - old6= $6 - }' \ + | $AwkCmd -f $LIB_DIR/rps12_filter_2.awk \ | $AwkCmd -v delta="$DELTA" \ -v seqlen="$SEQLEN" \ -v chloro="$SEQUENCE" \ - 'function min(a,b) {return (a <= b) ? a:b } - function max(a,b) {return (a >= b) ? a:b } - function rev(s) { - x = "" - for (i=length(s);i!=0;i--) - x=x substr(s,i,1) - return x - } - function swapchar(s,a,b) { - gsub(a,"@",s) - gsub(b,a,s) - gsub(/@/,b,s) - return s - } - function revcomp(s) { - s = swapchar(s,"A","T") - s = swapchar(s,"C","G") - s = swapchar(s,"M","K") - s = swapchar(s,"R","Y") - s = swapchar(s,"W","S") - s = swapchar(s,"B","V") - s = swapchar(s,"D","H") - s = swapchar(s,"a","t") - s = swapchar(s,"c","g") - s = swapchar(s,"m","k") - s = swapchar(s,"r","y") - s = swapchar(s,"w","s") - s = swapchar(s,"b","v") - s = swapchar(s,"d","h") - return rev(s) - } - { from = max(1,$1 - delta) - to = min($2 + delta,seqlen) - sequence = substr(chloro,from,to-from+1) - if ($5 == "R") sequence = revcomp(sequence) - nparts[$6]+=1 - n = nparts[$6] - parts[$6][n][1] = from - parts[$6][n][2] = to - parts[$6][n][3] = $3 - parts[$6][n][4] = $4 - parts[$6][n][5] = $5 - parts[$6][n][6] = $6 - parts[$6][n][7] = sequence - } - END { - l = length(parts) - if (l==1) { - n = nparts[1] - for (i =1; i <= n; i++) { - print ">RPS12_" i,"parts=1; limit=" length(parts[1][i][7]) + 1 \ - "; from1=" parts[1][i][1] \ - "; to1=" parts[1][i][2] "; strand1=" parts[1][i][5] \ - ";" > "rps12_fragments_" i ".fasta" - print parts[1][i][7] \ - > "rps12_fragments_" i ".fasta" - } - } + -f $LIB_DIR/rps12_filter_3.awk - if (l==2) { - n1 = nparts[1] - n2 = nparts[2] - for (i =1; i <= n1; i++) - for (j =1; j <= n2; j++) { - k = (i-1)*n2+j - print ">RPS12_" k,"parts=2", \ - "limit=" (length(parts[1][i][7]) + 10 + 1) \ - "; from1=" parts[1][i][1] "; to1=" parts[1][i][2] "; strand1=" parts[1][i][5] \ - "; from2=" parts[2][j][1] "; to2=" parts[2][j][2] "; strand2=" parts[2][j][5] \ - ";" > "rps12_fragments_" k ".fasta" - print parts[1][i][7] "nnnnnnnnnn" parts[2][j][7] \ - > "rps12_fragments_" k ".fasta" - - } - } - } - ' nrps12=$(ls -1 rps12_fragments_*.fasta | wc -l) @@ -303,55 +174,7 @@ blastx \ cat $f.ori \ | $AwkCmd -v S1="$S1" -v F1="$F1" -v T1="$T1" \ -v S2="$S2" -v F2="$F2" -v T2="$T2" -v L2="$L2" \ - ' - function convert1p(p) { - if (p+0 < L2) { - I = 1 - if (S1=="F") { - S = 1 - B = F1 - } else { - S = -1 - B = T1 - } - } else { - I = L2 - if (S2=="F") { - S = 1 - B = F2 - } else { - S = -1 - B = T2 - } - } - return S*(p - I) + B - } - function convert(p1,p2) { - p1 = convert1p(p1) - p2 = convert1p(p2) - if (p1 < p2) - res = p1 ".." p2 - else - res = "complement(" p2 ".." p1 ")" - return res - } - /[0-9]+\.\.[0-9]+/ { - s = $0 - r = $0 - while (length(s) > 0) { - match(s,/[0-9]+\.\.[0-9]+/) - range = substr(s,RSTART,RLENGTH) - s = substr(s,RSTART+RLENGTH+1) - match(range,/^[0-9]+/) - from = substr(range,RSTART,RLENGTH) - match(range,/[0-9]+$/) - to = substr(range,RSTART,RLENGTH) - sub(range,convert(from,to),r) - } - $0=r - } - {print $0} - ' \ + -f $LIB_DIR/rps12_filter_4.awk \ | $AwkCmd ' # # Normalize join(complement(A),complement(B),complement(C)) locations diff --git a/detectors/cds/lib/rps12_filter_1.awk b/detectors/cds/lib/rps12_filter_1.awk new file mode 100644 index 0000000..f9c9566 --- /dev/null +++ b/detectors/cds/lib/rps12_filter_1.awk @@ -0,0 +1,38 @@ +function overlap(x1,y1,x2,y2) { + return (((x1+0 <= x2+0) && ((y1+1) >= x2+0)) || + ((x2+0 <= x1+0) && ((y2+1) >= x1+0))) + } +function min(a,b) {return (a <= b) ? a:b } +function max(a,b) {return (a >= b) ? a:b } + +(NR==1) {i=0 + frg[i]=$0 + } + +(x1 && y1) { + if (overlap(x1,y1,$1,$2)) { + $1 = min(x1,$1) + $2 = max(y1,$2) + if (overlap(v1,w1,$3,$4)) { + $3 = min(v1,$3) + $4 = max(w1,$4) + } + } + else i++ +} + +(x1 && y1) { + frg[i] = $0 +} + +{ x1 = $1 + y1 = $2 + v1 = $3 + w1 = $4 +} + +END { + for (j = 0; j <= i; j++) { + print frg[j] + } +} \ No newline at end of file diff --git a/detectors/cds/lib/rps12_filter_2.awk b/detectors/cds/lib/rps12_filter_2.awk new file mode 100644 index 0000000..f4e03e3 --- /dev/null +++ b/detectors/cds/lib/rps12_filter_2.awk @@ -0,0 +1,26 @@ +function min(a,b) {return (a <= b) ? a:b } + (old6 == 1) { + print old + oldprint = 1 + } + +((old6 == 2 && $6==2) || +full == 1) { + print old + full = 0 +} + +(((old6 == 2 && $6==3) || +(old6 == 3 && $6==2)) && full != 1) { + $1 = old1 + $6 = min(old6,$6) + full = 1 +} + +END {print old} + +{ + old = $0 + old1 = $1 + old6= $6 +} \ No newline at end of file diff --git a/detectors/cds/lib/rps12_filter_3.awk b/detectors/cds/lib/rps12_filter_3.awk new file mode 100644 index 0000000..d25fbab --- /dev/null +++ b/detectors/cds/lib/rps12_filter_3.awk @@ -0,0 +1,78 @@ +function min(a,b) {return (a <= b) ? a:b } + function max(a,b) {return (a >= b) ? a:b } + function rev(s) { + x = "" + for (i=length(s);i!=0;i--) + x=x substr(s,i,1) + return x + } + +function swapchar(s,a,b) { + gsub(a,"@",s) + gsub(b,a,s) + gsub(/@/,b,s) + return s + } + +function revcomp(s) { + s = swapchar(s,"A","T") + s = swapchar(s,"C","G") + s = swapchar(s,"M","K") + s = swapchar(s,"R","Y") + s = swapchar(s,"W","S") + s = swapchar(s,"B","V") + s = swapchar(s,"D","H") + s = swapchar(s,"a","t") + s = swapchar(s,"c","g") + s = swapchar(s,"m","k") + s = swapchar(s,"r","y") + s = swapchar(s,"w","s") + s = swapchar(s,"b","v") + s = swapchar(s,"d","h") + return rev(s) + } + { from = max(1,$1 - delta) + to = min($2 + delta,seqlen) + sequence = substr(chloro,from,to-from+1) + if ($5 == "R") sequence = revcomp(sequence) + nparts[$6]+=1 + n = nparts[$6] + parts[$6][n][1] = from + parts[$6][n][2] = to + parts[$6][n][3] = $3 + parts[$6][n][4] = $4 + parts[$6][n][5] = $5 + parts[$6][n][6] = $6 + parts[$6][n][7] = sequence + } +END { + l = length(parts) + if (l==1) { + n = nparts[1] + for (i =1; i <= n; i++) { + print ">RPS12_" i,"parts=1; limit=" length(parts[1][i][7]) + 1 \ + "; from1=" parts[1][i][1] \ + "; to1=" parts[1][i][2] "; strand1=" parts[1][i][5] \ + ";" > "rps12_fragments_" i ".fasta" + print parts[1][i][7] \ + > "rps12_fragments_" i ".fasta" + } + } + + if (l==2) { + n1 = nparts[1] + n2 = nparts[2] + for (i =1; i <= n1; i++) + for (j =1; j <= n2; j++) { + k = (i-1)*n2+j + print ">RPS12_" k,"parts=2", \ + "limit=" (length(parts[1][i][7]) + 10 + 1) \ + "; from1=" parts[1][i][1] "; to1=" parts[1][i][2] "; strand1=" parts[1][i][5] \ + "; from2=" parts[2][j][1] "; to2=" parts[2][j][2] "; strand2=" parts[2][j][5] \ + ";" > "rps12_fragments_" k ".fasta" + print parts[1][i][7] "nnnnnnnnnn" parts[2][j][7] \ + > "rps12_fragments_" k ".fasta" + + } + } + } \ No newline at end of file diff --git a/detectors/cds/lib/rps12_filter_4.awk b/detectors/cds/lib/rps12_filter_4.awk new file mode 100644 index 0000000..2ad7572 --- /dev/null +++ b/detectors/cds/lib/rps12_filter_4.awk @@ -0,0 +1,47 @@ +function convert1p(p) { + if (p+0 < L2) { + I = 1 + if (S1=="F") { + S = 1 + B = F1 + } else { + S = -1 + B = T1 + } + } else { + I = L2 + if (S2=="F") { + S = 1 + B = F2 + } else { + S = -1 + B = T2 + } + } + return S*(p - I) + B +} +function convert(p1,p2) { + p1 = convert1p(p1) + p2 = convert1p(p2) + if (p1 < p2) + res = p1 ".." p2 + else + res = "complement(" p2 ".." p1 ")" + return res +} +/[0-9]+\.\.[0-9]+/ { + s = $0 + r = $0 + while (length(s) > 0) { + match(s,/[0-9]+\.\.[0-9]+/) + range = substr(s,RSTART,RLENGTH) + s = substr(s,RSTART+RLENGTH+1) + match(range,/^[0-9]+/) + from = substr(range,RSTART,RLENGTH) + match(range,/[0-9]+$/) + to = substr(range,RSTART,RLENGTH) + sub(range,convert(from,to),r) + } + $0=r + } +{print $0} \ No newline at end of file diff --git a/org-annotate.sh b/org-annotate.sh index b4a87d0..53aa753 100755 --- a/org-annotate.sh +++ b/org-annotate.sh @@ -785,7 +785,7 @@ pushTmpDir ORG.organnot } ' "${RESULTS}.sorted.annot" "${RESULTS}.sorted.annot" \ > "${RESULTS}.uniq_gene.annot" - log-Pinfo "Done." + loginfo "Done." if [[ "$tagprefix" != "no" ]] ; then loginfo "Adding locus tags from number: $locusshift..."