Small patches

Former-commit-id: 7f32ef237be64d3f81353241462f0b6c8f68d3c5
Former-commit-id: 8eb0147cc85f241e89399c4d3a9c7b5b2f52e215
This commit is contained in:
2023-05-15 20:48:44 +02:00
parent 2ff37343b6
commit 785e0a6226
6 changed files with 194 additions and 182 deletions

View File

@ -105,40 +105,7 @@ blastx \
($7 > $8) {print $8,$7,$9,$10,"R"}' \ ($7 > $8) {print $8,$7,$9,$10,"R"}' \
| sort -n \ | sort -n \
| uniq \ | uniq \
| $AwkCmd 'function overlap(x1,y1,x2,y2) { | $AwkCmd -f $LIB_DIR/rps12_filter_1.awk \
return (((x1+0 <= x2+0) && ((y1+1) >= x2+0)) ||
((x2+0 <= x1+0) && ((y2+1) >= x1+0)))
}
function min(a,b) {return (a <= b) ? a:b }
function max(a,b) {return (a >= b) ? a:b }
(NR==1) {i=0
frg[i]=$0
}
(x1 && y1) {
if (overlap(x1,y1,$1,$2)) {
$1 = min(x1,$1)
$2 = max(y1,$2)
if (overlap(v1,w1,$3,$4)) {
$3 = min(v1,$3)
$4 = max(w1,$4)
}
}
else i++
}
(x1 && y1) {
frg[i] = $0
}
{ x1 = $1
y1 = $2
v1 = $3
w1 = $4
}
END {
for (j = 0; j <= i; j++) {
print frg[j]
}
}
' \
| sort -nk 3 \ | sort -nk 3 \
| $AwkCmd '($3 != old3 || $4 != old4) { | $AwkCmd '($3 != old3 || $4 != old4) {
i++ i++
@ -148,108 +115,12 @@ blastx \
{print $0,i} {print $0,i}
' \ ' \
| sort -nk 6 \ | sort -nk 6 \
| $AwkCmd 'function min(a,b) {return (a <= b) ? a:b } | $AwkCmd -f $LIB_DIR/rps12_filter_2.awk \
(old6 == 1) {
print old
oldprint = 1
}
((old6 == 2 && $6==2) ||
full == 1) {
print old
full = 0
}
(((old6 == 2 && $6==3) ||
(old6 == 3 && $6==2)) && full != 1) {
$1 = old1
$6 = min(old6,$6)
full = 1
}
END {print old}
{
old = $0
old1 = $1
old6= $6
}' \
| $AwkCmd -v delta="$DELTA" \ | $AwkCmd -v delta="$DELTA" \
-v seqlen="$SEQLEN" \ -v seqlen="$SEQLEN" \
-v chloro="$SEQUENCE" \ -v chloro="$SEQUENCE" \
'function min(a,b) {return (a <= b) ? a:b } -f $LIB_DIR/rps12_filter_3.awk
function max(a,b) {return (a >= b) ? a:b }
function rev(s) {
x = ""
for (i=length(s);i!=0;i--)
x=x substr(s,i,1)
return x
}
function swapchar(s,a,b) {
gsub(a,"@",s)
gsub(b,a,s)
gsub(/@/,b,s)
return s
}
function revcomp(s) {
s = swapchar(s,"A","T")
s = swapchar(s,"C","G")
s = swapchar(s,"M","K")
s = swapchar(s,"R","Y")
s = swapchar(s,"W","S")
s = swapchar(s,"B","V")
s = swapchar(s,"D","H")
s = swapchar(s,"a","t")
s = swapchar(s,"c","g")
s = swapchar(s,"m","k")
s = swapchar(s,"r","y")
s = swapchar(s,"w","s")
s = swapchar(s,"b","v")
s = swapchar(s,"d","h")
return rev(s)
}
{ from = max(1,$1 - delta)
to = min($2 + delta,seqlen)
sequence = substr(chloro,from,to-from+1)
if ($5 == "R") sequence = revcomp(sequence)
nparts[$6]+=1
n = nparts[$6]
parts[$6][n][1] = from
parts[$6][n][2] = to
parts[$6][n][3] = $3
parts[$6][n][4] = $4
parts[$6][n][5] = $5
parts[$6][n][6] = $6
parts[$6][n][7] = sequence
}
END {
l = length(parts)
if (l==1) {
n = nparts[1]
for (i =1; i <= n; i++) {
print ">RPS12_" i,"parts=1; limit=" length(parts[1][i][7]) + 1 \
"; from1=" parts[1][i][1] \
"; to1=" parts[1][i][2] "; strand1=" parts[1][i][5] \
";" > "rps12_fragments_" i ".fasta"
print parts[1][i][7] \
> "rps12_fragments_" i ".fasta"
}
}
if (l==2) {
n1 = nparts[1]
n2 = nparts[2]
for (i =1; i <= n1; i++)
for (j =1; j <= n2; j++) {
k = (i-1)*n2+j
print ">RPS12_" k,"parts=2", \
"limit=" (length(parts[1][i][7]) + 10 + 1) \
"; from1=" parts[1][i][1] "; to1=" parts[1][i][2] "; strand1=" parts[1][i][5] \
"; from2=" parts[2][j][1] "; to2=" parts[2][j][2] "; strand2=" parts[2][j][5] \
";" > "rps12_fragments_" k ".fasta"
print parts[1][i][7] "nnnnnnnnnn" parts[2][j][7] \
> "rps12_fragments_" k ".fasta"
}
}
}
'
nrps12=$(ls -1 rps12_fragments_*.fasta | wc -l) nrps12=$(ls -1 rps12_fragments_*.fasta | wc -l)
@ -303,55 +174,7 @@ blastx \
cat $f.ori \ cat $f.ori \
| $AwkCmd -v S1="$S1" -v F1="$F1" -v T1="$T1" \ | $AwkCmd -v S1="$S1" -v F1="$F1" -v T1="$T1" \
-v S2="$S2" -v F2="$F2" -v T2="$T2" -v L2="$L2" \ -v S2="$S2" -v F2="$F2" -v T2="$T2" -v L2="$L2" \
' -f $LIB_DIR/rps12_filter_4.awk \
function convert1p(p) {
if (p+0 < L2) {
I = 1
if (S1=="F") {
S = 1
B = F1
} else {
S = -1
B = T1
}
} else {
I = L2
if (S2=="F") {
S = 1
B = F2
} else {
S = -1
B = T2
}
}
return S*(p - I) + B
}
function convert(p1,p2) {
p1 = convert1p(p1)
p2 = convert1p(p2)
if (p1 < p2)
res = p1 ".." p2
else
res = "complement(" p2 ".." p1 ")"
return res
}
/[0-9]+\.\.[0-9]+/ {
s = $0
r = $0
while (length(s) > 0) {
match(s,/[0-9]+\.\.[0-9]+/)
range = substr(s,RSTART,RLENGTH)
s = substr(s,RSTART+RLENGTH+1)
match(range,/^[0-9]+/)
from = substr(range,RSTART,RLENGTH)
match(range,/[0-9]+$/)
to = substr(range,RSTART,RLENGTH)
sub(range,convert(from,to),r)
}
$0=r
}
{print $0}
' \
| $AwkCmd ' | $AwkCmd '
# #
# Normalize join(complement(A),complement(B),complement(C)) locations # Normalize join(complement(A),complement(B),complement(C)) locations

View File

@ -0,0 +1,38 @@
function overlap(x1,y1,x2,y2) {
return (((x1+0 <= x2+0) && ((y1+1) >= x2+0)) ||
((x2+0 <= x1+0) && ((y2+1) >= x1+0)))
}
function min(a,b) {return (a <= b) ? a:b }
function max(a,b) {return (a >= b) ? a:b }
(NR==1) {i=0
frg[i]=$0
}
(x1 && y1) {
if (overlap(x1,y1,$1,$2)) {
$1 = min(x1,$1)
$2 = max(y1,$2)
if (overlap(v1,w1,$3,$4)) {
$3 = min(v1,$3)
$4 = max(w1,$4)
}
}
else i++
}
(x1 && y1) {
frg[i] = $0
}
{ x1 = $1
y1 = $2
v1 = $3
w1 = $4
}
END {
for (j = 0; j <= i; j++) {
print frg[j]
}
}

View File

@ -0,0 +1,26 @@
function min(a,b) {return (a <= b) ? a:b }
(old6 == 1) {
print old
oldprint = 1
}
((old6 == 2 && $6==2) ||
full == 1) {
print old
full = 0
}
(((old6 == 2 && $6==3) ||
(old6 == 3 && $6==2)) && full != 1) {
$1 = old1
$6 = min(old6,$6)
full = 1
}
END {print old}
{
old = $0
old1 = $1
old6= $6
}

View File

@ -0,0 +1,78 @@
function min(a,b) {return (a <= b) ? a:b }
function max(a,b) {return (a >= b) ? a:b }
function rev(s) {
x = ""
for (i=length(s);i!=0;i--)
x=x substr(s,i,1)
return x
}
function swapchar(s,a,b) {
gsub(a,"@",s)
gsub(b,a,s)
gsub(/@/,b,s)
return s
}
function revcomp(s) {
s = swapchar(s,"A","T")
s = swapchar(s,"C","G")
s = swapchar(s,"M","K")
s = swapchar(s,"R","Y")
s = swapchar(s,"W","S")
s = swapchar(s,"B","V")
s = swapchar(s,"D","H")
s = swapchar(s,"a","t")
s = swapchar(s,"c","g")
s = swapchar(s,"m","k")
s = swapchar(s,"r","y")
s = swapchar(s,"w","s")
s = swapchar(s,"b","v")
s = swapchar(s,"d","h")
return rev(s)
}
{ from = max(1,$1 - delta)
to = min($2 + delta,seqlen)
sequence = substr(chloro,from,to-from+1)
if ($5 == "R") sequence = revcomp(sequence)
nparts[$6]+=1
n = nparts[$6]
parts[$6][n][1] = from
parts[$6][n][2] = to
parts[$6][n][3] = $3
parts[$6][n][4] = $4
parts[$6][n][5] = $5
parts[$6][n][6] = $6
parts[$6][n][7] = sequence
}
END {
l = length(parts)
if (l==1) {
n = nparts[1]
for (i =1; i <= n; i++) {
print ">RPS12_" i,"parts=1; limit=" length(parts[1][i][7]) + 1 \
"; from1=" parts[1][i][1] \
"; to1=" parts[1][i][2] "; strand1=" parts[1][i][5] \
";" > "rps12_fragments_" i ".fasta"
print parts[1][i][7] \
> "rps12_fragments_" i ".fasta"
}
}
if (l==2) {
n1 = nparts[1]
n2 = nparts[2]
for (i =1; i <= n1; i++)
for (j =1; j <= n2; j++) {
k = (i-1)*n2+j
print ">RPS12_" k,"parts=2", \
"limit=" (length(parts[1][i][7]) + 10 + 1) \
"; from1=" parts[1][i][1] "; to1=" parts[1][i][2] "; strand1=" parts[1][i][5] \
"; from2=" parts[2][j][1] "; to2=" parts[2][j][2] "; strand2=" parts[2][j][5] \
";" > "rps12_fragments_" k ".fasta"
print parts[1][i][7] "nnnnnnnnnn" parts[2][j][7] \
> "rps12_fragments_" k ".fasta"
}
}
}

View File

@ -0,0 +1,47 @@
function convert1p(p) {
if (p+0 < L2) {
I = 1
if (S1=="F") {
S = 1
B = F1
} else {
S = -1
B = T1
}
} else {
I = L2
if (S2=="F") {
S = 1
B = F2
} else {
S = -1
B = T2
}
}
return S*(p - I) + B
}
function convert(p1,p2) {
p1 = convert1p(p1)
p2 = convert1p(p2)
if (p1 < p2)
res = p1 ".." p2
else
res = "complement(" p2 ".." p1 ")"
return res
}
/[0-9]+\.\.[0-9]+/ {
s = $0
r = $0
while (length(s) > 0) {
match(s,/[0-9]+\.\.[0-9]+/)
range = substr(s,RSTART,RLENGTH)
s = substr(s,RSTART+RLENGTH+1)
match(range,/^[0-9]+/)
from = substr(range,RSTART,RLENGTH)
match(range,/[0-9]+$/)
to = substr(range,RSTART,RLENGTH)
sub(range,convert(from,to),r)
}
$0=r
}
{print $0}

View File

@ -785,7 +785,7 @@ pushTmpDir ORG.organnot
} }
' "${RESULTS}.sorted.annot" "${RESULTS}.sorted.annot" \ ' "${RESULTS}.sorted.annot" "${RESULTS}.sorted.annot" \
> "${RESULTS}.uniq_gene.annot" > "${RESULTS}.uniq_gene.annot"
log-Pinfo "Done." loginfo "Done."
if [[ "$tagprefix" != "no" ]] ; then if [[ "$tagprefix" != "no" ]] ; then
loginfo "Adding locus tags from number: $locusshift..." loginfo "Adding locus tags from number: $locusshift..."