Patch the detection algorithm (the overlap detection)
Former-commit-id: 7aca679a3425b6f5505f6122f2a58d1c5cd14663 Former-commit-id: 85fa6c3f1934391e952feb71f46300662034eaef
This commit is contained in:
@ -31,6 +31,12 @@ else
|
|||||||
QUERY="$1"
|
QUERY="$1"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if (( $# > 1 )) ; then
|
||||||
|
TEMP=$2
|
||||||
|
else
|
||||||
|
TEMP=""
|
||||||
|
fi
|
||||||
|
|
||||||
DBROOT="$CDS_DATA_DIR/chlorodb/RPS12"
|
DBROOT="$CDS_DATA_DIR/chlorodb/RPS12"
|
||||||
RPS12DB="${DBROOT}/RPS12_DB.clean.fst"
|
RPS12DB="${DBROOT}/RPS12_DB.clean.fst"
|
||||||
DELTA=50
|
DELTA=50
|
||||||
@ -77,6 +83,7 @@ blastx \
|
|||||||
| sort \
|
| sort \
|
||||||
| uniq > "dbsel.txt"
|
| uniq > "dbsel.txt"
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Extract corresponding protein sequences
|
# Extract corresponding protein sequences
|
||||||
# from the RPS12 database.
|
# from the RPS12 database.
|
||||||
@ -91,10 +98,11 @@ blastx \
|
|||||||
| $AwkCmd '# Normalizes the writing of the forward and reverse strand matches
|
| $AwkCmd '# Normalizes the writing of the forward and reverse strand matches
|
||||||
($7 <= $8) {print $7,$8,$9,$10,"F"}
|
($7 <= $8) {print $7,$8,$9,$10,"F"}
|
||||||
($7 > $8) {print $8,$7,$9,$10,"R"}' \
|
($7 > $8) {print $8,$7,$9,$10,"R"}' \
|
||||||
| sort -un \
|
| sort -n \
|
||||||
|
| uniq \
|
||||||
| $AwkCmd 'function overlap(x1,y1,x2,y2) {
|
| $AwkCmd 'function overlap(x1,y1,x2,y2) {
|
||||||
return (((x1 <= x2) && (x2 <= (y1+1))) ||
|
return (((x1+0 <= x2+0) && ((y1+1) >= x2+0)) ||
|
||||||
((x2 <= x1) && (x1 <= (y2+1))))
|
((x2+0 <= x1+0) && ((y2+1) >= x1+0)))
|
||||||
}
|
}
|
||||||
function min(a,b) {return (a <= b) ? a:b }
|
function min(a,b) {return (a <= b) ? a:b }
|
||||||
function max(a,b) {return (a >= b) ? a:b }
|
function max(a,b) {return (a >= b) ? a:b }
|
||||||
@ -134,7 +142,7 @@ blastx \
|
|||||||
}
|
}
|
||||||
{print $0,i}
|
{print $0,i}
|
||||||
' \
|
' \
|
||||||
| sort -nk 1 \
|
| sort -nk 6 \
|
||||||
| $AwkCmd 'function min(a,b) {return (a <= b) ? a:b }
|
| $AwkCmd 'function min(a,b) {return (a <= b) ? a:b }
|
||||||
(old6 == 1) {
|
(old6 == 1) {
|
||||||
print old
|
print old
|
||||||
@ -156,7 +164,7 @@ blastx \
|
|||||||
old = $0
|
old = $0
|
||||||
old1 = $1
|
old1 = $1
|
||||||
old6= $6
|
old6= $6
|
||||||
}' \
|
}' \
|
||||||
| $AwkCmd -v delta="$DELTA" \
|
| $AwkCmd -v delta="$DELTA" \
|
||||||
-v seqlen="$SEQLEN" \
|
-v seqlen="$SEQLEN" \
|
||||||
-v chloro="$SEQUENCE" \
|
-v chloro="$SEQUENCE" \
|
||||||
@ -238,6 +246,16 @@ blastx \
|
|||||||
}
|
}
|
||||||
'
|
'
|
||||||
|
|
||||||
|
nrps12=$(ls -1 rps12_fragments_*.fasta | wc -l)
|
||||||
|
|
||||||
|
if (( nrps12 > 1 )) ; then
|
||||||
|
message="$nrps12 versions"
|
||||||
|
else
|
||||||
|
message="$nrps12 version"
|
||||||
|
fi
|
||||||
|
|
||||||
|
loginfo "$message of the gene rps12 detected."
|
||||||
|
|
||||||
#
|
#
|
||||||
# Run exonarate on every fragment of chloroplast
|
# Run exonarate on every fragment of chloroplast
|
||||||
#
|
#
|
||||||
@ -260,6 +278,12 @@ blastx \
|
|||||||
|
|
||||||
|
|
||||||
for f in *.res ; do
|
for f in *.res ; do
|
||||||
|
mv $f $f.ori
|
||||||
|
if [[ -z "$TEMP" ]] ; then
|
||||||
|
dest="/dev/stdout"
|
||||||
|
else
|
||||||
|
dest="$TEMP/$f"
|
||||||
|
fi
|
||||||
header=$(head -1 ${f/.rps12.res/.fasta})
|
header=$(head -1 ${f/.rps12.res/.fasta})
|
||||||
L2=$(sed -E 's/^.*limit=([0-9]+);.*$/\1/' <<< $header)
|
L2=$(sed -E 's/^.*limit=([0-9]+);.*$/\1/' <<< $header)
|
||||||
S1=$(sed -E 's/^.*strand1=(R|F);.*$/\1/' <<< $header)
|
S1=$(sed -E 's/^.*strand1=(R|F);.*$/\1/' <<< $header)
|
||||||
@ -268,7 +292,7 @@ blastx \
|
|||||||
F2=$(sed -E 's/^.*from2=([0-9]+);.*$/\1/' <<< $header)
|
F2=$(sed -E 's/^.*from2=([0-9]+);.*$/\1/' <<< $header)
|
||||||
T1=$(sed -E 's/^.*to1=([0-9]+);.*$/\1/' <<< $header)
|
T1=$(sed -E 's/^.*to1=([0-9]+);.*$/\1/' <<< $header)
|
||||||
T2=$(sed -E 's/^.*to2=([0-9]+);.*$/\1/' <<< $header)
|
T2=$(sed -E 's/^.*to2=([0-9]+);.*$/\1/' <<< $header)
|
||||||
cat $f \
|
cat $f.ori \
|
||||||
| $AwkCmd -v S1="$S1" -v F1="$F1" -v T1="$T1" \
|
| $AwkCmd -v S1="$S1" -v F1="$F1" -v T1="$T1" \
|
||||||
-v S2="$S2" -v F2="$F2" -v T2="$T2" -v L2="$L2" \
|
-v S2="$S2" -v F2="$F2" -v T2="$T2" -v L2="$L2" \
|
||||||
'
|
'
|
||||||
@ -358,8 +382,7 @@ blastx \
|
|||||||
$0 = line
|
$0 = line
|
||||||
}
|
}
|
||||||
{print $0}
|
{print $0}
|
||||||
'
|
' > $dest
|
||||||
|
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
@ -398,4 +421,13 @@ exit 0
|
|||||||
# join(51..159,312..553,1051..1092)
|
# join(51..159,312..553,1051..1092)
|
||||||
# location=join(complement(52867..52980),109583..109818,110316..110358);
|
# location=join(complement(52867..52980),109583..109818,110316..110358);
|
||||||
# >RPS12_2 parts=2 limit=254; from1=52787; to1=53030; strand1=R; from2=109521; to2=110405; strand2=F;
|
# >RPS12_2 parts=2 limit=254; from1=52787; to1=53030; strand1=R; from2=109521; to2=110405; strand2=F;
|
||||||
# join\((complement\([0-9]+\.\.[0-9]+\),)+complement\([0-9]+\.\.[0-9]+\)\)
|
# join\((complement\([0-9]+\.\.[0-9]+\),)+complement\([0-9]+\.\.[0-9]+\)\)
|
||||||
|
|
||||||
|
# cat NC_010654.annot.embl | awk -v tag="agser" 'BEGIN {n=0} /\locus_tag/ {n++; sub(/""/, sprintf("\"%s%04d\"",tag,n),$0)} {print $0}' | less
|
||||||
|
|
||||||
|
# BRR.chloro_1 NC_018117_rps12_2 96.08 102 4 0 99022 98717 36 137 9e-57 195
|
||||||
|
# BRR.chloro_1 NC_018117_rps12_2 96.08 102 4 0 141187 141492 36 137 9e-57 195
|
||||||
|
# BRR.chloro_1 NC_018117_rps12_2 94.87 39 2 0 70611 70495 1 39 2e-16 78.6
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user