fixed too many partial CDS bug
Former-commit-id: d733a46f4e92f755f38e452f03a28062de6739f1 Former-commit-id: 36bdc324d2b9a0491d07d40a7e68a4cf7ea73984
This commit is contained in:
98
detectors/cds/tools/lib/compareCds.awk
Normal file
98
detectors/cds/tools/lib/compareCds.awk
Normal file
@ -0,0 +1,98 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
function Min(a, b) {
|
||||
return (a < b ? a : b)
|
||||
}
|
||||
|
||||
function Max(a, b) {
|
||||
return (a > b ? a : b)
|
||||
}
|
||||
|
||||
function Align(s1, s2, _local_, d, l) {
|
||||
if (s1 == s2) return 100
|
||||
d = AlignNWS(s1, s2, Identity)
|
||||
l = Max(length(s1), length(s2))
|
||||
return int((l - d) * 100 / l)
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
PROCINFO["sorted_in"] = "@ind_num_asc"
|
||||
IdentityMatrix("ABCDEFGHIJKLMNOPQRSTUVWXYZ*", Identity)
|
||||
}
|
||||
|
||||
BEGINFILE {
|
||||
NFile++
|
||||
File[NFile] = FILENAME
|
||||
}
|
||||
|
||||
/^#/ { next }
|
||||
|
||||
{
|
||||
strand = $5
|
||||
stop = (strand == "D" ? $4 : $3)
|
||||
Stop[stop]++
|
||||
i = ++NRec[NFile]
|
||||
Rec[NFile][i]["record"] = $0
|
||||
Rec[NFile][i]["genefam"] = $1
|
||||
Rec[NFile][i]["gene"] = $2
|
||||
Rec[NFile][i]["from"] = $3
|
||||
Rec[NFile][i]["to"] = $4
|
||||
Rec[NFile][i]["strand"] = $5
|
||||
Rec[NFile][i]["nexon"] = $6
|
||||
Rec[NFile][i]["length"] = $7
|
||||
Rec[NFile][i]["protseq"] = $9
|
||||
if (NFile == 1)
|
||||
Indx1[stop] = i
|
||||
else
|
||||
Indx2[stop] = i
|
||||
}
|
||||
|
||||
END {
|
||||
for (st in Stop) {
|
||||
if (Indx1[st])
|
||||
print "FILE1 " Rec[1][Indx1[st]]["record"]
|
||||
else
|
||||
print "FILE1 NONE"
|
||||
|
||||
if (Indx2[st])
|
||||
print "FILE2 " Rec[2][Indx2[st]]["record"]
|
||||
else
|
||||
print "FILE2 NONE"
|
||||
|
||||
if (Indx1[st] && Indx2[st]) {
|
||||
fm = Rec[1][Indx1[st]]["genefam"]
|
||||
id = Align(Rec[1][Indx1[st]]["protseq"], Rec[2][Indx2[st]]["protseq"])
|
||||
printf("MATCH %s ID %d ", fm, id)
|
||||
if (id == 100)
|
||||
status = "CORRECT"
|
||||
else if (id >= 90)
|
||||
status = "ALMOST_CORRECT"
|
||||
else if (id >= 80)
|
||||
status = "ACCEPTABLE"
|
||||
else
|
||||
status = "WRONG"
|
||||
if (status != "CORRECT") {
|
||||
if (Rec[1][Indx1[st]]["nexon"] != Rec[2][Indx2[st]]["nexon"])
|
||||
status = status ".BAD_NBEXON"
|
||||
start1 = Rec[1][Indx1[st]]["strand"] == "D" ? Rec[1][Indx1[st]]["from"] : Rec[1][Indx1[st]]["to"]
|
||||
start2 = Rec[2][Indx2[st]]["strand"] == "D" ? Rec[2][Indx2[st]]["from"] : Rec[2][Indx2[st]]["to"]
|
||||
if (start1 != start2)
|
||||
status = status ".BAD_START"
|
||||
else
|
||||
status = status ".BAD_JUNCTION"
|
||||
}
|
||||
print status
|
||||
}
|
||||
else if (Indx1[st]) {
|
||||
fm = Rec[1][Indx1[st]]["genefam"]
|
||||
print "MATCH " fm " ID 0 MISSED.WRONG_STOP"
|
||||
}
|
||||
else if (Indx2[st]) {
|
||||
fm = Rec[2][Indx2[st]]["genefam"]
|
||||
print "MATCH " fm " ID 0 OVERPRED.WRONG_STOP"
|
||||
}
|
||||
print ""
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user