
Former-commit-id: 93fac7a70052d06c2a12bf8af59820c653edd31b Former-commit-id: 0869fdad0f550941a0f78f1e4c57f4fcdb3f6076
136 lines
2.7 KiB
Awk
136 lines
2.7 KiB
Awk
#
|
|
# extend start/stop
|
|
#
|
|
# -v FASTA START_MODEL STOP_MODEL START_WALK STOP_WALK
|
|
#
|
|
|
|
function UpStart(pos, strand, _local_, seq, i, imax, smax, s) {
|
|
seq = Seq
|
|
|
|
if (strand == "-") {
|
|
pos = LenSeq - pos + 1
|
|
seq = RevSeq
|
|
}
|
|
|
|
imax = 0
|
|
smax = 0
|
|
|
|
for (i = pos ; i >= Max(1, pos-START_WALK) ; i -= 3) {
|
|
s = substr(seq, i, 3)
|
|
if (s in StopModel) break
|
|
if ((s in StartModel) && (StartModel[s] > smax)) {
|
|
imax = i
|
|
smax = StartModel[s]
|
|
}
|
|
}
|
|
|
|
if (strand == "-") {
|
|
imax = (imax > 0) ? LenSeq - imax + 1 : imax
|
|
}
|
|
|
|
return imax
|
|
}
|
|
|
|
#
|
|
|
|
function DownStop(pos, strand, _local_, seq, i, imax, s) {
|
|
seq = Seq
|
|
|
|
if (strand == "-") {
|
|
pos = LenSeq - pos + 1
|
|
seq = RevSeq
|
|
}
|
|
|
|
imax = 0
|
|
|
|
for (i = pos ; i < Min(LenSeq, pos+STOP_WALK) ; i += 3) {
|
|
s = substr(seq, i, 3)
|
|
if (s in StopModel) {
|
|
imax = i
|
|
break
|
|
}
|
|
}
|
|
|
|
if (strand == "-") {
|
|
imax = (imax > 0) ? LenSeq - imax + 1 : imax
|
|
}
|
|
|
|
return imax
|
|
}
|
|
|
|
#
|
|
# rules
|
|
#
|
|
|
|
BEGIN {
|
|
|
|
if (START_MODEL == "") START_MODEL="Models/start.default.frq"
|
|
if (STOP_MODEL == "") STOP_MODEL="Models/stop.default.frq"
|
|
if (START_WALK == "") START_WALK=120
|
|
if (STOP_WALK == "") STOP_WALK=-1
|
|
|
|
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
|
|
|
Seq = ReadFasta(FASTA)
|
|
LenSeq = length(Seq)
|
|
|
|
RevSeq = RevComplement(Seq)
|
|
|
|
if (START_WALK < 0) START_WALK = LenSeq
|
|
if (STOP_WALK < 0) STOP_WALK = LenSeq
|
|
|
|
if (! TestPath(START_MODEL)) Error("model file: '" START_MODEL "' not found", 2)
|
|
if (! TestPath(STOP_MODEL)) Error("model file: '" STOP_MODEL "' not found", 2)
|
|
|
|
ReadModel(START_MODEL, StartModel)
|
|
ReadModel(STOP_MODEL, StopModel)
|
|
}
|
|
|
|
#
|
|
|
|
/^c begin_entry/ {
|
|
Strand = $7
|
|
nbexon = $9+0
|
|
StartExon = 1 # always first (even on - strand)
|
|
StopExon = nbexon # always last (even on - strand)
|
|
NbExon = 0
|
|
}
|
|
|
|
/^c/ {
|
|
print $0
|
|
next
|
|
}
|
|
|
|
/^e exon/ {
|
|
NbExon++
|
|
if (NbExon == StartExon) {
|
|
pos = (Strand == "+" ? $3+0 : $4+0)
|
|
start = UpStart(pos, Strand)
|
|
mod_start = (start == 0 ? (Strand == "+" ? "<" : ">") : "=")
|
|
start = (start == 0 ? pos : start)
|
|
$(Strand == "+" ? 3 : 4) = start
|
|
} else {
|
|
mod_start = "="
|
|
}
|
|
if (NbExon == StopExon) {
|
|
pos = (Strand == "+" ? $4+0 : $3+0)
|
|
pos += (Strand == "+" ? 1 : -1)
|
|
stop = DownStop(pos, Strand)
|
|
mod_stop = (stop == 0 ? (Strand == "+" ? ">" : "<") : "=")
|
|
last = (stop == 0 ? pos : stop)
|
|
last += (Strand == "+" ? -1 : 1)
|
|
stop = last + (stop == 0 ? 0 : (Strand == "+") ? 3 : -3)
|
|
$(Strand == "+" ? 4 : 3) = stop
|
|
} else {
|
|
mod_stop = "="
|
|
}
|
|
modif = (Strand == "+" ? mod_start "" mod_stop : mod_stop "" mod_start)
|
|
print $0, "; modifier \"" modif "\""
|
|
next
|
|
}
|
|
|
|
/^e (intron|splice|similarity)/ {
|
|
print $0
|
|
next
|
|
}
|