first batch
Former-commit-id: 1eecb206a17c4aff21d1170b48db134ce3c4f14e
This commit is contained in:
528
detectors/cds/tools/build_swissprot_mito_db.sh
Normal file
528
detectors/cds/tools/build_swissprot_mito_db.sh
Normal file
@ -0,0 +1,528 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# BUILD REFERENCE : From SwissProt
|
||||
#
|
||||
#========================================================================================
|
||||
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
SPDIR="$CDS_DATA_DIR/sp_mitodb"
|
||||
SPGENESDIR="$SPDIR/genes"
|
||||
|
||||
CORE_GENES="atp6 atp8"
|
||||
CORE_GENES="$CORE_GENES cox1 cox2 cox3"
|
||||
CORE_GENES="$CORE_GENES cytb"
|
||||
CORE_GENES="$CORE_GENES nd1 nd2 nd3 nd4 nd4L nd5 nd6"
|
||||
|
||||
function download_swissprot() {
|
||||
URL="https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.dat.gz"
|
||||
|
||||
curl $URL
|
||||
}
|
||||
|
||||
function extract_mito_entries() {
|
||||
awk -F'\n' '
|
||||
BEGIN {RS="//\n"; ORS=RS; OFS="\n"}
|
||||
/OC Eukaryota; Metazoa;/ && /OG Mitochondrion\./ && !/DE Flags: Fragment;/ {print $0}
|
||||
' $1
|
||||
}
|
||||
|
||||
function extract_mito_gene_entries() {
|
||||
|
||||
awk -v gene=$1 -F'\n' '
|
||||
BEGIN {RS="//\n"; ORS=RS; OFS="\n"}
|
||||
($0 ~ gene"_") && /OC Eukaryota; Metazoa;/ && /OG Mitochondrion./ {print $0}
|
||||
' $2
|
||||
}
|
||||
|
||||
function extract_mito_gene_frg() {
|
||||
|
||||
awk -F'\n' '
|
||||
BEGIN {RS="//\n"; ORS=RS; OFS="\n"}
|
||||
/DE Flags: Fragment;/ {print $0}
|
||||
' $1
|
||||
}
|
||||
|
||||
function extract_mito_gene_ac() {
|
||||
|
||||
awk -v ac=$1 -F'\n' '
|
||||
BEGIN {RS="//\n"; ORS=RS; OFS="\n"}
|
||||
($0 ~ "AC " ac ";") && /OC Eukaryota; Metazoa;/ && /OG Mitochondrion./ {print $0}
|
||||
' $2
|
||||
}
|
||||
|
||||
function extract_fasta_protein() {
|
||||
$AwkCmd '
|
||||
/^ID/ {ID=$2}
|
||||
/^AC/ {AC=$2; sub(";","",AC)}
|
||||
/^DR EMBL;/ {
|
||||
EMBL=$4;
|
||||
sub(";","",EMBL);
|
||||
sub("-","xxx",EMBL);
|
||||
if ( EMBL != "xxx" ) {
|
||||
EBI="curl \"https://www.ebi.ac.uk/ena/browser/api/embl/" EMBL "?download=true\" | egrep \"FT +/gene=\" | cut -d \"=\" -f 2 | tr -d \"\\\"\""
|
||||
EBI | getline GENE
|
||||
close(EBI)
|
||||
} else {
|
||||
GENE = "xxx"
|
||||
}
|
||||
}
|
||||
/^ / {gsub(/ /,"",$0); SEQ=SEQ $0}
|
||||
/^\/\// {
|
||||
if (GENE != "xxx") {
|
||||
print ">"ID,"SP_AC="AC";","EMBL_AC="EMBL";","gene="GENE";";
|
||||
print SEQ
|
||||
}
|
||||
ID="xxx";
|
||||
AC="xxx";
|
||||
EMBL="xxx";
|
||||
GENE="xxx"
|
||||
SEQ=""
|
||||
}
|
||||
' $1 \
|
||||
| formatfasta
|
||||
}
|
||||
|
||||
function rename_rules() {
|
||||
egrep "^>" $1 \
|
||||
| sed 's/^>//' \
|
||||
| sed 's/_/=/' \
|
||||
| tr -d ";" \
|
||||
| awk -F"=" '{print $1,$NF}' \
|
||||
| sort \
|
||||
| uniq -c \
|
||||
| awk '{
|
||||
x=$1
|
||||
$1=$2
|
||||
$2=sprintf("%06d",x)
|
||||
print $0}' \
|
||||
| sort -rn \
|
||||
| sed 's/ /=/' \
|
||||
| sed 's/ /=/' \
|
||||
| awk -F "=" '
|
||||
($1 == current) {
|
||||
sub(/^0+/,"",occurrence)
|
||||
print "# based on",occurrence,"observations renames",$NF,"in",gene
|
||||
print "/^>" current "/ s@gene=" $NF "@gene=" gene "@"
|
||||
}
|
||||
($1 != current) {
|
||||
current= $1
|
||||
gene=$NF
|
||||
occurrence=$2
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
function rename_genes() {
|
||||
local n=1
|
||||
local input=$1
|
||||
cat $input > __tmp__$$__fasta
|
||||
rules=${input/fst/rules}
|
||||
#echo $rule 1>&2
|
||||
|
||||
rename_rules __tmp__$$__fasta > $rules.$n
|
||||
while [[ -s $rules.$n ]] ; do
|
||||
sed -f $rules.$n __tmp__$$__fasta > __tmp2__$$__fasta
|
||||
mv __tmp2__$$__fasta __tmp__$$__fasta
|
||||
((n++))
|
||||
rename_rules __tmp__$$__fasta > $rules.$n
|
||||
done
|
||||
|
||||
cat __tmp__$$__fasta
|
||||
rm __tmp__$$__fasta
|
||||
}
|
||||
|
||||
|
||||
function clean_strange_gene_name() {
|
||||
local input=$1
|
||||
local output=${input/.fst/_sp_ebi.genes}
|
||||
local to_keep=${input/.fst/_to_keep.lst}
|
||||
local to_be_removed=${input/.fst/_to_be_removed.lst}
|
||||
|
||||
grep '^>' $input \
|
||||
| sed 's/^>//' \
|
||||
| sed 's/_/=/' \
|
||||
| sed 's/;$//' \
|
||||
| awk -F'=' '{print $NF,$1}' \
|
||||
| sort \
|
||||
| uniq -c \
|
||||
| $AwkCmd '{
|
||||
x=$1
|
||||
$1=$2
|
||||
$2=sprintf("%06d",x)
|
||||
print $0}' \
|
||||
| sort -r > $output
|
||||
|
||||
$AwkCmd '
|
||||
($1!=current) {current=$1; print $3}
|
||||
' $output > $to_keep
|
||||
|
||||
$AwkCmd '
|
||||
($1==current) {print $3}
|
||||
($1!=current) {current=$1}
|
||||
' $output > $to_be_removed
|
||||
|
||||
filter_sp_fasta_db $to_be_removed $input > ${input/.fst/_strange_genes.fst}
|
||||
filter_sp_fasta_db $to_keep $input
|
||||
}
|
||||
|
||||
function filter_sp_fasta_db() {
|
||||
gene_pattern="$(echo $(cat $1) | tr ' ' '|')"
|
||||
|
||||
$AwkCmd -F "_" -v gene_pattern=$gene_pattern '
|
||||
/^>/ && (gene ~ gene_pattern) {
|
||||
print entry
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
entry = $0
|
||||
gene = $1
|
||||
sub(/^>/, "", gene)
|
||||
}
|
||||
|
||||
!/^>/ {
|
||||
entry = entry "\n" $0
|
||||
}
|
||||
|
||||
END {
|
||||
if (gene ~ gene_pattern)
|
||||
print entry
|
||||
}
|
||||
' $2
|
||||
|
||||
}
|
||||
|
||||
function filter_out_sp_fasta_db() {
|
||||
gene_pattern="$(echo $(cat $1) | tr ' ' '|')"
|
||||
|
||||
$AwkCmd -v gene_pattern=$gene_pattern '
|
||||
/^>/ && (gene !~ gene_pattern) {
|
||||
print entry
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
entry = $0
|
||||
gene = $1
|
||||
sub(/^>/, "", gene)
|
||||
}
|
||||
|
||||
!/^>/ {
|
||||
entry = entry "\n" $0
|
||||
}
|
||||
|
||||
END {
|
||||
if (gene !~ gene_pattern)
|
||||
print entry
|
||||
}
|
||||
' $2
|
||||
|
||||
}
|
||||
|
||||
function split_by_gene() {
|
||||
$AwkCmd -F "=" -v SPGENES=$SPGENESDIR '
|
||||
function writegene(gene,entry) {
|
||||
outputdir = SPGENES "/" gene
|
||||
output = outputdir "/" gene ".fst"
|
||||
system("mkdir -p " outputdir)
|
||||
print entry >> output
|
||||
close(output)
|
||||
}
|
||||
|
||||
/^>/ && gene!="" {
|
||||
writegene(gene,entry)
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
entry = $0
|
||||
gene = $NF
|
||||
sub(/;$/, "", gene)
|
||||
}
|
||||
|
||||
!/^>/ {
|
||||
entry = entry "\n" $0
|
||||
}
|
||||
|
||||
END {
|
||||
writegene(gene,entry)
|
||||
}
|
||||
' $1
|
||||
}
|
||||
|
||||
function dereplicate() {
|
||||
local CDHIT_ID=0.95
|
||||
local CDHIT_DELTA=0.95
|
||||
|
||||
local gene="${1}"
|
||||
local fastain="${gene}/${gene}.fst"
|
||||
local cdhitout="${gene}/${gene}.cdhit.fst"
|
||||
|
||||
cd-hit -i "${fastain}" \
|
||||
-o "${cdhitout}" \
|
||||
-c ${CDHIT_DELTA} \
|
||||
-G 1 \
|
||||
-g 1 \
|
||||
-aL 0.95 \
|
||||
-s ${CDHIT_ID} \
|
||||
-b 350 -p 1 \
|
||||
-d 0 -n 3
|
||||
|
||||
local fasta1="${gene}/${gene}.1l.fst"
|
||||
}
|
||||
|
||||
function dereplicate_genes() {
|
||||
pushd $SPGENESDIR
|
||||
for g in * ; do
|
||||
dereplicate $g ;
|
||||
done
|
||||
popd
|
||||
}
|
||||
|
||||
function buildGeneBlastDB() {
|
||||
local gene="${1}"
|
||||
local fastain="${gene}/${gene}.cdhit.fst"
|
||||
|
||||
loginfo " formatting Blast $gene DB"
|
||||
timeoutcmd 300 makeblastdb -dbtype prot -in ${fastain} >& /dev/null
|
||||
loginfo "Done"
|
||||
}
|
||||
|
||||
function buildBlastDBs() {
|
||||
pushd $SPGENESDIR
|
||||
for g in * ; do
|
||||
buildGeneBlastDB $g ;
|
||||
done
|
||||
popd
|
||||
}
|
||||
|
||||
function list_shell_genes() {
|
||||
pushd $SPDIR
|
||||
|
||||
ls genes \
|
||||
| grep -v '\.' \
|
||||
| egrep -iv $(tr " " "|" <<< $CORE_GENES) \
|
||||
| grep -iv '^orf' \
|
||||
| grep -iv "$RPS12_GENE"
|
||||
|
||||
popd
|
||||
}
|
||||
|
||||
function list_dust_genes() {
|
||||
pushd $SPDIR 1>&2
|
||||
|
||||
ls genes \
|
||||
| grep -v '\.' \
|
||||
| egrep -iv $(tr " " "|" <<< $CORE_GENES) \
|
||||
| grep -i '^orf'
|
||||
|
||||
popd 1>&2
|
||||
}
|
||||
|
||||
function build_core_libraries() {
|
||||
pushd $SPDIR 1>&2
|
||||
|
||||
rm -rf core
|
||||
mkdir -p core
|
||||
|
||||
for gene in $CORE_GENES ; do
|
||||
cp genes/$gene/$gene.cdhit.fst core/$gene.fst
|
||||
cp genes/$gene/$gene.cdhit.fst.phr core/$gene.fst.phr
|
||||
cp genes/$gene/$gene.cdhit.fst.pin core/$gene.fst.pin
|
||||
cp genes/$gene/$gene.cdhit.fst.psq core/$gene.fst.psq
|
||||
done
|
||||
|
||||
popd 1>&2
|
||||
}
|
||||
|
||||
function build_rps12_library() {
|
||||
pushd $SPDIR 1>&2
|
||||
|
||||
local gene=$RPS12_GENE
|
||||
|
||||
rm -rf RPS12
|
||||
mkdir -p RPS12
|
||||
|
||||
cp genes/$gene/$gene.cdhit.fst RPS12/$gene.fst
|
||||
cp genes/$gene/$gene.cdhit.fst.phr RPS12/$gene.fst.phr
|
||||
cp genes/$gene/$gene.cdhit.fst.pin RPS12/$gene.fst.pin
|
||||
cp genes/$gene/$gene.cdhit.fst.psq RPS12/$gene.fst.psq
|
||||
|
||||
popd 1>&2
|
||||
}
|
||||
|
||||
function build_shell_libraries() {
|
||||
pushd $SPDIR 1>&2
|
||||
|
||||
rm -rf shell
|
||||
mkdir -p shell
|
||||
|
||||
for gene in $(list_shell_genes) ; do
|
||||
cp genes/$gene/$gene.cdhit.fst shell/$gene.fst
|
||||
cp genes/$gene/$gene.cdhit.fst.phr shell/$gene.fst.phr
|
||||
cp genes/$gene/$gene.cdhit.fst.pin shell/$gene.fst.pin
|
||||
cp genes/$gene/$gene.cdhit.fst.psq shell/$gene.fst.psq
|
||||
done
|
||||
|
||||
popd 1>&2
|
||||
}
|
||||
|
||||
function build_dust_libraries() {
|
||||
pushd $SPDIR 1>&2
|
||||
|
||||
rm -rf dust
|
||||
mkdir -p dust
|
||||
|
||||
for gene in $(list_dust_genes) ; do
|
||||
cp genes/$gene/$gene.cdhit.fst dust/$gene.fst
|
||||
cp genes/$gene/$gene.cdhit.fst.phr dust/$gene.fst.phr
|
||||
cp genes/$gene/$gene.cdhit.fst.pin dust/$gene.fst.pin
|
||||
cp genes/$gene/$gene.cdhit.fst.psq dust/$gene.fst.psq
|
||||
done
|
||||
|
||||
popd 1>&2
|
||||
}
|
||||
|
||||
function get_product_line() {
|
||||
pushd $SPGENESDIR 1>&2
|
||||
|
||||
local gene=$1
|
||||
local spac=$(head -1 $gene/$gene.cdhit.fst \
|
||||
| awk '
|
||||
{ AC=$2;
|
||||
sub(/^SP_AC=/,"",AC);
|
||||
sub(/;$/,"",AC);
|
||||
print AC}')
|
||||
|
||||
popd 1>&2
|
||||
|
||||
pushd $SPDIR 1>&2
|
||||
|
||||
extract_mito_gene_ac $spac rawdata/SP_Mito.dat \
|
||||
| grep "^DE " \
|
||||
| $AwkCmd -v gene=$gene '
|
||||
function remove_tails(line) {
|
||||
sub(/ *(\{[^}]+\});/,"",line)
|
||||
st = index(line,"=")
|
||||
return substr(line,st+1)
|
||||
}
|
||||
|
||||
/DE +RecName:/ {
|
||||
full = remove_tails($0)
|
||||
}
|
||||
/DE +Short=/ {
|
||||
ns = remove_tails($0)
|
||||
if (length(ns) > length(short)) {
|
||||
short = ns
|
||||
}
|
||||
}
|
||||
/DE +EC=/ {
|
||||
ec = remove_tails($0)
|
||||
}
|
||||
|
||||
END {
|
||||
if (length(short) > 10) {
|
||||
product = short
|
||||
} else {
|
||||
product = full
|
||||
}
|
||||
|
||||
if (ec != "") {
|
||||
product = product " (EC:" ec ")"
|
||||
}
|
||||
|
||||
if (product == "") {
|
||||
product = "Hypothetical protein of unknown function"
|
||||
}
|
||||
|
||||
gsub(/ /,"_",product)
|
||||
|
||||
print gene,gene,"--","--","--",product
|
||||
}
|
||||
'
|
||||
popd 1>&2
|
||||
}
|
||||
|
||||
function build_annotate_lst() {
|
||||
pushd $SPGENESDIR 1>&2
|
||||
|
||||
for gene in * ; do
|
||||
get_product_line $gene
|
||||
done
|
||||
|
||||
popd 1>&2
|
||||
}
|
||||
|
||||
pushd $SPDIR
|
||||
|
||||
####
|
||||
#
|
||||
# Download and prepare raw library from Swissprot FTP site
|
||||
#
|
||||
###
|
||||
|
||||
rm -rf rawdata
|
||||
mkdir -p rawdata
|
||||
|
||||
pushd rawdata
|
||||
|
||||
download_swissprot | extract_mito_entries > SP_Mito.dat
|
||||
|
||||
extract_fasta_protein SP_Mito.dat > SP_Mito_gene_db.fst
|
||||
|
||||
popd
|
||||
|
||||
####
|
||||
#
|
||||
# Clean swiss-prot fasta file for gene name annotation
|
||||
#
|
||||
###
|
||||
|
||||
pushd rawdata
|
||||
|
||||
rename_genes SP_Mito_gene_db.fst > SP_Mito_gene_db.clean_name.fst
|
||||
|
||||
clean_strange_gene_name SP_Mito_gene_db.clean_name.fst \
|
||||
> SP_Mito_gene_db.good_gene.fst
|
||||
|
||||
popd
|
||||
|
||||
####
|
||||
#
|
||||
# Prepare the database for all genes
|
||||
#
|
||||
###
|
||||
|
||||
rm -rf genes
|
||||
split_by_gene rawdata/SP_Mito_gene_db.good_gene.fst
|
||||
|
||||
dereplicate_genes
|
||||
|
||||
buildBlastDBs
|
||||
|
||||
####
|
||||
#
|
||||
# Prepare the differente gene databases for CDS annotation
|
||||
#
|
||||
###
|
||||
|
||||
build_core_libraries
|
||||
build_shell_libraries
|
||||
build_dust_libraries
|
||||
build_rps12_library
|
||||
|
||||
####
|
||||
#
|
||||
# Build Annotation file
|
||||
#
|
||||
###
|
||||
|
||||
build_annotate_lst > Annot.lst
|
||||
|
||||
# ls ../../mitodb/core/ | grep -v '\.' | sort > core_genes.lst
|
||||
# ls | grep -v '\.' | awk '{print tolower($1),$1}' | sort > sp_genes.lst
|
||||
# join -a 1 -e xxxx core_genes.lst sp_genes.lst > join_core.lst
|
||||
|
||||
popd
|
BIN
detectors/ir/.DS_Store
vendored
BIN
detectors/ir/.DS_Store
vendored
Binary file not shown.
23
detectors/normalize/tools/selectMetazoa.sh
Executable file
23
detectors/normalize/tools/selectMetazoa.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
|
||||
( \
|
||||
for f in $1/*.gbk* ; do \
|
||||
if [[ "$f" =~ \.gz$ ]] ; then \
|
||||
GREP=zgrep; \
|
||||
else \
|
||||
GREP=grep; \
|
||||
fi; \
|
||||
${GREP} -H -A 1 ' ORGANISM' $f; \
|
||||
done \
|
||||
) | \
|
||||
grep -B 1 Metazoa | \
|
||||
$AwkCmd '{print $1}' | \
|
||||
grep '\.gbk' | \
|
||||
sed -E 's/(^.*\.gbk(.gz)?).$/\1/' | \
|
||||
uniq
|
BIN
detectors/rrna/.DS_Store
vendored
BIN
detectors/rrna/.DS_Store
vendored
Binary file not shown.
141
detectors/rrna/tools/buildRRNAModels_mito.sh
Executable file
141
detectors/rrna/tools/buildRRNAModels_mito.sh
Executable file
@ -0,0 +1,141 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# BUILD RRNA models
|
||||
#
|
||||
#========================================================================================
|
||||
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
function fasta1li {
|
||||
|
||||
$AwkCmd '/^>/ {if (sequence) \
|
||||
{print sequence}; \
|
||||
print $0; \
|
||||
sequence=""} \
|
||||
!/^>/ {sequence = sequence $0} \
|
||||
END {print sequence}' $1
|
||||
}
|
||||
|
||||
function dereplicate {
|
||||
DATA=$1
|
||||
sumaclust -t 1 $DATA | \
|
||||
fasta1li | \
|
||||
grep -A 1 '^>' | \
|
||||
grep -A1 'cluster_center=True;' | \
|
||||
grep -v -- -- | \
|
||||
sed -E "s/count=[0-9]+; //" | \
|
||||
sed 's/cluster_weight/count/' | \
|
||||
$AwkCmd ' /^>/ {SEQ++;\
|
||||
match($0,"count=[0-9][0-9]*;");\
|
||||
count=substr($0,RSTART,RLENGTH);\
|
||||
$1=$1"_"SEQ;\
|
||||
print $1,count} \
|
||||
!/^>/ {print $0}'
|
||||
}
|
||||
|
||||
|
||||
function clustering {
|
||||
DATA=$1
|
||||
rm -rf $DATA
|
||||
mkdir $DATA
|
||||
sumaclust -t 0.9 $DATA.fasta | \
|
||||
fasta1li > $DATA.clust.fasta
|
||||
cluster=$(grep '^>' $DATA.clust.fasta | \
|
||||
sed -E 's/.*cluster=([^;]+);.*$/\1/' | \
|
||||
sort -u)
|
||||
for c in $cluster; do
|
||||
w=$(grep "$c" "${DATA}.clust.fasta" | \
|
||||
head -1 | \
|
||||
sed -E 's/.*cluster_weight=([^;]+);.*$/\1/')
|
||||
out=$(printf "${DATA}/%05d_%s" $w $c)
|
||||
grep -A1 "$c" "${DATA}.clust.fasta" | \
|
||||
grep -v -- -- > "$out.fasta"
|
||||
muscle -in "$out.fasta" -out "$out.align.fasta"
|
||||
done
|
||||
}
|
||||
|
||||
function revcomp {
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
} \
|
||||
function comp(seq) { \
|
||||
"echo "seq" | tr acgtACGT tgcaTGCA " | getline res; \
|
||||
close("echo "seq" | tr acgtACGT tgcaTGCA "); \
|
||||
return res; \
|
||||
} \
|
||||
function rev(seq) { \
|
||||
"echo "seq" | rev " | getline res; \
|
||||
close("echo "seq" | rev "); \
|
||||
return res; \
|
||||
} \
|
||||
function revcomp(seq) { \
|
||||
res=rev(comp(seq)); \
|
||||
return res; \
|
||||
} \
|
||||
\
|
||||
(seq) && /^>/ {print head; \
|
||||
printfasta(revcomp(seq)); \
|
||||
seq=""} \
|
||||
/^>/ {head=$0} \
|
||||
! /^>/ {seq=seq$0} \
|
||||
END { print head; \
|
||||
printfasta(revcomp(seq)); \
|
||||
}' $1
|
||||
}
|
||||
|
||||
|
||||
pushTmpDir ORG.buildRRNAMito
|
||||
loginfo "Tempdir: $(pwd)"
|
||||
|
||||
openLogFile "${RRNA_DATA_DIR}/rRNA_mito_models.log"
|
||||
|
||||
loginfo "Selecting Metazoa genebank entries..."
|
||||
METAZOA=$(${PROG_DIR}/../../normalize/tools/selectMetazoa.sh $*)
|
||||
loginfo " --> $(echo ${METAZOA} | wc -w) entries selected"
|
||||
loginfo "Done"
|
||||
|
||||
loginfo "Extracting 12S rRNA sequences..."
|
||||
rm -f raw_12S.fasta
|
||||
for f in ${METAZOA}; do
|
||||
loginfo "Extracting 12S rRNA sequences from ${f}..."
|
||||
${PROG_DIR}/extract_ref12S.sh ${f} >> raw_12S.fasta
|
||||
done
|
||||
loginfo " --> $(fastaCount raw_12S.fasta) retreived sequences"
|
||||
dereplicate raw_12S.fasta >> 12S.fasta
|
||||
loginfo " --> $(fastaCount 12S.fasta) distinct sequences"
|
||||
loginfo "Done"
|
||||
|
||||
loginfo "Clustering 12S rRNA sequences..."
|
||||
clustering 12S
|
||||
loginfo "Done"
|
||||
|
||||
loginfo "Installing 12S rRNA sequences..."
|
||||
cp -r 12S "${RRNA_DATA_DIR}/RRNA_12S_mito"
|
||||
loginfo "Done"
|
||||
|
||||
|
||||
loginfo "Extracting 16S rRNA sequences..."
|
||||
rm -f raw_16S.fasta
|
||||
for f in ${METAZOA}; do
|
||||
${PROG_DIR}/extract_ref16S.sh ${f} >> raw_16S.fasta
|
||||
done
|
||||
loginfo " --> $(fastaCount raw_16S.fasta) retreived sequences"
|
||||
dereplicate raw_16S.fasta > 16S.fasta
|
||||
loginfo " --> $(fastaCount 16S.fasta) distinct sequences"
|
||||
loginfo "Done"
|
||||
|
||||
loginfo "Clustering 16S rRNA sequences..."
|
||||
clustering 16S
|
||||
loginfo "Done"
|
||||
|
||||
loginfo "Installing 16S rRNA sequences..."
|
||||
cp -r 16S "${RRNA_DATA_DIR}/RRNA_16S_mito"
|
||||
loginfo "Done"
|
||||
|
||||
|
||||
popTmpDir
|
55
detectors/rrna/tools/extract_ref12S.sh
Executable file
55
detectors/rrna/tools/extract_ref12S.sh
Executable file
@ -0,0 +1,55 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
} \
|
||||
function comp(seq) { \
|
||||
"echo "seq" | tr acgtACGT tgcaTGCA " | getline res; \
|
||||
close("echo "seq" | tr acgtACGT tgcaTGCA "); \
|
||||
return res; \
|
||||
} \
|
||||
function rev(seq) { \
|
||||
"echo "seq" | rev " | getline res; \
|
||||
close("echo "seq" | rev "); \
|
||||
return res; \
|
||||
} \
|
||||
function revcomp(seq) { \
|
||||
res=rev(comp(seq)); \
|
||||
return res; \
|
||||
} \
|
||||
\
|
||||
/^LOCUS / {AC=$2; sequence=""; seqon=0; FROM="";TO=""} \
|
||||
/^ rRNA / {LOCUS=$2; STRAND=1} \
|
||||
/^ rRNA / && /complement/ {STRAND=0; \
|
||||
sub("complement\\(","",LOCUS); \
|
||||
sub("\\)","",LOCUS); \
|
||||
} \
|
||||
/12S/ {split(LOCUS,POS,"."); \
|
||||
FROM=POS[1]; \
|
||||
TO=POS[3]; \
|
||||
LENGTH=TO-FROM+1 \
|
||||
} \
|
||||
/^ORIGIN/ {seqon=1} \
|
||||
/^ *[1-9][0-9]* [a-z ]+$/ && seqon {seq=$2 $3 $4 $5 $6 $7; \
|
||||
gsub("[^acgt]","n",seq);\
|
||||
sequence=sequence seq \
|
||||
} \
|
||||
/^\/\// && FROM \
|
||||
{print ">RRNA12S_"AC" Strand="STRAND";", \
|
||||
"cut="FROM".."TO";", \
|
||||
"seq_length="LENGTH";"; \
|
||||
SS=substr(sequence,FROM,LENGTH); \
|
||||
if (! STRAND) \
|
||||
SS=revcomp(SS); \
|
||||
printfasta(SS); \
|
||||
} \
|
||||
' $*
|
@ -2,7 +2,12 @@
|
||||
#
|
||||
|
||||
|
||||
gawk 'function printfasta(seq) { \
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -2,7 +2,12 @@
|
||||
#
|
||||
|
||||
|
||||
gawk 'function printfasta(seq) { \
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -1,8 +1,12 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
gawk 'function printfasta(seq) { \
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -2,7 +2,12 @@
|
||||
#
|
||||
|
||||
|
||||
gawk 'function printfasta(seq) { \
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
@ -11,7 +11,7 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
|
||||
function revcomp {
|
||||
gawk 'function printfasta(seq) { \
|
||||
$AwkCmd 'function printfasta(seq) { \
|
||||
seqlen=length(seq); \
|
||||
for (i=1; i <= seqlen; i+=60) \
|
||||
print substr(seq,i,60); \
|
||||
|
29
detectors/rrna/tools/splitgbk.sh
Executable file
29
detectors/rrna/tools/splitgbk.sh
Executable file
@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# splitgbk.sh:
|
||||
# Split a gbk file in multiple files
|
||||
# each containing a single sequence
|
||||
#
|
||||
#========================================================================================
|
||||
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
inputfile=$1
|
||||
dest=${inputfile/.*/}
|
||||
|
||||
mkdir -p $dest
|
||||
|
||||
$AwkCmd -v dest="$dest" '/^LOCUS/ {
|
||||
AC=$2;
|
||||
destfile = sprintf("%s/%s.gbk", dest, AC);
|
||||
}
|
||||
{
|
||||
print $0 >> destfile
|
||||
}
|
||||
/^\/\// {
|
||||
close(destfile);
|
||||
}
|
||||
' $inputfile
|
@ -32,7 +32,7 @@ pushTmpDir ORG.trna
|
||||
|
||||
TRNA=$(basename ${QUERY})
|
||||
|
||||
aragorn -i -w -seq -gc11 ${QUERY} | \
|
||||
aragorn -i -w -seq -gc${2} ${QUERY} | \
|
||||
${AwkCmd} -f ${PROG_DIR}/../lib/aragorn_wrapper.awk
|
||||
|
||||
|
||||
|
390
detectors/trna/lib/aragorn.man
Normal file
390
detectors/trna/lib/aragorn.man
Normal file
@ -0,0 +1,390 @@
|
||||
'\" t
|
||||
.\" Title: aragorn
|
||||
.\" Author: [see the "AUTHORS" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
|
||||
.\" Date: 02/24/2013
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "ARAGORN" "1" "02/24/2013" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.\" http://bugs.debian.org/507673
|
||||
.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
|
||||
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.ie \n(.g .ds Aq \(aq
|
||||
.el .ds Aq '
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * set default formatting
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" disable hyphenation
|
||||
.nh
|
||||
.\" disable justification (adjust text to left margin only)
|
||||
.ad l
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * MAIN CONTENT STARTS HERE *
|
||||
.\" -----------------------------------------------------------------
|
||||
.SH "NAME"
|
||||
aragorn \- detect tRNA genes in nucleotide sequences
|
||||
.SH "SYNOPSIS"
|
||||
.sp
|
||||
\fBaragorn\fR [\fIOPTION\fR]\&... \fIFILE\fR
|
||||
.SH "OPTIONS"
|
||||
.PP
|
||||
\fB\-m\fR
|
||||
.RS 4
|
||||
Search for tmRNA genes\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-t\fR
|
||||
.RS 4
|
||||
Search for tRNA genes\&. By default, all are detected\&. If one of
|
||||
\fB\-m\fR
|
||||
or
|
||||
\fB\-t\fR
|
||||
is specified, then the other is not detected unless specified as well\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-mt\fR
|
||||
.RS 4
|
||||
Search for Metazoan mitochondrial tRNA genes\&. tRNA genes with introns not detected\&.
|
||||
\fB\-i\fR,
|
||||
\fB\-sr\fR
|
||||
switchs ignored\&. Composite Metazoan mitochondrial genetic code used\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-mtmam\fR
|
||||
.RS 4
|
||||
Search for Mammalian mitochondrial tRNA genes\&.
|
||||
\fB\-i\fR,
|
||||
\fB\-sr\fR
|
||||
switchs ignored\&.
|
||||
\fB\-tv\fR
|
||||
switch set\&. Mammalian mitochondrial genetic code used\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-mtx\fR
|
||||
.RS 4
|
||||
Same as
|
||||
\fB\-mt\fR
|
||||
but low scoring tRNA genes are not reported\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-mtd\fR
|
||||
.RS 4
|
||||
Overlapping metazoan mitochondrial tRNA genes on opposite strands are reported\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gc\fR[\fInum\fR]
|
||||
.RS 4
|
||||
Use the GenBank transl_table = [\fInum\fR] genetic code\&. Individual modifications can be appended using
|
||||
\fI,BBB\fR=<aa> B = A,C,G, or T\&. <aa> is the three letter code for an amino\-acid\&. More than one modification can be specified\&. eg
|
||||
\fB\-gcvert\fR,aga=Trp,agg=Trp uses the Vertebrate Mitochondrial code and the codons AGA and AGG changed to Tryptophan\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcstd\fR
|
||||
.RS 4
|
||||
Use standard genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcmet\fR
|
||||
.RS 4
|
||||
Use composite Metazoan mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcvert\fR
|
||||
.RS 4
|
||||
Use Vertebrate mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcinvert\fR
|
||||
.RS 4
|
||||
Use Invertebrate mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcyeast\fR
|
||||
.RS 4
|
||||
Use Yeast mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcprot\fR
|
||||
.RS 4
|
||||
Use Mold/Protozoan/Coelenterate mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcciliate\fR
|
||||
.RS 4
|
||||
Use Ciliate genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcflatworm\fR
|
||||
.RS 4
|
||||
Use Echinoderm/Flatworm mitochondrial genetic code
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gceuplot\fR
|
||||
.RS 4
|
||||
Use Euplotid genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcbact\fR
|
||||
.RS 4
|
||||
Use Bacterial/Plant Chloroplast genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcaltyeast\fR
|
||||
.RS 4
|
||||
Use alternative Yeast genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcascid\fR
|
||||
.RS 4
|
||||
Use Ascidian Mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcaltflat\fR
|
||||
.RS 4
|
||||
Use alternative Flatworm Mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcblep\fR
|
||||
.RS 4
|
||||
Use Blepharisma genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcchloroph\fR
|
||||
.RS 4
|
||||
Use Chlorophycean Mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gctrem\fR
|
||||
.RS 4
|
||||
Use Trematode Mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcscen\fR
|
||||
.RS 4
|
||||
Use Scenedesmus obliquus Mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-gcthraust\fR
|
||||
.RS 4
|
||||
Use Thraustochytrium Mitochondrial genetic code\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-tv\fR
|
||||
.RS 4
|
||||
Do not search for mitochondrial TV replacement loop tRNA genes\&. Only relevant if
|
||||
\fB\-mt\fR
|
||||
used\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-c7\fR
|
||||
.RS 4
|
||||
Search for tRNA genes with 7 base C\-loops only\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-i\fR
|
||||
.RS 4
|
||||
Search for tRNA genes with introns in anticodon loop with maximum length 3000 bases\&. Minimum intron length is 0 bases\&. Ignored if
|
||||
\fB\-m\fR
|
||||
is specified\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-i\fR[\fImax\fR]
|
||||
.RS 4
|
||||
Search for tRNA genes with introns in anticodon loop with maximum length [\fImax\fR] bases\&. Minimum intron length is 0 bases\&. Ignored if
|
||||
\fB\-m\fR
|
||||
is specified\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-i\fR[\fImin\fR],[\fImax\fR]
|
||||
.RS 4
|
||||
Search for tRNA genes with introns in anticodon loop with maximum length [\fImax\fR] bases, and minimum length [\fImin\fR] bases\&. Ignored if
|
||||
\fB\-m\fR
|
||||
is specified\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-io\fR
|
||||
.RS 4
|
||||
Same as
|
||||
\fB\-i\fR, but allow tRNA genes with long introns to overlap shorter tRNA genes\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-if\fR
|
||||
.RS 4
|
||||
Same as
|
||||
\fB\-i\fR, but fix intron between positions 37 and 38 on C\-loop (one base after anticodon)\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-ifo\fR
|
||||
.RS 4
|
||||
Same as
|
||||
\fB\-if\fR
|
||||
and
|
||||
\fB\-io\fR
|
||||
combined\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-ir\fR
|
||||
.RS 4
|
||||
Same as
|
||||
\fB\-i\fR, but report tRNA genes with minimum length [\fImin\fR] bases rather than search for tRNA genes with minimum length [\fImin\fR] bases\&. With this switch, [\fImin\fR] acts as an output filter, minimum intron length for searching is still 0 bases\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-c\fR
|
||||
.RS 4
|
||||
Assume that each sequence has a circular topology\&. Search wraps around each end\&. Default setting\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-l\fR
|
||||
.RS 4
|
||||
Assume that each sequence has a linear topology\&. Search does not wrap\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-d\fR
|
||||
.RS 4
|
||||
Double\&. Search both strands of each sequence\&. Default setting\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-s\fR or \fB\-s+\fR
|
||||
.RS 4
|
||||
Single\&. Do not search the complementary (antisense) strand of each sequence\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-sc\fR or \fB\-s\-\fR
|
||||
.RS 4
|
||||
Single complementary\&. Do not search the sense strand of each sequence\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-ps\fR
|
||||
.RS 4
|
||||
Lower scoring thresholds to 95% of default levels\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-ps\fR[\fInum\fR]
|
||||
.RS 4
|
||||
Change scoring thresholds to [\fInum\fR] percent of default levels\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-rp\fR
|
||||
.RS 4
|
||||
Flag possible pseudogenes (score < 100 or tRNA anticodon loop <> 7 bases long)\&. Note that genes with score < 100 will not be detected or flagged if scoring thresholds are not also changed to below 100% (see \-ps switch)\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-seq\fR
|
||||
.RS 4
|
||||
Print out primary sequence\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-br\fR
|
||||
.RS 4
|
||||
Show secondary structure of tRNA gene primary sequence using round brackets\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-fasta\fR
|
||||
.RS 4
|
||||
Print out primary sequence in fasta format\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-fo\fR
|
||||
.RS 4
|
||||
Print out primary sequence in fasta format only (no secondary structure)\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-fon\fR
|
||||
.RS 4
|
||||
Same as
|
||||
\fB\-fo\fR, with sequence and gene numbering in header\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-fos\fR
|
||||
.RS 4
|
||||
Same as
|
||||
\fB\-fo\fR, with no spaces in header\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-fons\fR
|
||||
.RS 4
|
||||
Same as
|
||||
\fB\-fo\fR, with sequence and gene numbering, but no spaces\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-w\fR
|
||||
.RS 4
|
||||
Print out in Batch mode\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-ss\fR
|
||||
.RS 4
|
||||
Use the stricter canonical 1\-2 bp spacer1 and 1 bp spacer2\&. Ignored if
|
||||
\fB\-mt\fR
|
||||
set\&. Default is to allow 3 bp spacer1 and 0\-2 bp spacer2, which may degrade selectivity\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-v\fR
|
||||
.RS 4
|
||||
Verbose\&. Prints out information during search to STDERR\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-a\fR
|
||||
.RS 4
|
||||
Print out tRNA domain for tmRNA genes\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-a7\fR
|
||||
.RS 4
|
||||
Restrict tRNA astem length to a maximum of 7 bases
|
||||
.RE
|
||||
.PP
|
||||
\fB\-aa\fR
|
||||
.RS 4
|
||||
Display message if predicted iso\-acceptor species does not match species in sequence name (if present)\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-j\fR
|
||||
.RS 4
|
||||
Display 4\-base sequence on 3\*(Aq end of astem regardless of predicted amino\-acyl acceptor length\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-jr\fR
|
||||
.RS 4
|
||||
Allow some divergence of 3\*(Aq amino\-acyl acceptor sequence from NCCA\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-jr4\fR
|
||||
.RS 4
|
||||
Allow some divergence of 3\*(Aq amino\-acyl acceptor sequence from NCCA, and display 4 bases\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-q\fR
|
||||
.RS 4
|
||||
Dont print configuration line (which switchs and files were used)\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-rn\fR
|
||||
.RS 4
|
||||
Repeat sequence name before summary information\&.
|
||||
.RE
|
||||
.PP
|
||||
\fB\-O\fR [\fIoutfile\fR]
|
||||
.RS 4
|
||||
Print output to
|
||||
\fI\&. If [\*(Aqoutfile\fR] already exists, it is overwritten\&. By default all output goes to stdout\&.
|
||||
.RE
|
||||
.SH "DESCRIPTION"
|
||||
.sp
|
||||
aragorn detects tRNA, mtRNA, and tmRNA genes\&. A minimum requirement is at least a 32 bit compiler architecture (variable types int and unsigned int are at least 4 bytes long)\&.
|
||||
.sp
|
||||
[\fIFILE\fR] is assumed to contain one or more sequences in FASTA format\&. Results of the search are printed to STDOUT\&. All switches are optional and case\-insensitive\&. Unless \-i is specified, tRNA genes containing introns are not detected\&.
|
||||
.SH "AUTHORS"
|
||||
.sp
|
||||
Bjorn Canback <bcanback@acgt\&.se>, Dean Laslett <gaiaquark@gmail\&.com>
|
||||
.SH "REFERENCES"
|
||||
.sp
|
||||
Laslett, D\&. and Canback, B\&. (2004) ARAGORN, a program for the detection of transfer RNA and transfer\-messenger RNA genes in nucleotide sequences Nucleic Acids Research, 32;11\-16
|
||||
.sp
|
||||
Laslett, D\&. and Canback, B\&. (2008) ARWEN: a program to detect tRNA genes in metazoan mitochondrial nucleotide sequences Bioinformatics, 24(2); 172\-175\&.
|
Reference in New Issue
Block a user