Former-commit-id: 78c4a4d8497e6dc7185a6e5726ffc643d9ea914d Former-commit-id: e6945818dbdbabd9cd060edb9d2578ea26671209
167 lines
4.0 KiB
Bash
167 lines
4.0 KiB
Bash
#
|
|
# Bash file to be sourced at the begining of each bash script
|
|
# for setting up basic variables and functions
|
|
#
|
|
|
|
########################
|
|
#
|
|
# General usage functions
|
|
#
|
|
#
|
|
|
|
function getAbsolutePath {
|
|
[[ -d $1 ]] && { cd "$1"; echo "$(pwd -P)"; } ||
|
|
{ cd "$(dirname "$1")"; echo "$(pwd -P)/$(basename "$1")"; }
|
|
}
|
|
|
|
# Manage temp directory
|
|
|
|
function pushTmpDir {
|
|
TMP_DIR=$(mktemp -d -t "$1_proc_$$_")
|
|
pushd $TMP_DIR >& /dev/null
|
|
}
|
|
|
|
function popTmpDir {
|
|
popd >& /dev/null
|
|
rm -rf $TMP_DIR >& /dev/null
|
|
}
|
|
|
|
# Logging functions
|
|
|
|
|
|
function errcho {
|
|
>&2 echo $*
|
|
}
|
|
|
|
function openLogFile {
|
|
LOGFILE=$1
|
|
touch ${LOGFILE}
|
|
}
|
|
|
|
|
|
function loginfo {
|
|
errcho `date +'%Y-%m-%d %H:%M:%S'` "[OA INFO ] $$ -- $1"
|
|
if [[ ! -z ${LOGFILE} ]]; then
|
|
echo `date +'%Y-%m-%d %H:%M:%S'` "[OA INFO ] $$ -- $1" >> ${LOGFILE}
|
|
fi
|
|
}
|
|
|
|
function logerror {
|
|
errcho `date +'%Y-%m-%d %H:%M:%S'` "[OA ERROR ] $$ -- $1"
|
|
if [[ ! -z ${LOGFILE} ]]; then
|
|
echo `date +'%Y-%m-%d %H:%M:%S'` "[OA ERROR ] $$ -- $1" >> ${LOGFILE}
|
|
fi
|
|
}
|
|
|
|
function logwarning {
|
|
errcho `date +'%Y-%m-%d %H:%M:%S'` "[OA WARNING] $$ -- $1"
|
|
if [[ ! -z ${LOGFILE} ]]; then
|
|
echo `date +'%Y-%m-%d %H:%M:%S'` "[OA WARNING] $$ -- $1" >> ${LOGFILE}
|
|
fi
|
|
}
|
|
|
|
# Sequence related functions
|
|
|
|
# Counts how many sequences are stored in a fasta file
|
|
# - $1 : The fasta file to count
|
|
function fastaCount {
|
|
grep '^>' $1 | wc -l
|
|
}
|
|
|
|
|
|
# compute the sequence length from a fasta sequence
|
|
# - $1 : The fasta file to cut
|
|
function seqlength {
|
|
cat $1 | \
|
|
wc |\
|
|
awk -v t="`head -1 $1 | wc -c`" '{print $3 - t - $1 + 1}'
|
|
}
|
|
|
|
# extract a subseq from a fasta sequence
|
|
# - $1 : The fasta file to cut
|
|
# - $2 : First position of the subsequence (first position is numered 1),
|
|
# - $3 : End of the subsequence (included in the subsequence)
|
|
function cutseq {
|
|
awk -v from=$2 -v end=$3 'function printfasta(seq) { \
|
|
seqlen=length(seq); \
|
|
for (i=1; i <= seqlen; i+=60) \
|
|
print substr(seq,i,60); \
|
|
} \
|
|
\
|
|
/^>/ {print $0} \
|
|
! /^>/ {seq=seq$0} \
|
|
END {printfasta(substr(seq,from,end-from+1))}' $1
|
|
}
|
|
|
|
# Joins a set of sequences stored in a fasta file into
|
|
# a single sequence
|
|
# - $1 : The fasta file containing the sequences to join
|
|
function joinfasta {
|
|
awk '(NR==1 && /^>/) {print $0} \
|
|
! /^>/ {print $0}' $1 | \
|
|
formatfasta
|
|
}
|
|
|
|
function formatfasta {
|
|
awk 'function printfasta(seq) { \
|
|
seqlen=length(seq); \
|
|
for (i=1; i <= seqlen; i+=60) \
|
|
print substr(seq,i,60); \
|
|
} \
|
|
/^>/ { print $0 } \
|
|
! /^>/ { seq=seq $0 } \
|
|
END { printfasta(seq)}' $1
|
|
}
|
|
|
|
|
|
#
|
|
#
|
|
########################
|
|
|
|
|
|
|
|
########################
|
|
#
|
|
# Local variable definitions
|
|
#
|
|
#
|
|
|
|
# The absolute path to the ORG.Annote home direcotory
|
|
ORG_HOME=`getAbsolutePath $(dirname ${BASH_SOURCE[0]})/..`
|
|
|
|
|
|
ORG_PORTNAME=`${ORG_HOME}/config/guess_port` # The architecture running the ORG.Annnote instance
|
|
|
|
PROG_DIR="$(getAbsolutePath $(dirname $0))" # Directory containing the main script file
|
|
|
|
DATA_DIR="${ORG_HOME}/data" # Directory containing reference data for the annotation
|
|
|
|
CALL_DIR="$(getAbsolutePath $(pwd))" # Directory from where the main script is called
|
|
|
|
|
|
IR_DATA_DIR="${DATA_DIR}/ir" # Directory containing data related to the
|
|
# Inverted repeat strucuture
|
|
|
|
|
|
#
|
|
#
|
|
########################
|
|
|
|
|
|
|
|
########################
|
|
#
|
|
# Altering the environment
|
|
#
|
|
#
|
|
|
|
# We alter the path to include the bin dir corresponding to the port
|
|
PATH="${ORG_HOME}/ports/${ORG_PORTNAME}/bin:${PATH}"
|
|
export PATH
|
|
|
|
# Force to basic international setting for a correction behaviour of AWK on mac with float
|
|
export LANG=C
|
|
export LC_ALL=C
|
|
|
|
|