From 9d2d60e9f0aaa8ba787e03570d96847f843a121f Mon Sep 17 00:00:00 2001
From: "Eric Coissac eric.coissac@ujf-grenoble.fr" <coissac@luke21.(none)>
Date: Mon, 9 Nov 2015 11:35:00 +0100
Subject: [PATCH 1/8] Patch config/default.conf for considering a CFLAGS and
 LDFLAGS locally predefined variables

Former-commit-id: 29597cc21be2605b6753e4d64bfeb165b210309b
Former-commit-id: b2c4aed567119dae4f596f04c83648102639b000
---
 config/default.conf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/default.conf b/config/default.conf
index ff863ec..8bdb54c 100644
--- a/config/default.conf
+++ b/config/default.conf
@@ -118,9 +118,9 @@ INCDIR	 =	$(abspath $(PRTDIR))/include
 # default gmake variable in implicit rules
 # ------------------------------------
 
-CFLAGS 	  = 	$(OPTIM) $(MACHDEF) -I$(INCDIR)
+CFLAGS 	  := $(CFLAGS)	$(OPTIM) $(MACHDEF) -I$(INCDIR)
 
-LDFLAGS	  =     -L$(LIBDIR) -L.
+LDFLAGS	  := $(LDFLAGS)    -L$(LIBDIR) -L.
 
 LDLIBS	  =	    $(LIBS) $(MALLOC_LIBS) $(MATH_LIBS) $(CC_LIBS)
 

From cf99ca56a4f34b8ad3f3753ee23acc2c92c9ebbe Mon Sep 17 00:00:00 2001
From: Eric Coissac <eric.coissac@metabarcoding.org>
Date: Mon, 9 Nov 2015 13:11:54 +0100
Subject: [PATCH 2/8] Patches for the cluster luke

Former-commit-id: 54cfd925a4cb13d389bd3086164b99bd246319e7
Former-commit-id: d865056147c56bf0c6220e29d7f40e26f140a4ba
---
 detectors/ir/bin/go_ir.sh                | 2 +-
 detectors/normalize/lib/lookforIR.lib.sh | 2 ++
 detectors/trna/bin/go_trna.sh            | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/detectors/ir/bin/go_ir.sh b/detectors/ir/bin/go_ir.sh
index e199449..2f7a0bb 100755
--- a/detectors/ir/bin/go_ir.sh
+++ b/detectors/ir/bin/go_ir.sh
@@ -61,4 +61,4 @@ pushTmpDir ORG.ir
 
 popTmpDir
 
-exit 0
\ No newline at end of file
+exit 0
diff --git a/detectors/normalize/lib/lookforIR.lib.sh b/detectors/normalize/lib/lookforIR.lib.sh
index 1919a21..0070c2d 100644
--- a/detectors/normalize/lib/lookforIR.lib.sh
+++ b/detectors/normalize/lib/lookforIR.lib.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 source "${THIS_DIR}/../../../scripts/bash_init.sh"
 
 SELECTIR="${PROG_DIR}/../../normalize/lib/selectIR.py"
diff --git a/detectors/trna/bin/go_trna.sh b/detectors/trna/bin/go_trna.sh
index dd6f468..0647825 100755
--- a/detectors/trna/bin/go_trna.sh
+++ b/detectors/trna/bin/go_trna.sh
@@ -33,7 +33,7 @@ pushTmpDir ORG.trna
 	TRNA=$(basename ${QUERY})
 	
 	aragorn -i -w -seq ${QUERY} | \
-		${PROG_DIR}/../lib/aragorn_wrapper.awk
+		${CmdAwk} -f ${PROG_DIR}/../lib/aragorn_wrapper.awk
 	
 
 popTmpDir

From 8b27f23a8ef7cfe5581a8a994c77f1c11041df9e Mon Sep 17 00:00:00 2001
From: Eric Coissac <eric.coissac@metabarcoding.org>
Date: Mon, 9 Nov 2015 14:01:21 +0100
Subject: [PATCH 3/8] change the call to awk

Former-commit-id: fa0b02e712db163ba6d37407cea46d1d413a7cb5
Former-commit-id: f393558b7962188159ac123d654ae14ee581f5f8
---
 detectors/trna/bin/go_trna.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detectors/trna/bin/go_trna.sh b/detectors/trna/bin/go_trna.sh
index 0647825..66964d0 100755
--- a/detectors/trna/bin/go_trna.sh
+++ b/detectors/trna/bin/go_trna.sh
@@ -33,7 +33,7 @@ pushTmpDir ORG.trna
 	TRNA=$(basename ${QUERY})
 	
 	aragorn -i -w -seq ${QUERY} | \
-		${CmdAwk} -f ${PROG_DIR}/../lib/aragorn_wrapper.awk
+		${AwkCmd} -f ${PROG_DIR}/../lib/aragorn_wrapper.awk
 	
 
 popTmpDir

From ae703c70d66e181363e7c4e9477e7b7169bbbfe5 Mon Sep 17 00:00:00 2001
From: Eric Coissac <eric.coissac@metabarcoding.org>
Date: Mon, 9 Nov 2015 15:27:32 +0100
Subject: [PATCH 4/8] small modifications for the luke cluster

Former-commit-id: 55a83611220f824a0efdb1267816f98df984152e
Former-commit-id: e01bf6440c2981904f31807766e9e48aea8b18c1
---
 detectors/normalize/lib/lookforIR.lib.sh | 1 +
 detectors/normalize/lib/selectIR.py      | 4 ++--
 scripts/bash_init.sh                     | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/detectors/normalize/lib/lookforIR.lib.sh b/detectors/normalize/lib/lookforIR.lib.sh
index 0070c2d..437b77c 100644
--- a/detectors/normalize/lib/lookforIR.lib.sh
+++ b/detectors/normalize/lib/lookforIR.lib.sh
@@ -25,6 +25,7 @@ function lookForIR {
 			 			{print substr($2,1,3),$8,$7,SAME}}' | \
 		  sort -nk 2 > ${MATCHES}
 	loginfo "Done"
+
 	  
 	loginfo "Looking for long inverted repeats..."
 		repseek -c -p 0.001 -i ${QUERY} 2>> /dev/null > ${REPEATS}
diff --git a/detectors/normalize/lib/selectIR.py b/detectors/normalize/lib/selectIR.py
index cefe74b..abe7dbd 100755
--- a/detectors/normalize/lib/selectIR.py
+++ b/detectors/normalize/lib/selectIR.py
@@ -32,7 +32,7 @@ for line in data:
         
         for p in range(begin,end):
             chr[p]+=direction
-   
+ 
 maxSSC = float(max(abs(n) for n in chloro['SSC']))
 maxLSC = float(max(abs(n) for n in chloro['LSC']))
 
@@ -132,4 +132,4 @@ sys.stdout.write("%s %s %s %s %d %d %d %d %6.5f\n" % (center,
          
 #for p in range(chlorosize):
 #    sys.stdout.write("%d %d %d\n"  % (p,chloro['SSC'][p],chloro['LSC'][p]))
-    
\ No newline at end of file
+    
diff --git a/scripts/bash_init.sh b/scripts/bash_init.sh
index bb60ba2..72d4a81 100644
--- a/scripts/bash_init.sh
+++ b/scripts/bash_init.sh
@@ -19,7 +19,7 @@ function getAbsolutePath {
 # Manage temp directory
 
 function pushTmpDir {
-	TMP_DIR=$(mktemp -d -t "$1_proc_$$_")
+	TMP_DIR=$(mktemp -d -t "$1_proc_$$_XXXXXX")
 	pushd $TMP_DIR >& /dev/null
 	TMP_DIR_STACK="$TMP_DIR $TMP_DIR_STACK"
 	logdebug "Pushing temp directory $TMP_DIR"

From 58e99a81eba2bd74630b8d5559cfeb6bfdd6116f Mon Sep 17 00:00:00 2001
From: Eric Coissac <eric.coissac@metabarcoding.org>
Date: Mon, 9 Nov 2015 15:34:59 +0100
Subject: [PATCH 5/8] Run the annotation on the normalized sequence

Former-commit-id: 48e30ca5dff232ebf886d351cfd254b3fc3a0798
Former-commit-id: e4832b9fe1c644d232b6bc724ab98d5ee18ab6eb
---
 org-annotate.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/org-annotate.sh b/org-annotate.sh
index 5417d8c..2ea1b66 100755
--- a/org-annotate.sh
+++ b/org-annotate.sh
@@ -38,15 +38,15 @@ pushTmpDir ORG.organnot
 	loginfo "Done."
 	
 	loginfo "Annotating the tRNA..."
-		${PROG_DIR}/detectors/trna/bin/go_trna.sh ${QUERY} >> "${RESULTS}.annot"
+		${PROG_DIR}/detectors/trna/bin/go_trna.sh "${RESULTS}.norm.fasta" >> "${RESULTS}.annot"
 	loginfo "Done."
 	
 	loginfo "Annotating the rRNA genes..."
-		${PROG_DIR}/detectors/rrna/bin/go_rrna.sh ${QUERY} >> "${RESULTS}.annot"
+		${PROG_DIR}/detectors/rrna/bin/go_rrna.sh "${RESULTS}.norm.fasta" >> "${RESULTS}.annot"
 	loginfo "Done."
 
 	loginfo "Annotating the CDS..."
-		${PROG_DIR}/detectors/cds/bin/go_cds.sh ${QUERY} >> "${RESULTS}.annot"
+		${PROG_DIR}/detectors/cds/bin/go_cds.sh "${RESULTS}.norm.fasta" >> "${RESULTS}.annot"
 	loginfo "Done."
 	
 	loginfo "Printing annotations header..."
@@ -96,11 +96,11 @@ pushTmpDir ORG.organnot
 			 		      freq["G"]" G; "\
 			 		      freq["T"]" T; "\
 			 		      other" other;" \
-			 }' ${QUERY}
+			 }' "${RESULTS}.norm.fasta"
 	loginfo "Done."
 	
 	loginfo "Reformating sequences..."
-		lines=$(wc -l ${QUERY} | awk '{print $1}')
+		lines=$(wc -l "${RESULTS}.norm.fasta" | awk '{print $1}')
 		awk -v lines=$lines ' \
 			! /^>/ { \
 					seq=tolower($0); \
@@ -115,7 +115,7 @@ pushTmpDir ORG.organnot
 					if (NR==lines) \
 					  {pos-=1}; \
 					printf("   %6d\n",pos) \
-			   }' ${QUERY}
+			   }' "${RESULTS}.norm.fasta"
 	loginfo "Done."
 	
 	loginfo "Closing sequence part..."

From 195739b5f8f2cd195a9253dd9e8f26a2eb8e6684 Mon Sep 17 00:00:00 2001
From: Eric Coissac <eric.coissac@metabarcoding.org>
Date: Mon, 9 Nov 2015 17:03:22 +0100
Subject: [PATCH 6/8] Patch bug in the inverted repeats annotation Realize the
 annotation on the normalized chromosome

Former-commit-id: 73e4e016d7bfe23fc92e4071efb0a4629eb8e21f
Former-commit-id: d44249c0d078d475ca80386bb428fb660289b0dd
---
 detectors/normalize/bin/go_normalize.sh  | 11 +++---
 detectors/normalize/lib/lookforIR.lib.sh | 11 +++++-
 detectors/normalize/lib/selectIR.py      | 49 +++++++++++++++++-------
 org-annotate.sh                          |  2 +-
 4 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/detectors/normalize/bin/go_normalize.sh b/detectors/normalize/bin/go_normalize.sh
index 200e21d..5ef1865 100755
--- a/detectors/normalize/bin/go_normalize.sh
+++ b/detectors/normalize/bin/go_normalize.sh
@@ -100,7 +100,7 @@ pushTmpDir ORG.normalize
 	tmpLSC="tmp_$$_LSC.fasta"		
 	tmpSSC="tmp_$$_SSC.fasta"		
 	
-	# Extract the first SC present in between the two IRs
+	# Extract the central SC present in between the two IRs
 	# considering it as LSC
 
 	let "beginLSC=$endIR1+1"
@@ -110,7 +110,7 @@ pushTmpDir ORG.normalize
 	strandLSC="${IR[1]}"
 
 
-	# Extract the second SC present in two parts
+	# Extract the external SC present in two parts
 	# Considering it as SSC
 	
 	let "beginSSC=$endIR2+1"
@@ -130,16 +130,17 @@ pushTmpDir ORG.normalize
 	
 		# Actually this is the oposite LSC is SSC and SSC is LSC
 
-		# Exchange the SSC and LSC sequences
+		# Exchanges the SSC and LSC sequences
 		mv ${tmpSSC}    ${tmpfasta1}
 		mv ${tmpLSC}    ${tmpSSC}
 		mv ${tmpfasta1} ${tmpLSC}
 		
-		# Exchange the IRa and IRb sequences
+		# Exchanges the IRa and IRb sequences
 		mv ${tmpIR1}    ${tmpfasta1}
 		mv ${tmpIR2}    ${tmpIR1}
 		mv ${tmpfasta1} ${tmpIR2}
 		
+		# Exchanges the strand of both the Single copies
 		tmp=${strandSSC}
 		strandSSC=${strandLSC}
 		strandLSC=${tmp}
@@ -161,7 +162,7 @@ pushTmpDir ORG.normalize
 	# Merges the four parts of the genome.
 	cat ${tmpLSC} ${tmpIR2} ${tmpSSC} ${tmpIR1} | joinfasta
 
-	
+	exit 1
 	
 popTmpDir
 
diff --git a/detectors/normalize/lib/lookforIR.lib.sh b/detectors/normalize/lib/lookforIR.lib.sh
index 437b77c..3b35206 100644
--- a/detectors/normalize/lib/lookforIR.lib.sh
+++ b/detectors/normalize/lib/lookforIR.lib.sh
@@ -12,6 +12,15 @@ function lookForIR {
 	
 	local REPEATS="${MATCHES/.*/}.repseek"
 	
+	# Blast columns:
+	# 	query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score
+	# We keep blast matches if : 
+	#	The match is longer than 1000
+	#   The identity is higher than 80%
+	#
+	# The match file has the following format:
+	#	LSC/SSC  begin end  same_strand=1/diff_strand=0
+	
 	loginfo "Locating SSC and LSC by similarity..."
 		blastn -db ${SCDB} \
 		       -query ${QUERY} \
@@ -31,7 +40,7 @@ function lookForIR {
 		repseek -c -p 0.001 -i ${QUERY} 2>> /dev/null > ${REPEATS}
 		loginfo " --> $(wc -l ${REPEATS} | awk '{print $1}') repeats identified"
 	loginfo "Done"
-	
+
 	loginfo "Marking and selecting the best inverted repeat..."
 		local IR=( $(${SELECTIR} ${MATCHES} ${REPEATS}) )
 	loginfo "Done"
diff --git a/detectors/normalize/lib/selectIR.py b/detectors/normalize/lib/selectIR.py
index abe7dbd..7bd216d 100755
--- a/detectors/normalize/lib/selectIR.py
+++ b/detectors/normalize/lib/selectIR.py
@@ -8,6 +8,12 @@ repeats = open(sys.argv[2])
 chloro    = {'LSC' : [], 'SSC' : [] }
 chlorosize =0
 
+# We scan the blast matches:
+#    We build a vector with one position per base pair counting the matches
+
+# The match file has the following format:
+#    LSC/SSC  begin end  same_strand=1/diff_strand=0
+
 for line in data:
     parts = line.strip().split()
     if len(parts) >= 4:
@@ -16,7 +22,8 @@ for line in data:
         end         = int(parts[2])
         direction   = int(parts[3])
         
-        
+        # Change the code of the direction:
+        #    reverse complement = -1
         if direction==0:
             direction=-1
             
@@ -39,36 +46,53 @@ maxLSC = float(max(abs(n) for n in chloro['LSC']))
 chloro['SSC']=[n / maxSSC for n in chloro['SSC']]
 chloro['LSC']=[n / maxLSC for n in chloro['LSC']]
 
-
 scoreMax=0
+len1Max=0
+len2Max=0
+
 imax = len(chloro['LSC'])
 
 for line in repeats:
     parts   = line.strip().split()
     
+    # First repeat position and length 
+    # (position start at 0)
     pos1    = int(parts[1]) -1
     len1    = int(parts[3])
     
+    # Second repeat position and length
+    # (position start at 0)
     pos2    = int(parts[2]) -1
     len2    = int(parts[4])
     
+    # Location of the central single copy 
+    #       - in between the two IR -
     c_begin = min(pos1 + len1,imax)
     c_end   = min(pos2,imax)
+    
+    # Location of the external single copy 
+    #       - in between the two IR -
     o_max   = min(pos1 ,imax)
     o_min   = min(pos2 + len2, imax)
     
-    c_lsc   = sum(abs(chloro['LSC'][n]) for n in range(c_begin,c_end))
-    c_ssc   = sum(abs(chloro['SSC'][n]) for n in range(c_begin,c_end))
+    # count of coherent matches for LSC and SSC on the central single copy 
+    c_lsc   = abs(sum(chloro['LSC'][n] for n in range(c_begin,c_end)))
+    c_ssc   = abs(sum(chloro['SSC'][n] for n in range(c_begin,c_end)))
 
-    o_lsc   = sum(abs(chloro['LSC'][n]) for n in range(0,o_max))
-    o_ssc   = sum(abs(chloro['SSC'][n]) for n in range(0,o_max))
+    # count of coherent matches for LSC and SSC on the external single copy 
+    #    this score is in two parts before the first copy and after the second
+    o_lsc   = sum(chloro['LSC'][n] for n in range(0,o_max))
+    o_ssc   = sum(chloro['SSC'][n] for n in range(0,o_max))
 
-    o_lsc  += sum(abs(chloro['LSC'][n]) for n in range(o_min,len(chloro['LSC'])))
-    o_ssc  += sum(abs(chloro['SSC'][n]) for n in range(o_min,len(chloro['SSC'])))
+    o_lsc  += sum(chloro['LSC'][n] for n in range(o_min,imax))
+    o_ssc  += sum(chloro['SSC'][n] for n in range(o_min,imax))
+    
+    o_lsc   = abs(o_lsc)
+    o_ssc   = abs(o_ssc)
     
     c = float(c_lsc + c_ssc)
     o = float(o_lsc + o_ssc)
-    
+        
     if c > 0:
         c_lsc /= c
         c_ssc /= c 
@@ -78,10 +102,9 @@ for line in repeats:
         o_ssc /= o 
     
     score = ((c_lsc - c_ssc) ** 2 + (o_lsc - o_ssc) ** 2) / 2.0
-    
     # print >>sys.stderr,"c.lsc = %f c.ssc = %f   o.lsc = %f o.ssc = %f score = %6.4f (len=%d)" % (c_lsc,c_ssc,o_lsc,o_ssc,score,len1)
         
-    if (score > scoreMax):
+    if (score >= scoreMax) and ((len1 > len1Max) or (len2 > len2Max)):
         scoreMax = score
         pos1Max  = pos1
         pos2Max  = pos2
@@ -99,8 +122,8 @@ c_ssc   = sum(chloro['SSC'][n] for n in range(c_begin,c_end))
 o_lsc   = sum(chloro['LSC'][n] for n in range(0,o_max))
 o_ssc   = sum(chloro['SSC'][n] for n in range(0,o_max))
 
-o_lsc  += sum(chloro['LSC'][n] for n in range(o_min,len(chloro['LSC'])))
-o_ssc  += sum(chloro['SSC'][n] for n in range(o_min,len(chloro['SSC'])))
+o_lsc  += sum(chloro['LSC'][n] for n in range(o_min,imax))
+o_ssc  += sum(chloro['SSC'][n] for n in range(o_min,imax))
 
 if abs(c_lsc) > abs(c_ssc):
     center = "LSC"  
diff --git a/org-annotate.sh b/org-annotate.sh
index 2ea1b66..993428f 100755
--- a/org-annotate.sh
+++ b/org-annotate.sh
@@ -34,7 +34,7 @@ pushTmpDir ORG.organnot
 	loginfo "Done."
 	
 	loginfo "Annotating the Inverted repeats and Single copies (LSC and SSC)..."
-		${PROG_DIR}/detectors/ir/bin/go_ir.sh ${QUERY} > "${RESULTS}.annot"		
+		${PROG_DIR}/detectors/ir/bin/go_ir.sh "${RESULTS}.norm.fasta" > "${RESULTS}.annot"		
 	loginfo "Done."
 	
 	loginfo "Annotating the tRNA..."

From 32908c68093537795d0ee0e781265e5ff60d35c2 Mon Sep 17 00:00:00 2001
From: Eric Coissac <eric.coissac@metabarcoding.org>
Date: Mon, 9 Nov 2015 17:17:34 +0100
Subject: [PATCH 7/8] remove the debug code

Former-commit-id: 7a805dc6278b8aae97e6be8deb7e9035bc4aa853
Former-commit-id: 54e6c6030fd0c876ad05745a6a51d14a523e313e
---
 detectors/normalize/bin/go_normalize.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/detectors/normalize/bin/go_normalize.sh b/detectors/normalize/bin/go_normalize.sh
index 5ef1865..0fbd8b9 100755
--- a/detectors/normalize/bin/go_normalize.sh
+++ b/detectors/normalize/bin/go_normalize.sh
@@ -162,7 +162,6 @@ pushTmpDir ORG.normalize
 	# Merges the four parts of the genome.
 	cat ${tmpLSC} ${tmpIR2} ${tmpSSC} ${tmpIR1} | joinfasta
 
-	exit 1
 	
 popTmpDir
 

From 813e3958ba9aa23ed29d356fa086860924037650 Mon Sep 17 00:00:00 2001
From: Eric Coissac <eric.coissac@metabarcoding.org>
Date: Mon, 9 Nov 2015 17:35:59 +0100
Subject: [PATCH 8/8] Change minimum length for considering a match from 1000
 to 100.

Former-commit-id: 6bd827ca011ee71d83e98710edc837f56a089875
Former-commit-id: 454f080c0b163f238951541eec23b5946f914f28
---
 detectors/normalize/lib/lookforIR.lib.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detectors/normalize/lib/lookforIR.lib.sh b/detectors/normalize/lib/lookforIR.lib.sh
index 3b35206..94ea75b 100644
--- a/detectors/normalize/lib/lookforIR.lib.sh
+++ b/detectors/normalize/lib/lookforIR.lib.sh
@@ -26,7 +26,7 @@ function lookForIR {
 		       -query ${QUERY} \
 		       -outfmt 6 \
 		       -max_target_seqs 10000 | \
-		  awk '($4 > 1000) && ($3>80) { \
+		  awk '($4 > 100) && ($3>80) { \
 		             SAME=(($7 < $8) && ($9 < $10)) || (($7 > $8) && ($9 > $10)); \
 			 		 if ($7 < $8) \
 			 			{print substr($2,1,3),$7,$8,SAME}  \