diff --git a/data/ir/LSC_RefDB.nhr b/data/ir/LSC_RefDB.nhr deleted file mode 100644 index 42d2c8e..0000000 Binary files a/data/ir/LSC_RefDB.nhr and /dev/null differ diff --git a/data/ir/LSC_RefDB.nin b/data/ir/LSC_RefDB.nin deleted file mode 100644 index 49c2d43..0000000 Binary files a/data/ir/LSC_RefDB.nin and /dev/null differ diff --git a/data/ir/SC_RefDB.nhr b/data/ir/SC_RefDB.nhr new file mode 100644 index 0000000..6fd643a Binary files /dev/null and b/data/ir/SC_RefDB.nhr differ diff --git a/data/ir/SC_RefDB.nin b/data/ir/SC_RefDB.nin new file mode 100644 index 0000000..43ae390 Binary files /dev/null and b/data/ir/SC_RefDB.nin differ diff --git a/data/ir/LSC_RefDB.nsq b/data/ir/SC_RefDB.nsq similarity index 74% rename from data/ir/LSC_RefDB.nsq rename to data/ir/SC_RefDB.nsq index fa7f80f..ad63b2f 100644 Binary files a/data/ir/LSC_RefDB.nsq and b/data/ir/SC_RefDB.nsq differ diff --git a/data/ir/SSC_RefDB.nhr b/data/ir/SSC_RefDB.nhr deleted file mode 100644 index 1b3deb0..0000000 Binary files a/data/ir/SSC_RefDB.nhr and /dev/null differ diff --git a/data/ir/SSC_RefDB.nin b/data/ir/SSC_RefDB.nin deleted file mode 100644 index aa5ef50..0000000 Binary files a/data/ir/SSC_RefDB.nin and /dev/null differ diff --git a/data/ir/SSC_RefDB.nsq b/data/ir/SSC_RefDB.nsq deleted file mode 100644 index 976ca58..0000000 Binary files a/data/ir/SSC_RefDB.nsq and /dev/null differ diff --git a/data/ir/SingleCopyDB.log b/data/ir/SingleCopyDB.log index 1adbe23..2571e76 100644 --- a/data/ir/SingleCopyDB.log +++ b/data/ir/SingleCopyDB.log @@ -1,63 +1,65 @@ -2015-10-07 11:25:33 [OA INFO ] 89006 -- Selecting Viridiplantae genebank entries... -2015-10-07 11:25:42 [OA INFO ] 89006 -- --> 695 entries selected -2015-10-07 11:25:42 [OA INFO ] 89006 -- Done -2015-10-07 11:25:42 [OA INFO ] 89006 -- Extracting Long Single Copies (LSC)... -2015-10-07 11:26:36 [OA INFO ] 89006 -- --> 64 retreived sequences -2015-10-07 11:26:36 [OA INFO ] 89006 -- Done -2015-10-07 11:26:36 [OA INFO ] 89006 -- Building LSC coorientation graph... -2015-10-07 11:26:36 [OA INFO ] 89127 -- Build temporary blast DB... -2015-10-07 11:26:36 [OA INFO ] 89127 -- Done -2015-10-07 11:26:36 [OA INFO ] 89127 -- Running Blast... -2015-10-07 11:27:47 [OA INFO ] 89127 -- Done -2015-10-07 11:27:47 [OA INFO ] 89006 -- --> 6 connected componants -2015-10-07 11:27:47 [OA INFO ] 89006 -- Done -2015-10-07 11:27:47 [OA INFO ] 89006 -- Indexing LCS... -2015-10-07 11:27:47 [OA INFO ] 89006 -- Done -2015-10-07 11:27:47 [OA INFO ] 89006 -- Extracting main connected components for LCS... -2015-10-07 11:27:48 [OA INFO ] 89006 -- --> 40 sequences -2015-10-07 11:27:48 [OA INFO ] 89006 -- Done -2015-10-07 11:27:48 [OA INFO ] 89006 -- Extracting second connected components for LCS... -2015-10-07 11:27:48 [OA INFO ] 89006 -- --> 5 sequences -2015-10-07 11:27:48 [OA INFO ] 89006 -- Done -2015-10-07 11:27:48 [OA INFO ] 89006 -- merging both connected components for LCS... -2015-10-07 11:27:48 [OA INFO ] 89006 -- --> 45 sequences in total -2015-10-07 11:27:48 [OA INFO ] 89006 -- Done -2015-10-07 11:27:48 [OA INFO ] 89006 -- Checking LCS homogeneity... -2015-10-07 11:27:48 [OA INFO ] 89258 -- Build temporary blast DB... -2015-10-07 11:27:48 [OA INFO ] 89258 -- Done -2015-10-07 11:27:48 [OA INFO ] 89258 -- Running Blast... -2015-10-07 11:28:50 [OA INFO ] 89258 -- Done -2015-10-07 11:28:50 [OA INFO ] 89006 -- --> 1 connected componants -2015-10-07 11:28:50 [OA INFO ] 89006 -- Done -2015-10-07 11:28:50 [OA INFO ] 89006 -- Installing LCS reference databases... -2015-10-07 11:28:50 [OA INFO ] 89006 -- Done -2015-10-07 11:28:50 [OA INFO ] 89006 -- Extracting Short Single Copies (SSC)... -2015-10-07 11:29:42 [OA INFO ] 89006 -- --> 76 retreived sequences -2015-10-07 11:29:42 [OA INFO ] 89006 -- Done -2015-10-07 11:29:42 [OA INFO ] 89006 -- Building SSC coorientation graph... -2015-10-07 11:29:42 [OA INFO ] 89336 -- Build temporary blast DB... -2015-10-07 11:29:42 [OA INFO ] 89336 -- Done -2015-10-07 11:29:42 [OA INFO ] 89336 -- Running Blast... -2015-10-07 11:30:03 [OA INFO ] 89336 -- Done -2015-10-07 11:30:03 [OA INFO ] 89006 -- --> 1 connected componants -2015-10-07 11:30:03 [OA INFO ] 89006 -- Done -2015-10-07 11:30:03 [OA INFO ] 89006 -- Indexing SSC... -2015-10-07 11:30:03 [OA INFO ] 89006 -- Done -2015-10-07 11:30:03 [OA INFO ] 89006 -- Extracting main connected components for SSC... -2015-10-07 11:30:03 [OA INFO ] 89006 -- --> 72 sequences -2015-10-07 11:30:03 [OA INFO ] 89006 -- Done -2015-10-07 11:30:03 [OA INFO ] 89006 -- Extracting second connected components for SSC... -2015-10-07 11:30:03 [OA INFO ] 89006 -- --> 0 sequences -2015-10-07 11:30:03 [OA INFO ] 89006 -- Done -2015-10-07 11:30:03 [OA INFO ] 89006 -- merging both connected components for SSC... -2015-10-07 11:30:03 [OA INFO ] 89006 -- --> 72 sequences in total -2015-10-07 11:30:03 [OA INFO ] 89006 -- Done -2015-10-07 11:30:03 [OA INFO ] 89006 -- Checking SSC homogeneity... -2015-10-07 11:30:03 [OA INFO ] 89497 -- Build temporary blast DB... -2015-10-07 11:30:03 [OA INFO ] 89497 -- Done -2015-10-07 11:30:03 [OA INFO ] 89497 -- Running Blast... -2015-10-07 11:30:23 [OA INFO ] 89497 -- Done -2015-10-07 11:30:23 [OA INFO ] 89006 -- --> 1 connected componants -2015-10-07 11:30:23 [OA INFO ] 89006 -- Done -2015-10-07 11:30:23 [OA INFO ] 89006 -- Installing SSC reference databases... -2015-10-07 11:30:23 [OA INFO ] 89006 -- Done +2015-10-07 12:22:15 [OA INFO ] 90184 -- Selecting Viridiplantae genebank entries... +2015-10-07 12:22:24 [OA INFO ] 90184 -- --> 695 entries selected +2015-10-07 12:22:24 [OA INFO ] 90184 -- Done +2015-10-07 12:22:24 [OA INFO ] 90184 -- Extracting Long Single Copies (LSC)... +2015-10-07 12:23:17 [OA INFO ] 90184 -- --> 64 retreived sequences +2015-10-07 12:23:17 [OA INFO ] 90184 -- Done +2015-10-07 12:23:17 [OA INFO ] 90184 -- Building LSC coorientation graph... +2015-10-07 12:23:17 [OA INFO ] 90306 -- Build temporary blast DB... +2015-10-07 12:23:18 [OA INFO ] 90306 -- Done +2015-10-07 12:23:18 [OA INFO ] 90306 -- Running Blast... +2015-10-07 12:24:29 [OA INFO ] 90306 -- Done +2015-10-07 12:24:29 [OA INFO ] 90184 -- --> 6 connected componants +2015-10-07 12:24:29 [OA INFO ] 90184 -- Done +2015-10-07 12:24:29 [OA INFO ] 90184 -- Indexing LCS... +2015-10-07 12:24:30 [OA INFO ] 90184 -- Done +2015-10-07 12:24:30 [OA INFO ] 90184 -- Extracting main connected components for LCS... +2015-10-07 12:24:30 [OA INFO ] 90184 -- --> 40 sequences +2015-10-07 12:24:30 [OA INFO ] 90184 -- Done +2015-10-07 12:24:30 [OA INFO ] 90184 -- Extracting second connected components for LCS... +2015-10-07 12:24:30 [OA INFO ] 90184 -- --> 5 sequences +2015-10-07 12:24:30 [OA INFO ] 90184 -- Done +2015-10-07 12:24:30 [OA INFO ] 90184 -- merging both connected components for LCS... +2015-10-07 12:24:30 [OA INFO ] 90184 -- --> 45 sequences in total +2015-10-07 12:24:30 [OA INFO ] 90184 -- Done +2015-10-07 12:24:30 [OA INFO ] 90184 -- Checking LCS homogeneity... +2015-10-07 12:24:30 [OA INFO ] 90439 -- Build temporary blast DB... +2015-10-07 12:24:30 [OA INFO ] 90439 -- Done +2015-10-07 12:24:30 [OA INFO ] 90439 -- Running Blast... +2015-10-07 12:25:35 [OA INFO ] 90439 -- Done +2015-10-07 12:25:35 [OA INFO ] 90184 -- --> 1 connected componants +2015-10-07 12:25:35 [OA INFO ] 90184 -- Done +2015-10-07 12:25:35 [OA INFO ] 90184 -- Installing LCS reference databases... +2015-10-07 12:25:35 [OA INFO ] 90184 -- Done +2015-10-07 12:25:35 [OA INFO ] 90184 -- Extracting Short Single Copies (SSC)... +2015-10-07 12:26:28 [OA INFO ] 90184 -- --> 76 retreived sequences +2015-10-07 12:26:28 [OA INFO ] 90184 -- Done +2015-10-07 12:26:28 [OA INFO ] 90184 -- Building SSC coorientation graph... +2015-10-07 12:26:28 [OA INFO ] 90517 -- Build temporary blast DB... +2015-10-07 12:26:28 [OA INFO ] 90517 -- Done +2015-10-07 12:26:28 [OA INFO ] 90517 -- Running Blast... +2015-10-07 12:26:49 [OA INFO ] 90517 -- Done +2015-10-07 12:26:49 [OA INFO ] 90184 -- --> 1 connected componants +2015-10-07 12:26:49 [OA INFO ] 90184 -- Done +2015-10-07 12:26:49 [OA INFO ] 90184 -- Indexing SSC... +2015-10-07 12:26:49 [OA INFO ] 90184 -- Done +2015-10-07 12:26:49 [OA INFO ] 90184 -- Extracting main connected components for SSC... +2015-10-07 12:26:49 [OA INFO ] 90184 -- --> 72 sequences +2015-10-07 12:26:49 [OA INFO ] 90184 -- Done +2015-10-07 12:26:49 [OA INFO ] 90184 -- Extracting second connected components for SSC... +2015-10-07 12:26:49 [OA INFO ] 90184 -- --> 0 sequences +2015-10-07 12:26:49 [OA INFO ] 90184 -- Done +2015-10-07 12:26:49 [OA INFO ] 90184 -- merging both connected components for SSC... +2015-10-07 12:26:49 [OA INFO ] 90184 -- --> 72 sequences in total +2015-10-07 12:26:49 [OA INFO ] 90184 -- Done +2015-10-07 12:26:49 [OA INFO ] 90184 -- Checking SSC homogeneity... +2015-10-07 12:26:49 [OA INFO ] 90675 -- Build temporary blast DB... +2015-10-07 12:26:49 [OA INFO ] 90675 -- Done +2015-10-07 12:26:49 [OA INFO ] 90675 -- Running Blast... +2015-10-07 12:27:10 [OA INFO ] 90675 -- Done +2015-10-07 12:27:10 [OA INFO ] 90184 -- --> 1 connected componants +2015-10-07 12:27:10 [OA INFO ] 90184 -- Done +2015-10-07 12:27:10 [OA INFO ] 90184 -- Installing SSC reference databases... +2015-10-07 12:27:10 [OA INFO ] 90184 -- Done +2015-10-07 12:27:10 [OA INFO ] 90184 -- Installing blast version of the SC_RefDB reference databases... +2015-10-07 12:27:10 [OA INFO ] 90184 -- Done diff --git a/detectors/normalize/tools/buildSCDB.sh b/detectors/normalize/tools/buildSCDB.sh index 0c66530..708a665 100755 --- a/detectors/normalize/tools/buildSCDB.sh +++ b/detectors/normalize/tools/buildSCDB.sh @@ -89,9 +89,6 @@ pushTmpDir ORG.buildSCDB cp LSC.direct.fasta "${IR_DATA_DIR}/LSC_RefDB.fasta" cp LSC_RefDB.tgf "${IR_DATA_DIR}/LSC_RefDB.tgf" - makeblastdb -in "${IR_DATA_DIR}/LSC_RefDB.fasta" \ - -dbtype nucl \ - -out "${IR_DATA_DIR}/LSC_RefDB" >& /dev/null loginfo "Done" # @@ -163,12 +160,17 @@ pushTmpDir ORG.buildSCDB loginfo "Installing SSC reference databases..." cp SSC.direct.fasta "${IR_DATA_DIR}/SSC_RefDB.fasta" - cp SSC_RefDB.tgf "${IR_DATA_DIR}/SSC_RefDB.tgf" - - makeblastdb -in "${IR_DATA_DIR}/SSC_RefDB.fasta" \ - -dbtype nucl \ - -out "${IR_DATA_DIR}/SSC_RefDB" >& /dev/null + cp SSC_RefDB.tgf "${IR_DATA_DIR}/SSC_RefDB.tgf" loginfo "Done" + loginfo "Installing blast version of the SC_RefDB reference databases..." + cat "${IR_DATA_DIR}/LSC_RefDB.fasta" \ + "${IR_DATA_DIR}/SSC_RefDB.fasta" > SC_RefDB.fasta + + makeblastdb -in SC_RefDB.fasta \ + -dbtype nucl \ + -out "${IR_DATA_DIR}/SC_RefDB" >& /dev/null + loginfo "Done" + popTmpDir