Taxonomy: new functions to find taxa by name

2020-10-30 10:45:20 +01:00
parent b9b4cec5b5
commit 112e12cab0
7 changed files with 174 additions and 58 deletions
--- a/src/obiview.h
+++ b/src/obiview.h
@ -30,54 +30,56 @@
 #include "obiblob.h"


-#define OBIVIEW_NAME_MAX_LENGTH (249)   		/**< The maximum length of an OBIDMS view name, without the extension.
-                                	 	  	  	 */
-#define VIEW_TYPE_MAX_LENGTH (1024)   			/**< The maximum length of the type name of a view.
-                                	 	  	  	 */
-#define LINES_COLUMN_NAME "LINES"				/**< The name of the column containing the line selections
- 	 	 	 	 	 	 	 	 	 	 	 	 *   in all views.
-                                	 	  	  	 */
-#define VIEW_TYPE_NUC_SEQS "NUC_SEQS_VIEW"   	/**< The type name of views based on nucleotide sequences
-												 *   and their metadata.
-                                	 	  	  	 */
-#define NUC_SEQUENCE_COLUMN "NUC_SEQ"			/**< The name of the column containing the nucleotide sequences
- 	 	 	 	 	 	 	 	 	 	 	 	 *   in NUC_SEQS_VIEW views.
-                                	 	  	  	 */
-#define ID_COLUMN "ID"							/**< The name of the column containing the sequence identifiers
- 	 	 	 	 	 	 	 	 	 	 	 	 *   in NUC_SEQS_VIEW views.
-                                	 	  	  	 */
-#define DEFINITION_COLUMN "DEFINITION"			/**< The name of the column containing the sequence definitions
- 	 	 	 	 	 	 	 	 	 	 	 	 *   in NUC_SEQS_VIEW views.
-                                	 	  	  	 */
-#define QUALITY_COLUMN "QUALITY"				/**< The name of the column containing the sequence qualities
- 	 	 	 	 	 	 	 	 	 	 	 	 *   in NUC_SEQS_VIEW views.
-                                	 	  	   	 */
-#define REVERSE_QUALITY_COLUMN "REVERSE_QUALITY" /**< The name of the column containing the sequence qualities
- 	 	 	 	 	 	 	 	 	 	 	 	 *    of the reverse read (generated by ngsfilter, used by alignpairedend).
-                                	 	  	   	 */
+#define OBIVIEW_NAME_MAX_LENGTH (249)   		   /**< The maximum length of an OBIDMS view name, without the extension.
+                                	 	  	  	    */
+#define VIEW_TYPE_MAX_LENGTH (1024)   			   /**< The maximum length of the type name of a view.
+                                	 	  	  	    */
+#define LINES_COLUMN_NAME "LINES"				   /**< The name of the column containing the line selections
+ 	 	 	 	 	 	 	 	 	 	 	 	    *   in all views.
+                                	 	  	  	    */
+#define VIEW_TYPE_NUC_SEQS "NUC_SEQS_VIEW"   	   /**< The type name of views based on nucleotide sequences
+												    *   and their metadata.
+                                	 	  	  	    */
+#define NUC_SEQUENCE_COLUMN "NUC_SEQ"			   /**< The name of the column containing the nucleotide sequences
+ 	 	 	 	 	 	 	 	 	 	 	 	    *   in NUC_SEQS_VIEW views.
+                                	 	  	  	    */
+#define ID_COLUMN "ID"							   /**< The name of the column containing the sequence identifiers
+ 	 	 	 	 	 	 	 	 	 	 	 	    *   in NUC_SEQS_VIEW views.
+                                	 	  	  	    */
+#define DEFINITION_COLUMN "DEFINITION"			   /**< The name of the column containing the sequence definitions
+ 	 	 	 	 	 	 	 	 	 	 	 	    *   in NUC_SEQS_VIEW views.
+                                	 	  	  	    */
+#define QUALITY_COLUMN "QUALITY"				   /**< The name of the column containing the sequence qualities
+ 	 	 	 	 	 	 	 	 	 	 	 	    *   in NUC_SEQS_VIEW views.
+                                	 	  	   	    */
+#define REVERSE_QUALITY_COLUMN "REVERSE_QUALITY"   /**< The name of the column containing the sequence qualities
+ 	 	 	 	 	 	 	 	 	 	 	 	    *    of the reverse read (generated by ngsfilter, used by alignpairedend).
+                                	 	  	   	    */
 #define REVERSE_SEQUENCE_COLUMN "REVERSE_SEQUENCE" /**< The name of the column containing the sequence
- 	 	 	 	 	 	 	 	 	 	 	 	 *    of the reverse read (generated by ngsfilter, used by alignpairedend).
-                                	 	  	   	 */
-#define QUALITY_COLUMN "QUALITY"				/**< The name of the column containing the sequence qualities
- 	 	 	 	 	 	 	 	 	 	 	 	 *   in NUC_SEQS_VIEW views.
-                                	 	  	   	 */
-#define COUNT_COLUMN "COUNT"				    /**< The name of the column containing the sequence counts
- 	 	 	 	 	 	 	 	 	 	 	 	 *   in NUC_SEQS_VIEW views.
-                                	 	  	  	 */
-#define TAXID_COLUMN "TAXID"				    /**< The name of the column containing the taxids.       TODO subtype of INT column?
-                                	             */
-#define MERGED_TAXID_COLUMN "MERGED_TAXID"		/**< The name of the column containing the merged taxids information.
-                                	             */
-#define MERGED_PREFIX "MERGED_"		            /**< The prefix to prepend to column names when merging informations during obi uniq.
-                                	             */
-#define TAXID_DIST_COLUMN "TAXID_DIST"			/**< The name of the column containing a dictionary of taxid:[list of ids] when merging informations during obi uniq.
-                                	             */
-#define MERGED_COLUMN "MERGED"					/**< The name of the column containing a list of ids when merging informations during obi uniq.
-                                	             */
-#define ID_PREFIX "seq"						    /**< The default prefix of sequence identifiers in automatic ID columns.
-                                	 	  	  	 */
-#define PREDICATE_KEY "predicates"		        /**< The key used in the json-formatted view comments to store predicates.
-                                	 	  	  	 */
+ 	 	 	 	 	 	 	 	 	 	 	 	    *    of the reverse read (generated by ngsfilter, used by alignpairedend).
+                                	 	  	   	    */
+#define QUALITY_COLUMN "QUALITY"				   /**< The name of the column containing the sequence qualities
+ 	 	 	 	 	 	 	 	 	 	 	 	    *   in NUC_SEQS_VIEW views.
+                                	 	  	   	    */
+#define COUNT_COLUMN "COUNT"				       /**< The name of the column containing the sequence counts
+ 	 	 	 	 	 	 	 	 	 	 	 	    *   in NUC_SEQS_VIEW views.
+                                	 	  	  	    */
+#define SCIENTIFIC_NAME_COLUMN "SCIENTIFIC_NAME"   /**< The name of the column containing the taxon scientific name.
+                                	                */
+#define TAXID_COLUMN "TAXID"				       /**< The name of the column containing the taxids.       TODO subtype of INT column?
+                                	                */
+#define MERGED_TAXID_COLUMN "MERGED_TAXID"		   /**< The name of the column containing the merged taxids information.
+                                	                */
+#define MERGED_PREFIX "MERGED_"		               /**< The prefix to prepend to column names when merging informations during obi uniq.
+                                	                */
+#define TAXID_DIST_COLUMN "TAXID_DIST"			   /**< The name of the column containing a dictionary of taxid:[list of ids] when merging informations during obi uniq.
+                                	                */
+#define MERGED_COLUMN "MERGED"					   /**< The name of the column containing a list of ids when merging informations during obi uniq.
+                                	                */
+#define ID_PREFIX "seq"						       /**< The default prefix of sequence identifiers in automatic ID columns.
+                                	 	  	  	    */
+#define PREDICATE_KEY "predicates"		           /**< The key used in the json-formatted view comments to store predicates.
+                                	 	  	  	    */


 /**