/**************************************************************************** * Utility functions * ****************************************************************************/ /** * @file utils.c * @author Celine Mercier (celine.mercier@metabarcoding.org) * @date 29 March 2016 * @brief Code for utility functions. */ #include #include #include #include #include #include #include #include #include #include #include "utils.h" #include "obidebug.h" #include "obierrno.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) /************************************************************************** * * D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S * **************************************************************************/ /** * Internal function returning the complement of a nucleotide base. * * @warning The base must be in lower case. * * @param nucAc The nucleotide base. * * @returns The complement of the nucleotide base. * @retval The nucleotide base itself if no complement was found. * * @since December 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) * @note Copied from ecoPCR source code */ static char nuc_base_complement(char nucAc); /** * Internal function returning the complement of a nucleotide sequence. * * @warning The sequence must be in lower case. * @warning The sequence will be replaced by its complement without being copied. * * @param nucAcSeq The nucleotide sequence. * * @returns The complemented sequence. * * @since December 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) * @note Copied from ecoPCR source code */ static char* nuc_seq_complement(char* nucAcSeq); /** * Internal function returning the reverse of a nucleotide sequence. * * @warning The sequence must be in lower case. * @warning The sequence will be replaced by its reverse without being copied. * * @param str The nucleotide sequence. * @param isPattern Whether the sequence is a pattern. TODO * * @returns The reversed sequence. * * @since December 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) * @note Copied from ecoPCR source code */ static char* reverse_sequence(char* str, char isPattern); /************************************************************************ * * D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S * ************************************************************************/ static char nuc_base_complement(char nucAc) { char* c; if ((c = strchr(DNA_ALPHA, nucAc))) return CDNA_ALPHA[(c - DNA_ALPHA)]; else return nucAc; } static char* nuc_seq_complement(char* nucAcSeq) { char *s; for (s = nucAcSeq ; *s ; s++) *s = nuc_base_complement(*s); return nucAcSeq; } static char* reverse_sequence(char* str, char isPattern) { char *sb, *se, c; if (! str) return str; sb = str; se = str + strlen(str) - 1; while(sb <= se) { c = *sb; *sb++ = *se; *se-- = c; } sb = str; se = str + strlen(str) - 1; if (isPattern) for (;sb < se; sb++) { if (*sb=='#') { if (((se - sb) > 2) && (*(sb+2)=='!')) { *sb='!'; sb+=2; *sb='#'; } else { *sb=*(sb+1); sb++; *sb='#'; } } else if (*sb=='!') { *sb=*(sb-1); *(sb-1)='!'; } } return str; } /********************************************************************** * * D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S * **********************************************************************/ int copy_file(const char* src_file_path, const char* dest_file_path) { int src_fd, dst_fd, n, err; unsigned char buffer[4096]; src_fd = open(src_file_path, O_RDONLY); if (src_fd == -1) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nError opening a file to copy"); return -1; } dst_fd = open(dest_file_path, O_CREAT | O_WRONLY, 0777); // overwrite if already exists if (dst_fd == -1) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nError opening a file to write a copy: %s", dest_file_path); return -1; } while (1) { err = read(src_fd, buffer, 4096); if (err == -1) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nProblem reading a file to copy"); return -1; } n = err; if (n == 0) break; err = write(dst_fd, buffer, n); if (err == -1) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nProblem writing to a file while copying"); return -1; } } if (close(src_fd) < 0) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nError closing a file after copying it"); return -1; } if (close(dst_fd) < 0) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nError closing a file after copying to it"); return -1; } return 0; } int digit_count(index_t i) { int n_digits; if (i == 0) n_digits = 1; else n_digits = floor(log10(llabs(i))) + 1; return n_digits; } char* build_word_with_idx(const char* prefix, index_t idx) { char* word; int n_digits; n_digits = digit_count(idx); word = (char*) malloc((strlen(prefix) + 1+ n_digits + 1)*sizeof(char)); if (word == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a character string"); return NULL; } if (sprintf(word, "%s_%lld", prefix, idx) < 0) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nProblem building a word from a prefix and an index"); return NULL; } return word; } int count_dir(char* dir_path) { struct dirent* dp; DIR* fd; int count; count = 0; if ((fd = opendir(dir_path)) == NULL) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "Error opening a directory: %s\n", dir_path); return -1; } while ((dp = readdir(fd)) != NULL) { if ((dp->d_name)[0] == '.') continue; count++; } if (closedir(fd) < 0) { obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nError closing a directory"); return -1; } return count; } char* obi_format_date(time_t date) { char* formatted_time; struct tm* tmp; formatted_time = (char*) malloc(FORMATTED_TIME_LENGTH*sizeof(char)); if (formatted_time == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory to format a date"); return NULL; } tmp = localtime(&date); if (tmp == NULL) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError formatting a date"); return NULL; } if (strftime(formatted_time, FORMATTED_TIME_LENGTH, "%c", tmp) == 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError formatting a date"); return NULL; } return formatted_time; } void* obi_get_memory_aligned_on_16(int size, int* shift) { void* memory; *shift = 0; memory = (void*) malloc(size); if (memory == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory"); return NULL; } while ((((long long unsigned int) (memory))%16) != 0) { memory++; (*shift)++; } return (memory); } /* * A generic implementation of binary search for the Linux kernel * * Copyright (C) 2008-2009 Ksplice, Inc. * Author: Tim Abbott * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; version 2. */ void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data, int (*cmp)(const void *key, const void *elt, const void* user_data)) { size_t start = 0; size_t end = num; size_t mid; int result; while (start < end) { mid = start + (end - start) / 2; result = cmp(key, base + mid * size, user_data); if (result < 0) end = mid; else if (result > 0) start = mid + 1; else return (void*)base + mid * size; } return NULL; } /* * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". */ #define MIN(a,b) ((a) < (b) ? a : b) #define swapcode(TYPE, parmi, parmj, n) { \ long i = (n) / sizeof (TYPE); \ register TYPE *pi = (TYPE *) (parmi); \ register TYPE *pj = (TYPE *) (parmj); \ do { \ register TYPE t = *pi; \ *pi++ = *pj; \ *pj++ = t; \ } while (--i > 0); \ } #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \ es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1; static __inline void swapfunc(char *a, char *b, int n, int swaptype) { if (swaptype <= 1) swapcode(long, a, b, n) else swapcode(char, a, b, n) } #define swap(a, b) \ if (swaptype == 0) { \ long t = *(long *)(a); \ *(long *)(a) = *(long *)(b); \ *(long *)(b) = t; \ } else \ swapfunc(a, b, es, swaptype) #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) static __inline char * med3(char *a, char *b, char *c, const void *user_data, int (*cmp)(const void *, const void *, const void *)) { return cmp(a, b, user_data) < 0 ? (cmp(b, c, user_data) < 0 ? b : (cmp(a, c, user_data) < 0 ? c : a )) :(cmp(b, c, user_data) > 0 ? b : (cmp(a, c, user_data) < 0 ? a : c )); } void qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *)) { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; int d, r, swaptype, swap_cnt; register char *a = aa; loop: SWAPINIT(a, es); swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es) for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0; pl -= es) swap(pl, pl - es); return; } pm = (char *)a + (n / 2) * es; if (n > 7) { pl = (char *)a; pn = (char *)a + (n - 1) * es; if (n > 40) { d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, user_data, cmp); pm = med3(pm - d, pm, pm + d, user_data, cmp); pn = med3(pn - 2 * d, pn - d, pn, user_data, cmp); } pm = med3(pl, pm, pn, user_data, cmp); } swap(a, pm); pa = pb = (char *)a + es; pc = pd = (char *)a + (n - 1) * es; for (;;) { while (pb <= pc && (r = cmp(pb, a, user_data)) <= 0) { if (r == 0) { swap_cnt = 1; swap(pa, pb); pa += es; } pb += es; } while (pb <= pc && (r = cmp(pc, a, user_data)) >= 0) { if (r == 0) { swap_cnt = 1; swap(pc, pd); pd -= es; } pc -= es; } if (pb > pc) break; swap(pb, pc); swap_cnt = 1; pb += es; pc -= es; } if (swap_cnt == 0) { /* Switch to insertion sort */ for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0; pl -= es) swap(pl, pl - es); return; } pn = (char *)a + n * es; r = MIN(pa - (char *)a, pb - pa); vecswap(a, pb - r, r); r = MIN((long)(pd - pc), (long)(pn - pd - es)); vecswap(pb, pn - r, r); if ((r = pb - pa) > (int)es) qsort_user_data(a, r / es, es, user_data, cmp); if ((r = pd - pc) > (int)es) { /* Iterate rather than recurse to save stack space */ a = pn - r; n = r / es; goto loop; } /* qsort(pn - r, r / es, es, cmp);*/ } char* reverse_complement_pattern(char* nucAcSeq) { return reverse_sequence(nuc_seq_complement(nucAcSeq), 1); } char* reverse_complement_sequence(char* nucAcSeq) { return reverse_sequence(nuc_seq_complement(nucAcSeq), 0); }