/* * sse_banded_LCS_alignment.c * * Created on: 7 nov. 2012 * Author: celine mercier */ #include #include #include #include #include #include "obierrno.h" #include "obidebug.h" #include "utils.h" #include "_sse.h" #include "sse_banded_LCS_alignment.h" #include "obiblob.h" #include "encode.h" // TODO move putBlobInSeq function to encode.c ? #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) static void printreg(__m128i r) { int16_t a0,a1,a2,a3,a4,a5,a6,a7; a0= _MM_EXTRACT_EPI16(r,0); a1= _MM_EXTRACT_EPI16(r,1); a2= _MM_EXTRACT_EPI16(r,2); a3= _MM_EXTRACT_EPI16(r,3); a4= _MM_EXTRACT_EPI16(r,4); a5= _MM_EXTRACT_EPI16(r,5); a6= _MM_EXTRACT_EPI16(r,6); a7= _MM_EXTRACT_EPI16(r,7); fprintf(stderr, "a00 :-> %7d %7d %7d %7d " " %7d %7d %7d %7d " "\n" , a0,a1,a2,a3,a4,a5,a6,a7 ); } static inline int extract_reg(__m128i r, int p) { switch (p) { case 0: return(_MM_EXTRACT_EPI16(r,0)); case 1: return(_MM_EXTRACT_EPI16(r,1)); case 2: return(_MM_EXTRACT_EPI16(r,2)); case 3: return(_MM_EXTRACT_EPI16(r,3)); case 4: return(_MM_EXTRACT_EPI16(r,4)); case 5: return(_MM_EXTRACT_EPI16(r,5)); case 6: return(_MM_EXTRACT_EPI16(r,6)); case 7: return(_MM_EXTRACT_EPI16(r,7)); } return(0); } // TODO warning on length order void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length) { register int j; int k1, k2; int max, diff; int l_reg, l_loc; int line; int numberOfRegistersPerLine; int numberOfRegistersFor3Lines; bool even_line; bool odd_line; bool even_BLL; bool odd_BLL; um128* SSEregisters; um128* p_diag; um128* p_gap1; um128* p_gap2; um128* p_diag_j; um128* p_gap1_j; um128* p_gap2_j; um128 current; um128* l_ali_SSEregisters; um128* p_l_ali_diag; um128* p_l_ali_gap1; um128* p_l_ali_gap2; um128* p_l_ali_diag_j; um128* p_l_ali_gap1_j; um128* p_l_ali_gap2_j; um128 l_ali_current; um128 nucs1; um128 nucs2; um128 scores; um128 boolean_reg; // Initialisations odd_BLL = bandLengthLeft & 1; even_BLL = !odd_BLL; max = INT16_MAX - l1; numberOfRegistersPerLine = bandLengthTotal / 8; numberOfRegistersFor3Lines = 3 * numberOfRegistersPerLine; SSEregisters = (um128*) calloc(numberOfRegistersFor3Lines * 2, sizeof(um128)); l_ali_SSEregisters = SSEregisters + numberOfRegistersFor3Lines; // preparer registres SSE for (j=0; ji, scores.i); // Computing alignment length l_ali_current.i = p_l_ali_diag_j->i; boolean_reg.i = _MM_CMPGT_EPI16(p_gap1_j->i, current.i); l_ali_current.i = _MM_OR_SI128( _MM_AND_SI128(p_l_ali_gap1_j->i, boolean_reg.i), _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i)); current.i = _MM_OR_SI128( _MM_AND_SI128(p_gap1_j->i, boolean_reg.i), _MM_ANDNOT_SI128(boolean_reg.i, current.i)); boolean_reg.i = _MM_AND_SI128( _MM_CMPEQ_EPI16(p_gap1_j->i, current.i), _MM_CMPLT_EPI16(p_l_ali_gap1_j->i, l_ali_current.i)); l_ali_current.i = _MM_OR_SI128( _MM_AND_SI128(p_l_ali_gap1_j->i, boolean_reg.i), _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i)); current.i = _MM_OR_SI128( _MM_AND_SI128(p_gap1_j->i, boolean_reg.i), _MM_ANDNOT_SI128(boolean_reg.i, current.i)); boolean_reg.i = _MM_CMPGT_EPI16(p_gap2_j->i, current.i); l_ali_current.i = _MM_OR_SI128( _MM_AND_SI128(p_l_ali_gap2_j->i, boolean_reg.i), _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i)); current.i = _MM_OR_SI128( _MM_AND_SI128(p_gap2_j->i, boolean_reg.i), _MM_ANDNOT_SI128(boolean_reg.i, current.i)); boolean_reg.i = _MM_AND_SI128( _MM_CMPEQ_EPI16(p_gap2_j->i, current.i), _MM_CMPLT_EPI16(p_l_ali_gap2_j->i, l_ali_current.i)); l_ali_current.i = _MM_OR_SI128( _MM_AND_SI128(p_l_ali_gap2_j->i, boolean_reg.i), _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i)); current.i = _MM_OR_SI128( _MM_AND_SI128(p_gap2_j->i, boolean_reg.i), _MM_ANDNOT_SI128(boolean_reg.i, current.i)); // if (print) // { // fprintf(stderr, "\nline = %d", line); // fprintf(stderr, "\nDiag, r %d : ", j); // printreg((*(p_diag_j)).i); // fprintf(stderr, "Gap1 : "); // printreg((*(p_gap1_j)).i); // fprintf(stderr, "Gap2 : "); // printreg((*(p_gap2_j)).i); // fprintf(stderr, "current : "); // printreg(current.i); // fprintf(stderr, "L ALI\nDiag r %d : ", j); // printreg((*(p_l_ali_diag_j)).i); // fprintf(stderr, "Gap1 : "); // printreg((*(p_l_ali_gap1_j)).i); // fprintf(stderr, "Gap2 : "); // printreg((*(p_l_ali_gap2_j)).i); // fprintf(stderr, "current : "); // printreg(l_ali_current.i); // } // diag = gap1 and gap1 = current p_diag_j->i = p_gap1_j->i; p_gap1_j->i = current.i; // l_ali_diag = l_ali_gap1 and l_ali_gap1 = l_ali_current+1 p_l_ali_diag_j->i = p_l_ali_gap1_j->i; p_l_ali_gap1_j->i = _MM_ADD_EPI16(l_ali_current.i, _MM_SET1_EPI16(1)); } // shifts for gap2, to do only once all the registers of a line have been computed Copier gap2 puis le charger depuis la copie? for (j=0; j < numberOfRegistersPerLine; j++) { if ((odd_line && even_BLL) || (even_line && odd_BLL)) { p_gap2[j].i = _MM_LOADU_SI128((const __m128i*)((p_gap1[j].s16)-1)); p_l_ali_gap2[j].i = _MM_LOADU_SI128((const __m128i*)((p_l_ali_gap1[j].s16)-1)); if (j == 0) { p_gap2[j].i = _MM_INSERT_EPI16(p_gap2[j].i, 0, 0); p_l_ali_gap2[j].i = _MM_INSERT_EPI16(p_l_ali_gap2[j].i, max, 0); } } else { p_gap2[j].i = _MM_LOADU_SI128((const __m128i*)(p_gap1[j].s16+1)); p_l_ali_gap2[j].i = _MM_LOADU_SI128((const __m128i*)(p_l_ali_gap1[j].s16+1)); if (j == numberOfRegistersPerLine - 1) { p_gap2[j].i = _MM_INSERT_EPI16(p_gap2[j].i, 0, 7); p_l_ali_gap2[j].i = _MM_INSERT_EPI16(p_l_ali_gap2[j].i, max, 7); } } } // end shifts for gap2 } /* /// Recovering LCS and alignment lengths \\\ */ // finding the location of the results in the registers : diff = l1-l2; if ((diff & 1) && odd_BLL) l_loc = (int) floor((double)(bandLengthLeft) / (double)2) - floor((double)(diff) / (double)2); else l_loc = (int) floor((double)(bandLengthLeft) / (double)2) - ceil((double)(diff) / (double)2); l_reg = (int)floor((double)l_loc/(double)8.0); // if (print) // fprintf(stderr, "\nl_reg = %d, l_loc = %d\n", l_reg, l_loc); l_loc = l_loc - l_reg*8; // extracting the results from the registers : *lcs_length = extract_reg(p_gap1[l_reg].i, l_loc); *ali_length = extract_reg(p_l_ali_gap1[l_reg].i, l_loc) - 1; // freeing the registers free(SSEregisters); } // TODO warning on length order void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length) { register int j; int k1, k2; int diff; int l_reg, l_loc; int line; int numberOfRegistersPerLine; int numberOfRegistersFor3Lines; bool even_line; bool odd_line; bool even_BLL; bool odd_BLL; um128* SSEregisters; um128* p_diag; um128* p_gap1; um128* p_gap2; um128* p_diag_j; um128* p_gap1_j; um128* p_gap2_j; um128 current; um128 nucs1; um128 nucs2; um128 scores; // Initialisations odd_BLL = bandLengthLeft & 1; even_BLL = !odd_BLL; numberOfRegistersPerLine = bandLengthTotal / 8; numberOfRegistersFor3Lines = 3 * numberOfRegistersPerLine; SSEregisters = malloc(numberOfRegistersFor3Lines * sizeof(um128)); if (SSEregisters == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for SSE registers for LCS alignment"); } // preparer registres SSE for (j=0; jvalue[i/4] & mask) >> shift; *target = (int16_t)nuc+1; // +1 because nucleotide can't be == 0 (0 is a default value used to initialize some registers) } } else { for (i=0; target < end; target++, i++) { shift = 6 - 2*(i % 4); mask = NUC_MASK_2B << shift; nuc = (b->value[i/4] & mask) >> shift; *target = (int16_t)nuc+1; // +1 because nucleotide can't be == 0 (0 is a default value used to initialize some registers) } } } void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int l1) { int i; int address_00, x_address_10, address_01, address_01_shifted; int numberOfRegistersPerLine; int bm; int value=INT16_MAX-l1; numberOfRegistersPerLine = bandLengthTotal / 8; bm = bandLengthLeft%2; for (i=0; i < (3*numberOfRegistersPerLine*8); i++) address[i] = value; // 0,0 set to 1 and 0,1 and 1,0 set to 2 address_00 = bandLengthLeft / 2; x_address_10 = address_00 + bm - 1; address_01 = numberOfRegistersPerLine*8 + x_address_10; address_01_shifted = numberOfRegistersPerLine*16 + address_00 - bm; // fill address_00, address_01,+1, address_01_shifted,+1 address[address_00] = 1; address[address_01] = 2; address[address_01+1] = 2; address[address_01_shifted] = 2; address[address_01_shifted+1] = 2; } // TODO warning on length order double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length) { double id; int bandLengthRight, bandLengthLeft, bandLengthTotal; bandLengthLeft = calculateLeftBandLength(l1, LCSmin); bandLengthRight = calculateRightBandLength(l2, LCSmin); // fprintf(stderr, "\nBLL = %d, BLR = %d, LCSmin = %d\n", bandLengthLeft, bandLengthRight, LCSmin); bandLengthTotal = calculateSSEBandLength(bandLengthRight, bandLengthLeft); // fprintf(stderr, "\nBLT = %d\n", bandLengthTotal); if ((reference == ALILEN) && (normalize || !similarity_mode)) { initializeAddressWithGaps(address, bandLengthTotal, bandLengthLeft, l1); sse_banded_align_lcs_and_ali_len(seq1, seq2, l1, l2, bandLengthLeft, bandLengthTotal, address, lcs_length, ali_length); } else sse_banded_align_just_lcs(seq1, seq2, l1, l2, bandLengthLeft, bandLengthTotal, lcs_length); id = (double) *lcs_length; // fprintf(stderr, "\nid before normalizations = %f", id); //fprintf(stderr, "\nlcs = %f, ali length = %d\n", id, ali_length); if (!similarity_mode && !normalize) switch(reference) { case ALILEN: id = *ali_length - id; break; case MAXLEN: id = l1 - id; break; case MINLEN: id = l2 - id; } // fprintf(stderr, "\n2>>> %f, %d\n", id, ali_length); if (normalize) switch(reference) { case ALILEN: id = id / (double) *ali_length; break; case MAXLEN: id = id / (double) l1; break; case MINLEN: id = id / (double) l2; } // fprintf(stderr, "\nid = %f\n", id); return(id); } // PUBLIC FUNCTIONS int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool similarity_mode) { int LCSmin; if (threshold > 0) { if (normalize) { if (reference == MINLEN) LCSmin = threshold*l2; else // ref = maxlen or alilen LCSmin = threshold*l1; } else if (similarity_mode) LCSmin = threshold; else if (reference == MINLEN) // not similarity_mode LCSmin = l2 - threshold; else // not similarity_mode and ref = maxlen or alilen LCSmin = l1 - threshold; } else LCSmin = 0; return(LCSmin); } double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length) { double id; int l1, l2; int lmax, lmin; int sizeToAllocateForBand, sizeToAllocateForSeqs; int maxBLL; int LCSmin; int shift; int16_t* address; int16_t* iseq1; int16_t* iseq2; address = NULL; l1 = strlen(seq1); l2 = strlen(seq2); if (l1 > l2) { lmax = l1; lmin = l2; } else { lmax = l2; lmin = l1; } // If the score is expressed as a normalized distance, get the corresponding identity if (!similarity_mode && normalize) threshold = 1.0 - threshold; // Calculate the minimum LCS length corresponding to the threshold LCSmin = calculateLCSmin(lmax, lmin, threshold, normalize, reference, similarity_mode); // Allocate space for matrix band if the alignment length must be computed if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed { sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin); address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift); if (address == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError getting a memory address aligned on 16 bytes boundary"); return 0; // TODO DOUBLE_MIN } } // Allocate space for the int16_t arrays representing the sequences maxBLL = calculateLeftBandLength(lmax, LCSmin); sizeToAllocateForSeqs = 2*maxBLL+lmax; iseq1 = (int16_t*) malloc(sizeToAllocateForSeqs*sizeof(int16_t)); iseq2 = (int16_t*) malloc(sizeToAllocateForSeqs*sizeof(int16_t)); if ((iseq1 == NULL) || (iseq2 == NULL)) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for integer arrays to use in LCS alignment"); return 0; // TODO DOUBLE_MIN } // Initialize the int arrays iniSeq(iseq1, (2*maxBLL)+lmax, 0); iniSeq(iseq2, (2*maxBLL)+lmax, 255); // Shift addresses to where the sequences have to be put iseq1 = iseq1+maxBLL; iseq2 = iseq2+maxBLL; // Put the DNA sequences in the int arrays. Longest sequence must be first argument of sse_align function if (l2 > l1) { putSeqInSeq(iseq1, seq2, l2, TRUE); putSeqInSeq(iseq2, seq1, l1, FALSE); // Compute alignment id = sse_banded_lcs_align(iseq1, iseq2, l2, l1, normalize, reference, similarity_mode, address, LCSmin, lcs_length, ali_length); } else { putSeqInSeq(iseq1, seq1, l1, TRUE); putSeqInSeq(iseq2, seq2, l2, FALSE); // Compute alignment id = sse_banded_lcs_align(iseq1, iseq2, l1, l2, normalize, reference, similarity_mode, address, LCSmin, lcs_length, ali_length); } // Free allocated elements if (address != NULL) free(address-shift); free(iseq1-maxBLL); free(iseq2-maxBLL); return(id); } double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length) { double id; int l1, l2; int lmax, lmin; int sizeToAllocateForBand, sizeToAllocateForSeqs; int maxBLL; int LCSmin; int shift; int16_t* address; int16_t* iseq1; int16_t* iseq2; address = NULL; l1 = seq1->length_decoded_value; l2 = seq2->length_decoded_value; if (l1 > l2) { lmax = l1; lmin = l2; } else { lmax = l2; lmin = l1; } // If the score is expressed as a normalized distance, get the corresponding identity if (!similarity_mode && normalize) threshold = 1.0 - threshold; // Calculate the minimum LCS length corresponding to the threshold LCSmin = calculateLCSmin(lmax, lmin, threshold, normalize, reference, similarity_mode); // Allocate space for matrix band if the alignment length must be computed if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed { sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin); address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift); if (address == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError getting a memory address aligned on 16 bytes boundary"); return 0; // TODO DOUBLE_MIN } } // Allocate space for the int16_t arrays representing the sequences maxBLL = calculateLeftBandLength(lmax, LCSmin); sizeToAllocateForSeqs = 2*maxBLL+lmax; iseq1 = (int16_t*) malloc(sizeToAllocateForSeqs*sizeof(int16_t)); iseq2 = (int16_t*) malloc(sizeToAllocateForSeqs*sizeof(int16_t)); if ((iseq1 == NULL) || (iseq2 == NULL)) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for integer arrays to use in LCS alignment"); return 0; // TODO DOUBLE_MIN } // Initialize the int arrays iniSeq(iseq1, (2*maxBLL)+lmax, 0); iniSeq(iseq2, (2*maxBLL)+lmax, 255); // Shift addresses to where the sequences have to be put iseq1 = iseq1+maxBLL; iseq2 = iseq2+maxBLL; // Put the DNA sequences in the int arrays. Longest sequence must be first argument of sse_align function if (l2 > l1) { putBlobInSeq(iseq1, seq2, l2, TRUE); putBlobInSeq(iseq2, seq1, l1, FALSE); // Compute alignment id = sse_banded_lcs_align(iseq1, iseq2, l2, l1, normalize, reference, similarity_mode, address, LCSmin, lcs_length, ali_length); } else { putBlobInSeq(iseq1, seq1, l1, TRUE); putBlobInSeq(iseq2, seq2, l2, FALSE); // Compute alignment id = sse_banded_lcs_align(iseq1, iseq2, l1, l2, normalize, reference, similarity_mode, address, LCSmin, lcs_length, ali_length); } // Free allocated elements if (address != NULL) free(address-shift); free(iseq1-maxBLL); free(iseq2-maxBLL); return(id); }