void checksuflinks(Suffixarray *s, Uint i, Uint j){ Uint k, childlcp, suflcp, *space = NULL; PairUint* child, childsuf; Container *children; // ignore singletons as initial input if (i == j){ return; } children = getChildintervals(space, s, i, j, 0); for (k = 0; k < bl_containerSize(children); k++){ child = (PairUint *) bl_containerGet(children, k); // exclude singletons if (child->a == child->b){ return; } // check suflink of child childlcp = getlcpval(s, child->a, child->b); childsuf = getSuflink(s, child->a, child->b); suflcp = getlcpval(s, childsuf.a, childsuf.b); if (childlcp != suflcp + 1){ DBG("suf[%u, %u, %u]=[%u, %u, %u]\n", child->a, child->b, childlcp, childsuf.a, childsuf.b, suflcp); } // recursively check all children of child checksuflinks(s, child->a, child->b); } bl_containerDestruct(children, NULL); free(children); }
void FragmentList::freeFragList(Container* frags) { for (uint32_t i = 0; i < bl_containerSize(frags); i++){ slmatch_t *sl = (slmatch_t *) bl_containerGet(frags, i); if (sl->chain != nullptr){ std::cerr << "still at least one chain not freed before end: " << i << "\n"; exit(-1); bl_slchainDestruct(sl->chain); free(sl->chain); } } bl_containerDestruct(frags, bl_slmatchDestruct); free(frags); }
void suffix_mng_free(suffix_mng_t *p) { if (p) { if (p->suffix_lists) { for (int i = 0; i < p->num_chroms; i++) { if (p->suffix_lists[i]) { linked_list_free(p->suffix_lists[i], (void *)seed_free); } } free(p->suffix_lists); } if (p->subject){ for (int i = 0; i < bl_containerSize(p->subject); i++) { free(*(char **) bl_containerGet(p->subject, i)); } bl_containerDestruct(p->subject, NULL); free(p->subject); } free(p); } }
void kmismatch(void *space, Suffixarray *s, fasta_t *reads, Uint k, Uint* counter, Uint rep_type, unsigned char silent, FILE *dev) { Uint i, curlen; char *buffer, *curseq; branch_t *V; Gmap map; Uint noofmatches=0; gread_t read; Container C; pthread_mutex_t *mtx=NULL; if (counter == NULL) { initProgressBarVT(); } else { mtx = &mutex2; } initGmap(&map, s->seq, 1); for (i=0; i < reads->noofseqs; i++) { noofmatches = 0; initRead(&read, reads->seqs[i]); setReads(&map, &read, 1); if (!silent) { if (mtx == NULL) { progressBarVT("reads matched.", reads->noofseqs, i, 25); } else { (*counter)++; } } curseq = reads->seqs[i]->sequence; curlen = reads->seqs[i]->length; V=kmis(space, s, curseq, curlen, k, &noofmatches); if(noofmatches) { bl_containerInit(&C, 100, sizeof(gmatch_t)); branch2match(s, &C, V, noofmatches); setMatches(&read, (gmatch_t*)C.contspace, bl_containerSize(&C), PLUSSTRAND); reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen); bl_containerDestruct(&C, NULL); FREEMEMORY(space, V); } initRead(&read, reads->seqs[i]); setReads(&map, &read, 1); buffer = charDNAcomplement(space, curseq, curlen); V=kmis(space, s, buffer, curlen, k, &noofmatches); if(noofmatches) { bl_containerInit(&C, 100, sizeof(gmatch_t)); branch2match(s, &C, V, noofmatches); setMatches(&read, (gmatch_t*)C.contspace, bl_containerSize(&C), MINUSSTRAND); reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen); bl_containerDestruct(&C, NULL); FREEMEMORY(space, V); } FREEMEMORY(space, buffer); } return; }
/*------------------------------ bl_slChainLin --------------------------------- * * @brief creates chains of non overlapping fragments from a list of * fragments with the highest score using linear gap costs (L1) * @parameter fragments - the list of fragments to be chained, * lambda - cost of aligning gap with character on sequence, * eps - cost of aligning gap with character on query * @author Christian Otto * */ void bl_slChainLin(slmatch_t *fragments, Uint size, double eps, double lambda, int maxgap) { Uint i, *trans, *sorted, *space = NULL; int pred, succ; PairSint t; point_t *point; slmatch_t *current, *first; slchain_t *chain, *cand, **prev, **predmatch, **succmatch; #ifndef BINTREE VebTree veb; #else BinTree bin; #endif Container *points; VQueue refs; /* no chaining if size is zero */ if (size == 0) { return; } /* sorting (if already sorted -> only one scan required) */ qsort(fragments, size, sizeof(slmatch_t), cmp_slmatch_qsort); /* preinitialize */ points = bl_slExtractPoints(fragments, size); /* sorting by query position (required for vebtree/bintree) */ trans = (Uint *) malloc(sizeof(Uint) * bl_containerSize(points)); sorted = quickSort(space, points->contspace, bl_containerSize(points), cmp_slmatch_trans_first_y, NULL); for (i = 0; i < bl_containerSize(points); i++) { trans[sorted[i]] = i; } /* get t.a and t.b */ t.a = ((point_t*) bl_containerGet(points, bl_containerSize(points) - 1))->x; t.b = ((point_t*) bl_containerGet(points, sorted[bl_containerSize(points) - 1]))->y; free(sorted); /* initializations for chaining itself */ prev = (slchain_t **) malloc(sizeof(slchain_t *) * size); memset(prev, 0, sizeof(slchain_t*) * size); #ifndef BINTREE bl_vebtreeInit(&veb, bl_containerSize(points), sizeof(slchain_t *)); #else bl_bintreeInit(&bin, bl_containerSize(points), sizeof(slchain_t *)); #endif bl_vqueueInit(&refs, 2 * size, sizeof(slchain_t *)); /* traverse all points (sorted by sequence start position) */ for (i = 0; i < bl_containerSize(points); i++) { point = (point_t *) bl_containerGet(points, i); /* point is the start point of fragment point->index (in fragments) */ if (point->start == 1) { /* get current match from fragments */ current = fragments + point->index; /* RMQ in vebtree/bintree by y-coordinate */ #ifndef BINTREE pred = bl_vebtreePred(&veb, trans[i]); predmatch = (slchain_t **) bl_vebtreeGetData(&veb, pred); #else pred = bl_bintreePred(&bin, trans[i]); predmatch = (slchain_t **) bl_bintreeGet(&bin, pred); #endif if (predmatch != NULL && !MAXGAP(current, *predmatch)) { /* only take prev if it increases chain score */ if ((*predmatch)->scr >= (double) GLIN(current, *predmatch)) { prev[point->index] = *predmatch; } /* * addition due to clusters of local chains * (differs from Abouelhoda2004) */ else if (current->scr >= (double) GLIN(current, *predmatch)) { first = *(slmatch_t **) bl_containerGet((*predmatch)->matches, 0); if (first->chain != NULL && ((slchain_t *)first->chain)->scr < (*predmatch)->scr + current->scr - GLIN(current, *predmatch)) { chain = (slchain_t *) malloc(sizeof(slchain_t)); bl_slchainInit(chain); bl_slchainCopy(chain, *predmatch); chain->j = FEND_Q(current) - FSTART_Q(*predmatch) + 1; chain->q = FEND_S(current) - FSTART_S(*predmatch) + 1; chain->scr += current->scr - (double) GLIN(current, *predmatch); bl_containerAdd(chain->matches, ¤t); bl_slchainpDestruct(&first->chain); first->chain = chain; } } } } /* point is the end point of fragment point->index */ else { current = fragments + point->index; chain = (slchain_t *) malloc(sizeof(slchain_t)); /* * chaining candidate found * (can be only better chain for prev or best chain for current as well) */ if (prev[point->index] != NULL) { cand = prev[point->index]; bl_slchainInit(chain); chain->i = FSTART_Q(cand); chain->j = FEND_Q(current) - FSTART_Q(cand) + 1; chain->p = FSTART_S(cand); chain->q = FEND_S(current) - FSTART_S(cand) + 1; chain->scr = current->scr + cand->scr - (double) GLIN(current, cand); bl_containerMerge(chain->matches, cand->matches); bl_containerAdd(chain->matches, ¤t); if (current->subject != cand->subject) { DBG("slchain.c: Attempt to chain fragments of different \ reference sequences.\nExit forced.\n\n", NULL); exit(-1); } chain->subject = current->subject; /* update best chain of cluster */ first = *(slmatch_t **) bl_containerGet(chain->matches, 0); if (first->chain != NULL) { if (((slchain_t *) first->chain)->scr <= chain->scr) { slchain_t *tmp = (slchain_t *) malloc(sizeof(slchain_t)); bl_slchainInit(tmp); bl_slchainCopy(tmp, chain); bl_slchainpDestruct(&first->chain); first->chain = (void *) tmp; } } } /* no chaining candidate found */ else {
void suffix_mng_create_cals(fastq_read_t *read, int min_area, int strand, sa_index3_t *sa_index, array_list_t *cal_list, suffix_mng_t *p) { if (!p) return; if (!p->suffix_lists) return; if (p->num_seeds <= 0) return; int read_area, chrom; seed_t *seed; seed_cal_t *cal; linked_list_t *seed_list; claspinfo_t info; bl_claspinfoInit(&info); // initialization info.fragments = (Container *) malloc(sizeof(Container)); bl_containerInit(info.fragments, p->num_seeds, sizeof(slmatch_t)); info.subject = p->subject; slmatch_t frag; linked_list_t *suffix_list; for (unsigned int i = 0; i < p->num_chroms; i++) { suffix_list = p->suffix_lists[i]; if (suffix_list) { for (linked_list_item_t *item = suffix_list->first; item != NULL; item = item->next) { seed = item->item; bl_slmatchInit(&frag, 0); frag.i = seed->read_start; frag.j = seed->read_end - seed->read_start + 1; frag.p = seed->genome_start; frag.q = seed->genome_end - seed->genome_start + 1; frag.scr = seed->genome_end - seed->genome_start + 1; frag.subject = seed->chromosome_id; bl_containerAdd(info.fragments, &frag); } } } // sort fragments qsort(info.fragments->contspace, bl_containerSize(info.fragments), sizeof(slmatch_t), cmp_slmatch_qsort); int begin = 0; for (int i = 1; i <= bl_containerSize(info.fragments); i++){ // end of fragments list or different database sequence // --> process fragment[begin]...fragment[i-1], write output // and free chains (less memory consumption with large input files) if (i == bl_containerSize(info.fragments) || ((slmatch_t *) bl_containerGet(info.fragments, begin))->subject != ((slmatch_t *) bl_containerGet(info.fragments, i))->subject){ if (info.chainmode == SOP){ // only use chaining without clustering if no ids are specified bl_slClusterSop((slmatch_t *) info.fragments->contspace + begin, i - begin, info.epsilon, info.lambda, info.maxgap); } else { bl_slClusterLin((slmatch_t *) info.fragments->contspace + begin, i - begin, info.epsilon, info.lambda, info.maxgap); } for (int j = begin; j < i; j++) { slmatch_t *match = (slmatch_t *) bl_containerGet(info.fragments, j); if (match->chain) { slchain_t *chain = (slchain_t *) match->chain; if (chain->scr >= info.minscore && bl_containerSize(chain->matches) >= info.minfrag) { chrom = atoi(*(char **) bl_containerGet(info.subject, chain->subject)); read_area = 0; seed_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED); for (int k = 0; k < bl_containerSize(chain->matches); k++){ slmatch_t *frag = *(slmatch_t **) bl_containerGet(chain->matches, k); seed = seed_new(frag->i, frag->i + frag->j - 1, frag->p, frag->p + frag->q - 1); seed->chromosome_id = chrom; seed->strand = strand; read_area += frag->j; cigar_append_op(frag->j, '=', &seed->cigar); linked_list_insert_last(seed, seed_list); } // extend seeds cal = seed_cal_new(chrom, strand, chain->p, chain->p + chain->q - 1, seed_list); cal->read = read; extend_seeds(cal, sa_index); seed_cal_update_info(cal); if (cal->read_area >= min_area) { array_list_insert(cal, cal_list); } else { seed_cal_free(cal); } } bl_slchainDestruct(chain); free(chain); match->chain = NULL; } } // END OF for (j = begin; j < i; j++) begin = i; } // END OF if (i == bl_containerSize(info.fragments) || } // END OF for (i = 1; i <= bl_containerSize(info.fragments); i++) // destruct everything info.subject = NULL; bl_claspinfoDestruct(&info); // finally, clear suffix manager suffix_mng_clear(p); }
bool FragmentList::computeBestChain_(Container* frags, double& maxScore, uint32_t& bestPos) { double epsilon = 0.0; double lambda = 1.0; unsigned char chainMode = LIN; // Did we find a chain with a score higher than the original // maxScore parameter? bool updatedMaxScore = false; /* sort fragments */ qsort(frags->contspace, bl_containerSize(frags), sizeof(slmatch_t), cmp_slmatch_qsort); uint32_t begin = 0; for (uint32_t i = 1; i <= bl_containerSize(frags); i++){ /* * end of fragments list or different database sequence * --> process fragment[begin]...fragment[i-1], write output * and free chains (less memory consumption with large input files) */ if (i == bl_containerSize(frags) || ((slmatch_t *) bl_containerGet(frags, begin))->subject != ((slmatch_t *) bl_containerGet(frags, i))->subject){ //fprintf(info.dev, "%d\t%d\n", begin, i-begin); if (chainMode == SOP){ /* only use chaining without clustering if no ids are specified */ bl_slChainSop((slmatch_t *) frags->contspace + begin, i - begin, epsilon, lambda, 10); /* bl_slClusterSop((slmatch_t *) info.fragments->contspace + begin, i - begin, info.epsilon, info.lambda, info.maxgap); */ } else { bl_slChainLin((slmatch_t *) frags->contspace + begin, i - begin, epsilon, lambda, 10); /* bl_slClusterLin((slmatch_t *) info.fragments->contspace + begin, i - begin, info.epsilon, info.lambda, info.maxgap); */ } for (uint32_t j = begin; j < i; j++){ slmatch_t *match = (slmatch_t *) bl_containerGet(frags, j); if (match->chain) { slchain_t* chain = (slchain_t*) match->chain; if (chain->scr >= maxScore) { maxScore = chain->scr; bestPos = chain->p; updatedMaxScore = true; } /* // output matches (if desired) if (info.outputm){ fprintf(info.dev, "M\t"); if (!info.outputorig){ if (info.idcol != NULL){ fprintf(info.dev, "%s\t", *(char **) bl_containerGet(info.subject, match->subject)); } fprintf(info.dev, "%d\t%d\t%d\t%d\t%.3f\n", match->i, match->i + match->j - 1, match->p, match->p + match->q - 1, match->scr); } // output in original format as input else { fprintf(info.dev, "%s\n", *(char **) bl_containerGet(info.lines, j)); } } if (match->chain){ slchain_t *chain = (slchain_t *) match->chain; if (info.outputc && chain->scr >= info.minscore && bl_containerSize(chain->matches) >= info.minfrag){ fprintf(info.dev, "C\t"); if (info.idcol != NULL){ fprintf(info.dev, "%s\t", *(char **) bl_containerGet(info.subject, chain->subject)); } fprintf(info.dev, "%d\t%d\t%d\t%d\t%.3f\n", chain->i, chain->i + chain->j - 1, chain->p, chain->p + chain->q - 1, chain->scr); } // output chains and fragments (if requested) if (info.outputf && chain->scr >= info.minscore && bl_containerSize(chain->matches) >= info.minfrag){ for (k = 0; k < bl_containerSize(chain->matches); k++){ slmatch_t *frag = *(slmatch_t **) bl_containerGet(chain->matches, k); fprintf(info.dev, "F\t"); if (!info.outputorig){ if (info.idcol != NULL){ fprintf(info.dev, "%s\t", *(char **) bl_containerGet(info.subject, frag->subject)); } fprintf(info.dev, "%d\t%d\t%d\t%d\t%.3f\n", frag->i, frag->i + frag->j - 1, frag->p, frag->p + frag->q - 1, frag->scr); } // output in original format as input else { fprintf(info.dev, "%s\n", *(char **) bl_containerGet(info.lines, frag->idx)); } } } */ bl_slchainDestruct(chain); free(chain); match->chain = nullptr; } /* END OF if (frag->chain) */ } /* END OF for (j = begin; j < i; j++) */ begin = i; } /* END OF if (i == bl_containerSize(info.fragments) || ((slmatch_t *) bl_containerGet(info.fragments, begin))->subject != ((slmatch_t *) bl_containerGet(info.fragments, i))->subject) */ } /* END OF for (i = 1; i <= bl_containerSize(info.fragments); i++) */ return updatedMaxScore; }