Beispiel #1
0
void checksuflinks(Suffixarray *s, Uint i, Uint j){
  Uint k, childlcp, suflcp, *space = NULL;
  PairUint* child, childsuf;
  Container *children;
  // ignore singletons as initial input
  if (i == j){
    return;
  }
  children = getChildintervals(space, s, i, j, 0);
  for (k = 0; k < bl_containerSize(children); k++){
    child = (PairUint *) bl_containerGet(children, k);
    // exclude singletons
    if (child->a == child->b){
      return;
    }
    // check suflink of child
    childlcp = getlcpval(s, child->a, child->b);
    childsuf = getSuflink(s, child->a, child->b);
    suflcp = getlcpval(s, childsuf.a, childsuf.b);
    if (childlcp != suflcp + 1){
      DBG("suf[%u, %u, %u]=[%u, %u, %u]\n", child->a, child->b, childlcp,
	  childsuf.a, childsuf.b, suflcp);
    }
    // recursively check all children of child
    checksuflinks(s, child->a, child->b);
  }
  bl_containerDestruct(children, NULL);
  free(children);
}
Beispiel #2
0
void FragmentList::freeFragList(Container* frags) {
    for (uint32_t i = 0; i < bl_containerSize(frags); i++){
        slmatch_t *sl = (slmatch_t *) bl_containerGet(frags, i);
        if (sl->chain != nullptr){
            std::cerr << "still at least one chain not freed before end: " << i << "\n";
            exit(-1);
            bl_slchainDestruct(sl->chain);
            free(sl->chain);
        }
    }
    bl_containerDestruct(frags, bl_slmatchDestruct);
    free(frags);
}
Beispiel #3
0
void suffix_mng_free(suffix_mng_t *p) {
  if (p) {
    if (p->suffix_lists) {
      for (int i = 0; i < p->num_chroms; i++) {
	if (p->suffix_lists[i]) {
	  linked_list_free(p->suffix_lists[i], (void *)seed_free);
	}
      }
      free(p->suffix_lists);
    }
    if (p->subject){
      for (int i = 0; i < bl_containerSize(p->subject); i++) {
	free(*(char **) bl_containerGet(p->subject, i));
      }
      bl_containerDestruct(p->subject, NULL);
      free(p->subject);
    }
    free(p);
  }
}
Beispiel #4
0
void
kmismatch(void *space,
    Suffixarray *s,
    fasta_t *reads,
    Uint k,
    Uint* counter,
    Uint rep_type,
    unsigned char silent,
    FILE *dev)
{
  Uint i, curlen;
  char *buffer, *curseq;
  branch_t *V; 
  Gmap map;
  Uint noofmatches=0;
  gread_t read;
  Container C;
  pthread_mutex_t *mtx=NULL;
  
  if (counter == NULL) {
    initProgressBarVT();
  } else { 
    mtx = &mutex2;
  }

  initGmap(&map, s->seq, 1);
  
  for (i=0; i < reads->noofseqs; i++) {

    noofmatches = 0;
    initRead(&read, reads->seqs[i]);
    setReads(&map, &read, 1);
    
    if (!silent) {
      if (mtx == NULL) {
        progressBarVT("reads matched.", reads->noofseqs, i, 25);
      } else {
        (*counter)++;
      }
    }

    curseq = reads->seqs[i]->sequence;
    curlen = reads->seqs[i]->length;

    V=kmis(space, s, curseq, curlen, k, &noofmatches);

    if(noofmatches) {
      bl_containerInit(&C, 100, sizeof(gmatch_t));
      branch2match(s, &C, V, noofmatches);
      setMatches(&read, (gmatch_t*)C.contspace, 
		 bl_containerSize(&C), PLUSSTRAND);
      
      reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen);
      bl_containerDestruct(&C, NULL);
      FREEMEMORY(space, V);
    }

    initRead(&read, reads->seqs[i]);
    setReads(&map, &read, 1);
    
    buffer = charDNAcomplement(space, curseq, curlen);
    V=kmis(space, s, buffer, curlen, k, &noofmatches);

    if(noofmatches) {
      bl_containerInit(&C, 100, sizeof(gmatch_t));
      branch2match(s, &C, V, noofmatches);
      setMatches(&read, (gmatch_t*)C.contspace, 
		 bl_containerSize(&C), MINUSSTRAND);
      reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen);
      bl_containerDestruct(&C, NULL);
      FREEMEMORY(space, V);
    }
    FREEMEMORY(space, buffer);
  }

  return;
}
Beispiel #5
0
/*------------------------------ bl_slChainLin ---------------------------------
 *
 * @brief      creates chains of non overlapping fragments from a list of
 *	       fragments with the highest score using linear gap costs (L1)
 * @parameter  fragments - the list of fragments to be chained,
 *	       lambda - cost of aligning gap with character on sequence,
 *	       eps - cost of aligning gap with character on query
 * @author     Christian Otto
 *
 */
void bl_slChainLin(slmatch_t *fragments, Uint size, double eps, double lambda, int maxgap) {
    Uint i, *trans, *sorted, *space = NULL;
    int pred, succ;
    PairSint t;
    point_t *point;
    slmatch_t *current, *first;
    slchain_t *chain, *cand, **prev, **predmatch, **succmatch;
#ifndef BINTREE
    VebTree veb;
#else
    BinTree bin;
#endif
    Container *points;
    VQueue refs;
    /* no chaining if size is zero */
    if (size == 0) {
        return;
    }
    /* sorting (if already sorted -> only one scan required) */
    qsort(fragments, size, sizeof(slmatch_t), cmp_slmatch_qsort);
    /* preinitialize */
    points = bl_slExtractPoints(fragments, size);
    /* sorting by query position (required for vebtree/bintree) */
    trans = (Uint *) malloc(sizeof(Uint) * bl_containerSize(points));
    sorted = quickSort(space, points->contspace, bl_containerSize(points),
                       cmp_slmatch_trans_first_y, NULL);
    for (i = 0; i < bl_containerSize(points); i++) {
        trans[sorted[i]] = i;
    }
    /* get t.a and t.b */
    t.a = ((point_t*) bl_containerGet(points, bl_containerSize(points) - 1))->x;
    t.b = ((point_t*) bl_containerGet(points,
                                      sorted[bl_containerSize(points) - 1]))->y;
    free(sorted);
    /* initializations for chaining itself */
    prev = (slchain_t **) malloc(sizeof(slchain_t *) * size);
    memset(prev, 0, sizeof(slchain_t*) * size);
#ifndef BINTREE
    bl_vebtreeInit(&veb, bl_containerSize(points), sizeof(slchain_t *));
#else
    bl_bintreeInit(&bin, bl_containerSize(points), sizeof(slchain_t *));
#endif
    bl_vqueueInit(&refs, 2 * size, sizeof(slchain_t *));

    /* traverse all points (sorted by sequence start position) */
    for (i = 0; i < bl_containerSize(points); i++) {
        point = (point_t *) bl_containerGet(points, i);

        /* point is the start point of fragment point->index (in fragments) */
        if (point->start == 1) {
            /* get current match from fragments */
            current = fragments + point->index;

            /* RMQ in vebtree/bintree by y-coordinate */
#ifndef BINTREE
            pred = bl_vebtreePred(&veb, trans[i]);
            predmatch = (slchain_t **) bl_vebtreeGetData(&veb, pred);
#else
            pred = bl_bintreePred(&bin, trans[i]);
            predmatch = (slchain_t **) bl_bintreeGet(&bin, pred);
#endif

            if (predmatch != NULL && !MAXGAP(current, *predmatch)) {
                /* only take prev if it increases chain score */
                if ((*predmatch)->scr >= (double) GLIN(current, *predmatch)) {
                    prev[point->index] = *predmatch;
                }
                /*
                 * addition due to clusters of local chains
                 * (differs from Abouelhoda2004)
                 */
                else if (current->scr >= (double) GLIN(current, *predmatch)) {
                    first = *(slmatch_t **) bl_containerGet((*predmatch)->matches, 0);
                    if (first->chain != NULL &&
                            ((slchain_t *)first->chain)->scr < (*predmatch)->scr +
                            current->scr - GLIN(current, *predmatch)) {
                        chain = (slchain_t *) malloc(sizeof(slchain_t));
                        bl_slchainInit(chain);
                        bl_slchainCopy(chain, *predmatch);
                        chain->j = FEND_Q(current) - FSTART_Q(*predmatch) + 1;
                        chain->q = FEND_S(current) - FSTART_S(*predmatch) + 1;
                        chain->scr += current->scr - (double) GLIN(current, *predmatch);
                        bl_containerAdd(chain->matches, &current);
                        bl_slchainpDestruct(&first->chain);
                        first->chain = chain;
                    }
                }
            }
        }
        /* point is the end point of fragment point->index */
        else {
            current = fragments + point->index;
            chain = (slchain_t *) malloc(sizeof(slchain_t));
            /*
             * chaining candidate found
             * (can be only better chain for prev or best chain for current as well)
             */
            if (prev[point->index] != NULL) {
                cand = prev[point->index];
                bl_slchainInit(chain);
                chain->i = FSTART_Q(cand);
                chain->j = FEND_Q(current) - FSTART_Q(cand) + 1;
                chain->p = FSTART_S(cand);
                chain->q = FEND_S(current) - FSTART_S(cand) + 1;
                chain->scr = current->scr + cand->scr - (double) GLIN(current, cand);
                bl_containerMerge(chain->matches, cand->matches);
                bl_containerAdd(chain->matches, &current);
                if (current->subject != cand->subject) {
                    DBG("slchain.c: Attempt to chain fragments of different \
reference sequences.\nExit forced.\n\n", NULL);
                    exit(-1);
                }
                chain->subject = current->subject;

                /* update best chain of cluster */
                first = *(slmatch_t **) bl_containerGet(chain->matches, 0);
                if (first->chain != NULL) {
                    if (((slchain_t *) first->chain)->scr <= chain->scr) {
                        slchain_t *tmp = (slchain_t *) malloc(sizeof(slchain_t));
                        bl_slchainInit(tmp);
                        bl_slchainCopy(tmp, chain);
                        bl_slchainpDestruct(&first->chain);
                        first->chain = (void *) tmp;
                    }
                }
            }
            /* no chaining candidate found */
            else {
Beispiel #6
0
void suffix_mng_create_cals(fastq_read_t *read, int min_area, int strand, 
			    sa_index3_t *sa_index, array_list_t *cal_list,
			    suffix_mng_t *p) {

  if (!p) return;
  if (!p->suffix_lists) return;

  if (p->num_seeds <= 0) return;

  int read_area, chrom;
  seed_t *seed;
  seed_cal_t *cal;
  linked_list_t *seed_list;
  claspinfo_t info;
  bl_claspinfoInit(&info);

  // initialization
  info.fragments = (Container *) malloc(sizeof(Container));
  bl_containerInit(info.fragments, p->num_seeds, sizeof(slmatch_t));

  info.subject = p->subject;

  slmatch_t frag;
  linked_list_t *suffix_list;
  for (unsigned int i = 0; i < p->num_chroms; i++) {
    suffix_list = p->suffix_lists[i];
    if (suffix_list) {
      for (linked_list_item_t *item = suffix_list->first; 
	   item != NULL; 
	   item = item->next) {

	seed = item->item;

	bl_slmatchInit(&frag, 0);
	frag.i = seed->read_start;
	frag.j = seed->read_end - seed->read_start + 1;
	frag.p = seed->genome_start;
	frag.q = seed->genome_end - seed->genome_start + 1;
	frag.scr = seed->genome_end - seed->genome_start + 1;
	frag.subject = seed->chromosome_id;
	bl_containerAdd(info.fragments, &frag);
      }
    }
  }

  // sort fragments
  qsort(info.fragments->contspace, bl_containerSize(info.fragments),
	sizeof(slmatch_t), cmp_slmatch_qsort);
  int begin = 0;
  for (int i = 1; i <= bl_containerSize(info.fragments); i++){
    // end of fragments list or different database sequence 
    // --> process fragment[begin]...fragment[i-1], write output
    // and free chains (less memory consumption with large input files)
    if (i == bl_containerSize(info.fragments) ||
	((slmatch_t *) bl_containerGet(info.fragments, begin))->subject !=
	((slmatch_t *) bl_containerGet(info.fragments, i))->subject){
      if (info.chainmode == SOP){
	// only use chaining without clustering if no ids are specified
	bl_slClusterSop((slmatch_t *) info.fragments->contspace + begin, i - begin,
			info.epsilon, info.lambda, info.maxgap);
      }
      else {    
	bl_slClusterLin((slmatch_t *) info.fragments->contspace + begin, i - begin,
			info.epsilon, info.lambda, info.maxgap);
      }
      
      for (int j = begin; j < i; j++) {


	slmatch_t *match = (slmatch_t *) bl_containerGet(info.fragments, j);

	if (match->chain) {
	  slchain_t *chain = (slchain_t *) match->chain;

	  if (chain->scr >= info.minscore &&
	      bl_containerSize(chain->matches) >= info.minfrag) {

	    chrom = atoi(*(char **) bl_containerGet(info.subject, chain->subject));
	    
	    read_area = 0;
	    seed_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED);
	    
	    for (int k = 0; k < bl_containerSize(chain->matches); k++){
	      slmatch_t *frag = *(slmatch_t **) bl_containerGet(chain->matches, k);

	      seed = seed_new(frag->i, frag->i + frag->j - 1, frag->p, frag->p + frag->q - 1);
	      seed->chromosome_id = chrom;
	      seed->strand = strand;
	      read_area += frag->j;
	      cigar_append_op(frag->j, '=', &seed->cigar);
	      
	      linked_list_insert_last(seed, seed_list);
	    }

	    // extend seeds	    
	    cal = seed_cal_new(chrom, strand, chain->p, chain->p + chain->q - 1, seed_list);
	    cal->read = read;
	    extend_seeds(cal, sa_index);
	    seed_cal_update_info(cal);

	    if (cal->read_area >= min_area) {
	      array_list_insert(cal, cal_list);
	    } else {
	      seed_cal_free(cal);
	    }
	  }

	  bl_slchainDestruct(chain);
	  free(chain);
	  match->chain = NULL;
	}
      }  // END OF for (j = begin; j < i; j++)
      begin = i;
    } // END OF  if (i == bl_containerSize(info.fragments) ||
  } // END OF for (i = 1; i <= bl_containerSize(info.fragments); i++)

  // destruct everything
  info.subject = NULL;
  bl_claspinfoDestruct(&info);

  // finally, clear suffix manager
  suffix_mng_clear(p);
}
Beispiel #7
0
bool FragmentList::computeBestChain_(Container* frags, double& maxScore, uint32_t& bestPos) {
    double epsilon = 0.0;
    double lambda = 1.0;
    unsigned char chainMode = LIN;
    // Did we find a chain with a score higher than the original
    // maxScore parameter?
    bool updatedMaxScore = false;

    /* sort fragments */
    qsort(frags->contspace, bl_containerSize(frags),
            sizeof(slmatch_t), cmp_slmatch_qsort);
    uint32_t  begin = 0;
    for (uint32_t i = 1; i <= bl_containerSize(frags); i++){
        /*
         * end of fragments list or different database sequence
         * --> process fragment[begin]...fragment[i-1], write output
         *     and free chains (less memory consumption with large input files)
         */
        if (i == bl_containerSize(frags) ||
                ((slmatch_t *) bl_containerGet(frags, begin))->subject !=
                ((slmatch_t *) bl_containerGet(frags, i))->subject){
            //fprintf(info.dev, "%d\t%d\n", begin, i-begin);
            if (chainMode == SOP){
                /* only use chaining without clustering if no ids are specified */
                bl_slChainSop((slmatch_t *) frags->contspace + begin, i - begin,
                        epsilon, lambda, 10);
                /*
                   bl_slClusterSop((slmatch_t *) info.fragments->contspace + begin, i - begin,
                   info.epsilon, info.lambda, info.maxgap);
                   */
            }
            else {
                bl_slChainLin((slmatch_t *) frags->contspace + begin, i - begin,
                        epsilon, lambda, 10);
                /*
                   bl_slClusterLin((slmatch_t *) info.fragments->contspace + begin, i - begin,
                   info.epsilon, info.lambda, info.maxgap);
                   */
            }

            for (uint32_t j = begin; j < i; j++){
                slmatch_t *match = (slmatch_t *) bl_containerGet(frags, j);

                if (match->chain) {
                    slchain_t* chain = (slchain_t*) match->chain;
                    if (chain->scr >= maxScore) {
                        maxScore = chain->scr;
                        bestPos = chain->p;
                        updatedMaxScore = true;
                    }

                    /*
                    // output matches (if desired)
                    if (info.outputm){
                    fprintf(info.dev, "M\t");
                    if (!info.outputorig){
                    if (info.idcol != NULL){
                    fprintf(info.dev, "%s\t",
                     *(char **) bl_containerGet(info.subject, match->subject));
                     }
                     fprintf(info.dev, "%d\t%d\t%d\t%d\t%.3f\n", match->i,
                     match->i + match->j - 1, match->p,
                     match->p + match->q - 1, match->scr);
                     }
                    // output in original format as input
                    else {
                    fprintf(info.dev, "%s\n", *(char **) bl_containerGet(info.lines, j));
                    }
                    }
                    if (match->chain){
                    slchain_t *chain = (slchain_t *) match->chain;
                    if (info.outputc && chain->scr >= info.minscore &&
                    bl_containerSize(chain->matches) >= info.minfrag){
                    fprintf(info.dev, "C\t");
                    if (info.idcol != NULL){
                    fprintf(info.dev, "%s\t", *(char **) bl_containerGet(info.subject, chain->subject));
                    }
                    fprintf(info.dev, "%d\t%d\t%d\t%d\t%.3f\n", chain->i,
                    chain->i + chain->j - 1, chain->p,
                    chain->p + chain->q - 1, chain->scr);
                    }
                    // output chains and fragments (if requested)
                    if (info.outputf && chain->scr >= info.minscore &&
                    bl_containerSize(chain->matches) >= info.minfrag){
                    for (k = 0; k < bl_containerSize(chain->matches); k++){
                    slmatch_t *frag = *(slmatch_t **)
                    bl_containerGet(chain->matches, k);
                    fprintf(info.dev, "F\t");
                    if (!info.outputorig){
                    if (info.idcol != NULL){
                    fprintf(info.dev, "%s\t",
                     *(char **) bl_containerGet(info.subject, frag->subject));
                     }
                     fprintf(info.dev, "%d\t%d\t%d\t%d\t%.3f\n", frag->i,
                     frag->i + frag->j - 1, frag->p, frag->p + frag->q - 1,
                     frag->scr);
                     }
                    // output in original format as input
                    else {
                    fprintf(info.dev, "%s\n",
                     *(char **) bl_containerGet(info.lines, frag->idx));
                     }
                     }
                     }
                     */
                    bl_slchainDestruct(chain);
                    free(chain);
                    match->chain = nullptr;
                } /* END OF if (frag->chain) */
                }  /* END OF for (j = begin; j < i; j++) */
                begin = i;
            } /* END OF  if (i == bl_containerSize(info.fragments) ||
                 ((slmatch_t *) bl_containerGet(info.fragments, begin))->subject !=
                 ((slmatch_t *) bl_containerGet(info.fragments, i))->subject) */
        } /* END OF for (i = 1; i <= bl_containerSize(info.fragments); i++) */

        return updatedMaxScore;

    }