Exemple #1
0
suffix_mng_t *suffix_mng_new(sa_genome3_t *genome) {
  suffix_mng_t *p = (suffix_mng_t *) calloc(1, sizeof(suffix_mng_t));

  int num_chroms = genome->num_chroms;

  char *name;
  Container *subject = (Container *) malloc(sizeof(Container));
  bl_containerInit(subject, num_chroms, sizeof(char *));

  linked_list_t **suffix_lists = (linked_list_t **) malloc (sizeof(linked_list_t *) * num_chroms);
  for (int i = 0; i < num_chroms; i++) {
    suffix_lists[i] = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED);

    name = calloc(64, sizeof(char));
    sprintf(name, "%i", i);
    bl_containerAdd(subject, &name);
  }

  p->num_seeds = 0;
  p->num_chroms = num_chroms;
  p->subject = subject;
  p->suffix_lists = suffix_lists;

  return p;
}
Exemple #2
0
/*----------------------------- bl_slExtractPoints -----------------------------
 *
 * @brief       extracts all start and end points from the matches,
 *	        returns a sorted list according to the position in the sequence,
 *              runs in O(n log n) (consists of one sort and two linear scans),
 * @parameter   array of slmatch_t, assumed to be presorted by
 *              start position on sequence
 * @author      Christian Otto
 *
 */
Container* bl_slExtractPoints(slmatch_t *fragments, Uint size) {
    int i, j, *space = NULL;
    Uint *sorted;
    Container *points;
    point_t point;
    slmatch_t *amatch, *bmatch;

    /* sort the indexes of the container src by sequence position of end points */
    sorted = quickSort(space, fragments, size, cmp_slmatch_end_quick, NULL);
    /* initialize data structures */
    points = (Container *) malloc(sizeof(Container));
    bl_containerInit(points, 1000, sizeof(point_t));

    /* traverse all start points according to the order */
    for (i = 0, j = 0; i < size; i++) {
        amatch = fragments + i;
        bmatch = fragments + sorted[j];
        /* if end point is before sequence pos of next start point */
        while (FSTART_S(amatch) > FEND_S(bmatch)) {
            point.x = FEND_S(bmatch);
            point.y = FEND_Q(bmatch);
            point.index = sorted[j];
            point.start = 0;
            bl_containerAdd(points, &point);
            bmatch = fragments + sorted[++j];
        }
        point.x = FSTART_S(amatch);
        point.y = FSTART_Q(amatch);
        point.index = i;
        point.start = 1;
        bl_containerAdd(points, &point);
    }
    /* assumes that end point is always later than start point */
    while (j < size) {
        bmatch = fragments + sorted[j];
        point.x = FEND_S(bmatch);
        point.y = FEND_Q(bmatch);
        point.index = sorted[j];
        point.start = 0;
        bl_containerAdd(points, &point);
        j++;
    }
    free(sorted);
    return points;
}
Exemple #3
0
void
kmismatch(void *space,
    Suffixarray *s,
    fasta_t *reads,
    Uint k,
    Uint* counter,
    Uint rep_type,
    unsigned char silent,
    FILE *dev)
{
  Uint i, curlen;
  char *buffer, *curseq;
  branch_t *V; 
  Gmap map;
  Uint noofmatches=0;
  gread_t read;
  Container C;
  pthread_mutex_t *mtx=NULL;
  
  if (counter == NULL) {
    initProgressBarVT();
  } else { 
    mtx = &mutex2;
  }

  initGmap(&map, s->seq, 1);
  
  for (i=0; i < reads->noofseqs; i++) {

    noofmatches = 0;
    initRead(&read, reads->seqs[i]);
    setReads(&map, &read, 1);
    
    if (!silent) {
      if (mtx == NULL) {
        progressBarVT("reads matched.", reads->noofseqs, i, 25);
      } else {
        (*counter)++;
      }
    }

    curseq = reads->seqs[i]->sequence;
    curlen = reads->seqs[i]->length;

    V=kmis(space, s, curseq, curlen, k, &noofmatches);

    if(noofmatches) {
      bl_containerInit(&C, 100, sizeof(gmatch_t));
      branch2match(s, &C, V, noofmatches);
      setMatches(&read, (gmatch_t*)C.contspace, 
		 bl_containerSize(&C), PLUSSTRAND);
      
      reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen);
      bl_containerDestruct(&C, NULL);
      FREEMEMORY(space, V);
    }

    initRead(&read, reads->seqs[i]);
    setReads(&map, &read, 1);
    
    buffer = charDNAcomplement(space, curseq, curlen);
    V=kmis(space, s, buffer, curlen, k, &noofmatches);

    if(noofmatches) {
      bl_containerInit(&C, 100, sizeof(gmatch_t));
      branch2match(s, &C, V, noofmatches);
      setMatches(&read, (gmatch_t*)C.contspace, 
		 bl_containerSize(&C), MINUSSTRAND);
      reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen);
      bl_containerDestruct(&C, NULL);
      FREEMEMORY(space, V);
    }
    FREEMEMORY(space, buffer);
  }

  return;
}
Exemple #4
0
void suffix_mng_create_cals(fastq_read_t *read, int min_area, int strand, 
			    sa_index3_t *sa_index, array_list_t *cal_list,
			    suffix_mng_t *p) {

  if (!p) return;
  if (!p->suffix_lists) return;

  if (p->num_seeds <= 0) return;

  int read_area, chrom;
  seed_t *seed;
  seed_cal_t *cal;
  linked_list_t *seed_list;
  claspinfo_t info;
  bl_claspinfoInit(&info);

  // initialization
  info.fragments = (Container *) malloc(sizeof(Container));
  bl_containerInit(info.fragments, p->num_seeds, sizeof(slmatch_t));

  info.subject = p->subject;

  slmatch_t frag;
  linked_list_t *suffix_list;
  for (unsigned int i = 0; i < p->num_chroms; i++) {
    suffix_list = p->suffix_lists[i];
    if (suffix_list) {
      for (linked_list_item_t *item = suffix_list->first; 
	   item != NULL; 
	   item = item->next) {

	seed = item->item;

	bl_slmatchInit(&frag, 0);
	frag.i = seed->read_start;
	frag.j = seed->read_end - seed->read_start + 1;
	frag.p = seed->genome_start;
	frag.q = seed->genome_end - seed->genome_start + 1;
	frag.scr = seed->genome_end - seed->genome_start + 1;
	frag.subject = seed->chromosome_id;
	bl_containerAdd(info.fragments, &frag);
      }
    }
  }

  // sort fragments
  qsort(info.fragments->contspace, bl_containerSize(info.fragments),
	sizeof(slmatch_t), cmp_slmatch_qsort);
  int begin = 0;
  for (int i = 1; i <= bl_containerSize(info.fragments); i++){
    // end of fragments list or different database sequence 
    // --> process fragment[begin]...fragment[i-1], write output
    // and free chains (less memory consumption with large input files)
    if (i == bl_containerSize(info.fragments) ||
	((slmatch_t *) bl_containerGet(info.fragments, begin))->subject !=
	((slmatch_t *) bl_containerGet(info.fragments, i))->subject){
      if (info.chainmode == SOP){
	// only use chaining without clustering if no ids are specified
	bl_slClusterSop((slmatch_t *) info.fragments->contspace + begin, i - begin,
			info.epsilon, info.lambda, info.maxgap);
      }
      else {    
	bl_slClusterLin((slmatch_t *) info.fragments->contspace + begin, i - begin,
			info.epsilon, info.lambda, info.maxgap);
      }
      
      for (int j = begin; j < i; j++) {


	slmatch_t *match = (slmatch_t *) bl_containerGet(info.fragments, j);

	if (match->chain) {
	  slchain_t *chain = (slchain_t *) match->chain;

	  if (chain->scr >= info.minscore &&
	      bl_containerSize(chain->matches) >= info.minfrag) {

	    chrom = atoi(*(char **) bl_containerGet(info.subject, chain->subject));
	    
	    read_area = 0;
	    seed_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED);
	    
	    for (int k = 0; k < bl_containerSize(chain->matches); k++){
	      slmatch_t *frag = *(slmatch_t **) bl_containerGet(chain->matches, k);

	      seed = seed_new(frag->i, frag->i + frag->j - 1, frag->p, frag->p + frag->q - 1);
	      seed->chromosome_id = chrom;
	      seed->strand = strand;
	      read_area += frag->j;
	      cigar_append_op(frag->j, '=', &seed->cigar);
	      
	      linked_list_insert_last(seed, seed_list);
	    }

	    // extend seeds	    
	    cal = seed_cal_new(chrom, strand, chain->p, chain->p + chain->q - 1, seed_list);
	    cal->read = read;
	    extend_seeds(cal, sa_index);
	    seed_cal_update_info(cal);

	    if (cal->read_area >= min_area) {
	      array_list_insert(cal, cal_list);
	    } else {
	      seed_cal_free(cal);
	    }
	  }

	  bl_slchainDestruct(chain);
	  free(chain);
	  match->chain = NULL;
	}
      }  // END OF for (j = begin; j < i; j++)
      begin = i;
    } // END OF  if (i == bl_containerSize(info.fragments) ||
  } // END OF for (i = 1; i <= bl_containerSize(info.fragments); i++)

  // destruct everything
  info.subject = NULL;
  bl_claspinfoDestruct(&info);

  // finally, clear suffix manager
  suffix_mng_clear(p);
}
Exemple #5
0
FragmentList::FragmentList(){
    fragments = (Container *) malloc(sizeof(Container));
    bl_containerInit(fragments, 20, sizeof(slmatch_t));
    fragmentsRC = (Container *) malloc(sizeof(Container));
    bl_containerInit(fragmentsRC, 20, sizeof(slmatch_t));
}