suffix_mng_t *suffix_mng_new(sa_genome3_t *genome) { suffix_mng_t *p = (suffix_mng_t *) calloc(1, sizeof(suffix_mng_t)); int num_chroms = genome->num_chroms; char *name; Container *subject = (Container *) malloc(sizeof(Container)); bl_containerInit(subject, num_chroms, sizeof(char *)); linked_list_t **suffix_lists = (linked_list_t **) malloc (sizeof(linked_list_t *) * num_chroms); for (int i = 0; i < num_chroms; i++) { suffix_lists[i] = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED); name = calloc(64, sizeof(char)); sprintf(name, "%i", i); bl_containerAdd(subject, &name); } p->num_seeds = 0; p->num_chroms = num_chroms; p->subject = subject; p->suffix_lists = suffix_lists; return p; }
/*----------------------------- bl_slExtractPoints ----------------------------- * * @brief extracts all start and end points from the matches, * returns a sorted list according to the position in the sequence, * runs in O(n log n) (consists of one sort and two linear scans), * @parameter array of slmatch_t, assumed to be presorted by * start position on sequence * @author Christian Otto * */ Container* bl_slExtractPoints(slmatch_t *fragments, Uint size) { int i, j, *space = NULL; Uint *sorted; Container *points; point_t point; slmatch_t *amatch, *bmatch; /* sort the indexes of the container src by sequence position of end points */ sorted = quickSort(space, fragments, size, cmp_slmatch_end_quick, NULL); /* initialize data structures */ points = (Container *) malloc(sizeof(Container)); bl_containerInit(points, 1000, sizeof(point_t)); /* traverse all start points according to the order */ for (i = 0, j = 0; i < size; i++) { amatch = fragments + i; bmatch = fragments + sorted[j]; /* if end point is before sequence pos of next start point */ while (FSTART_S(amatch) > FEND_S(bmatch)) { point.x = FEND_S(bmatch); point.y = FEND_Q(bmatch); point.index = sorted[j]; point.start = 0; bl_containerAdd(points, &point); bmatch = fragments + sorted[++j]; } point.x = FSTART_S(amatch); point.y = FSTART_Q(amatch); point.index = i; point.start = 1; bl_containerAdd(points, &point); } /* assumes that end point is always later than start point */ while (j < size) { bmatch = fragments + sorted[j]; point.x = FEND_S(bmatch); point.y = FEND_Q(bmatch); point.index = sorted[j]; point.start = 0; bl_containerAdd(points, &point); j++; } free(sorted); return points; }
void kmismatch(void *space, Suffixarray *s, fasta_t *reads, Uint k, Uint* counter, Uint rep_type, unsigned char silent, FILE *dev) { Uint i, curlen; char *buffer, *curseq; branch_t *V; Gmap map; Uint noofmatches=0; gread_t read; Container C; pthread_mutex_t *mtx=NULL; if (counter == NULL) { initProgressBarVT(); } else { mtx = &mutex2; } initGmap(&map, s->seq, 1); for (i=0; i < reads->noofseqs; i++) { noofmatches = 0; initRead(&read, reads->seqs[i]); setReads(&map, &read, 1); if (!silent) { if (mtx == NULL) { progressBarVT("reads matched.", reads->noofseqs, i, 25); } else { (*counter)++; } } curseq = reads->seqs[i]->sequence; curlen = reads->seqs[i]->length; V=kmis(space, s, curseq, curlen, k, &noofmatches); if(noofmatches) { bl_containerInit(&C, 100, sizeof(gmatch_t)); branch2match(s, &C, V, noofmatches); setMatches(&read, (gmatch_t*)C.contspace, bl_containerSize(&C), PLUSSTRAND); reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen); bl_containerDestruct(&C, NULL); FREEMEMORY(space, V); } initRead(&read, reads->seqs[i]); setReads(&map, &read, 1); buffer = charDNAcomplement(space, curseq, curlen); V=kmis(space, s, buffer, curlen, k, &noofmatches); if(noofmatches) { bl_containerInit(&C, 100, sizeof(gmatch_t)); branch2match(s, &C, V, noofmatches); setMatches(&read, (gmatch_t*)C.contspace, bl_containerSize(&C), MINUSSTRAND); reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen); bl_containerDestruct(&C, NULL); FREEMEMORY(space, V); } FREEMEMORY(space, buffer); } return; }
void suffix_mng_create_cals(fastq_read_t *read, int min_area, int strand, sa_index3_t *sa_index, array_list_t *cal_list, suffix_mng_t *p) { if (!p) return; if (!p->suffix_lists) return; if (p->num_seeds <= 0) return; int read_area, chrom; seed_t *seed; seed_cal_t *cal; linked_list_t *seed_list; claspinfo_t info; bl_claspinfoInit(&info); // initialization info.fragments = (Container *) malloc(sizeof(Container)); bl_containerInit(info.fragments, p->num_seeds, sizeof(slmatch_t)); info.subject = p->subject; slmatch_t frag; linked_list_t *suffix_list; for (unsigned int i = 0; i < p->num_chroms; i++) { suffix_list = p->suffix_lists[i]; if (suffix_list) { for (linked_list_item_t *item = suffix_list->first; item != NULL; item = item->next) { seed = item->item; bl_slmatchInit(&frag, 0); frag.i = seed->read_start; frag.j = seed->read_end - seed->read_start + 1; frag.p = seed->genome_start; frag.q = seed->genome_end - seed->genome_start + 1; frag.scr = seed->genome_end - seed->genome_start + 1; frag.subject = seed->chromosome_id; bl_containerAdd(info.fragments, &frag); } } } // sort fragments qsort(info.fragments->contspace, bl_containerSize(info.fragments), sizeof(slmatch_t), cmp_slmatch_qsort); int begin = 0; for (int i = 1; i <= bl_containerSize(info.fragments); i++){ // end of fragments list or different database sequence // --> process fragment[begin]...fragment[i-1], write output // and free chains (less memory consumption with large input files) if (i == bl_containerSize(info.fragments) || ((slmatch_t *) bl_containerGet(info.fragments, begin))->subject != ((slmatch_t *) bl_containerGet(info.fragments, i))->subject){ if (info.chainmode == SOP){ // only use chaining without clustering if no ids are specified bl_slClusterSop((slmatch_t *) info.fragments->contspace + begin, i - begin, info.epsilon, info.lambda, info.maxgap); } else { bl_slClusterLin((slmatch_t *) info.fragments->contspace + begin, i - begin, info.epsilon, info.lambda, info.maxgap); } for (int j = begin; j < i; j++) { slmatch_t *match = (slmatch_t *) bl_containerGet(info.fragments, j); if (match->chain) { slchain_t *chain = (slchain_t *) match->chain; if (chain->scr >= info.minscore && bl_containerSize(chain->matches) >= info.minfrag) { chrom = atoi(*(char **) bl_containerGet(info.subject, chain->subject)); read_area = 0; seed_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED); for (int k = 0; k < bl_containerSize(chain->matches); k++){ slmatch_t *frag = *(slmatch_t **) bl_containerGet(chain->matches, k); seed = seed_new(frag->i, frag->i + frag->j - 1, frag->p, frag->p + frag->q - 1); seed->chromosome_id = chrom; seed->strand = strand; read_area += frag->j; cigar_append_op(frag->j, '=', &seed->cigar); linked_list_insert_last(seed, seed_list); } // extend seeds cal = seed_cal_new(chrom, strand, chain->p, chain->p + chain->q - 1, seed_list); cal->read = read; extend_seeds(cal, sa_index); seed_cal_update_info(cal); if (cal->read_area >= min_area) { array_list_insert(cal, cal_list); } else { seed_cal_free(cal); } } bl_slchainDestruct(chain); free(chain); match->chain = NULL; } } // END OF for (j = begin; j < i; j++) begin = i; } // END OF if (i == bl_containerSize(info.fragments) || } // END OF for (i = 1; i <= bl_containerSize(info.fragments); i++) // destruct everything info.subject = NULL; bl_claspinfoDestruct(&info); // finally, clear suffix manager suffix_mng_clear(p); }
FragmentList::FragmentList(){ fragments = (Container *) malloc(sizeof(Container)); bl_containerInit(fragments, 20, sizeof(slmatch_t)); fragmentsRC = (Container *) malloc(sizeof(Container)); bl_containerInit(fragmentsRC, 20, sizeof(slmatch_t)); }