void* calcConProbs(void* arg) { Params *params = (Params*)arg; ModelType *model = (ModelType*)(params->model); ReadReader<ReadType> *reader = (ReadReader<ReadType>*)(params->reader); HitContainer<HitType> *hitv = (HitContainer<HitType>*)(params->hitv); double *ncpv = (double*)(params->ncpv); ReadType read; READ_INT_TYPE N = hitv->getN(); HIT_INT_TYPE fr, to; assert(model->getNeedCalcConPrb()); reader->reset(); for (READ_INT_TYPE i = 0; i < N; i++) { general_assert(reader->next(read), "Can not load a read!"); fr = hitv->getSAt(i); to = hitv->getSAt(i + 1); ncpv[i] = model->getNoiseConPrb(read); for (HIT_INT_TYPE j = fr; j < to; j++) { HitType &hit = hitv->getHitAt(j); hit.setConPrb(model->getConPrb(read, hit)); } } return NULL; }
void* GET_SS_STEP(void* arg) { Params *params = (Params*)arg; ModelType *model = (ModelType*)(params->model); ReadReader<ReadType> *reader = (ReadReader<ReadType>*)(params->reader); HitContainer<HitType> *hitv = (HitContainer<HitType>*)(params->hitv); double *ncpv = (double*)(params->ncpv); ModelType *mhp = (ModelType*)(params->mhp); assert(!model->getNeedCalcConPrb()); ReadType read; READ_INT_TYPE N = hitv->getN(); double sum; vector<double> fracs; //to remove this, do calculation twice HIT_INT_TYPE fr, to, id; reader->reset(); mhp->init(); for (READ_INT_TYPE i = 0; i < N; i++) { general_assert(reader->next(read), "Can not load a read!"); fr = hitv->getSAt(i); to = hitv->getSAt(i + 1); fracs.resize(to - fr + 1); sum = 0.0; fracs[0] = probv[0] * ncpv[i]; if (fracs[0] < EPSILON) fracs[0] = 0.0; sum += fracs[0]; for (HIT_INT_TYPE j = fr; j < to; j++) { HitType &hit = hitv->getHitAt(j); id = j - fr + 1; fracs[id] = probv[hit.getSid()] * hit.getConPrb(); if (fracs[id] < EPSILON) fracs[id] = 0.0; sum += fracs[id]; } if (sum >= EPSILON) { fracs[0] /= sum; //mhp->updateNoise(read, fracs[0]); for (HIT_INT_TYPE j = fr; j < to; j++) { HitType &hit = hitv->getHitAt(j); id = j - fr + 1; fracs[id] /= sum; mhp->update(read, hit, fracs[id]); } } } return NULL; }
void* E_STEP(void* arg) { Params *params = (Params*)arg; ModelType *model = (ModelType*)(params->model); ReadReader<ReadType> *reader = (ReadReader<ReadType>*)(params->reader); HitContainer<HitType> *hitv = (HitContainer<HitType>*)(params->hitv); double *ncpv = (double*)(params->ncpv); ModelType *mhp = (ModelType*)(params->mhp); double *countv = (double*)(params->countv); bool needCalcConPrb = model->getNeedCalcConPrb(); ReadType read; READ_INT_TYPE N = hitv->getN(); double sum; vector<double> fracs; //to remove this, do calculation twice HIT_INT_TYPE fr, to, id; if (needCalcConPrb || updateModel) { reader->reset(); } if (updateModel) { mhp->init(); } memset(countv, 0, sizeof(double) * (M + 1)); for (READ_INT_TYPE i = 0; i < N; i++) { if (needCalcConPrb || updateModel) { general_assert(reader->next(read), "Can not load a read!"); } fr = hitv->getSAt(i); to = hitv->getSAt(i + 1); fracs.resize(to - fr + 1); sum = 0.0; if (needCalcConPrb) { ncpv[i] = model->getNoiseConPrb(read); } fracs[0] = probv[0] * ncpv[i]; if (fracs[0] < EPSILON) fracs[0] = 0.0; sum += fracs[0]; for (HIT_INT_TYPE j = fr; j < to; j++) { HitType &hit = hitv->getHitAt(j); if (needCalcConPrb) { hit.setConPrb(model->getConPrb(read, hit)); } id = j - fr + 1; fracs[id] = probv[hit.getSid()] * hit.getConPrb(); if (fracs[id] < EPSILON) fracs[id] = 0.0; sum += fracs[id]; } if (sum >= EPSILON) { fracs[0] /= sum; countv[0] += fracs[0]; if (updateModel) { mhp->updateNoise(read, fracs[0]); } if (calcExpectedWeights) { ncpv[i] = fracs[0]; } for (HIT_INT_TYPE j = fr; j < to; j++) { HitType &hit = hitv->getHitAt(j); id = j - fr + 1; fracs[id] /= sum; countv[hit.getSid()] += fracs[id]; if (updateModel) { mhp->update(read, hit, fracs[id]); } if (calcExpectedWeights) { hit.setConPrb(fracs[id]); } } } else if (calcExpectedWeights) { ncpv[i] = 0.0; for (HIT_INT_TYPE j = fr; j < to; j++) { HitType &hit = hitv->getHitAt(j); hit.setConPrb(0.0); } } } return NULL; }
void parseIt(SamParser *parser) { // record_val & record_read are copies of val & read for record purpose int val, record_val; ReadType read, record_read; HitType hit; HitContainer<HitType> hits; nHits = 0; nUnique = nMulti = nIsoMulti = 0; memset(N, 0, sizeof(N)); READ_INT_TYPE cnt = 0; record_val = -2; //indicate no recorded read now while ((val = parser->parseNext(read, hit)) >= 0) { if (val >= 0 && val <= 2) { // flush out previous read's info if needed if (record_val >= 0) { record_read.write(n_os, cat[record_val]); ++N[record_val]; } // flush out previous read's hits if the read is alignable reads if (record_val == 1) { hits.updateRI(); nHits += hits.getNHits(); nMulti += hits.calcNumGeneMultiReads(gi); nIsoMulti += hits.calcNumIsoformMultiReads(); hits.write(hit_out); iter = counter.find(hits.getNHits()); if (iter != counter.end()) { iter->second++; } else { counter[hits.getNHits()] = 1; } } hits.clear(); record_val = val; record_read = read; // no pointer, thus safe } if (val == 1 || val == 5) { hits.push_back(hit); } ++cnt; if (verbose && (cnt % 1000000 == 0)) { cout<< "Parsed "<< cnt<< " entries"<< endl; } } if (record_val >= 0) { record_read.write(n_os, cat[record_val]); ++N[record_val]; } if (record_val == 1) { hits.updateRI(); nHits += hits.getNHits(); nMulti += hits.calcNumGeneMultiReads(gi); nIsoMulti += hits.calcNumIsoformMultiReads(); hits.write(hit_out); iter = counter.find(hits.getNHits()); if (iter != counter.end()) { iter->second++; } else { counter[hits.getNHits()] = 1; } } nUnique = N[1] - nMulti; }