void HashErrorModel::addPrediction(Model model,int blendedWeight) { BaseData data; if(ourUseFast) { return; } for(HashMatch::iterator it = mismatchTable.begin(); it != mismatchTable.end(); ++it) { Covariates cov; data.parseKey(it->first); cov.setCovariates(data); int j = 1; double qemp = model[0]; //slope for(std::vector<uint16_t>::const_iterator itv = cov.covariates.begin(); itv != cov.covariates.end(); ++itv) { qemp += model[j]*(double)(*itv); j++; } //phred-score transformation //conservative (otherwise +0.5) //blended Model? int phred = 0; if(blendedWeight==9999) { uint32_t m = it->second.m; uint32_t mm = it->second.mm; qemp = (mm-blendedWeight)/(m+mm-blendedWeight); } else { if(blendedWeight>0) { uint32_t m = it->second.m; uint32_t mm = it->second.mm; //qemp = (mm+qemp*blendedWeight)/(m+mm+blendedWeight); qemp = (mm+qemp*mm)/(2.0*(m+mm)); } } phred = trunc((-10.0*log10(1.0-(1.0/(1.0+exp(-qemp)))))+0.5); it->second.qempLogReg = phred; } };
void HashErrorModel::setDataforPrediction(Matrix & X, Vector & succ, Vector & total,bool binarizeFlag) { int i = 0; BaseData data; if(ourUseFast) { return; } for(HashMatch::const_iterator it = mismatchTable.begin(); it != mismatchTable.end(); ++it) { data.parseKey(it->first); Covariates cov; cov.setCovariates(data); if(i == 0) { uint32_t rows = mismatchTable.size(); succ.Dimension(rows); total.Dimension(rows); X.Dimension(rows, cov.covariates.size() + 1); X.Zero(); } // The first column of the design matrix is constant one, for the slope X[i][0] = 1.0; int j = 0; //binarize a couple of co-variates for(std::vector<uint16_t>::const_iterator itv = cov.covariates.begin(); itv != cov.covariates.end(); ++itv) { if(binarizeFlag) { //hardcoded pos is 2 if(j==1){ uint16_t pos = (uint16_t)*itv; // hardcoded pos += 7; X[i][pos] = 1; } else { if(j>1) X[i][j] = (uint16_t)*itv; else X[i][j+1] = (uint16_t)*itv; } j++; } else { X[i][j+1] = (uint16_t)*itv; j++; } } total[i] = it->second.mm + it->second.m; succ[i] = it->second.m; i++; } }
int HashErrorModel::writeTableQemp(std::string& filename, const std::vector<std::string>& id2rg, bool logReg) { FILE *pFile; pFile = fopen(filename.c_str(), "w"); if(!pFile) return 0; int maxId = id2rg.size() - 1; BaseData data; if(ourUseFast) { for(unsigned int i = 0; i < mismatchTableFast.size(); i++) { SMatchesFast& matchInfo = mismatchTableFast[i]; if((matchInfo.m == 0) && (matchInfo.mm == 0)) { // No data, so continue to the next entry. continue; } data.parseFastKey(i); if(data.rgid <= maxId) { int16_t cycle = data.cycle + 1; if(data.read) { // 2nd. cycle = -cycle; } if(matchInfo.qempSimple == 255) { matchInfo.qempSimple = getQempSimple(matchInfo.m, matchInfo.mm); } fprintf(pFile,"%s,%d,%d,%c%c,%d,%d,%d\n", id2rg[data.rgid].c_str(), data.qual, cycle, data.preBase, data.curBase, matchInfo.m + matchInfo.mm, matchInfo.mm, matchInfo.qempSimple); } } fclose(pFile); return(1); } for(HashMatch::iterator it = mismatchTable.begin(); it != mismatchTable.end(); ++it) { data.parseKey(it->first); if(data.rgid <= maxId) { int16_t cycle = data.cycle + 1; if(data.read) { // 2nd. cycle = -cycle; } if(it->second.qempSimple == 255) { it->second.qempSimple = getQempSimple(it->second.m, it->second.mm); } uint8_t qemp = it->second.qempSimple; if(logReg) { qemp = it->second.qempLogReg; } fprintf(pFile,"%s,%d,%d,%c%c,%d,%d,%d\n", id2rg[data.rgid].c_str(), data.qual, cycle, data.preBase, data.curBase, it->second.m + it->second.mm, it->second.mm, qemp); } } fclose(pFile); return 1; }