void calcExpectedEffectiveLengths(ModelType& model) { int lb, ub, span; double *pdf = NULL, *cdf = NULL, *clen = NULL; // clen[i] = sigma_{j=1}^{i}pdf[i]*(lb+i) model.getGLD().copyTo(pdf, cdf, lb, ub, span); clen = new double[span + 1]; clen[0] = 0.0; for (int i = 1; i <= span; i++) { clen[i] = clen[i - 1] + pdf[i] * (lb + i); } eel.clear(); eel.resize(M + 1, 0.0); for (int i = 1; i <= M; i++) { int totLen = refs.getRef(i).getTotLen(); int fullLen = refs.getRef(i).getFullLen(); int pos1 = max(min(totLen - fullLen + 1, ub) - lb, 0); int pos2 = max(min(totLen, ub) - lb, 0); if (pos2 == 0) { eel[i] = 0.0; continue; } eel[i] = fullLen * cdf[pos1] + ((cdf[pos2] - cdf[pos1]) * (totLen + 1) - (clen[pos2] - clen[pos1])); assert(eel[i] >= 0); if (eel[i] < MINEEL) { eel[i] = 0.0; } } delete[] pdf; delete[] cdf; delete[] clen; }
inline void PROBerReadModel::update(InMemAlignG* ag_in_mem, InMemAlign* aligns, AlignmentGroup& ag, double noise_frac) { SEQstring seq; QUALstring qual; CIGARstring cigar; const RefSeq* refseq = NULL; assert(ag.getSEQ(seq)); if (model_type & 1) assert(ag.getQUAL(qual)); // update noise prob npro->update(seq, noise_frac); // update alignment probs for (int i = 0; i < ag_in_mem->size; ++i) if (aligns[i].frac > 0.0) { refseq = refs->getRef(aligns[i].tid); assert(ag.getAlignment(i)->getCIGAR(cigar)); seqmodel->update(aligns[i].frac, '+', aligns[i].pos, refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL)); } if (model_type >= 2) { // paired-end reads assert(ag.getSEQ(seq, 2)); if (model_type & 1) assert(ag.getQUAL(qual, 2)); // update noise prob npro->update(seq, noise_frac); // update alignment probs for (int i = 0; i < ag_in_mem->size; ++i) if (aligns[i].frac > 0.0) { refseq = refs->getRef(aligns[i].tid); assert(ag.getAlignment(i)->getCIGAR(cigar, 2)); assert(aligns[i].fragment_length > 0); seqmodel->update(aligns[i].frac, '-', refseq->getLen() - aligns[i].pos - aligns[i].fragment_length, refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL)); } } }
inline void PROBerReadModel::setConProbs(InMemAlignG* ag_in_mem, InMemAlign* aligns, AlignmentGroup& ag) { int seqlen; SEQstring seq; // seq, qual and cigar must be in each function since we have multiple threads! QUALstring qual; CIGARstring cigar; const RefSeq* refseq = NULL; // Get read sequences and quality scores assert(ag.getSEQ(seq)); seqlen = read_length < 0 ? ag.getSeqLength() : read_length; if (model_type & 1) assert(ag.getQUAL(qual)); // set noise probability ag_in_mem->noise_conprb = mld1->getProb(seqlen) * npro->getProb(seq); // set alignment probabilities for (int i = 0; i < ag_in_mem->size; ++i) if (aligns[i].conprb != -1.0) { refseq = refs->getRef(aligns[i].tid); assert(ag.getAlignment(i)->getCIGAR(cigar)); aligns[i].conprb = (aligns[i].fragment_length > 0 ? mld1->getProb(seqlen, aligns[i].fragment_length) : mld1->getProb(seqlen)) * \ seqmodel->getProb('+', aligns[i].pos, refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL)); } if (model_type >= 2) { // paired-end reads assert(ag.getSEQ(seq, 2)); seqlen = read_length < 0 ? ag.getSeqLength(2) : read_length; if (model_type & 1) assert(ag.getQUAL(qual, 2)); ag_in_mem->noise_conprb *= mld2->getProb(seqlen) * npro->getProb(seq); for (int i = 0; i < ag_in_mem->size; ++i) if (aligns[i].conprb != -1.0) { refseq = refs->getRef(aligns[i].tid); assert(ag.getAlignment(i)->getCIGAR(cigar, 2)); assert(aligns[i].fragment_length > 0); aligns[i].conprb *= mld2->getProb(seqlen, aligns[i].fragment_length) * \ seqmodel->getProb('-', refseq->getLen() - aligns[i].pos - aligns[i].fragment_length, refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL)); } } }
inline void PROBerReadModel::simulate(READ_INT_TYPE rid, int tid, int pos, int fragment_length, std::ofstream* out1, std::ofstream* out2) { int m2pos; int mateL1, mateL2; std::string qual1, qual2, cigar1, cigar2, readseq1, readseq2; // Simulate reads if (tid == 0) { cigar1 = cigar2 = "*"; mateL1 = mld1->simulate(sampler); if (model_type & 1) qd->simulate(sampler, mateL1, qual1); npro->simulate(sampler, mateL1, readseq1); if (model_type >= 2) { mateL2 = mld2->simulate(sampler); if (model_type & 1) qd->simulate(sampler, mateL2, qual2); npro->simulate(sampler, mateL2, readseq2); } } else { const RefSeq* ref = refs->getRef(tid); mateL1 = mld1->simulate(sampler, fragment_length); if (model_type & 1) qd->simulate(sampler, mateL1, qual1); seqmodel->simulate(sampler, mateL1, '+', pos, ref, qual1, cigar1, readseq1); if (model_type >= 2) { mateL2 = mld2->simulate(sampler, fragment_length); if (model_type & 1) qd->simulate(sampler, mateL2, qual2); m2pos = ref->getLen() - pos - fragment_length; seqmodel->simulate(sampler, mateL2, '-', m2pos, ref, qual2, cigar2, readseq2); } } // Output reads (*out1)<< ((model_type & 1) ? '@' : '>') << rid<< '_'<< tid<< '_'<< pos<< '_'<< fragment_length<< '_'<< cigar1<< (model_type >= 2 ? "/1" : "") << std::endl; (*out1)<< readseq1<< std::endl; if (model_type & 1) (*out1)<< '+'<< std::endl<< qual1<< std::endl; if (model_type >= 2) { (*out2)<< ((model_type & 1) ? '@' : '>') << rid<< '_'<< tid<< '_'<< pos<< '_'<< fragment_length<< '_'<< cigar2<< "/2"<< std::endl; (*out2)<< readseq2<< std::endl; if (model_type & 1) (*out2)<< '+'<< std::endl<< qual2<< std::endl; } }
void parr_update_duplicates(Ob ** obs, size_t obs_N, PDUP_t dupT, Refs& new_dups) { REL_t pHUB = REL_PARENT; REL_t pEXA = REL_CHILD; cmp_function_t *cmp = nullptr; // determine \c cmp function (functional argument) switch (dupT) { case PDUP_NAME: cmp = voidp_ob_cmp_name; break; case PDUP_CONTENT: cmp = voidp_ob_cmp_std; break; default: PTODO("Cannot handle dupT:%d (yet)\n", dupT); return; break; } qsort_mt(obs, obs_N, sizeof(Ob*), cmp, 0,0); for (size_t i = 0; i < obs_N;) { // for all obs size_t j = i+1; // iterate j to beyond first non-duplicate for (; j < obs_N; j++) { // for all obs following obs[i] if (cmp(&obs[i], &obs[j]) != 0) { break; } } const size_t dupN = j-i; // multiplicity (2 or more for duplicates) if (dupN >= 2) { // obsj[i ... i+dupN-1] are duplicates Dup * pdup = gen::dup(dupT); new_dups.app(pdup); for (size_t k = 0; k < dupN; k++) { // for all duplicates obs[i+k]->net_disconnectM(pHUB); // disconnect from all previous net_connectS(pEXA, obs[i+k], // connect to new pHUB, pdup, true); } } else { // obs[i] has no duplicates obs[i]->net_disconnectM(pHUB); // disconnect from all previous } i = j; } }
void init(ReadReader<ReadType> **&readers, HitContainer<HitType> **&hitvs, double **&ncpvs, ModelType **&mhps) { READ_INT_TYPE nReads; HIT_INT_TYPE nHits; int rt; // read type READ_INT_TYPE nrLeft, curnr; // nrLeft : number of reads left, curnr: current number of reads HIT_INT_TYPE nhT; // nhT : hit threshold per thread char datF[STRLEN]; int s; char readFs[2][STRLEN]; ReadIndex *indices[2]; ifstream fin; readers = new ReadReader<ReadType>*[nThreads]; genReadFileNames(imdName, 1, read_type, s, readFs); for (int i = 0; i < s; i++) { indices[i] = new ReadIndex(readFs[i]); } for (int i = 0; i < nThreads; i++) { readers[i] = new ReadReader<ReadType>(s, readFs, refs.hasPolyA(), mparams.seedLen); // allow calculation of calc_lq() function readers[i]->setIndices(indices); } hitvs = new HitContainer<HitType>*[nThreads]; for (int i = 0; i < nThreads; i++) { hitvs[i] = new HitContainer<HitType>(); } sprintf(datF, "%s.dat", imdName); fin.open(datF); general_assert(fin.is_open(), "Cannot open " + cstrtos(datF) + "! It may not exist."); fin>>nReads>>nHits>>rt; general_assert(nReads == N1, "Number of alignable reads does not match!"); general_assert(rt == read_type, "Data file (.dat) does not have the right read type!"); //A just so so strategy for paralleling nhT = nHits / nThreads; nrLeft = N1; curnr = 0; ncpvs = new double*[nThreads]; for (int i = 0; i < nThreads; i++) { HIT_INT_TYPE ntLeft = nThreads - i - 1; // # of threads left general_assert(readers[i]->locate(curnr), "Read indices files do not match!"); while (nrLeft > ntLeft && (i == nThreads - 1 || hitvs[i]->getNHits() < nhT)) { general_assert(hitvs[i]->read(fin), "Cannot read alignments from .dat file!"); --nrLeft; if (verbose && nrLeft % 1000000 == 0) { cout<< "DAT "<< nrLeft << " reads left"<< endl; } } ncpvs[i] = new double[hitvs[i]->getN()]; memset(ncpvs[i], 0, sizeof(double) * hitvs[i]->getN()); curnr += hitvs[i]->getN(); if (verbose) { cout<<"Thread "<< i<< " : N = "<< hitvs[i]->getN()<< ", NHit = "<< hitvs[i]->getNHits()<< endl; } } fin.close(); mhps = new ModelType*[nThreads]; for (int i = 0; i < nThreads; i++) { mhps[i] = new ModelType(mparams, false); // just model helper } probv = new double[M + 1]; countvs = new double*[nThreads]; for (int i = 0; i < nThreads; i++) { countvs[i] = new double[M + 1]; } if (verbose) { printf("EM_init finished!\n"); } }
int main(int argc, char* argv[]) { ifstream fin; bool quiet = false; if (argc < 5) { printf("Usage : rsem-run-em refName read_type sampleName sampleToken [-p #Threads] [-b samInpType samInpF has_fn_list_? [fn_list]] [-q] [--gibbs-out] [--sampling]\n\n"); printf(" refName: reference name\n"); printf(" read_type: 0 single read without quality score; 1 single read with quality score; 2 paired-end read without quality score; 3 paired-end read with quality score.\n"); printf(" sampleName: sample's name, including the path\n"); printf(" sampleToken: sampleName excludes the path\n"); printf(" -p: number of threads which user wants to use. (default: 1)\n"); printf(" -b: produce bam format output file. (default: off)\n"); printf(" -q: set it quiet\n"); printf(" --gibbs-out: generate output file used by Gibbs sampler. (default: off)\n"); printf(" --sampling: sample each read from its posterior distribution when bam file is generated. (default: off)\n"); printf("// model parameters should be in imdName.mparams.\n"); exit(-1); } time_t a = time(NULL); strcpy(refName, argv[1]); read_type = atoi(argv[2]); strcpy(outName, argv[3]); sprintf(imdName, "%s.temp/%s", argv[3], argv[4]); sprintf(statName, "%s.stat/%s", argv[3], argv[4]); nThreads = 1; genBamF = false; bamSampling = false; genGibbsOut = false; pt_fn_list = pt_chr_list = NULL; for (int i = 5; i < argc; i++) { if (!strcmp(argv[i], "-p")) { nThreads = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-b")) { genBamF = true; inpSamType = argv[i + 1][0]; strcpy(inpSamF, argv[i + 2]); if (atoi(argv[i + 3]) == 1) { strcpy(fn_list, argv[i + 4]); pt_fn_list = (char*)(&fn_list); } } if (!strcmp(argv[i], "-q")) { quiet = true; } if (!strcmp(argv[i], "--gibbs-out")) { genGibbsOut = true; } if (!strcmp(argv[i], "--sampling")) { bamSampling = true; } } general_assert(nThreads > 0, "Number of threads should be bigger than 0!"); verbose = !quiet; //basic info loading sprintf(refF, "%s.seq", refName); refs.loadRefs(refF); M = refs.getM(); sprintf(groupF, "%s.grp", refName); gi.load(groupF); m = gi.getm(); sprintf(tiF, "%s.ti", refName); transcripts.readFrom(tiF); sprintf(cntF, "%s.cnt", statName); fin.open(cntF); general_assert(fin.is_open(), "Cannot open " + cstrtos(cntF) + "! It may not exist."); fin>>N0>>N1>>N2>>N_tot; fin.close(); general_assert(N1 > 0, "There are no alignable reads!"); if ((READ_INT_TYPE)nThreads > N1) nThreads = N1; //set model parameters mparams.M = M; mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2; mparams.refs = &refs; sprintf(mparamsF, "%s.mparams", imdName); fin.open(mparamsF); general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist."); fin>> mparams.minL>> mparams.maxL>> mparams.probF; int val; // 0 or 1 , for estRSPD fin>>val; mparams.estRSPD = (val != 0); fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd; fin>> mparams.seedLen; fin.close(); //run EM switch(read_type) { case 0 : EM<SingleRead, SingleHit, SingleModel>(); break; case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break; case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break; case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break; default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1); } time_t b = time(NULL); printTimeUsed(a, b, "EM.cpp"); return 0; }
int main(int argc, char* argv[]) { if (argc < 2) { printf("Usage: PROBer-build-reference refName [--gtf gtfF] [--mapping mappingF] [--allele-specific] [--files num_of_files file_1 file_2 ...] [--n2g-index] [-q]\n"); exit(-1); } hasGTF = false; mappingType = 0; n2g_idx = false; int argpos = 2; while (argpos < argc) { if (!strcmp(argv[argpos], "--gtf")) { hasGTF = true; strcpy(gtfF, argv[++argpos]); } if (!strcmp(argv[argpos], "--mapping")) { mappingType = 1; mappingPos = ++argpos; } if (!strcmp(argv[argpos], "--allele-specific")) mappingType = 2; if (!strcmp(argv[argpos], "--files")) { num_files = atoi(argv[++argpos]); file_pos = argpos + 1; // the position in argv for the first file argpos += num_files; } if (!strcmp(argv[argpos], "--n2g-index")) n2g_idx = true; if (!strcmp(argv[argpos], "-q")) verbose = false; ++argpos; } if (mappingType > 0) loadMappingInfo(mappingType, argv[mappingPos]); ifstream fin; string line, gseq, tseq; // gseq, genomic sequence; tseq, transcript sequence string seqname, gene_id, transcript_id; if (hasGTF) { transcripts.setType(0); assert(mappingType < 2); parse_gtf_file(gtfF); M = transcripts.getM(); general_assert(M > 0, "The reference contains no transcripts!"); seqs.assign(M + 1, ""); chrvec.clear(); for (int i = 0; i < num_files; ++i, ++file_pos) { fin.open(argv[file_pos]); general_assert(fin.is_open(), "Cannot open " + cstrtos(argv[file_pos]) + "! It may not exist."); getline(fin, line); while ((fin) && (line[0] == '>')) { istringstream strin(line.substr(1)); strin>>seqname; gseq = ""; while((getline(fin, line)) && (line[0] != '>')) { gseq += line; } assert(gseq.length() > 0); sn2tr_iter = sn2tr.find(seqname); if (sn2tr_iter == sn2tr.end()) continue; chrvec.push_back(ChrInfo(seqname, gseq.length())); vector<int>& vec = sn2tr_iter->second; int s = vec.size(); for (int j = 0; j < s; ++j) { assert(vec[j] > 0 && vec[j] <= M); transcripts.getTranscriptAt(vec[j]).extractSeq(gseq, seqs[vec[j]]); } } fin.close(); if (verbose) { printf("%s is processed!\n", argv[file_pos]); } } sort(chrvec.begin(), chrvec.end()); // Shrink and build up Refs int curp = 0; for (int i = 1; i <= M; ++i) { const Transcript& transcript = transcripts.getTranscriptAt(i); if (seqs[i] == "") printf("Warning: Cannot extract transcript %s because the chromosome it locates -- %s -- is absent!\n", transcript.getTranscriptID().c_str(), transcript.getSeqName().c_str()); else { refs.addRef(transcript.getTranscriptID(), seqs[i]); // insert RefSeqs ++curp; transcripts.move(i, curp); } } printf("%d transcripts are extracted and %d transcripts are omitted.\n", curp, M - curp); transcripts.setM(curp); M = transcripts.getM(); general_assert(M > 0, "The reference contains no transcripts!"); assert(refs.getM() == M); } else {
void writeToDisk(char* refName) { ofstream fout; sprintf(tiF, "%s.ti", refName); transcripts.writeTo(tiF); if (verbose) { printf("Transcript Information File is generated!\n"); } sprintf(refFastaF, "%s.transcripts.fa", refName); refs.writeTo(refFastaF); sprintf(transListF, "%s.translist", refName); refs.writeTransListTo(transListF); sprintf(chromListF, "%s.chrlist", refName); fout.open(chromListF); for (int i = 0; i < (int)chrvec.size(); ++i) fout<< chrvec[i].name<< '\t'<< chrvec[i].len<< endl; fout.close(); if (verbose) { printf("Chromosome List File is generated!\n"); } string cur_gene_id, cur_transcript_id, name; vector<int> gi, gt, ta; cur_gene_id = ""; gi.clear(); if (mappingType == 2) { cur_transcript_id = ""; gt.clear(); ta.clear(); } for (int i = 1; i <= M; ++i) { const Transcript& transcript = transcripts.getTranscriptAt(i); if (cur_gene_id != transcript.getGeneID()) { gi.push_back(i); if (mappingType == 2) gt.push_back((int)ta.size()); cur_gene_id = transcript.getGeneID(); } if ((mappingType == 2) && (cur_transcript_id != transcript.getTranscriptID())) { ta.push_back(i); cur_transcript_id = transcript.getTranscriptID(); } } gi.push_back(M + 1); if (mappingType == 2) { gt.push_back((int)ta.size()); ta.push_back(M + 1); } sprintf(groupF, "%s.grp", refName); fout.open(groupF); for (int i = 0; i < (int)gi.size(); ++i) fout<< gi[i]<< endl; fout.close(); if (verbose) { printf("Group File is generated!\n"); } if (mappingType == 2) { sprintf(gtF, "%s.gt", refName); fout.open(gtF); for (int i = 0; i < (int)gt.size(); ++i) fout<< gt[i]<< endl; fout.close(); sprintf(taF, "%s.ta", refName); fout.open(taF); for (int i = 0; i < (int)ta.size(); ++i) fout<< ta[i]<< endl; fout.close(); if (verbose) { printf("Allele-specific group files are generated!\n"); } } if (n2g_idx) { sprintf(n2g_idxF, "%s.n2g.idx.fa", refName); fout.open(n2g_idxF); for (int i = 1; i <= M; ++i) fout<< '>'<< refs.getRef(i)->getName()<< endl<< n2g(refs.getRef(i)->getSeq())<< endl; fout.close(); if (verbose) printf("%s is generated!\n", n2g_idxF); } }
int main(int argc, char* argv[]) { if (argc < 8) { printf("Usage: rsem-calculate-credibility-intervals reference_name sample_name sampleToken confidence nCV nSpC nMB [-p #Threads] [-q]\n"); exit(-1); } confidence = atof(argv[4]); nCV = atoi(argv[5]); nSpC = atoi(argv[6]); nMB = atoi(argv[7]); nThreads = 1; quiet = false; for (int i = 8; i < argc; i++) { if (!strcmp(argv[i], "-p")) nThreads = atoi(argv[i + 1]); if (!strcmp(argv[i], "-q")) quiet = true; } verbose = !quiet; sprintf(refF, "%s.seq", argv[1]); refs.loadRefs(refF, 1); M = refs.getM(); sprintf(groupF, "%s.grp", argv[1]); gi.load(groupF); m = gi.getm(); nSamples = nCV * nSpC; cvlen = M + 1; assert(nSamples > 0 && cvlen > 1); // for Buffter.h: (bufsize_type)nSamples sprintf(imdName, "%s.temp/%s", argv[2], argv[3]); sprintf(statName, "%s.stat/%s", argv[2], argv[3]); sprintf(tmpF, "%s.tmp", imdName); sprintf(cvsF, "%s.countvectors", imdName); sprintf(modelF, "%s.model", statName); FILE *fi = fopen(modelF, "r"); general_assert(fi != NULL, "Cannot open " + cstrtos(modelF) + "!"); assert(fscanf(fi, "%d", &model_type) == 1); fclose(fi); // Phase I switch(model_type) { case 0 : sample_theta_vectors_from_count_vectors<SingleModel>(); break; case 1 : sample_theta_vectors_from_count_vectors<SingleQModel>(); break; case 2 : sample_theta_vectors_from_count_vectors<PairedEndModel>(); break; case 3 : sample_theta_vectors_from_count_vectors<PairedEndQModel>(); break; } // Phase II calculate_credibility_intervals(imdName); /* sprintf(command, "rm -f %s", tmpF); int status = system(command); if (status != 0) { fprintf(stderr, "Cannot delete %s!\n", tmpF); exit(-1); } */ return 0; }
int main(int argc, char* argv[]) { ifstream fin; bool quiet = false; if (argc < 6) { printf("Usage : rsem-run-em refName read_type sampleName imdName statName [-p #Threads] [-b samInpType samInpF has_fn_list_? [fn_list]] [-q] [--gibbs-out] [--sampling] [--seed seed] [--calc-evaluation-score nb_r nb_p L w]\n\n"); printf(" refName: reference name\n"); printf(" read_type: 0 single read without quality score; 1 single read with quality score; 2 paired-end read without quality score; 3 paired-end read with quality score.\n"); printf(" sampleName: sample's name, including the path\n"); printf(" sampleToken: sampleName excludes the path\n"); printf(" -p: number of threads which user wants to use. (default: 1)\n"); printf(" -b: produce bam format output file. (default: off)\n"); printf(" -q: set it quiet\n"); printf(" --gibbs-out: generate output file use by Gibbs sampler. (default: off)\n"); printf(" --sampling: sample each read from its posterior distribution when bam file is generated. (default: off)\n"); printf(" --seed uint32: the seed used for the BAM sampling. (default: off)\n"); printf(" --calc-evaluation-score nb_r nb_p L w: " "nb_r and nb_p are parameters for the true transcript length distribution, which is modeled by a negative binomial distribution; " "L is the read length and w is the mininum overlap required for joining two contigs.\n"); printf("// model parameters should be in imdName.mparams.\n"); exit(-1); } time_t a = time(NULL); strcpy(refName, argv[1]); read_type = atoi(argv[2]); strcpy(outName, argv[3]); strcpy(imdName, argv[4]); strcpy(statName, argv[5]); nThreads = 1; genBamF = false; bamSampling = false; genGibbsOut = false; calcEvalScore = false; pt_fn_list = NULL; hasSeed = false; for (int i = 6; i < argc; i++) { if (!strcmp(argv[i], "-p")) { nThreads = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-b")) { genBamF = true; inpSamType = argv[i + 1][0]; strcpy(inpSamF, argv[i + 2]); if (atoi(argv[i + 3]) == 1) { strcpy(fn_list, argv[i + 4]); pt_fn_list = (char*)(&fn_list); } } if (!strcmp(argv[i], "-q")) { quiet = true; } if (!strcmp(argv[i], "--gibbs-out")) { genGibbsOut = true; } if (!strcmp(argv[i], "--sampling")) { bamSampling = true; } if (!strcmp(argv[i], "--seed")) { hasSeed = true; int len = strlen(argv[i + 1]); seed = 0; for (int k = 0; k < len; k++) seed = seed * 10 + (argv[i + 1][k] - '0'); } if (!strcmp(argv[i], "--calc-evaluation-score")) { calcEvalScore = true; nb_r = atof(argv[i + 1]); nb_p = atof(argv[i + 2]); L = atoi(argv[i + 3]); w = atoi(argv[i + 4]); } } general_assert(nThreads > 0, "Number of threads should be bigger than 0!"); verbose = !quiet; //basic info loading sprintf(refF, "%s.seq", refName); refs.loadRefs(refF); M = refs.getM(); sprintf(tiF, "%s.ti", refName); transcripts.readFrom(tiF); sprintf(cntF, "%s.cnt", statName); fin.open(cntF); general_assert(fin.is_open(), "Cannot open " + cstrtos(cntF) + "! It may not exist."); fin>>N0>>N1>>N2>>N_tot; fin.close(); general_assert(N1 > 0, "There are no alignable reads!"); if ((READ_INT_TYPE)nThreads > N1) nThreads = N1; //set model parameters mparams.M = M; mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2; mparams.refs = &refs; sprintf(mparamsF, "%s.mparams", imdName); fin.open(mparamsF); general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist."); fin>> mparams.minL>> mparams.maxL>> mparams.probF; int val; // 0 or 1 , for estRSPD fin>>val; mparams.estRSPD = (val != 0); fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd; fin>> mparams.seedLen; fin.close(); //run EM switch(read_type) { case 0 : EM<SingleRead, SingleHit, SingleModel>(); break; case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break; case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break; case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break; default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1); } if (calcEvalScore) { CalcEvalScore ces(refs, nb_r, nb_p, L, w, statName); sprintf(scoreF, "%s.score", outName); ces.writeScoresTo(scoreF); char groupF[STRLEN]; GroupInfo gi; sprintf(groupF, "%s.grp", argv[1]); gi.load(groupF); ces.generateExpressionFiles(gi, transcripts, scoreF); } time_t b = time(NULL); printTimeUsed(a, b, "EM.cpp"); return 0; }