int main(int argc, char* argv[]) { bool quiet = false; if (argc < 6) { printf("Usage : rsem-parse-alignments refName imdName statName alignFType('s' for sam, 'b' for bam) alignF [-t Type] [-l fn_list] [-tag tagName] [-q]\n"); exit(-1); } strcpy(fn_list, ""); read_type = 0; if (argc > 6) { for (int i = 6; i < argc; i++) { if (!strcmp(argv[i], "-t")) { read_type = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-l")) { strcpy(fn_list, argv[i + 1]); } if (!strcmp(argv[i], "-tag")) { SamParser::setReadTypeTag(argv[i + 1]); } if (!strcmp(argv[i], "-q")) { quiet = true; } } } verbose = !quiet; sprintf(groupF, "%s.grp", argv[1]); gi.load(groupF); sprintf(tiF, "%s.ti", argv[1]); transcripts.readFrom(tiF); sprintf(datF, "%s.dat", argv[2]); sprintf(cntF, "%s.cnt", argv[3]); init(argv[2], argv[4][0], argv[5]); hit_out.open(datF); string firstLine(99, ' '); firstLine.append(1, '\n'); //May be dangerous! hit_out<<firstLine; switch(read_type) { case 0 : parseIt<SingleRead, SingleHit>(parser); break; case 1 : parseIt<SingleReadQ, SingleHit>(parser); break; case 2 : parseIt<PairedEndRead, PairedEndHit>(parser); break; case 3 : parseIt<PairedEndReadQ, PairedEndHit>(parser); break; } hit_out.seekp(0, ios_base::beg); hit_out<<N[1]<<" "<<nHits<<" "<<read_type; hit_out.close(); //cntF for statistics of alignments file ofstream fout(cntF); fout<<N[0]<<" "<<N[1]<<" "<<N[2]<<" "<<(N[0] + N[1] + N[2])<<endl; fout<<nUnique<<" "<<nMulti<<" "<<nIsoMulti<<endl; fout<<nHits<<" "<<read_type<<endl; fout<<"0\t"<<N[0]<<endl; for (iter = counter.begin(); iter != counter.end(); iter++) { fout<<iter->first<<'\t'<<iter->second<<endl; } fout<<"Inf\t"<<N[2]<<endl; fout.close(); release(); if (verbose) { printf("Done!\n"); } return 0; }
int main(int argc, char* argv[]) { ifstream fin; bool quiet = false; if (argc < 5) { printf("Usage : rsem-run-em refName read_type sampleName sampleToken [-p #Threads] [-b samInpType samInpF has_fn_list_? [fn_list]] [-q] [--gibbs-out] [--sampling]\n\n"); printf(" refName: reference name\n"); printf(" read_type: 0 single read without quality score; 1 single read with quality score; 2 paired-end read without quality score; 3 paired-end read with quality score.\n"); printf(" sampleName: sample's name, including the path\n"); printf(" sampleToken: sampleName excludes the path\n"); printf(" -p: number of threads which user wants to use. (default: 1)\n"); printf(" -b: produce bam format output file. (default: off)\n"); printf(" -q: set it quiet\n"); printf(" --gibbs-out: generate output file used by Gibbs sampler. (default: off)\n"); printf(" --sampling: sample each read from its posterior distribution when bam file is generated. (default: off)\n"); printf("// model parameters should be in imdName.mparams.\n"); exit(-1); } time_t a = time(NULL); strcpy(refName, argv[1]); read_type = atoi(argv[2]); strcpy(outName, argv[3]); sprintf(imdName, "%s.temp/%s", argv[3], argv[4]); sprintf(statName, "%s.stat/%s", argv[3], argv[4]); nThreads = 1; genBamF = false; bamSampling = false; genGibbsOut = false; pt_fn_list = pt_chr_list = NULL; for (int i = 5; i < argc; i++) { if (!strcmp(argv[i], "-p")) { nThreads = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-b")) { genBamF = true; inpSamType = argv[i + 1][0]; strcpy(inpSamF, argv[i + 2]); if (atoi(argv[i + 3]) == 1) { strcpy(fn_list, argv[i + 4]); pt_fn_list = (char*)(&fn_list); } } if (!strcmp(argv[i], "-q")) { quiet = true; } if (!strcmp(argv[i], "--gibbs-out")) { genGibbsOut = true; } if (!strcmp(argv[i], "--sampling")) { bamSampling = true; } } general_assert(nThreads > 0, "Number of threads should be bigger than 0!"); verbose = !quiet; //basic info loading sprintf(refF, "%s.seq", refName); refs.loadRefs(refF); M = refs.getM(); sprintf(groupF, "%s.grp", refName); gi.load(groupF); m = gi.getm(); sprintf(tiF, "%s.ti", refName); transcripts.readFrom(tiF); sprintf(cntF, "%s.cnt", statName); fin.open(cntF); general_assert(fin.is_open(), "Cannot open " + cstrtos(cntF) + "! It may not exist."); fin>>N0>>N1>>N2>>N_tot; fin.close(); general_assert(N1 > 0, "There are no alignable reads!"); if ((READ_INT_TYPE)nThreads > N1) nThreads = N1; //set model parameters mparams.M = M; mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2; mparams.refs = &refs; sprintf(mparamsF, "%s.mparams", imdName); fin.open(mparamsF); general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist."); fin>> mparams.minL>> mparams.maxL>> mparams.probF; int val; // 0 or 1 , for estRSPD fin>>val; mparams.estRSPD = (val != 0); fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd; fin>> mparams.seedLen; fin.close(); //run EM switch(read_type) { case 0 : EM<SingleRead, SingleHit, SingleModel>(); break; case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break; case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break; case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break; default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1); } time_t b = time(NULL); printTimeUsed(a, b, "EM.cpp"); return 0; }
int main(int argc, char* argv[]) { ifstream fin; bool quiet = false; if (argc < 6) { printf("Usage : rsem-run-em refName read_type sampleName imdName statName [-p #Threads] [-b samInpType samInpF has_fn_list_? [fn_list]] [-q] [--gibbs-out] [--sampling] [--seed seed] [--calc-evaluation-score nb_r nb_p L w]\n\n"); printf(" refName: reference name\n"); printf(" read_type: 0 single read without quality score; 1 single read with quality score; 2 paired-end read without quality score; 3 paired-end read with quality score.\n"); printf(" sampleName: sample's name, including the path\n"); printf(" sampleToken: sampleName excludes the path\n"); printf(" -p: number of threads which user wants to use. (default: 1)\n"); printf(" -b: produce bam format output file. (default: off)\n"); printf(" -q: set it quiet\n"); printf(" --gibbs-out: generate output file use by Gibbs sampler. (default: off)\n"); printf(" --sampling: sample each read from its posterior distribution when bam file is generated. (default: off)\n"); printf(" --seed uint32: the seed used for the BAM sampling. (default: off)\n"); printf(" --calc-evaluation-score nb_r nb_p L w: " "nb_r and nb_p are parameters for the true transcript length distribution, which is modeled by a negative binomial distribution; " "L is the read length and w is the mininum overlap required for joining two contigs.\n"); printf("// model parameters should be in imdName.mparams.\n"); exit(-1); } time_t a = time(NULL); strcpy(refName, argv[1]); read_type = atoi(argv[2]); strcpy(outName, argv[3]); strcpy(imdName, argv[4]); strcpy(statName, argv[5]); nThreads = 1; genBamF = false; bamSampling = false; genGibbsOut = false; calcEvalScore = false; pt_fn_list = NULL; hasSeed = false; for (int i = 6; i < argc; i++) { if (!strcmp(argv[i], "-p")) { nThreads = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-b")) { genBamF = true; inpSamType = argv[i + 1][0]; strcpy(inpSamF, argv[i + 2]); if (atoi(argv[i + 3]) == 1) { strcpy(fn_list, argv[i + 4]); pt_fn_list = (char*)(&fn_list); } } if (!strcmp(argv[i], "-q")) { quiet = true; } if (!strcmp(argv[i], "--gibbs-out")) { genGibbsOut = true; } if (!strcmp(argv[i], "--sampling")) { bamSampling = true; } if (!strcmp(argv[i], "--seed")) { hasSeed = true; int len = strlen(argv[i + 1]); seed = 0; for (int k = 0; k < len; k++) seed = seed * 10 + (argv[i + 1][k] - '0'); } if (!strcmp(argv[i], "--calc-evaluation-score")) { calcEvalScore = true; nb_r = atof(argv[i + 1]); nb_p = atof(argv[i + 2]); L = atoi(argv[i + 3]); w = atoi(argv[i + 4]); } } general_assert(nThreads > 0, "Number of threads should be bigger than 0!"); verbose = !quiet; //basic info loading sprintf(refF, "%s.seq", refName); refs.loadRefs(refF); M = refs.getM(); sprintf(tiF, "%s.ti", refName); transcripts.readFrom(tiF); sprintf(cntF, "%s.cnt", statName); fin.open(cntF); general_assert(fin.is_open(), "Cannot open " + cstrtos(cntF) + "! It may not exist."); fin>>N0>>N1>>N2>>N_tot; fin.close(); general_assert(N1 > 0, "There are no alignable reads!"); if ((READ_INT_TYPE)nThreads > N1) nThreads = N1; //set model parameters mparams.M = M; mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2; mparams.refs = &refs; sprintf(mparamsF, "%s.mparams", imdName); fin.open(mparamsF); general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist."); fin>> mparams.minL>> mparams.maxL>> mparams.probF; int val; // 0 or 1 , for estRSPD fin>>val; mparams.estRSPD = (val != 0); fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd; fin>> mparams.seedLen; fin.close(); //run EM switch(read_type) { case 0 : EM<SingleRead, SingleHit, SingleModel>(); break; case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break; case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break; case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break; default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1); } if (calcEvalScore) { CalcEvalScore ces(refs, nb_r, nb_p, L, w, statName); sprintf(scoreF, "%s.score", outName); ces.writeScoresTo(scoreF); char groupF[STRLEN]; GroupInfo gi; sprintf(groupF, "%s.grp", argv[1]); gi.load(groupF); ces.generateExpressionFiles(gi, transcripts, scoreF); } time_t b = time(NULL); printTimeUsed(a, b, "EM.cpp"); return 0; }