int main(int argc, char* argv[]) { ifstream fin; bool quiet = false; if (argc < 5) { printf("Usage : rsem-run-em refName read_type sampleName sampleToken [-p #Threads] [-b samInpType samInpF has_fn_list_? [fn_list]] [-q] [--gibbs-out] [--sampling]\n\n"); printf(" refName: reference name\n"); printf(" read_type: 0 single read without quality score; 1 single read with quality score; 2 paired-end read without quality score; 3 paired-end read with quality score.\n"); printf(" sampleName: sample's name, including the path\n"); printf(" sampleToken: sampleName excludes the path\n"); printf(" -p: number of threads which user wants to use. (default: 1)\n"); printf(" -b: produce bam format output file. (default: off)\n"); printf(" -q: set it quiet\n"); printf(" --gibbs-out: generate output file used by Gibbs sampler. (default: off)\n"); printf(" --sampling: sample each read from its posterior distribution when bam file is generated. (default: off)\n"); printf("// model parameters should be in imdName.mparams.\n"); exit(-1); } time_t a = time(NULL); strcpy(refName, argv[1]); read_type = atoi(argv[2]); strcpy(outName, argv[3]); sprintf(imdName, "%s.temp/%s", argv[3], argv[4]); sprintf(statName, "%s.stat/%s", argv[3], argv[4]); nThreads = 1; genBamF = false; bamSampling = false; genGibbsOut = false; pt_fn_list = pt_chr_list = NULL; for (int i = 5; i < argc; i++) { if (!strcmp(argv[i], "-p")) { nThreads = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-b")) { genBamF = true; inpSamType = argv[i + 1][0]; strcpy(inpSamF, argv[i + 2]); if (atoi(argv[i + 3]) == 1) { strcpy(fn_list, argv[i + 4]); pt_fn_list = (char*)(&fn_list); } } if (!strcmp(argv[i], "-q")) { quiet = true; } if (!strcmp(argv[i], "--gibbs-out")) { genGibbsOut = true; } if (!strcmp(argv[i], "--sampling")) { bamSampling = true; } } general_assert(nThreads > 0, "Number of threads should be bigger than 0!"); verbose = !quiet; //basic info loading sprintf(refF, "%s.seq", refName); refs.loadRefs(refF); M = refs.getM(); sprintf(groupF, "%s.grp", refName); gi.load(groupF); m = gi.getm(); sprintf(tiF, "%s.ti", refName); transcripts.readFrom(tiF); sprintf(cntF, "%s.cnt", statName); fin.open(cntF); general_assert(fin.is_open(), "Cannot open " + cstrtos(cntF) + "! It may not exist."); fin>>N0>>N1>>N2>>N_tot; fin.close(); general_assert(N1 > 0, "There are no alignable reads!"); if ((READ_INT_TYPE)nThreads > N1) nThreads = N1; //set model parameters mparams.M = M; mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2; mparams.refs = &refs; sprintf(mparamsF, "%s.mparams", imdName); fin.open(mparamsF); general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist."); fin>> mparams.minL>> mparams.maxL>> mparams.probF; int val; // 0 or 1 , for estRSPD fin>>val; mparams.estRSPD = (val != 0); fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd; fin>> mparams.seedLen; fin.close(); //run EM switch(read_type) { case 0 : EM<SingleRead, SingleHit, SingleModel>(); break; case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break; case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break; case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break; default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1); } time_t b = time(NULL); printTimeUsed(a, b, "EM.cpp"); return 0; }
int main(int argc, char* argv[]) { if (argc < 8) { printf("Usage: rsem-calculate-credibility-intervals reference_name sample_name sampleToken confidence nCV nSpC nMB [-p #Threads] [-q]\n"); exit(-1); } confidence = atof(argv[4]); nCV = atoi(argv[5]); nSpC = atoi(argv[6]); nMB = atoi(argv[7]); nThreads = 1; quiet = false; for (int i = 8; i < argc; i++) { if (!strcmp(argv[i], "-p")) nThreads = atoi(argv[i + 1]); if (!strcmp(argv[i], "-q")) quiet = true; } verbose = !quiet; sprintf(refF, "%s.seq", argv[1]); refs.loadRefs(refF, 1); M = refs.getM(); sprintf(groupF, "%s.grp", argv[1]); gi.load(groupF); m = gi.getm(); nSamples = nCV * nSpC; cvlen = M + 1; assert(nSamples > 0 && cvlen > 1); // for Buffter.h: (bufsize_type)nSamples sprintf(imdName, "%s.temp/%s", argv[2], argv[3]); sprintf(statName, "%s.stat/%s", argv[2], argv[3]); sprintf(tmpF, "%s.tmp", imdName); sprintf(cvsF, "%s.countvectors", imdName); sprintf(modelF, "%s.model", statName); FILE *fi = fopen(modelF, "r"); general_assert(fi != NULL, "Cannot open " + cstrtos(modelF) + "!"); assert(fscanf(fi, "%d", &model_type) == 1); fclose(fi); // Phase I switch(model_type) { case 0 : sample_theta_vectors_from_count_vectors<SingleModel>(); break; case 1 : sample_theta_vectors_from_count_vectors<SingleQModel>(); break; case 2 : sample_theta_vectors_from_count_vectors<PairedEndModel>(); break; case 3 : sample_theta_vectors_from_count_vectors<PairedEndQModel>(); break; } // Phase II calculate_credibility_intervals(imdName); /* sprintf(command, "rm -f %s", tmpF); int status = system(command); if (status != 0) { fprintf(stderr, "Cannot delete %s!\n", tmpF); exit(-1); } */ return 0; }
int main(int argc, char* argv[]) { if (argc < 2) { printf("Usage: PROBer-build-reference refName [--gtf gtfF] [--mapping mappingF] [--allele-specific] [--files num_of_files file_1 file_2 ...] [--n2g-index] [-q]\n"); exit(-1); } hasGTF = false; mappingType = 0; n2g_idx = false; int argpos = 2; while (argpos < argc) { if (!strcmp(argv[argpos], "--gtf")) { hasGTF = true; strcpy(gtfF, argv[++argpos]); } if (!strcmp(argv[argpos], "--mapping")) { mappingType = 1; mappingPos = ++argpos; } if (!strcmp(argv[argpos], "--allele-specific")) mappingType = 2; if (!strcmp(argv[argpos], "--files")) { num_files = atoi(argv[++argpos]); file_pos = argpos + 1; // the position in argv for the first file argpos += num_files; } if (!strcmp(argv[argpos], "--n2g-index")) n2g_idx = true; if (!strcmp(argv[argpos], "-q")) verbose = false; ++argpos; } if (mappingType > 0) loadMappingInfo(mappingType, argv[mappingPos]); ifstream fin; string line, gseq, tseq; // gseq, genomic sequence; tseq, transcript sequence string seqname, gene_id, transcript_id; if (hasGTF) { transcripts.setType(0); assert(mappingType < 2); parse_gtf_file(gtfF); M = transcripts.getM(); general_assert(M > 0, "The reference contains no transcripts!"); seqs.assign(M + 1, ""); chrvec.clear(); for (int i = 0; i < num_files; ++i, ++file_pos) { fin.open(argv[file_pos]); general_assert(fin.is_open(), "Cannot open " + cstrtos(argv[file_pos]) + "! It may not exist."); getline(fin, line); while ((fin) && (line[0] == '>')) { istringstream strin(line.substr(1)); strin>>seqname; gseq = ""; while((getline(fin, line)) && (line[0] != '>')) { gseq += line; } assert(gseq.length() > 0); sn2tr_iter = sn2tr.find(seqname); if (sn2tr_iter == sn2tr.end()) continue; chrvec.push_back(ChrInfo(seqname, gseq.length())); vector<int>& vec = sn2tr_iter->second; int s = vec.size(); for (int j = 0; j < s; ++j) { assert(vec[j] > 0 && vec[j] <= M); transcripts.getTranscriptAt(vec[j]).extractSeq(gseq, seqs[vec[j]]); } } fin.close(); if (verbose) { printf("%s is processed!\n", argv[file_pos]); } } sort(chrvec.begin(), chrvec.end()); // Shrink and build up Refs int curp = 0; for (int i = 1; i <= M; ++i) { const Transcript& transcript = transcripts.getTranscriptAt(i); if (seqs[i] == "") printf("Warning: Cannot extract transcript %s because the chromosome it locates -- %s -- is absent!\n", transcript.getTranscriptID().c_str(), transcript.getSeqName().c_str()); else { refs.addRef(transcript.getTranscriptID(), seqs[i]); // insert RefSeqs ++curp; transcripts.move(i, curp); } } printf("%d transcripts are extracted and %d transcripts are omitted.\n", curp, M - curp); transcripts.setM(curp); M = transcripts.getM(); general_assert(M > 0, "The reference contains no transcripts!"); assert(refs.getM() == M); } else {
int main(int argc, char* argv[]) { ifstream fin; bool quiet = false; if (argc < 6) { printf("Usage : rsem-run-em refName read_type sampleName imdName statName [-p #Threads] [-b samInpType samInpF has_fn_list_? [fn_list]] [-q] [--gibbs-out] [--sampling] [--seed seed] [--calc-evaluation-score nb_r nb_p L w]\n\n"); printf(" refName: reference name\n"); printf(" read_type: 0 single read without quality score; 1 single read with quality score; 2 paired-end read without quality score; 3 paired-end read with quality score.\n"); printf(" sampleName: sample's name, including the path\n"); printf(" sampleToken: sampleName excludes the path\n"); printf(" -p: number of threads which user wants to use. (default: 1)\n"); printf(" -b: produce bam format output file. (default: off)\n"); printf(" -q: set it quiet\n"); printf(" --gibbs-out: generate output file use by Gibbs sampler. (default: off)\n"); printf(" --sampling: sample each read from its posterior distribution when bam file is generated. (default: off)\n"); printf(" --seed uint32: the seed used for the BAM sampling. (default: off)\n"); printf(" --calc-evaluation-score nb_r nb_p L w: " "nb_r and nb_p are parameters for the true transcript length distribution, which is modeled by a negative binomial distribution; " "L is the read length and w is the mininum overlap required for joining two contigs.\n"); printf("// model parameters should be in imdName.mparams.\n"); exit(-1); } time_t a = time(NULL); strcpy(refName, argv[1]); read_type = atoi(argv[2]); strcpy(outName, argv[3]); strcpy(imdName, argv[4]); strcpy(statName, argv[5]); nThreads = 1; genBamF = false; bamSampling = false; genGibbsOut = false; calcEvalScore = false; pt_fn_list = NULL; hasSeed = false; for (int i = 6; i < argc; i++) { if (!strcmp(argv[i], "-p")) { nThreads = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-b")) { genBamF = true; inpSamType = argv[i + 1][0]; strcpy(inpSamF, argv[i + 2]); if (atoi(argv[i + 3]) == 1) { strcpy(fn_list, argv[i + 4]); pt_fn_list = (char*)(&fn_list); } } if (!strcmp(argv[i], "-q")) { quiet = true; } if (!strcmp(argv[i], "--gibbs-out")) { genGibbsOut = true; } if (!strcmp(argv[i], "--sampling")) { bamSampling = true; } if (!strcmp(argv[i], "--seed")) { hasSeed = true; int len = strlen(argv[i + 1]); seed = 0; for (int k = 0; k < len; k++) seed = seed * 10 + (argv[i + 1][k] - '0'); } if (!strcmp(argv[i], "--calc-evaluation-score")) { calcEvalScore = true; nb_r = atof(argv[i + 1]); nb_p = atof(argv[i + 2]); L = atoi(argv[i + 3]); w = atoi(argv[i + 4]); } } general_assert(nThreads > 0, "Number of threads should be bigger than 0!"); verbose = !quiet; //basic info loading sprintf(refF, "%s.seq", refName); refs.loadRefs(refF); M = refs.getM(); sprintf(tiF, "%s.ti", refName); transcripts.readFrom(tiF); sprintf(cntF, "%s.cnt", statName); fin.open(cntF); general_assert(fin.is_open(), "Cannot open " + cstrtos(cntF) + "! It may not exist."); fin>>N0>>N1>>N2>>N_tot; fin.close(); general_assert(N1 > 0, "There are no alignable reads!"); if ((READ_INT_TYPE)nThreads > N1) nThreads = N1; //set model parameters mparams.M = M; mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2; mparams.refs = &refs; sprintf(mparamsF, "%s.mparams", imdName); fin.open(mparamsF); general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist."); fin>> mparams.minL>> mparams.maxL>> mparams.probF; int val; // 0 or 1 , for estRSPD fin>>val; mparams.estRSPD = (val != 0); fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd; fin>> mparams.seedLen; fin.close(); //run EM switch(read_type) { case 0 : EM<SingleRead, SingleHit, SingleModel>(); break; case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break; case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break; case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break; default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1); } if (calcEvalScore) { CalcEvalScore ces(refs, nb_r, nb_p, L, w, statName); sprintf(scoreF, "%s.score", outName); ces.writeScoresTo(scoreF); char groupF[STRLEN]; GroupInfo gi; sprintf(groupF, "%s.grp", argv[1]); gi.load(groupF); ces.generateExpressionFiles(gi, transcripts, scoreF); } time_t b = time(NULL); printTimeUsed(a, b, "EM.cpp"); return 0; }