Exemplo n.º 1
0
int main(int argc, char* argv[]) {
	ifstream fin;
	bool quiet = false;

	if (argc < 5) {
		printf("Usage : rsem-run-em refName read_type sampleName sampleToken [-p #Threads] [-b samInpType samInpF has_fn_list_? [fn_list]] [-q] [--gibbs-out] [--sampling]\n\n");
		printf("  refName: reference name\n");
		printf("  read_type: 0 single read without quality score; 1 single read with quality score; 2 paired-end read without quality score; 3 paired-end read with quality score.\n");
		printf("  sampleName: sample's name, including the path\n");
		printf("  sampleToken: sampleName excludes the path\n");
		printf("  -p: number of threads which user wants to use. (default: 1)\n");
		printf("  -b: produce bam format output file. (default: off)\n");
		printf("  -q: set it quiet\n");
		printf("  --gibbs-out: generate output file used by Gibbs sampler. (default: off)\n");
		printf("  --sampling: sample each read from its posterior distribution when bam file is generated. (default: off)\n");
		printf("// model parameters should be in imdName.mparams.\n");
		exit(-1);
	}

	time_t a = time(NULL);

	strcpy(refName, argv[1]);
	read_type = atoi(argv[2]);
	strcpy(outName, argv[3]);
	sprintf(imdName, "%s.temp/%s", argv[3], argv[4]);
	sprintf(statName, "%s.stat/%s", argv[3], argv[4]);

	nThreads = 1;

	genBamF = false;
	bamSampling = false;
	genGibbsOut = false;
	pt_fn_list = pt_chr_list = NULL;

	for (int i = 5; i < argc; i++) {
		if (!strcmp(argv[i], "-p")) { nThreads = atoi(argv[i + 1]); }
		if (!strcmp(argv[i], "-b")) {
			genBamF = true;
			inpSamType = argv[i + 1][0];
			strcpy(inpSamF, argv[i + 2]);
			if (atoi(argv[i + 3]) == 1) {
				strcpy(fn_list, argv[i + 4]);
				pt_fn_list = (char*)(&fn_list);
			}
		}
		if (!strcmp(argv[i], "-q")) { quiet = true; }
		if (!strcmp(argv[i], "--gibbs-out")) { genGibbsOut = true; }
		if (!strcmp(argv[i], "--sampling")) { bamSampling = true; }
	}

	general_assert(nThreads > 0, "Number of threads should be bigger than 0!");

	verbose = !quiet;

	//basic info loading
	sprintf(refF, "%s.seq", refName);
	refs.loadRefs(refF);
	M = refs.getM();
	sprintf(groupF, "%s.grp", refName);
	gi.load(groupF);
	m = gi.getm();

	sprintf(tiF, "%s.ti", refName);
	transcripts.readFrom(tiF);

	sprintf(cntF, "%s.cnt", statName);
	fin.open(cntF);

	general_assert(fin.is_open(), "Cannot open " + cstrtos(cntF) + "! It may not exist.");

	fin>>N0>>N1>>N2>>N_tot;
	fin.close();

	general_assert(N1 > 0, "There are no alignable reads!");

	if ((READ_INT_TYPE)nThreads > N1) nThreads = N1;

	//set model parameters
	mparams.M = M;
	mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2;
	mparams.refs = &refs;

	sprintf(mparamsF, "%s.mparams", imdName);
	fin.open(mparamsF);

	general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist.");

	fin>> mparams.minL>> mparams.maxL>> mparams.probF;
	int val; // 0 or 1 , for estRSPD
	fin>>val;
	mparams.estRSPD = (val != 0);
	fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd;
	fin>> mparams.seedLen;
	fin.close();

	//run EM
	switch(read_type) {
	case 0 : EM<SingleRead, SingleHit, SingleModel>(); break;
	case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break;
	case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break;
	case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break;
	default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1);
	}

	time_t b = time(NULL);

	printTimeUsed(a, b, "EM.cpp");

	return 0;
}
Exemplo n.º 2
0
int main(int argc, char* argv[]) {
	if (argc < 8) {
		printf("Usage: rsem-calculate-credibility-intervals reference_name sample_name sampleToken confidence nCV nSpC nMB [-p #Threads] [-q]\n");
		exit(-1);
	}

	confidence = atof(argv[4]);
	nCV = atoi(argv[5]);
	nSpC = atoi(argv[6]);
	nMB = atoi(argv[7]);

	nThreads = 1;
	quiet = false;
	for (int i = 8; i < argc; i++) {
		if (!strcmp(argv[i], "-p")) nThreads = atoi(argv[i + 1]);
		if (!strcmp(argv[i], "-q")) quiet = true;
	}
	verbose = !quiet;

	sprintf(refF, "%s.seq", argv[1]);
	refs.loadRefs(refF, 1);
	M = refs.getM();
	sprintf(groupF, "%s.grp", argv[1]);
	gi.load(groupF);
	m = gi.getm();

	nSamples = nCV * nSpC;
	cvlen = M + 1;
	assert(nSamples > 0 && cvlen > 1); // for Buffter.h: (bufsize_type)nSamples

	sprintf(imdName, "%s.temp/%s", argv[2], argv[3]);
	sprintf(statName, "%s.stat/%s", argv[2], argv[3]);
	sprintf(tmpF, "%s.tmp", imdName);
	sprintf(cvsF, "%s.countvectors", imdName);

	sprintf(modelF, "%s.model", statName);
	FILE *fi = fopen(modelF, "r");
	general_assert(fi != NULL, "Cannot open " + cstrtos(modelF) + "!");
	assert(fscanf(fi, "%d", &model_type) == 1);
	fclose(fi);

	// Phase I
	switch(model_type) {
	case 0 : sample_theta_vectors_from_count_vectors<SingleModel>(); break;
	case 1 : sample_theta_vectors_from_count_vectors<SingleQModel>(); break;
	case 2 : sample_theta_vectors_from_count_vectors<PairedEndModel>(); break;
	case 3 : sample_theta_vectors_from_count_vectors<PairedEndQModel>(); break;
	}

	// Phase II
	calculate_credibility_intervals(imdName);

	/*
	sprintf(command, "rm -f %s", tmpF);
	int status = system(command);
	if (status != 0) {
		fprintf(stderr, "Cannot delete %s!\n", tmpF);
		exit(-1);
	}
	*/

	return 0;
}
Exemplo n.º 3
0
int main(int argc, char* argv[]) {
	bool quiet = false;

	if (argc < 6) {
		printf("Usage : rsem-parse-alignments refName imdName statName alignFType('s' for sam, 'b' for bam) alignF [-t Type] [-l fn_list] [-tag tagName] [-q]\n");
		exit(-1);
	}

	strcpy(fn_list, "");
	read_type = 0;
	if (argc > 6) {
		for (int i = 6; i < argc; i++) {
			if (!strcmp(argv[i], "-t")) {
				read_type = atoi(argv[i + 1]);
			}
			if (!strcmp(argv[i], "-l")) {
				strcpy(fn_list, argv[i + 1]);
			}
			if (!strcmp(argv[i], "-tag")) {
				SamParser::setReadTypeTag(argv[i + 1]);
			}
			if (!strcmp(argv[i], "-q")) { quiet = true; }
		}
	}

	verbose = !quiet;

	sprintf(groupF, "%s.grp", argv[1]);
	gi.load(groupF);
	sprintf(tiF, "%s.ti", argv[1]);
	transcripts.readFrom(tiF);

	sprintf(datF, "%s.dat", argv[2]);
	sprintf(cntF, "%s.cnt", argv[3]);

	init(argv[2], argv[4][0], argv[5]);

	hit_out.open(datF);

	string firstLine(99, ' ');
	firstLine.append(1, '\n');		//May be dangerous!
	hit_out<<firstLine;

	switch(read_type) {
	case 0 : parseIt<SingleRead, SingleHit>(parser); break;
	case 1 : parseIt<SingleReadQ, SingleHit>(parser); break;
	case 2 : parseIt<PairedEndRead, PairedEndHit>(parser); break;
	case 3 : parseIt<PairedEndReadQ, PairedEndHit>(parser); break;
	}

	hit_out.seekp(0, ios_base::beg);
	hit_out<<N[1]<<" "<<nHits<<" "<<read_type;

	hit_out.close();

	//cntF for statistics of alignments file
	ofstream fout(cntF);
	fout<<N[0]<<" "<<N[1]<<" "<<N[2]<<" "<<(N[0] + N[1] + N[2])<<endl;
	fout<<nUnique<<" "<<nMulti<<" "<<nIsoMulti<<endl;
	fout<<nHits<<" "<<read_type<<endl;
	fout<<"0\t"<<N[0]<<endl;
	for (iter = counter.begin(); iter != counter.end(); iter++) {
		fout<<iter->first<<'\t'<<iter->second<<endl;
	}
	fout<<"Inf\t"<<N[2]<<endl;
	fout.close();

	release();

	if (verbose) { printf("Done!\n"); }

	return 0;
}
Exemplo n.º 4
0
int main(int argc, char* argv[]) {
	ifstream fin;
	bool quiet = false;

	if (argc < 6) {
		printf("Usage : rsem-run-em refName read_type sampleName imdName statName [-p #Threads] [-b samInpType samInpF has_fn_list_? [fn_list]] [-q] [--gibbs-out] [--sampling] [--seed seed] [--calc-evaluation-score nb_r nb_p L w]\n\n");
		printf("  refName: reference name\n");
		printf("  read_type: 0 single read without quality score; 1 single read with quality score; 2 paired-end read without quality score; 3 paired-end read with quality score.\n");
		printf("  sampleName: sample's name, including the path\n");
		printf("  sampleToken: sampleName excludes the path\n");
		printf("  -p: number of threads which user wants to use. (default: 1)\n");
		printf("  -b: produce bam format output file. (default: off)\n");
		printf("  -q: set it quiet\n");
		printf("  --gibbs-out: generate output file use by Gibbs sampler. (default: off)\n");
		printf("  --sampling: sample each read from its posterior distribution when bam file is generated. (default: off)\n");
		printf("  --seed uint32: the seed used for the BAM sampling. (default: off)\n");
		printf("  --calc-evaluation-score nb_r nb_p L w: "
				"nb_r and nb_p are parameters for the true transcript length distribution, which is modeled by a negative binomial distribution; "
				"L is the read length and w is the mininum overlap required for joining two contigs.\n");
		printf("// model parameters should be in imdName.mparams.\n");
		exit(-1);
	}

	time_t a = time(NULL);

	strcpy(refName, argv[1]);
	read_type = atoi(argv[2]);
	strcpy(outName, argv[3]);
	strcpy(imdName, argv[4]);
	strcpy(statName, argv[5]);

	nThreads = 1;

	genBamF = false;
	bamSampling = false;
	genGibbsOut = false;
	calcEvalScore = false;
	pt_fn_list = NULL;
	hasSeed = false;

	for (int i = 6; i < argc; i++) {
		if (!strcmp(argv[i], "-p")) { nThreads = atoi(argv[i + 1]); }
		if (!strcmp(argv[i], "-b")) {
			genBamF = true;
			inpSamType = argv[i + 1][0];
			strcpy(inpSamF, argv[i + 2]);
			if (atoi(argv[i + 3]) == 1) {
				strcpy(fn_list, argv[i + 4]);
				pt_fn_list = (char*)(&fn_list);
			}
		}
		if (!strcmp(argv[i], "-q")) { quiet = true; }
		if (!strcmp(argv[i], "--gibbs-out")) { genGibbsOut = true; }
		if (!strcmp(argv[i], "--sampling")) { bamSampling = true; }
		if (!strcmp(argv[i], "--seed")) {
		  hasSeed = true;
		  int len = strlen(argv[i + 1]);
		  seed = 0;
		  for (int k = 0; k < len; k++) seed = seed * 10 + (argv[i + 1][k] - '0');
		}
		if (!strcmp(argv[i], "--calc-evaluation-score")) {
			calcEvalScore = true;
			nb_r = atof(argv[i + 1]);
			nb_p = atof(argv[i + 2]);
			L = atoi(argv[i + 3]);
			w = atoi(argv[i + 4]);
		}
	}

	general_assert(nThreads > 0, "Number of threads should be bigger than 0!");

	verbose = !quiet;

	//basic info loading
	sprintf(refF, "%s.seq", refName);
	refs.loadRefs(refF);
	M = refs.getM();

	sprintf(tiF, "%s.ti", refName);
	transcripts.readFrom(tiF);

	sprintf(cntF, "%s.cnt", statName);
	fin.open(cntF);

	general_assert(fin.is_open(), "Cannot open " + cstrtos(cntF) + "! It may not exist.");

	fin>>N0>>N1>>N2>>N_tot;
	fin.close();

	general_assert(N1 > 0, "There are no alignable reads!");

	if ((READ_INT_TYPE)nThreads > N1) nThreads = N1;

	//set model parameters
	mparams.M = M;
	mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2;
	mparams.refs = &refs;

	sprintf(mparamsF, "%s.mparams", imdName);
	fin.open(mparamsF);

	general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist.");

	fin>> mparams.minL>> mparams.maxL>> mparams.probF;
	int val; // 0 or 1 , for estRSPD
	fin>>val;
	mparams.estRSPD = (val != 0);
	fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd;
	fin>> mparams.seedLen;
	fin.close();

	//run EM
	switch(read_type) {
	case 0 : EM<SingleRead, SingleHit, SingleModel>(); break;
	case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break;
	case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break;
	case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break;
	default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1);
	}

	if (calcEvalScore) {
		CalcEvalScore ces(refs, nb_r, nb_p, L, w, statName);
		sprintf(scoreF, "%s.score", outName);
		ces.writeScoresTo(scoreF);
		
		char groupF[STRLEN];
		GroupInfo gi;
		sprintf(groupF, "%s.grp", argv[1]);
		gi.load(groupF);

		ces.generateExpressionFiles(gi, transcripts, scoreF);
	}

	time_t b = time(NULL);

	printTimeUsed(a, b, "EM.cpp");

	return 0;
}