Beispiel #1
0
void createSimFile(string fname)
{
	Sim *sim = new Sim();

	hash_map<string,string>::iterator i = gtcHash.begin();
	gtc.open(i->second, Gtc::INTENSITY);
	sim->createFile(fname);
	sim->writeHeader(gtcHash.size(), gtc.xRawIntensity.size());

	//
	//
	// Process each GTC file in turn
	//
	unsigned int n=1;
	for (hash_map<string,string>::iterator i = gtcHash.begin(); i != gtcHash.end(); i++) {
		char *buffer;
		if (verbose) cout << timestamp() << "Processing GTC file " << n << " of " << gtcHash.size() << endl;
		//
		// add sample name to each output file
		// no family info as yet (todo?) - write sample ID twice
//		fn << i->first << endl;

		gtc.open(i->second,Gtc::XFORM | Gtc::INTENSITY);	// reload GTC file to read XForm and Intensity arrays
		buffer = new char[sim->sampleNameSize];
		memset(buffer,0,sim->sampleNameSize);
		// if we have a sample name from the json file, use it
		if (sampleNames.size() > (n-1)) { strcpy(buffer, sampleNames[n-1].c_str()); }
		else                            { strcpy(buffer,gtc.sampleName.c_str()); }
		sim->write(buffer, sim->sampleNameSize);

		for (unsigned int idx = 0; idx < gtc.xRawIntensity.size(); idx++) {
			uint16_t v;
			v = gtc.xRawIntensity[idx];
			sim->write(&v,sizeof(v));
			v = gtc.yRawIntensity[idx];
			sim->write(&v,sizeof(v));
		}
		n++;

#if 0
		for (vector<snpClass>::iterator snp = manifest->snps.begin(); snp != manifest->snps.end(); snp++) {
			if (excludeCnv && snp->name.find("cnv") != string::npos) continue;
			if (chrSelect.size() && chrSelect.compare(snp->chromosome)) continue;
			int idx = snp->index - 1;	// index is zero based in arrays, but starts from 1 in the map file
			unsigned int norm = manifest->normIdMap[snp->normId];
			XFormClass *XF = &gtc.XForm[norm];

			// first do the normalisation calculation
			double tempx = gtc.xRawIntensity[idx] - XF->xOffset;
			double tempy = gtc.yRawIntensity[idx] - XF->yOffset;

			double cos_theta = cos(XF->theta);
			double sin_theta = sin(XF->theta);
			double tempx2 = cos_theta * tempx + sin_theta * tempy;
			double tempy2 = -sin_theta * tempx + cos_theta * tempy;

			double tempx3 = tempx2 - XF->shear * tempy2;
			double tempy3 = tempy2;

			double xn = tempx3 / XF->xScale;
			double yn = tempy3 / XF->yScale;

			// add raw/norm x/y to .raw and .nor files
//			fn << "\t" << std::fixed << setprecision(3) << xn << " " << yn;
		}
#endif

	}
	sim->close();
}
Beispiel #2
0
//
// Create a SIM file from one or more GTC files
//
// infile      a file containing either a simple list of GTC files, or a list in JSON format
// outfile     the name of the SIM file to create, or '-' to write to stdout
// normalize   if true, normalize the intensities, else store the raw values in the SIM file
// manfile     the name of the manifest file
// verbose     boolean (default false)
//
// Note the the SIM file is written with the intensities sorted into position order, as given
// by the manifest file.
//
void commandCreate(string infile, string outfile, bool normalize, string manfile, bool verbose)
{
	vector<string> sampleNames;		// list of sample names from JSON input file
	vector<string> infiles;			// list of GTC files to process
	Sim *sim = new Sim();
	Gtc *gtc = new Gtc();
	Manifest *manifest = new Manifest();
	int numberFormat = normalize ? 0 : 1;

	//
	// First, get a list of GTC files. and possibly sample names
	//
	if (infile == "") throw("commandCreate(): infile not specified");

	parseInfile(infile,sampleNames,infiles);
	if (infiles.size() == 0) throw("No GTC files are specified in the infile");

	// Let's check the GTC files, shall we?
	for (unsigned int n = 0; n < infiles.size(); n++) {
		gtc->open(infiles[n],0);
		if (gtc->errorMsg.length()) throw gtc->errorMsg;
	}

	// We need a manifest file to sort the SNPs and to normalise the intensities (if required)
	loadManifest(manifest, manfile);
	// Sort the SNPs into position order
	sort(manifest->snps.begin(), manifest->snps.end(), SortByPosition);

	// Create the SIM file and write the header
	sim->createFile(outfile);
	sim->writeHeader(infiles.size(),gtc->numSnps, 2, numberFormat);

	// For each GTC file, write the sample name and intensities to the SIM file
	for (unsigned int n = 0; n < infiles.size(); n++) {
		gtc->open(infiles[n], Gtc::XFORM | Gtc::INTENSITY);
		if (manifest->snps.size() != gtc->xRawIntensity.size()) {
			ostringstream msg;
			msg << "Size mismatch: Manifest contains " << manifest->snps.size() << " probes, but " 
			    << infiles[0] << " contains " << gtc->xRawIntensity.size() << " probes.";
			throw msg.str();
		}
		char *buffer = new char[sim->sampleNameSize];
		memset(buffer,0,sim->sampleNameSize);
		// if we have a sample name from the json file, use it
		if (n < sampleNames.size()) { strcpy(buffer, sampleNames[n].c_str()); }
		else                        { strcpy(buffer,gtc->sampleName.c_str()); }
		sim->write(buffer, sim->sampleNameSize);
		if (verbose) {
			cerr << "Gtc file " 
		         << n+1
			     << " of " 
			     << infiles.size()
			     << "  File: "
			     << infiles[n]
			     << "  Sample: "
			     << buffer
			     << endl;
		}
		// Note that we write the intensities in SNP order, sorted by position
		for (vector<snpClass>::iterator snp = manifest->snps.begin(); snp != manifest->snps.end(); snp++) {
			double xn;
			double yn;
			int idx = snp->index - 1;   // index is zero based in arrays, but starts from 1 in the map file
			if (normalize) {
				// This is the normalization calculation, according to Illumina
				unsigned int norm = manifest->normIdMap[snp->normId];
				XFormClass *XF = &(gtc->XForm[norm]);
				double tempx = gtc->xRawIntensity[idx] - XF->xOffset;
				double tempy = gtc->yRawIntensity[idx] - XF->yOffset;
				double cos_theta = cos(XF->theta);
				double sin_theta = sin(XF->theta);
				double tempx2 = cos_theta * tempx + sin_theta * tempy;
				double tempy2 = -sin_theta * tempx + cos_theta * tempy;
				double tempx3 = tempx2 - XF->shear * tempy2;
				double tempy3 = tempy2;
				xn = tempx3 / XF->xScale;
				yn = tempy3 / XF->yScale;
			} else {
				xn = gtc->xRawIntensity[idx];
				yn = gtc->yRawIntensity[idx];
			}
			if (numberFormat == 0) {
				float v;
				v = xn; sim->write(&v,sizeof(v));
				v = yn; sim->write(&v,sizeof(v));
			} else {
				uint16_t v;
				v = xn; sim->write(&v,sizeof(v));
				v = yn; sim->write(&v,sizeof(v));
			}
		}

	}

	sim->close();
}