void CHPMultiDataDataTest::test_SumParams() { ParameterNameValueTypeList params; ParameterNameValueType param; CHPMultiDataData data; param.SetName(L"n1"); param.SetValueText(L"v1"); params.push_back(param); param.SetName(L"n2"); param.SetValueText(L"v2"); params.push_back(param); data.AddSummaryParams(params); ParameterNameValueTypeList params_out = data.GetSummaryParams(); CPPUNIT_ASSERT(params_out.size() == 2); ParameterNameValueTypeList::iterator it = params_out.begin(); param = *it; CPPUNIT_ASSERT(param.GetName() == L"n1"); CPPUNIT_ASSERT(param.GetValueText() == L"v1"); ++it; param = *it; CPPUNIT_ASSERT(param.GetName() == L"n2"); CPPUNIT_ASSERT(param.GetValueText() == L"v2"); }
void CalvinCHPMultiDataFileUpdaterTest::testMultiData1() { CreateReferenceFile1(); vector<ColumnInfo> cols; CalvinCHPMultiDataFileUpdater upd; ProbeSetMultiDataGenotypeData e; upd.Initialize(TEST1_FILE); e.call = 11; e.confidence = 111.0f; upd.UpdateMultiData(GenotypeMultiDataType, 0, e); e.call = 22; e.confidence = 222.0f; upd.UpdateMultiData(GenotypeMultiDataType, 2, e); CHPMultiDataData data; CHPMultiDataFileReader reader; CPPUNIT_ASSERT_NO_THROW(reader.SetFilename(TEST1_FILE)); CPPUNIT_ASSERT_NO_THROW(reader.Read(data)); CPPUNIT_ASSERT(data.GetEntryCount(GenotypeMultiDataType) == 4); data.GetGenotypeEntry(GenotypeMultiDataType, 0, e); CPPUNIT_ASSERT(e.name == "1"); CPPUNIT_ASSERT(e.call == 11); CPPUNIT_ASSERT_DOUBLES_EQUAL(e.confidence, 111.0f, 0.000001f); data.GetGenotypeEntry(GenotypeMultiDataType, 1, e); CPPUNIT_ASSERT(e.name == "2"); CPPUNIT_ASSERT(e.call == 2); CPPUNIT_ASSERT_DOUBLES_EQUAL(e.confidence, 20.0f, 0.000001f); data.GetGenotypeEntry(GenotypeMultiDataType, 2, e); CPPUNIT_ASSERT(e.name == "3"); CPPUNIT_ASSERT(e.call == 22); CPPUNIT_ASSERT_DOUBLES_EQUAL(e.confidence, 222.0f, 0.000001f); data.GetGenotypeEntry(GenotypeMultiDataType, 3, e); CPPUNIT_ASSERT(e.name == "4"); CPPUNIT_ASSERT(e.call == 4); CPPUNIT_ASSERT_DOUBLES_EQUAL(e.confidence, 40.0f, 0.000001f); }
void CalvinCHPMultiDataFileUpdaterTest::testMultiData3() { CreateReferenceFile3(); vector<ColumnInfo> cols; ByteColumn bcol(L"byte"); cols.push_back(bcol); UByteColumn ubcol(L"ubyte"); cols.push_back(ubcol); ShortColumn scol(L"short"); cols.push_back(scol); UShortColumn uscol(L"ushort"); cols.push_back(uscol); IntColumn icol(L"int"); cols.push_back(icol); UIntColumn uicol(L"uint"); cols.push_back(uicol); FloatColumn fcol(L"float"); cols.push_back(fcol); ASCIIColumn acol(L"ascii", 7); cols.push_back(acol); UnicodeColumn tcol(L"text", 10); cols.push_back(tcol); CalvinCHPMultiDataFileUpdater upd; ProbeSetMultiDataCopyNumberData e; ProbeSetMultiDataCytoRegionData c; upd.Initialize(TEST3_FILE); e.metrics.resize(9); e.chr = 11; e.position = 111; e.metrics[0].SetValueInt8(9); e.metrics[1].SetValueUInt8(10); e.metrics[2].SetValueInt16(17); e.metrics[3].SetValueUInt16(18); e.metrics[4].SetValueInt32(33); e.metrics[5].SetValueUInt32(34); e.metrics[6].SetValueFloat(55.0f); e.metrics[7].SetValueAscii("text"); e.metrics[8].SetValueText(L"ascii"); upd.UpdateMultiData(CopyNumberMultiDataType, 0, e, cols); e.chr = 22; e.position = 222; e.metrics[0].SetValueInt8(10); e.metrics[1].SetValueUInt8(11); e.metrics[2].SetValueInt16(18); e.metrics[3].SetValueUInt16(19); e.metrics[4].SetValueInt32(34); e.metrics[5].SetValueUInt32(35); e.metrics[6].SetValueFloat(66.0f); e.metrics[7].SetValueAscii("text2"); e.metrics[8].SetValueText(L"ascii2"); upd.UpdateMultiData(CopyNumberMultiDataType, 2, e, cols); vector<ColumnInfo> cycols; c.call = 11; c.confidenceScore = 111.0f; c.chr = 11; c.startPosition = 11; c.stopPosition = 22; upd.UpdateMultiData(CytoMultiDataType, 0, c, cycols); c.call = 2; c.confidenceScore = 20.0f; c.chr = 22; c.startPosition = 22; c.stopPosition = 33; upd.UpdateMultiData(CytoMultiDataType, 1, c, cycols); CHPMultiDataData data; CHPMultiDataFileReader reader; CPPUNIT_ASSERT_NO_THROW(reader.SetFilename(TEST3_FILE)); CPPUNIT_ASSERT_NO_THROW(reader.Read(data)); CPPUNIT_ASSERT(data.GetEntryCount(CopyNumberMultiDataType) == 4); CPPUNIT_ASSERT(data.GetEntryCount(CytoMultiDataType) == 2); data.GetCopyNumberEntry(CopyNumberMultiDataType, 0, e); CPPUNIT_ASSERT(e.name == "1"); CPPUNIT_ASSERT(e.chr == 11); CPPUNIT_ASSERT(e.position == 111); CPPUNIT_ASSERT(e.metrics[0].GetValueInt8() == 9); CPPUNIT_ASSERT(e.metrics[1].GetValueUInt8() == 10); CPPUNIT_ASSERT(e.metrics[2].GetValueInt16() == 17); CPPUNIT_ASSERT(e.metrics[3].GetValueUInt16() == 18); CPPUNIT_ASSERT(e.metrics[4].GetValueInt32() == 33); CPPUNIT_ASSERT(e.metrics[5].GetValueUInt32() == 34); CPPUNIT_ASSERT_DOUBLES_EQUAL(e.metrics[6].GetValueFloat(), 55.0f, 0.00001f); CPPUNIT_ASSERT(e.metrics[7].GetValueAscii() == "text"); CPPUNIT_ASSERT(e.metrics[8].GetValueText() == L"ascii"); data.GetCopyNumberEntry(CopyNumberMultiDataType, 1, e); CPPUNIT_ASSERT(e.name == "2"); CPPUNIT_ASSERT(e.chr == 2); CPPUNIT_ASSERT(e.position == 20); CPPUNIT_ASSERT(e.metrics[0].GetValueInt8() == 8); CPPUNIT_ASSERT(e.metrics[1].GetValueUInt8() == 8); CPPUNIT_ASSERT(e.metrics[2].GetValueInt16() == 16); CPPUNIT_ASSERT(e.metrics[3].GetValueUInt16() == 16); CPPUNIT_ASSERT(e.metrics[4].GetValueInt32() == 32); CPPUNIT_ASSERT(e.metrics[5].GetValueUInt32() == 32); CPPUNIT_ASSERT_DOUBLES_EQUAL(e.metrics[6].GetValueFloat(), 44.0f, 0.00001f); CPPUNIT_ASSERT(e.metrics[7].GetValueAscii() == "ascii"); CPPUNIT_ASSERT(e.metrics[8].GetValueText() == L"text"); data.GetCopyNumberEntry(CopyNumberMultiDataType, 2, e); CPPUNIT_ASSERT(e.name == "3"); CPPUNIT_ASSERT(e.chr == 22); CPPUNIT_ASSERT(e.position == 222); CPPUNIT_ASSERT(e.metrics[0].GetValueInt8() == 10); CPPUNIT_ASSERT(e.metrics[1].GetValueUInt8() == 11); CPPUNIT_ASSERT(e.metrics[2].GetValueInt16() == 18); CPPUNIT_ASSERT(e.metrics[3].GetValueUInt16() == 19); CPPUNIT_ASSERT(e.metrics[4].GetValueInt32() == 34); CPPUNIT_ASSERT(e.metrics[5].GetValueUInt32() == 35); CPPUNIT_ASSERT_DOUBLES_EQUAL(e.metrics[6].GetValueFloat(), 66.0f, 0.00001f); CPPUNIT_ASSERT(e.metrics[7].GetValueAscii() == "text2"); CPPUNIT_ASSERT(e.metrics[8].GetValueText() == L"ascii2"); data.GetCopyNumberEntry(CopyNumberMultiDataType, 3, e); CPPUNIT_ASSERT(e.name == "4"); CPPUNIT_ASSERT(e.chr == 4); CPPUNIT_ASSERT(e.position == 40); CPPUNIT_ASSERT(e.metrics[0].GetValueInt8() == 8); CPPUNIT_ASSERT(e.metrics[1].GetValueUInt8() == 8); CPPUNIT_ASSERT(e.metrics[2].GetValueInt16() == 16); CPPUNIT_ASSERT(e.metrics[3].GetValueUInt16() == 16); CPPUNIT_ASSERT(e.metrics[4].GetValueInt32() == 32); CPPUNIT_ASSERT(e.metrics[5].GetValueUInt32() == 32); CPPUNIT_ASSERT_DOUBLES_EQUAL(e.metrics[6].GetValueFloat(), 44.0f, 0.00001f); CPPUNIT_ASSERT(e.metrics[7].GetValueAscii() == "ascii"); CPPUNIT_ASSERT(e.metrics[8].GetValueText() == L"text"); data.GetCytoEntry(CytoMultiDataType, 0, c); CPPUNIT_ASSERT(c.name == "1"); CPPUNIT_ASSERT(c.call == 11); CPPUNIT_ASSERT(c.chr == 11); CPPUNIT_ASSERT(c.startPosition == 11); CPPUNIT_ASSERT(c.stopPosition == 22); CPPUNIT_ASSERT_DOUBLES_EQUAL(c.confidenceScore, 111.0f, 0.00001f); data.GetCytoEntry(CytoMultiDataType, 1, c); CPPUNIT_ASSERT(c.name == "2"); CPPUNIT_ASSERT(c.chr == 22); CPPUNIT_ASSERT(c.startPosition == 22); CPPUNIT_ASSERT(c.stopPosition == 33); CPPUNIT_ASSERT(c.call == 2); CPPUNIT_ASSERT_DOUBLES_EQUAL(c.confidenceScore, 20.0f, 0.00001f); }
/*! Create a "multi-data" CHP file with just the header information. The remainder of the file * will be created at a later time using the buffer writer technique. * The CHP file will contain only "genotyping" results. * @param execId The execution identifier. This identifier is used to identify the batch run that created the CHP files. * @param celFile The full path to the parent CEL file. The header of the CEL file is copied to the CHP file. * @param outFile The name of the output CHP file. * @param extraColNames The names of the extra data columns. Should not include probe set name, call and confidence columns. * @param extraColTypes The types (float, int, ubyte) of the extra columns. * @param numEntries The number of rows (entries) of results to store in the CHP file. * @param maxProbeSetNameLength The maximum length of the probe set names. * @param algName The name of the algorithm used to create the results. * @param algVersion The algorithm version. * @param chipType the chip type, also known as the probe array type. * @param programName The name of the program used to create the CHP file. * @param programVersion The version of the program. * @param programCompany The company or institution who developed the CHP creating software. * @param paramNames A list of parameter names to store in the CHP file header. * @param paramValues A list of parameter values to store in the CHP file header. * @param sumNames A list of summary statistic names to store in the CHP file header. * @param sumValues A list of summary statistic values to store in the CHP file header. */ static void CreateFileWithHeader ( const string &execId, const string &celFile, const string &outFile, const vector<string>& extraColNames, const vector<string>& extraColTypes, unsigned long numEntries, int maxProbeSetNameLength, const string &algName, const string &algVersion, const string &chipType, const string &programName, const string &programVersion, const string &programCompany, const vector<string>& paramNames, const vector<string>& paramValues, const vector<string>& sumNames, const vector<string>& sumValues, const vector<string>& extraNames, const vector<string>& extraValues ) { // Create the vector of extra columns. The sample code here supports only float, 32 bit integers and 8 bit unsigned integers. vector<ColumnInfo> extraColumns; int ncols = (int)extraColNames.size(); for (int icol=0; icol<ncols; icol++) { if (extraColTypes[icol] == "float") { FloatColumn fcol(StringUtils::ConvertMBSToWCS(extraColNames[icol])); extraColumns.push_back(fcol); } else if (extraColTypes[icol] == "int") { IntColumn intcol(StringUtils::ConvertMBSToWCS(extraColNames[icol])); extraColumns.push_back(intcol); } else if (extraColTypes[icol] == "ubyte") { UByteColumn ubcol(StringUtils::ConvertMBSToWCS(extraColNames[icol])); extraColumns.push_back(ubcol); } else { throw string("Unsupported column type: ") + extraColTypes[icol]; } } // Create the data object CHPMultiDataData *data = new CHPMultiDataData(outFile); data->SetEntryCount(GenotypeMultiDataType, numEntries, maxProbeSetNameLength, extraColumns); data->SetAlgName(StringUtils::ConvertMBSToWCS(algName)); data->SetAlgVersion(StringUtils::ConvertMBSToWCS(algVersion)); data->SetArrayType(StringUtils::ConvertMBSToWCS(chipType)); // Store the CEL header if (celFile.length() > 0 && FileUtils::Exists(celFile.c_str()) == true) { FusionCELData cel; cel.SetFileName(celFile.c_str()); cel.ReadHeader(); GenericData *gdata = cel.GetGenericData(); if (gdata != NULL) data->GetFileHeader()->GetGenericDataHdr()->AddParent(*gdata->Header().GetGenericDataHdr()); cel.Close(); } // Add algorithm parameters to list. ParameterNameValueTypeList params; ParameterNameValueType param; if (programName.empty() == false) { param.SetName(L"program-name"); param.SetValueText(StringUtils::ConvertMBSToWCS(programName)); data->GetGenericData().Header().GetGenericDataHdr()->AddNameValParam(param); } if (programVersion.empty() == false) { param.SetName(L"program-version"); param.SetValueText(StringUtils::ConvertMBSToWCS(programVersion)); data->GetGenericData().Header().GetGenericDataHdr()->AddNameValParam(param); } if (programCompany.empty() == false) { param.SetName(L"program-company"); param.SetValueText(StringUtils::ConvertMBSToWCS(programCompany)); data->GetGenericData().Header().GetGenericDataHdr()->AddNameValParam(param); } int nparams = (int) extraNames.size(); for(int iparam=0; iparam<nparams; iparam++) { param.SetName(StringUtils::ConvertMBSToWCS(extraNames[iparam])); param.SetValueAscii(extraValues[iparam]); data->GetGenericData().Header().GetGenericDataHdr()->AddNameValParam(param); } nparams = (int) paramNames.size(); param.SetName(L"exec-guid"); param.SetValueAscii(execId); params.push_back(param); for(int iparam=0; iparam<nparams; iparam++) { param.SetName(StringUtils::ConvertMBSToWCS(paramNames[iparam])); param.SetValueAscii(paramValues[iparam]); params.push_back(param); } if (params.empty() == false) data->AddAlgParams(params); params.clear(); nparams = (int) sumNames.size(); for(int iparam=0; iparam<nparams; iparam++) { param.SetName(StringUtils::ConvertMBSToWCS(sumNames[iparam])); param.SetValueAscii(sumValues[iparam]); params.push_back(param); } if (params.empty() == false) data->AddSummaryParams(params); // Creating the writer object will create the file with the header information. CHPMultiDataFileWriter writer(*data); }
void CHPMultiDataDataTest::test_AlgVersion() { CHPMultiDataData data; data.SetAlgVersion(L"1.0"); CPPUNIT_ASSERT(data.GetAlgVersion() == L"1.0"); }
void CHPMultiDataDataTest::test_AlgName() { CHPMultiDataData data; data.SetAlgName(L"alg"); CPPUNIT_ASSERT(data.GetAlgName() == L"alg"); }
void CHPMultiDataDataTest::test_ArrayType() { CHPMultiDataData data; data.SetArrayType(L"test3"); CPPUNIT_ASSERT(data.GetArrayType() == L"test3"); }
void CHPMultiDataDataTest::test_FileName() { CHPMultiDataData data; data.SetFilename("file"); CPPUNIT_ASSERT( data.GetFilename() == "file"); }
/* * Create a results file with the CEL file header and other parameters. */ void CopyNumberResultWriter::CreateResultFile(affymetrix_fusion_io::FusionCELData& cel, const std::string& fileName) { try { // Create the results file with the header. CHPMultiDataData *data = new CHPMultiDataData(fileName); data->SetEntryCount(CopyNumberMultiDataType, numberProbeSets, maxProbeSetNameLength[CopyNumberMultiDataType], columns); if (numberCytoRegions > 0) data->SetEntryCount(CytoMultiDataType, numberCytoRegions, maxProbeSetNameLength[CytoMultiDataType],cytoRegionColumns); if (numberGenotypeProbeSets > 0) data->SetEntryCount(GenotypeMultiDataType, numberGenotypeProbeSets, maxProbeSetNameLength[GenotypeMultiDataType], genotypeColumns); data->SetAlgName(StringUtils::ConvertMBSToWCS(algName)); data->SetAlgVersion(StringUtils::ConvertMBSToWCS(algVersion)); data->SetArrayType(cel.GetChipType()); GenericDataHeader *gdh = data->GetFileHeader()->GetGenericDataHdr(); ParameterNameValueType param; param.SetName(PROGRAM_NAME); param.SetValueText(StringUtils::ConvertMBSToWCS(programName)); gdh->AddNameValParam(param); param.SetName(L"program-version"); param.SetValueText(StringUtils::ConvertMBSToWCS(programVersion)); gdh->AddNameValParam(param); param.SetName(PROGRAM_COMPANY); param.SetValueText(StringUtils::ConvertMBSToWCS(programCompany)); gdh->AddNameValParam(param); ParameterNameValueTypeList params = algParams; param.SetName(L"ArraySet"); param.SetValueText(cel.GetChipType()); params.push_back(param); data->AddAlgParams(params); data->AddSummaryParams(summaryParams); DataSetHeader *dsh = data->GetDataSetHeader(CopyNumberMultiDataType); for (ParameterNameValueTypeList::iterator it=chrStartStop.begin(); it!=chrStartStop.end(); it++) dsh->AddNameValParam(*it); GenericData *gdata = cel.GetGenericData(); if (gdata != NULL) gdh->AddParent(*gdata->Header().GetGenericDataHdr()); CHPMultiDataFileWriter *writer = new CHPMultiDataFileWriter(*data); delete writer; delete data; // Create a buffer writer object outputFiles.clear(); outputFiles.push_back(fileName); vector<MultiDataType> dataTypes; dataTypes.push_back(CopyNumberMultiDataType); if (numberCytoRegions > 0) dataTypes.push_back(CytoMultiDataType); if (numberGenotypeProbeSets > 0) dataTypes.push_back(GenotypeMultiDataType); bufferWriter = new CHPMultiDataFileBufferWriter(); bufferWriter->Initialize(&outputFiles, dataTypes, maxProbeSetNameLength); } catch (CalvinException &ex) { string err = "Error creating the output file: " + fileName; wstring msg = ex.ToString(); if (msg.empty() == false) err += " " + StringUtils::ConvertWCSToMBS(msg); throw err; } catch (...) { string err = "Error creating the output file: " + fileName; throw err; } }