// Подготовка входных данных (формируем вектор хромосом и имен из мультимэпа особей) ErrCode Crossover::get_DataIn_from_Individuals(DataIn& dataIn, Individuals& individuals) { Individuals::iterator itIndividual = individuals.begin(); size_t nIndividual = individuals.size(); int nChromoSector_in_Chromoset = sampleIndividual.nChromoSector_in_Chromoset; dataIn.chromoSectors.resize(nIndividual*nChromoSector_in_Chromoset); dataIn.expressions.resize(nIndividual); for (int i = 0; i < nIndividual; ++i) { memcpy(&(dataIn.chromoSectors[i*nChromoSector_in_Chromoset]), &(itIndividual->second.chromoSectors[0]), sampleIndividual.nByte_in_Chromoset); dataIn.expressions[i] = itIndividual->second.expression; ++itIndividual; } return 0; }
ErrCode Crossover::get_crossedIndividuals(Individuals& crossedIndividuals, Individuals& individuals, const Device& device) { ErrCode err = 0; // Параметры int nThread_in_Warp = device.nMaxThread_in_Warp; int nThread_in_Group = nThread_in_Warp * nWarp_in_Group; int nThread = nThread_in_Group * nGroup_in_CU * device.max_compute_units; // Локальные int loc_sample_ByteSize = sampleIndividual.nByte_in_Chromoset; int loc_sampleInvert_ByteSize = sampleIndividual.nByte_in_Chromoset; int nIndividual_in_Loc = (device.LDS_ByteSize / nGroup_in_CU - loc_sample_ByteSize - loc_sampleInvert_ByteSize) / (sizeof(int)+sampleIndividual.nByte_in_Chromoset); int loc_indexes_ByteSize = nIndividual_in_Loc * sizeof(int); int loc_Individuals_ByteSize = nIndividual_in_Loc * sampleIndividual.nByte_in_Chromoset; int loc_ByteSize = loc_sample_ByteSize + loc_sampleInvert_ByteSize + loc_indexes_ByteSize + loc_Individuals_ByteSize; Editor editor; err = editor.import_(kernel_filePath); if (err != 0) return err; editor.replace("$$EXPRESSION", expression); calc_nParent(); editor.replace("$$N_PARENT", intToString(nParent)); editor.replace("$$N_CHROMOSECTOR_IN_CHROMOSET", intToString(sampleIndividual.chromoSectors.size())); editor.replace("$$N_CHROMOSET_IN_LOC", intToString(nIndividual_in_Loc)); editor.replace("$$N_CHROMOSET_IN_GLOB", intToString(individuals.size()-10)); int min_rating = (*(--individuals.end())).second.rating; editor.replace("$$START_RATING", intToString(min_rating)); editor.replace("$$N_GLOBAL_CICLES", intToString(nGlobal_Cicles)); editor.replace("$$N_LOCAL_CICLES", intToString(nLocal_Cicles)); editor.replace("$$K_RATING", floatToString(kRating)); editor.replace("$$KOEF_A", intToString(15678)); editor.replace("$$KOEF_C", intToString(34302)); editor.expand("$$EXTRACT_N_PARENT", nParent); editor.export_(kernel_filePath + KERNEL_FILE_SUFFIX); std::string kernel_Source; editor.get_string(kernel_Source); editor.clear(); Computer computer; computer.set_Device(device); computer.set_Kernel_Name("crossing"); computer.set_Kernel_Source(kernel_Source); computer.set_nThread_in_Group(nThread_in_Group); computer.set_nThread(nThread); // Строим (компиляция ядра, создание контекста...) err = computer.build_kernel(); if (err != 0) return err; DataIn dataIn; err = get_DataIn_from_Individuals(dataIn, individuals); // Входа computer.add_In(dataIn.chromoSectors); computer.add_In(sampleIndividual.chromoSectors); computer.add_In(sampleIndividual.invertChromoSectors); computer.add_Local(loc_ByteSize); DataOut dataOut(nThread, nParent); // Выхода computer.add_Out(dataOut.glob_indexes); computer.add_Out(dataOut.ratings); computer.add_Out(dataOut.c11s); computer.add_Out(dataOut.c01s); // Запускаем computer.compute(); // Восстанавливаем особей err = get_Individuals_from_DataOut(crossedIndividuals, dataOut); computeTime = computer.get_computeTime(); // Освобождение ресурсов computer.clear(); return 0; }
// mine(): main function for GERMLINE void GERMLINE::mine( string params, string prefix ) { PolymorphicIndividualsExtractor * pie = inputManager.getPie(); inputManager.getIndividuals(prefix); if ( ! pie->valid() ) return; string out = prefix; num_samples = 0; num_matches = 0; pie->loadInput(); MatchesBuilder mb( pie ); ofstream fout( ( out + ".log" ).c_str() ); fout << setw(65) << setfill('-') << ' ' << endl << setfill(' '); fout << " Welcome to GERMLINE, a tool for detecting long segments shared" << endl; fout << " by descent between pairs of individuals in near-linear time." << endl; fout << endl; fout << " For more details, please see the paper [ PMID: 18971310 ]" << endl; fout << " or the web-site [ http://www.cs.columbia.edu/~gusev/germline/ ]" << endl; fout << endl; fout << " GERMLINE was coded by Alexander Gusev and collaborators in " << endl; fout << " Itsik Pe'er's Computational Biology Lab at Columbia University" << endl; fout << setw(65) << setfill('-') << ' ' << endl << setfill(' '); if ( BINARY_OUT ) MATCH_FILE.open( ( out + ".bmatch" ).c_str() , ios::binary ); else MATCH_FILE.open( ( out + ".match" ).c_str() ); fout << params << endl; fout << setw(65) << setfill('-') << ' ' << endl << setfill(' '); //fout << setw(50) << left << "Minimum match length: " << MIN_MATCH_LEN << " cM" << endl; fout << setw(50) << "Allowed mismatching bits: " << MAX_ERR_HOM << " " << MAX_ERR_HET << endl; fout << setw(50) << "Word size: " << MARKER_SET_SIZE << endl; if ( ROI ) fout << setw(50) << "Target region: " << ALL_SNPS.getROIStart().getSNPID() << " - " << ALL_SNPS.getROIEnd().getSNPID() << endl; else fout << setw(50) << "Target region: " << "all" << endl; time_t timer[2]; time( &timer[0] ); if ( DEBUG ) cout << "DEBUG MODE ON" << endl; if ( ROI ) { ALL_SNPS.beginChromosome(); num_sets = (long)ceil((float)ALL_SNPS.currentSize()/(float)MARKER_SET_SIZE); mb.buildMatches(); ALL_SAMPLES.freeMatches(); ALL_SAMPLES.freeMarkers(); } else { int temp_i = 0; for ( ALL_SNPS.beginChromosome() ; ALL_SNPS.moreChromosome() ; ALL_SNPS.nextChromosome() ) { num_sets = (long)ceil((float)ALL_SNPS.currentSize()/(float)MARKER_SET_SIZE); mb.buildMatches(); if ( !SILENT ) cout << "Matches completed ... freeing memory" << endl; ALL_SAMPLES.freeMatches(); ALL_SAMPLES.freeMarkers(); } } //time( &timer[1] ); fout << setw(50) << "Total IBD segments: " << num_matches << endl; //fout << setw(50) << "Total runtime (sec): " << difftime( timer[1] , timer[0] ) << endl; fout.close(); MATCH_FILE.close(); if ( BINARY_OUT ) { ofstream bmid_out( ( out + ".bmid" ).c_str() ); ALL_SNPS.print( bmid_out ); bmid_out.close(); ofstream bsid_out( ( out + ".bsid" ).c_str() ); ALL_SAMPLES.print( bsid_out ); bsid_out.close(); } }
// mine(): main function for GERMLINE void GERMLINE::mine( string params, string map, string ped,string outfile) { PolymorphicIndividualsExtractor * pie = inputManager.getPie(); inputManager.getIndividuals(map, ped); if ( ! pie->valid() ) return; string out; if (outfile == "") out = inputManager.getOutput(); else out = outfile; num_samples = 0; num_matches = 0; pie->loadInput(); MatchesBuilder mb( pie ); unsigned long long init_mem = (unsigned long long)(mem_all_matches+mem_bufferchr+mem_chromosome+mem_ind+mem_inds+mem_matchfactory+mem_markers+mem_snps+mem_window); mem_expected_data= mb.calculateMemData(); if ( (init_mem+mem_expected_data) < MEM_BOUND) { cerr<<"\n\n\tBeginning Analysis"<<endl; ofstream fout( ( out + ".log" ).c_str() ); fout << setw(65) << setfill('-') << ' ' << endl << setfill(' '); fout << " Welcome to GERMLINE, a tool for detecting long segments shared" << endl; fout << " by descent between pairs of individuals in near-linear time." << endl; fout << endl; fout << " For more details, please see the paper [ PMID: 18971310 ]" << endl; fout << " or the web-site [ http://www.cs.columbia.edu/~gusev/germline/ ]" << endl; fout << endl; fout << " GERMLINE was coded by Alexander Gusev and collaborators in " << endl; fout << " Itsik Pe'er's Computational Biology Lab at Columbia University" << endl; fout << setw(65) << setfill('-') << ' ' << endl << setfill(' '); if ( BINARY_OUT ) MATCH_FILE.open( ( out + ".bmatch" ).c_str() , ios::binary ); else MATCH_FILE.open( ( out + ".match" ).c_str() ); fout << params << endl; fout << setw(65) << setfill('-') << ' ' << endl << setfill(' '); fout << setw(50) << left << "Minimum match length: " << MIN_MATCH_LEN << " cM" << endl; fout << setw(50) << "Allowed mismatch: " << MAX_ERR_HOMp << "% " << MAX_ERR_HETp << "%"<<endl; fout << setw(50) << "Minimum Word size: " << MIN_WINDOW_SIZE << endl; if ( ROI ) fout << setw(50) << "Target region: " << ALL_SNPS.getROIStart().getSNPID() << " - " << ALL_SNPS.getROIEnd().getSNPID() << endl; else fout << setw(50) << "Target region: " << "all" << endl; time_t timer[2]; time( &timer[0] ); if ( DEBUG ) cout << "DEBUG MODE ON" << endl; if ( ROI ) { ALL_SNPS.beginChromosome(); ALL_SNPS_CURRENT_SIZE = ALL_SNPS.currentSize(); mb.buildMatches(); if ( !SILENT ) cout << "\nMatches completed ... freeing memory" << endl; ALL_SAMPLES.freeMatches(); ALL_SAMPLES.freeMarkers(); WINDOWS_LIST.clear(); } else { for ( ALL_SNPS.beginChromosome() ; ALL_SNPS.moreChromosome() ; ALL_SNPS.nextChromosome() ) { MAX_WINDOW_SIZE=0; ALL_SNPS_CURRENT_SIZE = ALL_SNPS.currentSize(); mb.buildMatches(); if ( !SILENT ) cout << "\nMatches completed ... freeing memory" << endl; ALL_SAMPLES.freeMatches(); ALL_SAMPLES.freeMarkers(); WINDOWS_LIST.clear(); LAST_SET=false; } } time( &timer[1] ); fout << setw(50) << "Total IBD segments: " << num_matches << endl; fout << setw(50) << "Total runtime (sec): " << difftime( timer[1] , timer[0] ) << endl; fout.close(); MATCH_FILE.close(); if ( BINARY_OUT ) { ofstream bmid_out( ( out + ".bmid" ).c_str() ); ALL_SNPS.print( bmid_out ); bmid_out.close(); ofstream bsid_out( ( out + ".bsid" ).c_str() ); ALL_SAMPLES.print( bsid_out ); bsid_out.close(); }} else { cerr<<"\n\n\tNot enough memory to load data...Cannot begin analysis"; cerr<<"\n\tRequire atleast "<<ceil((float)(init_mem+mem_expected_data)/1048576)<<" Mb space"<<endl; } }