예제 #1
0
// Подготовка входных данных (формируем вектор хромосом и имен из мультимэпа особей)
ErrCode Crossover::get_DataIn_from_Individuals(DataIn& dataIn, Individuals& individuals)
{
	Individuals::iterator itIndividual = individuals.begin();

	size_t nIndividual = individuals.size();
	int nChromoSector_in_Chromoset = sampleIndividual.nChromoSector_in_Chromoset;
	dataIn.chromoSectors.resize(nIndividual*nChromoSector_in_Chromoset);
	dataIn.expressions.resize(nIndividual);

	for (int i = 0; i < nIndividual; ++i)
	{
		memcpy(&(dataIn.chromoSectors[i*nChromoSector_in_Chromoset]), &(itIndividual->second.chromoSectors[0]), sampleIndividual.nByte_in_Chromoset);
		dataIn.expressions[i] = itIndividual->second.expression;
		++itIndividual;
	}
	return 0;
}
예제 #2
0
ErrCode Crossover::get_crossedIndividuals(Individuals& crossedIndividuals, Individuals& individuals, const Device& device)
{
	ErrCode err = 0;

	// Параметры
	int nThread_in_Warp = device.nMaxThread_in_Warp;
	int nThread_in_Group = nThread_in_Warp * nWarp_in_Group;
	int nThread = nThread_in_Group * nGroup_in_CU * device.max_compute_units;

	// Локальные
	int loc_sample_ByteSize = sampleIndividual.nByte_in_Chromoset;
	int loc_sampleInvert_ByteSize = sampleIndividual.nByte_in_Chromoset;
	int nIndividual_in_Loc = (device.LDS_ByteSize / nGroup_in_CU - loc_sample_ByteSize - loc_sampleInvert_ByteSize) / (sizeof(int)+sampleIndividual.nByte_in_Chromoset);
	int loc_indexes_ByteSize = nIndividual_in_Loc * sizeof(int);
	int loc_Individuals_ByteSize = nIndividual_in_Loc * sampleIndividual.nByte_in_Chromoset;
	int loc_ByteSize = loc_sample_ByteSize + loc_sampleInvert_ByteSize + loc_indexes_ByteSize + loc_Individuals_ByteSize;

	Editor editor;

	err = editor.import_(kernel_filePath);
	if (err != 0) return err;
	editor.replace("$$EXPRESSION", expression);
	calc_nParent();
	editor.replace("$$N_PARENT", intToString(nParent));
	editor.replace("$$N_CHROMOSECTOR_IN_CHROMOSET", intToString(sampleIndividual.chromoSectors.size()));
	editor.replace("$$N_CHROMOSET_IN_LOC", intToString(nIndividual_in_Loc));
	editor.replace("$$N_CHROMOSET_IN_GLOB", intToString(individuals.size()-10));
	int min_rating = (*(--individuals.end())).second.rating;
	editor.replace("$$START_RATING", intToString(min_rating));
	editor.replace("$$N_GLOBAL_CICLES", intToString(nGlobal_Cicles));
	editor.replace("$$N_LOCAL_CICLES", intToString(nLocal_Cicles));
	editor.replace("$$K_RATING", floatToString(kRating));
	editor.replace("$$KOEF_A", intToString(15678));
	editor.replace("$$KOEF_C", intToString(34302));

	editor.expand("$$EXTRACT_N_PARENT", nParent);
			
	editor.export_(kernel_filePath + KERNEL_FILE_SUFFIX);

	std::string kernel_Source;
	editor.get_string(kernel_Source);
	editor.clear();

	Computer computer;

	computer.set_Device(device);
	computer.set_Kernel_Name("crossing");
	computer.set_Kernel_Source(kernel_Source);
	computer.set_nThread_in_Group(nThread_in_Group);
	computer.set_nThread(nThread);

	// Строим (компиляция ядра, создание контекста...) 
	err = computer.build_kernel();
	if (err != 0) return err;

	DataIn dataIn;
	err = get_DataIn_from_Individuals(dataIn, individuals);

	// Входа
	computer.add_In(dataIn.chromoSectors);
	computer.add_In(sampleIndividual.chromoSectors);
	computer.add_In(sampleIndividual.invertChromoSectors);

	computer.add_Local(loc_ByteSize);

	DataOut dataOut(nThread, nParent);

	// Выхода
	computer.add_Out(dataOut.glob_indexes);
	computer.add_Out(dataOut.ratings);
	computer.add_Out(dataOut.c11s);
	computer.add_Out(dataOut.c01s);

	// Запускаем
	computer.compute();

	// Восстанавливаем особей
	err = get_Individuals_from_DataOut(crossedIndividuals, dataOut);

	computeTime = computer.get_computeTime();

	// Освобождение ресурсов
	computer.clear();

	return 0;
}
예제 #3
0
// mine(): main function for GERMLINE
void GERMLINE::mine( string params, string prefix )
{
	PolymorphicIndividualsExtractor * pie = inputManager.getPie();
	inputManager.getIndividuals(prefix);
	if ( ! pie->valid() ) return;
	string out = prefix;
	num_samples = 0;
	num_matches = 0;

	pie->loadInput();
	MatchesBuilder mb( pie );

	ofstream fout( ( out + ".log" ).c_str() );

	fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
	fout << " Welcome to GERMLINE, a tool for detecting long segments shared" << endl;
	fout << " by descent between pairs of individuals in near-linear time." << endl;
	fout << endl;
	fout << " For more details, please see the paper [ PMID: 18971310 ]" << endl;
	fout << " or the web-site [ http://www.cs.columbia.edu/~gusev/germline/ ]" << endl;
	fout << endl;
	fout << " GERMLINE was coded by Alexander Gusev and collaborators in " << endl;
	fout << " Itsik Pe'er's Computational Biology Lab at Columbia University" << endl;
	fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
	
	if ( BINARY_OUT ) MATCH_FILE.open( ( out + ".bmatch" ).c_str() , ios::binary );
	else MATCH_FILE.open( ( out + ".match" ).c_str() );
	
	fout << params << endl;
	fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
	//fout << setw(50) << left << "Minimum match length: " << MIN_MATCH_LEN << " cM" << endl;
	fout << setw(50) << "Allowed mismatching bits: " << MAX_ERR_HOM << " " << MAX_ERR_HET << endl;
	fout << setw(50) << "Word size: " << MARKER_SET_SIZE << endl;
	if ( ROI )
		fout << setw(50) << "Target region: " << ALL_SNPS.getROIStart().getSNPID() << " - " << ALL_SNPS.getROIEnd().getSNPID() << endl;
	else
		fout << setw(50) << "Target region: " << "all" << endl;

	time_t timer[2]; time( &timer[0] );
	
	if ( DEBUG ) cout << "DEBUG MODE ON" << endl;

	if ( ROI )
	{
		ALL_SNPS.beginChromosome();
		num_sets = (long)ceil((float)ALL_SNPS.currentSize()/(float)MARKER_SET_SIZE);
		mb.buildMatches();
		ALL_SAMPLES.freeMatches();
		ALL_SAMPLES.freeMarkers();
	}
	else
	{
	    int temp_i = 0;
		for ( ALL_SNPS.beginChromosome() ; ALL_SNPS.moreChromosome() ; ALL_SNPS.nextChromosome() )
		{
			num_sets = (long)ceil((float)ALL_SNPS.currentSize()/(float)MARKER_SET_SIZE);
			mb.buildMatches();
			if ( !SILENT ) cout << "Matches completed ... freeing memory" << endl;
			ALL_SAMPLES.freeMatches();
			ALL_SAMPLES.freeMarkers();
		}
	}

	//time( &timer[1] );

	fout << setw(50) << "Total IBD segments: " << num_matches << endl;
	//fout << setw(50) << "Total runtime (sec): " << difftime( timer[1] , timer[0] ) << endl;
	fout.close();
	MATCH_FILE.close();

	if ( BINARY_OUT )
	{
		ofstream bmid_out( ( out + ".bmid" ).c_str() );
		ALL_SNPS.print( bmid_out );
		bmid_out.close();

		ofstream bsid_out( ( out + ".bsid" ).c_str() );
		ALL_SAMPLES.print( bsid_out );
		bsid_out.close();
	}
}
예제 #4
0
// mine(): main function for GERMLINE
void GERMLINE::mine( string params, string map, string ped,string outfile)
{
	PolymorphicIndividualsExtractor * pie = inputManager.getPie();
	inputManager.getIndividuals(map, ped);
	if ( ! pie->valid() ) return;

	string out;
	if (outfile == "")
		out = inputManager.getOutput(); 
	else
		out = outfile;

	num_samples = 0;						
	num_matches = 0;

	pie->loadInput();
	MatchesBuilder mb( pie );
	
	unsigned long long init_mem = (unsigned long long)(mem_all_matches+mem_bufferchr+mem_chromosome+mem_ind+mem_inds+mem_matchfactory+mem_markers+mem_snps+mem_window);
	mem_expected_data= mb.calculateMemData();

	if ( (init_mem+mem_expected_data) < MEM_BOUND)
	{
	cerr<<"\n\n\tBeginning Analysis"<<endl;
	
	ofstream fout( ( out + ".log" ).c_str() );
	fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
	fout << " Welcome to GERMLINE, a tool for detecting long segments shared" << endl;
	fout << " by descent between pairs of individuals in near-linear time." << endl;
	fout << endl;
	fout << " For more details, please see the paper [ PMID: 18971310 ]" << endl;
	fout << " or the web-site [ http://www.cs.columbia.edu/~gusev/germline/ ]" << endl;
	fout << endl;
	fout << " GERMLINE was coded by Alexander Gusev and collaborators in " << endl;
	fout << " Itsik Pe'er's Computational Biology Lab at Columbia University" << endl;
	fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
	
	if ( BINARY_OUT ) MATCH_FILE.open( ( out + ".bmatch" ).c_str() , ios::binary );
	else MATCH_FILE.open( ( out + ".match" ).c_str() );
	
	fout << params << endl;
	fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
	fout << setw(50) << left << "Minimum match length: " << MIN_MATCH_LEN << " cM" << endl;
	fout << setw(50) << "Allowed mismatch: " << MAX_ERR_HOMp << "% " << MAX_ERR_HETp << "%"<<endl;
	fout << setw(50) << "Minimum Word size: " << MIN_WINDOW_SIZE << endl;
	if ( ROI )
		fout << setw(50) << "Target region: " << ALL_SNPS.getROIStart().getSNPID() << " - " << ALL_SNPS.getROIEnd().getSNPID() << endl;
	else
		fout << setw(50) << "Target region: " << "all" << endl;
	
	time_t timer[2]; time( &timer[0] );
	
	if ( DEBUG ) cout << "DEBUG MODE ON" << endl;

	if ( ROI )
	{
		ALL_SNPS.beginChromosome();
		ALL_SNPS_CURRENT_SIZE = ALL_SNPS.currentSize();
		mb.buildMatches();
		if ( !SILENT ) cout << "\nMatches completed ... freeing memory" << endl;
		ALL_SAMPLES.freeMatches();
		ALL_SAMPLES.freeMarkers();
		WINDOWS_LIST.clear();
	}
	else
	{	
		for ( ALL_SNPS.beginChromosome() ; ALL_SNPS.moreChromosome() ; ALL_SNPS.nextChromosome() )
		{
			MAX_WINDOW_SIZE=0;
			ALL_SNPS_CURRENT_SIZE = ALL_SNPS.currentSize();
			mb.buildMatches();
			if ( !SILENT ) cout << "\nMatches completed ... freeing memory" << endl;
			ALL_SAMPLES.freeMatches();
			ALL_SAMPLES.freeMarkers();
			WINDOWS_LIST.clear(); LAST_SET=false; 
		}		
	}
	time( &timer[1] );

	fout << setw(50) << "Total IBD segments: " << num_matches << endl;
	fout << setw(50) << "Total runtime (sec): " << difftime( timer[1] , timer[0] ) << endl;
	fout.close();
	MATCH_FILE.close();

	if ( BINARY_OUT )		
	{
		ofstream bmid_out( ( out + ".bmid" ).c_str() );
		ALL_SNPS.print( bmid_out );
		bmid_out.close();

		ofstream bsid_out( ( out + ".bsid" ).c_str() );
		ALL_SAMPLES.print( bsid_out );
		bsid_out.close();
	}}
	else
	{
		cerr<<"\n\n\tNot enough memory to load data...Cannot begin analysis";
		cerr<<"\n\tRequire atleast "<<ceil((float)(init_mem+mem_expected_data)/1048576)<<" Mb space"<<endl;
	}
}