//splits the database into temp files and then merges the files in pairs //takes optional arguments for primary field delimiter and secondary field list separators bool sortDatabase(string database, const char delim, const char conn) { const int tempFileCount = splitDatabase(database,delim,conn); int mergeCount = 0; for (int i = 0; i < tempFileCount-1; i+=2) { mergeFiles(string("temp")+to_string(i)+".db",string("temp")+to_string(i+1)+".db",delim,conn,string("merge")+to_string(mergeCount++)+".db"); std::remove((string("temp")+to_string(i)+".db").c_str()); std::remove((string("temp")+to_string(i+1)+".db").c_str()); } //if there are an odd number of temporary files //rename the final temporary file to a merge file for automated merging later //this merge file will always have the name merge[tempFileCount/2].db //this results from every pair of temp files being merged (tempFileCount/2 merge files) //and file names starting at merge0.db if(tempFileCount%2) { std::rename((string("temp")+to_string(tempFileCount-1)+".db").c_str(),(string("merge")+to_string(mergeCount++)+".db").c_str()); } for (int i = 0; i < mergeCount-1; i+=2) { mergeFiles(string("merge")+to_string(i)+".db",string("merge")+to_string(i+1)+".db",delim,conn,string("merge")+to_string(mergeCount++)+".db"); std::remove((string("merge")+to_string(i)+".db").c_str()); std::remove((string("merge")+to_string(i+1)+".db").c_str()); } std::rename((string("merge")+to_string(mergeCount-1)+".db").c_str(),(database.substr(0,database.find('.')) + "sorted.db").c_str()); return true; }
void fullCombination(const bool electrons = false) { TString type("_muons"); if( electrons ) type = "_electrons"; std::map<TString, TList*> cathegories; // Loop over all files std::map<TString, double> fw(filesAndWeightsMap( electrons )); std::map<TString, double>::const_iterator it = fw.begin(); for( ; it != fw.end(); ++it ) { TString cathegory(getCategory(it->first)); if( cathegories[cathegory] == 0 ) cathegories[cathegory] = new TList(); std::cout << "fileName = " << std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"weighted"+type+".root" << std::endl; cathegories[cathegory]->Add( TFile::Open(std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"weighted"+type+".root") ); } TList * combinationsList = new TList(); std::map<TString, TList*>::const_iterator it2 = cathegories.begin(); for( ; it2 != cathegories.end(); ++it2 ) { TFile *Target = TFile::Open( it2->first+"_combined"+type+".root", "RECREATE" ); std::cout << "fileName = " << it2->first << std::endl; combinationsList->Add( Target ); mergeFiles( Target, it2->second ); } }
void reduce(char *reduceTask) { // implement the reduce operation here ... mergeFiles(reduceTask); }
/* * sortRandom(); * * Don't make any assumptions about the input. Store the input * records in a large buffer, and sort those in-core records once * all records are processed or the buffer is full. If the buffer * fills up, store the sorted records into temporary files. Once * all records are read, use mergeFiles() above to merge-sort the * temporary files. * * Exits the application if an error occurs. */ static void sortRandom( void) { int temp_file_idx = -1; skstream_t *input_rwios = NULL; /* input stream */ uint8_t *record_buffer = NULL; /* Region of memory for records */ uint8_t *cur_node = NULL; /* Ptr into record_buffer */ uint8_t *next_node = NULL; /* Ptr into record_buffer */ uint32_t buffer_max_recs; /* max buffer size (in number of recs) */ uint32_t buffer_recs; /* current buffer size (# records) */ uint32_t buffer_chunk_recs; /* how to grow from current to max buf */ uint32_t num_chunks; /* how quickly to grow buffer */ uint32_t record_count = 0; /* Number of records read */ int rv; /* Determine the maximum number of records that will fit into the * buffer if it grows the maximum size */ buffer_max_recs = buffer_size / NODE_SIZE; TRACEMSG((("buffer_size = %" PRIu64 "\nnode_size = %" PRIu32 "\nbuffer_max_recs = %" PRIu32), buffer_size, NODE_SIZE, buffer_max_recs)); /* We will grow to the maximum size in chunks */ num_chunks = NUM_CHUNKS; if (num_chunks <= 0) { num_chunks = 1; } /* Attempt to allocate the initial chunk. If we fail, increment * the number of chunks---which will decrease the amount we * attempt to allocate at once---and try again. */ for (;;) { buffer_chunk_recs = buffer_max_recs / num_chunks; TRACEMSG((("num_chunks = %" PRIu32 "\nbuffer_chunk_recs = %" PRIu32), num_chunks, buffer_chunk_recs)); record_buffer = (uint8_t*)malloc(NODE_SIZE * buffer_chunk_recs); if (record_buffer) { /* malloc was successful */ break; } else if (buffer_chunk_recs < MIN_IN_CORE_RECORDS) { /* give up at this point */ skAppPrintErr("Error allocating space for %d records", MIN_IN_CORE_RECORDS); appExit(EXIT_FAILURE); } else { /* reduce the amount we allocate at once by increasing the * number of chunks and try again */ TRACEMSG(("malloc() failed")); ++num_chunks; } } buffer_recs = buffer_chunk_recs; TRACEMSG((("buffer_recs = %" PRIu32), buffer_recs)); /* open first file */ rv = appNextInput(&input_rwios); if (rv < 0) { free(record_buffer); appExit(EXIT_FAILURE); } record_count = 0; cur_node = record_buffer; while (input_rwios != NULL) { /* read record */ if ((rv = skStreamReadRecord(input_rwios, (rwRec*)cur_node)) != SKSTREAM_OK) { if (rv != SKSTREAM_ERR_EOF) { skStreamPrintLastErr(input_rwios, rv, &skAppPrintErr); } /* end of file: close current and open next */ skStreamDestroy(&input_rwios); rv = appNextInput(&input_rwios); if (rv < 0) { free(record_buffer); appExit(EXIT_FAILURE); } continue; } ++record_count; cur_node += NODE_SIZE; if (record_count == buffer_recs) { /* Filled the current buffer */ /* If buffer not at max size, see if we can grow it */ if (buffer_recs < buffer_max_recs) { uint8_t *old_buf = record_buffer; /* add a chunk of records. if we are near the max, * set the size to the max */ buffer_recs += buffer_chunk_recs; if (buffer_recs + buffer_chunk_recs > buffer_max_recs) { buffer_recs = buffer_max_recs; } TRACEMSG((("Buffer full---attempt to grow to %" PRIu32 " records, %" PRIu32 " bytes"), buffer_recs, NODE_SIZE * buffer_recs)); /* attempt to grow */ record_buffer = (uint8_t*)realloc(record_buffer, NODE_SIZE * buffer_recs); if (record_buffer) { /* Success, make certain cur_node points into the * new buffer */ cur_node = (record_buffer + (record_count * NODE_SIZE)); } else { /* Unable to grow it */ TRACEMSG(("realloc() failed")); record_buffer = old_buf; buffer_max_recs = buffer_recs = record_count; } } /* Either buffer at maximum size or attempt to grow it * failed. */ if (record_count == buffer_max_recs) { /* Sort */ skQSort(record_buffer, record_count, NODE_SIZE, &rwrecCompare); /* Write to temp file */ if (skTempFileWriteBufferStream( tmpctx, &temp_file_idx, record_buffer, NODE_SIZE, record_count)) { skAppPrintSyserror( "Error writing sorted buffer to temporary file"); free(record_buffer); appExit(EXIT_FAILURE); } /* Reset record buffer to 'empty' */ record_count = 0; cur_node = record_buffer; } } } /* Sort (and maybe store) last batch of records */ if (record_count > 0) { skQSort(record_buffer, record_count, NODE_SIZE, &rwrecCompare); if (temp_file_idx >= 0) { /* Write last batch to temp file */ if (skTempFileWriteBufferStream( tmpctx, &temp_file_idx, record_buffer, NODE_SIZE, record_count)) { skAppPrintSyserror( "Error writing sorted buffer to temporary file"); free(record_buffer); appExit(EXIT_FAILURE); } } } /* Generate the output */ if (record_count == 0 && temp_file_idx == -1) { /* No records were read at all; write the header to the output * file */ rv = skStreamWriteSilkHeader(out_rwios); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); } } else if (temp_file_idx == -1) { /* No temp files written, just output batch of records */ uint32_t c; TRACEMSG((("Writing %" PRIu32 " records to '%s'"), record_count, skStreamGetPathname(out_rwios))); /* get first two records from the sorted buffer */ cur_node = record_buffer; next_node = record_buffer + NODE_SIZE; for (c = 1; c < record_count; ++c, next_node += NODE_SIZE) { if (0 != rwrecCompare(cur_node, next_node)) { /* records differ. print earlier record */ rv = skStreamWriteRecord(out_rwios, (rwRec*)cur_node); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { free(record_buffer); appExit(EXIT_FAILURE); } } cur_node = next_node; } /* else records are duplicates: ignore latter record */ } /* print remaining record */ rv = skStreamWriteRecord(out_rwios, (rwRec*)cur_node); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { free(record_buffer); appExit(EXIT_FAILURE); } } } else { /* no longer have a need for the record buffer */ free(record_buffer); record_buffer = NULL; /* now merge all the temp files */ mergeFiles(temp_file_idx); } if (record_buffer) { free(record_buffer); } }
void fullCombination(analysisType ana) { TString type(""); bool electrons=false; if ( ana==_2eTrack) { electrons=true; type = "2eTrack"; } else if ( ana==_2globalOrTrackerMu ) type = "2globalOrTrackerMu"; else if ( ana==_2muTrack ) type = "2muTrack"; else if ( ana==_2globalMu ) type = "2globalMu"; else if ( ana==_2trackerMu ) type = "2trackerMu"; else if ( ana==_2saMu ) type = "2saMu"; std::map<TString, TList*> categories; // Loop over all files std::map<TString, double> fw(filesAndWeightsMap( electrons )); std::map<TString, double>::const_iterator it = fw.begin(); for( ; it != fw.end(); ++it ) { TString category(getCategory(it->first)); // Skip unkown categories and signal MC if ( category=="" ) { std::cout << "Skipping file " << (it->first).Data() << std::endl; continue; } else std::cout << "Got sample " << it->first.Data() << " for category " << category.Data() << std::endl; if( categories[category] == 0 ) categories[category] = new TList(); TString fileName("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+".root"); categories[category]->Add( TFile::Open(fileName) ); // Check for tau version if ( category != "Data" && !category.BeginsWith("HTo2LongLived") && !category.BeginsWith("Chi0ToNuLL") ) { TString tauFileName("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+"_tau.root"); if ( categories["Tau"] == 0 ) categories["Tau"] = new TList(); std::cout << "Adding tau version : " << tauFileName << std::endl; categories["Tau"]->Add( TFile::Open(tauFileName) ); } // Check for d0 corrected fiels if ( category == "Data" ) { TString correctedFileName("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+"_tipLipCorrected.root"); if ( categories["Data_tipLipCorrected"] == 0 ) categories["Data_tipLipCorrected"] = new TList(); std::cout << "Adding tip lip corrected version : " << correctedFileName << std::endl; categories["Data_tipLipCorrected"]->Add( TFile::Open(correctedFileName) ); } else if ( category == "Data22Jan") { TString correctedFileName("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+"_tipLipCorrected.root"); if ( categories["Data22Jan_tipLipCorrected"] == 0 ) categories["Data22Jan_tipLipCorrected"] = new TList(); std::cout << "Adding tip lip corrected version : " << correctedFileName << std::endl; categories["Data22Jan_tipLipCorrected"]->Add( TFile::Open(correctedFileName) ); } // if ( category=="DYJets" ) { // if ( categories["DYJetsTau"] == 0 ) categories["DYJetsTau"] = new TList(); // categories["DYJetsTau"]->Add( TFile::Open("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+"_tau.root") ); // } } TList * combinationsList = new TList(); std::map<TString, TList*>::const_iterator it2 = categories.begin(); for( ; it2 != categories.end(); ++it2 ) { std::cout << "Doing category : " << it2->first << std::endl; TFile *Target = TFile::Open( "CombinedFiles/"+it2->first+"_combined_"+type+".root", "RECREATE" ); combinationsList->Add( Target ); mergeFiles( Target, it2->second ); } }
// handles the training void SVMProcessing::train() { char prefix[BUFF_SIZE], lastCummulativeTrainingFile[BUFF_SIZE], actualCummulativeTrainingFile[BUFF_SIZE], cummulativeModelFile[BUFF_SIZE], cummulativeTestingFile[BUFF_SIZE]; vector <int> classes, indices; std::stringstream CmdLn; vdbPtr->getFinalVolumeFormat(prefix); // calculating the sliding window and saving a 4D volume estimateActivation(1, vdbPtr->interval.maxIndex(), vdbPtr->slidingWindowSize, prefix, vdbPtr->train4DFile); // getting the volume indexes, excluding the baseline and the first ones discarted by haemodynamic stabilization // vdbPtr->interval.getVolumeIndices(vdbPtr->offset, 1, vdbPtr->interval.maxIndex(), indices); // added on 02AUG2016 fillVolumeIndexes(vdbPtr,indices); // getting a vector containing the classes for each volume vdbPtr->interval.getClassArray(classes); char svmMask[BUFF_SIZE], svmTestingFile[BUFF_SIZE]; sprintf(svmMask, "%s%s", vdbPtr->featuresSuffix, vdbPtr->trainFeatureSuffix); if (cummulativeTraining) { if (fileExists(featuresTestMask)) { fprintf(stderr, "################## Updating %s to %s due to cummulative train option ########################## \n", svmMask, featuresTestMask); CmdLn.str(""); CmdLn << "fslmaths " << featuresTestMask << " " << svmMask; fslmaths((char *)CmdLn.str().c_str()); } } // transforms the 4D volume in a svm like input file saveSVMFile(vdbPtr->train4DFile, svmMask, svmTrainingFile, 0, indices, classes); CmdLn.str(""); SVMObj svmObject; // training CmdLn << "svmtrain -t 0 " << svmTrainingFile << " " << svmModelFile; svmObject.train(CmdLn.str().c_str()); CmdLn.str(""); sprintf(svmTestingFile, "%s%s%s%s", svmDir, "training", vdbPtr->trainFeatureSuffix, ".tst"); // testing the training data CmdLn << "svmpredict " << svmTrainingFile << " " << svmModelFile << " " << svmTestingFile; svmObject.predict(CmdLn.str().c_str()); generateProjetionsGraph(svmTestingFile); // testing the training data with the prediction model if (fileExists(svmModelPredictFile)) { CmdLn.str(""); sprintf(svmTestingFile, "%s%s%s%s", svmDir, "testing", vdbPtr->trainFeatureSuffix, ".tst"); CmdLn << "svmpredict " << svmTrainingFile << " " << svmModelPredictFile << " " << svmTestingFile; svmObject.predict(CmdLn.str().c_str()); generateProjetionsGraph(svmTestingFile); } if (cummulativeTraining) { sprintf(lastCummulativeTrainingFile, "%s%s%s%s", svmDir, "cummulative_training", vdbPtr->testFeatureSuffix, ".txt"); sprintf(actualCummulativeTrainingFile, "%s%s%s%s", svmDir, "cummulative_training", vdbPtr->trainFeatureSuffix, ".txt"); if (fileExists(lastCummulativeTrainingFile)) { mergeFiles(svmTrainingFile, lastCummulativeTrainingFile, actualCummulativeTrainingFile); } else copyFile(svmTrainingFile, actualCummulativeTrainingFile); sprintf(cummulativeModelFile, "%s%s%s%s%s", svmDir, "cummulative_", vdbPtr->subject, vdbPtr->trainFeatureSuffix, ".model"); // training CmdLn.str(""); CmdLn << "svmtrain -t 0 " << actualCummulativeTrainingFile << " " << cummulativeModelFile; svmObject.train(CmdLn.str().c_str()); CmdLn.str(""); sprintf(cummulativeTestingFile, "%s%s%s%s", svmDir, "cummulative_training", vdbPtr->trainFeatureSuffix, ".tst"); // testing the cummulative training data CmdLn << "svmpredict " << actualCummulativeTrainingFile << " " << cummulativeModelFile << " " << cummulativeTestingFile; svmObject.predict(CmdLn.str().c_str()); generateProjetionsGraph(cummulativeTestingFile); /* if (fileExists(svmModelPredictFile)) { CmdLn.str(""); sprintf(cummulativeTestingFile, "%s%s%s%s", svmDir, "cummulative_testing", vdbPtr->trainFeatureSuffix, ".tst"); CmdLn << "svmpredict " << actualCummulativeTrainingFile << " " << svmModelPredictFile << " " << cummulativeTestingFile; svmObject.predict(CmdLn.str().c_str()); generateProjetionsGraph(cummulativeTestingFile); } */ } // Generating weight map volume fprintf(stderr, "Generating weight map volumes\n"); sprintf(svmMask, "%s.nii", vdbPtr->featuresTrainSuffix); fileExists(svmMask); fprintf(stderr, "using %s \n", svmMask); model=svm_load_model(svmModelFile); if (model != NULL) { generateWeightVolume(model, svmMask, 1, svmWeightNormFile); generateWeightVolume(model, svmMask, 0, svmWeightFile); unloadModel(model); } if (cummulativeTraining) { char cummulativeSvmWeightNormFile[BUFF_SIZE], cummulativeSvmWeightFile[BUFF_SIZE]; sprintf(cummulativeSvmWeightNormFile, "%s%s%s%s", svmDir, "cummulative_weights_norm", vdbPtr->trainFeatureSuffix, ".nii"); sprintf(cummulativeSvmWeightFile, "%s%s%s%s", svmDir, "cummulative_weights", vdbPtr->trainFeatureSuffix, ".nii"); model = svm_load_model(cummulativeModelFile); if (model != NULL) { generateWeightVolume(model, svmMask, 1, cummulativeSvmWeightNormFile); generateWeightVolume(model, svmMask, 0, cummulativeSvmWeightFile); unloadModel(model); } } }