Ejemplo n.º 1
0
//splits the database into temp files and then merges the files in pairs
//takes optional arguments for primary field delimiter and secondary field list separators
bool sortDatabase(string database, const char delim, const char conn)
{
    const int tempFileCount = splitDatabase(database,delim,conn);
    int mergeCount = 0;
    for (int i = 0; i < tempFileCount-1; i+=2)
    {
        mergeFiles(string("temp")+to_string(i)+".db",string("temp")+to_string(i+1)+".db",delim,conn,string("merge")+to_string(mergeCount++)+".db");
        std::remove((string("temp")+to_string(i)+".db").c_str());
        std::remove((string("temp")+to_string(i+1)+".db").c_str());
    }

    //if there are an odd number of temporary files
    //rename the final temporary file to a merge file for automated merging later
    //this merge file will always have the name merge[tempFileCount/2].db
    //this results from every pair of temp files being merged (tempFileCount/2 merge files)
    //and file names starting at merge0.db
    if(tempFileCount%2)
    {
        std::rename((string("temp")+to_string(tempFileCount-1)+".db").c_str(),(string("merge")+to_string(mergeCount++)+".db").c_str());
    }

    for (int i = 0; i < mergeCount-1; i+=2)
    {
        mergeFiles(string("merge")+to_string(i)+".db",string("merge")+to_string(i+1)+".db",delim,conn,string("merge")+to_string(mergeCount++)+".db");
        std::remove((string("merge")+to_string(i)+".db").c_str());
        std::remove((string("merge")+to_string(i+1)+".db").c_str());
    }

    std::rename((string("merge")+to_string(mergeCount-1)+".db").c_str(),(database.substr(0,database.find('.')) + "sorted.db").c_str());

    return true;
}
Ejemplo n.º 2
0
void fullCombination(const bool electrons = false)
{
  TString type("_muons");
  if( electrons ) type = "_electrons";

  std::map<TString, TList*> cathegories;
  // Loop over all files
  std::map<TString, double> fw(filesAndWeightsMap( electrons ));
  std::map<TString, double>::const_iterator it = fw.begin();
  for( ; it != fw.end(); ++it ) {
    TString cathegory(getCategory(it->first));
    if( cathegories[cathegory] == 0 ) cathegories[cathegory] = new TList();
    std::cout << "fileName = " << std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"weighted"+type+".root" << std::endl;
    cathegories[cathegory]->Add( TFile::Open(std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"weighted"+type+".root") );
  }

  TList * combinationsList = new TList();
  std::map<TString, TList*>::const_iterator it2 = cathegories.begin();
  for( ; it2 != cathegories.end(); ++it2 ) {
    TFile *Target = TFile::Open( it2->first+"_combined"+type+".root", "RECREATE" );
    std::cout << "fileName = " << it2->first << std::endl;
    combinationsList->Add( Target );
    mergeFiles( Target, it2->second );
  }
}
Ejemplo n.º 3
0
void reduce(char *reduceTask) {
    // implement the reduce operation here ...
    mergeFiles(reduceTask);
}
Ejemplo n.º 4
0
/*
 *  sortRandom();
 *
 *    Don't make any assumptions about the input.  Store the input
 *    records in a large buffer, and sort those in-core records once
 *    all records are processed or the buffer is full.  If the buffer
 *    fills up, store the sorted records into temporary files.  Once
 *    all records are read, use mergeFiles() above to merge-sort the
 *    temporary files.
 *
 *    Exits the application if an error occurs.
 */
static void
sortRandom(
    void)
{
    int temp_file_idx = -1;
    skstream_t *input_rwios = NULL; /* input stream */
    uint8_t *record_buffer = NULL;  /* Region of memory for records */
    uint8_t *cur_node = NULL;       /* Ptr into record_buffer */
    uint8_t *next_node = NULL;      /* Ptr into record_buffer */
    uint32_t buffer_max_recs;       /* max buffer size (in number of recs) */
    uint32_t buffer_recs;           /* current buffer size (# records) */
    uint32_t buffer_chunk_recs;     /* how to grow from current to max buf */
    uint32_t num_chunks;            /* how quickly to grow buffer */
    uint32_t record_count = 0;      /* Number of records read */
    int rv;

    /* Determine the maximum number of records that will fit into the
     * buffer if it grows the maximum size */
    buffer_max_recs = buffer_size / NODE_SIZE;
    TRACEMSG((("buffer_size = %" PRIu64
               "\nnode_size = %" PRIu32
               "\nbuffer_max_recs = %" PRIu32),
              buffer_size, NODE_SIZE, buffer_max_recs));

    /* We will grow to the maximum size in chunks */
    num_chunks = NUM_CHUNKS;
    if (num_chunks <= 0) {
        num_chunks = 1;
    }

    /* Attempt to allocate the initial chunk.  If we fail, increment
     * the number of chunks---which will decrease the amount we
     * attempt to allocate at once---and try again. */
    for (;;) {
        buffer_chunk_recs = buffer_max_recs / num_chunks;
        TRACEMSG((("num_chunks = %" PRIu32
                   "\nbuffer_chunk_recs = %" PRIu32),
                  num_chunks, buffer_chunk_recs));

        record_buffer = (uint8_t*)malloc(NODE_SIZE * buffer_chunk_recs);
        if (record_buffer) {
            /* malloc was successful */
            break;
        } else if (buffer_chunk_recs < MIN_IN_CORE_RECORDS) {
            /* give up at this point */
            skAppPrintErr("Error allocating space for %d records",
                          MIN_IN_CORE_RECORDS);
            appExit(EXIT_FAILURE);
        } else {
            /* reduce the amount we allocate at once by increasing the
             * number of chunks and try again */
            TRACEMSG(("malloc() failed"));
            ++num_chunks;
        }
    }

    buffer_recs = buffer_chunk_recs;
    TRACEMSG((("buffer_recs = %" PRIu32), buffer_recs));

    /* open first file */
    rv = appNextInput(&input_rwios);
    if (rv < 0) {
        free(record_buffer);
        appExit(EXIT_FAILURE);
    }

    record_count = 0;
    cur_node = record_buffer;
    while (input_rwios != NULL) {
        /* read record */
        if ((rv = skStreamReadRecord(input_rwios, (rwRec*)cur_node))
            != SKSTREAM_OK)
        {
            if (rv != SKSTREAM_ERR_EOF) {
                skStreamPrintLastErr(input_rwios, rv, &skAppPrintErr);
            }
            /* end of file: close current and open next */
            skStreamDestroy(&input_rwios);
            rv = appNextInput(&input_rwios);
            if (rv < 0) {
                free(record_buffer);
                appExit(EXIT_FAILURE);
            }
            continue;
        }

        ++record_count;
        cur_node += NODE_SIZE;

        if (record_count == buffer_recs) {
            /* Filled the current buffer */

            /* If buffer not at max size, see if we can grow it */
            if (buffer_recs < buffer_max_recs) {
                uint8_t *old_buf = record_buffer;

                /* add a chunk of records.  if we are near the max,
                 * set the size to the max */
                buffer_recs += buffer_chunk_recs;
                if (buffer_recs + buffer_chunk_recs > buffer_max_recs) {
                    buffer_recs = buffer_max_recs;
                }
                TRACEMSG((("Buffer full---attempt to grow to %" PRIu32
                           " records, %" PRIu32 " bytes"),
                          buffer_recs, NODE_SIZE * buffer_recs));

                /* attempt to grow */
                record_buffer = (uint8_t*)realloc(record_buffer,
                                                  NODE_SIZE * buffer_recs);
                if (record_buffer) {
                    /* Success, make certain cur_node points into the
                     * new buffer */
                    cur_node = (record_buffer + (record_count * NODE_SIZE));
                } else {
                    /* Unable to grow it */
                    TRACEMSG(("realloc() failed"));
                    record_buffer = old_buf;
                    buffer_max_recs = buffer_recs = record_count;
                }
            }

            /* Either buffer at maximum size or attempt to grow it
             * failed. */
            if (record_count == buffer_max_recs) {
                /* Sort */
                skQSort(record_buffer, record_count, NODE_SIZE, &rwrecCompare);

                /* Write to temp file */
                if (skTempFileWriteBufferStream(
                        tmpctx, &temp_file_idx,
                        record_buffer, NODE_SIZE, record_count))
                {
                    skAppPrintSyserror(
                        "Error writing sorted buffer to temporary file");
                    free(record_buffer);
                    appExit(EXIT_FAILURE);
                }

                /* Reset record buffer to 'empty' */
                record_count = 0;
                cur_node = record_buffer;
            }
        }
    }

    /* Sort (and maybe store) last batch of records */
    if (record_count > 0) {
        skQSort(record_buffer, record_count, NODE_SIZE, &rwrecCompare);

        if (temp_file_idx >= 0) {
            /* Write last batch to temp file */
            if (skTempFileWriteBufferStream(
                    tmpctx, &temp_file_idx,
                    record_buffer, NODE_SIZE, record_count))
            {
                skAppPrintSyserror(
                    "Error writing sorted buffer to temporary file");
                free(record_buffer);
                appExit(EXIT_FAILURE);
            }
        }
    }

    /* Generate the output */

    if (record_count == 0 && temp_file_idx == -1) {
        /* No records were read at all; write the header to the output
         * file */
        rv = skStreamWriteSilkHeader(out_rwios);
        if (0 != rv) {
            skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr);
        }
    } else if (temp_file_idx == -1) {
        /* No temp files written, just output batch of records */
        uint32_t c;

        TRACEMSG((("Writing %" PRIu32 " records to '%s'"),
                  record_count, skStreamGetPathname(out_rwios)));
        /* get first two records from the sorted buffer */
        cur_node = record_buffer;
        next_node = record_buffer + NODE_SIZE;
        for (c = 1; c < record_count; ++c, next_node += NODE_SIZE) {
            if (0 != rwrecCompare(cur_node, next_node)) {
                /* records differ. print earlier record */
                rv = skStreamWriteRecord(out_rwios, (rwRec*)cur_node);
                if (0 != rv) {
                    skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr);
                    if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                        free(record_buffer);
                        appExit(EXIT_FAILURE);
                    }
                }
                cur_node = next_node;
            }
            /* else records are duplicates: ignore latter record */
        }
        /* print remaining record */
        rv = skStreamWriteRecord(out_rwios, (rwRec*)cur_node);
        if (0 != rv) {
            skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr);
            if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                free(record_buffer);
                appExit(EXIT_FAILURE);
            }
        }
    } else {
        /* no longer have a need for the record buffer */
        free(record_buffer);
        record_buffer = NULL;

        /* now merge all the temp files */
        mergeFiles(temp_file_idx);
    }

    if (record_buffer) {
        free(record_buffer);
    }
}
Ejemplo n.º 5
0
void fullCombination(analysisType ana)
{
  TString type("");
  bool electrons=false;
  if ( ana==_2eTrack) {
    electrons=true;
    type = "2eTrack";
  }
  else if ( ana==_2globalOrTrackerMu ) type = "2globalOrTrackerMu";
  else if ( ana==_2muTrack ) type = "2muTrack";
  else if ( ana==_2globalMu ) type = "2globalMu";
  else if ( ana==_2trackerMu ) type = "2trackerMu";
  else if ( ana==_2saMu ) type = "2saMu";

  std::map<TString, TList*> categories;
  // Loop over all files
  std::map<TString, double> fw(filesAndWeightsMap( electrons ));
  std::map<TString, double>::const_iterator it = fw.begin();
  for( ; it != fw.end(); ++it ) {
    TString category(getCategory(it->first));

    // Skip unkown categories and signal MC
    if ( category=="" ) {
      std::cout << "Skipping file " << (it->first).Data() << std::endl;
      continue;
    }
    else std::cout << "Got sample " << it->first.Data() << " for category " << category.Data() << std::endl;

    if( categories[category] == 0 ) categories[category] = new TList();

    TString fileName("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+".root");
    categories[category]->Add( TFile::Open(fileName) );

    // Check for tau version
    if ( category != "Data" && !category.BeginsWith("HTo2LongLived") && !category.BeginsWith("Chi0ToNuLL") ) {
      TString tauFileName("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+"_tau.root");
      if ( categories["Tau"] == 0 ) categories["Tau"] = new TList();
      std::cout << "Adding tau version : " << tauFileName << std::endl;
      categories["Tau"]->Add( TFile::Open(tauFileName) );
    }
    // Check for d0 corrected fiels
    if ( category == "Data" ) {
      TString correctedFileName("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+"_tipLipCorrected.root");
      if ( categories["Data_tipLipCorrected"] == 0 ) categories["Data_tipLipCorrected"] = new TList();
      std::cout << "Adding tip lip corrected version : " << correctedFileName << std::endl;
      categories["Data_tipLipCorrected"]->Add( TFile::Open(correctedFileName) );
    }
    else if ( category == "Data22Jan") {
      TString correctedFileName("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+"_tipLipCorrected.root");
      if ( categories["Data22Jan_tipLipCorrected"] == 0 ) categories["Data22Jan_tipLipCorrected"] = new TList();
      std::cout << "Adding tip lip corrected version : " << correctedFileName << std::endl;
      categories["Data22Jan_tipLipCorrected"]->Add( TFile::Open(correctedFileName) );    }

    //    if ( category=="DYJets" ) {
    //      if ( categories["DYJetsTau"] == 0 ) categories["DYJetsTau"] = new TList();
    //      categories["DYJetsTau"]->Add( TFile::Open("WeightedFiles/"+std::string(it->first).substr(std::string(it->first).find_last_of("/")+1)+"_weighted_"+type+"_tau.root") );
    //    }
  }

  TList * combinationsList = new TList();
  std::map<TString, TList*>::const_iterator it2 = categories.begin();
  for( ; it2 != categories.end(); ++it2 ) {
    std::cout << "Doing category : " << it2->first << std::endl;
    TFile *Target = TFile::Open( "CombinedFiles/"+it2->first+"_combined_"+type+".root", "RECREATE" );
    combinationsList->Add( Target );
    mergeFiles( Target, it2->second );
  }
}
Ejemplo n.º 6
0
// handles the training
void SVMProcessing::train()
{
	char prefix[BUFF_SIZE], lastCummulativeTrainingFile[BUFF_SIZE], actualCummulativeTrainingFile[BUFF_SIZE], 
		cummulativeModelFile[BUFF_SIZE], cummulativeTestingFile[BUFF_SIZE];
   vector <int> classes, indices;
   
   std::stringstream CmdLn;
   
   vdbPtr->getFinalVolumeFormat(prefix);
   
   // calculating the sliding window and saving a 4D volume
   estimateActivation(1, vdbPtr->interval.maxIndex(), vdbPtr->slidingWindowSize, prefix, vdbPtr->train4DFile);
   
   // getting the volume indexes, excluding the baseline and the first ones discarted by haemodynamic stabilization
   // vdbPtr->interval.getVolumeIndices(vdbPtr->offset, 1, vdbPtr->interval.maxIndex(), indices);
    
   // added on 02AUG2016
   fillVolumeIndexes(vdbPtr,indices);
    
   // getting a vector containing the classes for each volume
   vdbPtr->interval.getClassArray(classes);
   
   char svmMask[BUFF_SIZE], svmTestingFile[BUFF_SIZE];
   sprintf(svmMask, "%s%s", vdbPtr->featuresSuffix, vdbPtr->trainFeatureSuffix);

   if (cummulativeTraining)
   {
	   if (fileExists(featuresTestMask))
	   {
		   fprintf(stderr, "################## Updating %s to %s due to cummulative train option ########################## \n", svmMask, featuresTestMask);
		   CmdLn.str("");
		   CmdLn << "fslmaths " << featuresTestMask << " " << svmMask;
		   fslmaths((char *)CmdLn.str().c_str());
	   }
   }
   
   // transforms the 4D volume in a svm like input file
   saveSVMFile(vdbPtr->train4DFile, svmMask, svmTrainingFile, 0, indices, classes);
   
   CmdLn.str("");
   SVMObj svmObject;
   
   // training
   CmdLn << "svmtrain -t 0 " << svmTrainingFile << " " << svmModelFile;
   svmObject.train(CmdLn.str().c_str());

   CmdLn.str("");

   sprintf(svmTestingFile, "%s%s%s%s", svmDir, "training", vdbPtr->trainFeatureSuffix, ".tst");

   // testing the training data
   CmdLn << "svmpredict " << svmTrainingFile << " " << svmModelFile << " " << svmTestingFile;
   svmObject.predict(CmdLn.str().c_str());
   generateProjetionsGraph(svmTestingFile);

   // testing the training data with the prediction model
   if (fileExists(svmModelPredictFile))
   {
	   CmdLn.str("");
	   sprintf(svmTestingFile, "%s%s%s%s", svmDir, "testing", vdbPtr->trainFeatureSuffix, ".tst");
	   CmdLn << "svmpredict " << svmTrainingFile << " " << svmModelPredictFile << " " << svmTestingFile;
	   svmObject.predict(CmdLn.str().c_str());
	   generateProjetionsGraph(svmTestingFile);
   }

   if (cummulativeTraining)
   {
	   sprintf(lastCummulativeTrainingFile, "%s%s%s%s", svmDir, "cummulative_training", vdbPtr->testFeatureSuffix, ".txt");
	   sprintf(actualCummulativeTrainingFile, "%s%s%s%s", svmDir, "cummulative_training", vdbPtr->trainFeatureSuffix, ".txt");

	   if (fileExists(lastCummulativeTrainingFile))
	   {
		   mergeFiles(svmTrainingFile, lastCummulativeTrainingFile, actualCummulativeTrainingFile);
	   }
	   else copyFile(svmTrainingFile, actualCummulativeTrainingFile);

	   sprintf(cummulativeModelFile, "%s%s%s%s%s", svmDir, "cummulative_", vdbPtr->subject, vdbPtr->trainFeatureSuffix, ".model");

	   // training
	   CmdLn.str("");
	   CmdLn << "svmtrain -t 0 " << actualCummulativeTrainingFile << " " << cummulativeModelFile;
	   svmObject.train(CmdLn.str().c_str());

	   CmdLn.str("");

	   sprintf(cummulativeTestingFile, "%s%s%s%s", svmDir, "cummulative_training", vdbPtr->trainFeatureSuffix, ".tst");


	   // testing the cummulative training data
	   CmdLn << "svmpredict " << actualCummulativeTrainingFile << " " << cummulativeModelFile << " " << cummulativeTestingFile;
	   svmObject.predict(CmdLn.str().c_str());
	   generateProjetionsGraph(cummulativeTestingFile);

	   /*
	   if (fileExists(svmModelPredictFile))
	   {
	   CmdLn.str("");
	   sprintf(cummulativeTestingFile, "%s%s%s%s", svmDir, "cummulative_testing", vdbPtr->trainFeatureSuffix, ".tst");
	   CmdLn << "svmpredict " << actualCummulativeTrainingFile << " " << svmModelPredictFile << " " << cummulativeTestingFile;
	   svmObject.predict(CmdLn.str().c_str());
	   generateProjetionsGraph(cummulativeTestingFile);
	   }
	   */
   }

   
   // Generating weight map volume
   fprintf(stderr, "Generating weight map volumes\n");
   
   sprintf(svmMask, "%s.nii", vdbPtr->featuresTrainSuffix);

   fileExists(svmMask);

   fprintf(stderr, "using %s \n", svmMask);
   model=svm_load_model(svmModelFile);
   if (model != NULL)
   {
	  generateWeightVolume(model, svmMask, 1, svmWeightNormFile);
	  generateWeightVolume(model, svmMask, 0, svmWeightFile);
	  unloadModel(model);
   }

   if (cummulativeTraining)
   {
	   char cummulativeSvmWeightNormFile[BUFF_SIZE], cummulativeSvmWeightFile[BUFF_SIZE];
	   sprintf(cummulativeSvmWeightNormFile, "%s%s%s%s", svmDir, "cummulative_weights_norm", vdbPtr->trainFeatureSuffix, ".nii");
	   sprintf(cummulativeSvmWeightFile, "%s%s%s%s", svmDir, "cummulative_weights", vdbPtr->trainFeatureSuffix, ".nii");

	   model = svm_load_model(cummulativeModelFile);
	   if (model != NULL)
	   {
		   generateWeightVolume(model, svmMask, 1, cummulativeSvmWeightNormFile);
		   generateWeightVolume(model, svmMask, 0, cummulativeSvmWeightFile);
		   unloadModel(model);
	   }
   }
}