Ejemplo n.º 1
0
  void DetectabilitySimulation::predictDetectabilities(vector<String>& peptides_vector, vector<DoubleReal>& labels,
                                                       vector<DoubleReal>& detectabilities)
  {
    // The support vector machine
    SVMWrapper svm;

    // initialize support vector machine
    LibSVMEncoder encoder;
    UInt k_mer_length = 0;
    DoubleReal sigma = 0.0;
    UInt border_length = 0;

    if (File::readable(dt_model_file_))
    {
      svm.loadModel(dt_model_file_);
    }
    else
    {
      throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "DetectibilitySimulation got invalid parameter. 'dt_model_file' " + dt_model_file_ + " is not readable");
    }

    // load additional parameters
    if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
    {
      String add_paramfile = dt_model_file_ + "_additional_parameters";
      if (!File::readable(add_paramfile))
      {
        throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "DetectibilitySimulation: SVM parameter file " + add_paramfile + " is not readable");
      }

      Param additional_parameters;
      ParamXMLFile paramFile;
      paramFile.load(add_paramfile, additional_parameters);

      if (additional_parameters.getValue("border_length") == DataValue::EMPTY
         && svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
      {
        throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "DetectibilitySimulation: No border length defined in additional parameters file.");
      }
      border_length = ((String)additional_parameters.getValue("border_length")).toInt();
      if (additional_parameters.getValue("k_mer_length") == DataValue::EMPTY
         && svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
      {
        throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "DetectibilitySimulation: No k-mer length defined in additional parameters file.");
      }
      k_mer_length = ((String)additional_parameters.getValue("k_mer_length")).toInt();

      if (additional_parameters.getValue("sigma") == DataValue::EMPTY
         && svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
      {
        throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "DetectibilitySimulation: No sigma defined in additional parameters file.");
      }

      sigma = ((String)additional_parameters.getValue("sigma")).toFloat();
    }

    if (File::readable(dt_model_file_))
    {
      svm.setParameter(SVMWrapper::BORDER_LENGTH, (Int) border_length);
      svm.setParameter(SVMWrapper::SIGMA, sigma);
      // to obtain probabilities
      svm.setParameter(SVMWrapper::PROBABILITY, 1);
    }
    // loading training data
    String sample_file = dt_model_file_ + "_samples";
    svm_problem* training_data = NULL;
    if (File::readable(sample_file))
    {
      training_data = encoder.loadLibSVMProblem(sample_file);
      svm.setTrainingSample(training_data);
    }
    else
    {
      throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "DetectibilitySimulation: SVM sample file " + sample_file + " is not readable");
    }


    LOG_INFO << "Predicting peptide detectabilities..    " << endl;

    String allowed_amino_acid_characters = "ACDEFGHIKLMNPQRSTVWY";

    // Encoding test data
    vector<DoubleReal> probs;
    probs.resize(peptides_vector.size(), 0);

    svm_problem* prediction_data = encoder.encodeLibSVMProblemWithOligoBorderVectors(peptides_vector, probs,
                                                                                     k_mer_length,
                                                                                     allowed_amino_acid_characters,
                                                                                     svm.getIntParameter(SVMWrapper::BORDER_LENGTH));

    svm.getSVCProbabilities(prediction_data, detectabilities, labels);

    // clean up when finished with prediction
    delete prediction_data;
    delete training_data;
  }