Beispiel #1
0
void MUSTER_XCLARA::ProcessClusterAssignment(cluster::kmedoids &muster_algorithm,
                                             Partition         &DataPartition,
                                             size_t             DataSize)
{
  vector<cluster_id_t>& ClusterAssignmentVector = DataPartition.GetAssignmentVector();
  set<cluster_id_t>&    DifferentIDs            = DataPartition.GetIDs();

  /*
  map<medoid_id, cluster_id_t>           ClusterTranslation;
  map<medoid_id, cluster_id_t>::iterator ClusterTranslationQuery;
  cluster_id_t                           CurrentClusterId = MIN_CLUSTERID;
  */
  
  ClusterAssignmentVector.clear();
  DifferentIDs.clear();

  for (size_t i = 0; i < DataSize; i++)
  {
    ClusterAssignmentVector.push_back(muster_algorithm.cluster_ids[i]);
    DifferentIDs.insert(muster_algorithm.cluster_ids[i]);

  }

  /* Add one more cluster, to avoid the non-existent NOISE cluster */
  /* Not needed any more
  DataPartition.NumberOfClusters (DifferentIDs.size());
  DataPartition.HasNoise(false); */

  /* DEBUG 
  for (ClusterTranslationQuery  = ClusterTranslation.begin();
       ClusterTranslationQuery != ClusterTranslation.end();
       ++ClusterTranslationQuery)
  {
    cout << "medoid_id = " << ClusterTranslationQuery->first;
    cout << " cluster_id = " << ClusterTranslationQuery->second;
    cout << endl;
  }
  */
}
/**
 * Generate the nodes that describe a given partition. It generates the cluster
 * statistics, rename the clusters depending on their duration, and also compute
 * the sequence score of each cluster
 *
 * \Bursts    Set of bursts used in the cluster analysis
 * \Partition Partition obtained using the clustering algorithm
 * \Node      The set of nodes that describe the partition
 *
 * \return True if the the node were generated correctly, false otherwise
 *
 */
bool ClusteringRefinementDivisive::GenerateNodes(const vector<CPUBurst*>&     Bursts,
                                                 Partition&                   CurrentPartition,
                                                 vector<ClusterInformation*>& Nodes)
{
  SequenceScore                                    Scoring;
  vector<SequenceScoreValue>                       CurrentClustersScores;
  double                                           GlobalScore;
  map<cluster_id_t, percentage_t>                  PercentageDurations;
  map<cluster_id_t, vector<instance_t> >           BurstsPerNode;
  map<cluster_id_t, vector<instance_t> >::iterator BurstPerNodeIt;

  bool NoNoise = false;

  ostringstream  Messages;

  ClusteringStatistics Statistics;

  vector<cluster_id_t>& AssignmentVector = CurrentPartition.GetAssignmentVector();
  if (Bursts.size() != AssignmentVector.size())
  {
    ostringstream ErrorMessage;

    ErrorMessage << "number of points (" << Bursts.size();
    ErrorMessage << ") different from number of IDs (" << AssignmentVector.size() << ")";
    ErrorMessage << " when generating partition points";

    SetErrorMessage(ErrorMessage.str());
    SetError(true);
    return false;
  }

  /* Update Statistics */
  Statistics.InitStatistics(CurrentPartition.GetIDs());

  //  Messages.str("");
  // Messages << "|---> Computing statistics" << endl;
  // system_messages::information(Messages.str());

  if (!Statistics.ComputeStatistics(Bursts,
                                    CurrentPartition.GetAssignmentVector()))
  {
    SetErrorMessage(Statistics.GetLastError());
    return false;
  }

  Statistics.TranslatedIDs(CurrentPartition.GetAssignmentVector());

  PercentageDurations = Statistics.GetPercentageDurations();

  Messages.str("");
  Messages << "|-----> Computing score" << endl;
  system_messages::information(Messages.str());

  if (!Scoring.ComputeScore(Bursts,
                            CurrentPartition.GetAssignmentVector(),
                            PercentageDurations,
                            CurrentClustersScores,
                            GlobalScore,
                            false,
                            string(""),
                            true))
  {
    SetErrorMessage("unable to generate nodes", Scoring.GetLastError());
    SetError(true);
    return false;
  }

  /* Generate current level hierarchy */
  map<cluster_id_t, double> CurrentClustersDurations   = Statistics.GetDurationSums();
  map<cluster_id_t, size_t> CurrentClustersIndividuals = Statistics.GetIndividuals();

  Nodes.clear();

  for (size_t i = 0; i < CurrentClustersScores.size(); i++)
  {
    cluster_id_t CurrentID = CurrentClustersScores[i].GetID();

    ClusterInformation* NewNode =
      new ClusterInformation(CurrentClustersScores[i].GetID(),
                             CurrentClustersScores[i].GetClusterScore(),
                             CurrentClustersScores[i].GetOccurrences(),
                             CurrentClustersDurations[CurrentID],
                             CurrentClustersIndividuals[CurrentID]);

    Nodes.push_back(NewNode);

    /* DEBUG
    cout << "Subcluster ID = " << CurrentID << " Score = " << CurrentClustersScores[i].GetClusterScore();
    cout << " Individuals = " << CurrentClustersIndividuals[CurrentID] << endl; */


    BurstsPerNode[CurrentID] = vector<instance_t> (0);
  }


  /* Fill each node with the instances */
  for (size_t i = 0; i < Bursts.size(); i++)
  {
    BurstsPerNode[AssignmentVector[i]].push_back(Bursts[i]->GetInstance());
    /* DEBUG
    cout << "NODE ID = " << AssignmentVector[i] << " INSTANCE = " << Bursts[i]->GetInstance() << endl; */
  }

  for (size_t i = 0; i < Nodes.size(); i++)
  {
    /* DEBUG
    cout << "NODE with ID = " << Nodes[i]->GetID() << " has " << BurstsPerNode[i].size() << " instances" << endl; */
    Nodes[i]->SetInstances(BurstsPerNode[Nodes[i]->GetID()]);
  }

  return true;
}
/**
 * Generates the DATA files and the GNUPlot scripts for a given cluster results
 *
 * \param Bursts Vector containing the bursts of the current step
 * \param CurrentPartition Partition object of the burst vector
 * \param Step Depth of the current step
 *
 * \return True if plots were printed correctly, false otherwise
 *
 */
bool ClusteringRefinementDivisive::PrintPlots(const vector<CPUBurst*>& Bursts,
                                              Partition&               CurrentPartition,
                                              size_t                   Step)
{
  ostringstream         CurrentDataFileName;
  ostringstream         CurrentPlotFileNamePrefix;
  ostringstream         PlotTitle;
  ofstream              CurrentDataStream;
  vector<cluster_id_t>& IDs = CurrentPartition.GetAssignmentVector();

  CurrentPlotFileNamePrefix << OutputFilePrefix << ".STEP" << (Step+1);
  CurrentDataFileName << OutputFilePrefix << ".STEP" << (Step+1) << ".DATA.csv";

  if (Bursts.size() != IDs.size())
  {
    ostringstream ErrorMessage;

    ErrorMessage << "number of points (" << Bursts.size();
    ErrorMessage << ") different from number of IDs (" << IDs.size() << ")";
    ErrorMessage << " when printing plots";

    SetErrorMessage(ErrorMessage.str());
    SetError(true);
    return false;
  }

  CurrentDataStream.open(CurrentDataFileName.str().c_str(), ios_base::trunc);

  if (CurrentDataStream.fail())
  {
    ostringstream ErrorMessage;

    ErrorMessage << "unable to open data output file for step " << Step;
    SetError(true);
    SetErrorMessage(ErrorMessage.str().c_str(), strerror(errno));

    return false;

  }

  /* Flush points */
  ParametersManager *Parameters = ParametersManager::GetInstance();

  vector<string> ClusteringParametersNames;
  vector<string> ExtrapolationParametersNames;

  vector<bool>   ClusteringParametersPrecision;
  vector<bool>   ExtrapolationParametersPrecision;

  ClusteringParametersNames        = Parameters->GetClusteringParametersNames();
  ExtrapolationParametersNames     = Parameters->GetExtrapolationParametersNames();

  ClusteringParametersPrecision    = Parameters->GetClusteringParametersPrecision();
  ExtrapolationParametersPrecision = Parameters->GetExtrapolationParametersPrecision();


  CurrentDataStream << "# Instance,TaskId,ThreadId,Begin_Time,End_Time,Duration, Line";
  for (size_t i = 0; i < ClusteringParametersNames.size(); i++)
  {
    CurrentDataStream << "," << ClusteringParametersNames[i];
  }

  for (size_t i = 0; i < ClusteringParametersNames.size(); i++)
  {
    CurrentDataStream << "," << ClusteringParametersNames[i] << "_Norm";
  }

  for (size_t i = 0; i < ExtrapolationParametersNames.size(); i++)
  {
    CurrentDataStream << "," << ExtrapolationParametersNames[i];
  }

  CurrentDataStream << ",ClusterID" << endl;

  for (size_t i = 0; i < Bursts.size(); i++)
  {
    Bursts[i]->Print(CurrentDataStream,
                     ClusteringParametersPrecision,
                     ExtrapolationParametersPrecision,
                     IDs[i]+PARAVER_OFFSET);
  }

  /* Create plots */
  PlottingManager *Plots;

  Plots = PlottingManager::GetInstance();  // No Data Extraction

  /* DEBUG
  cout << __FUNCTION__ << "Algorithm name = " << ClusteringCore->GetClusteringAlgorithmName() << endl;
  cout << "Current partition has " << CurrentPartition.NumberOfClusters() << " clusters" << endl; */

  PlotTitle << "REFINEMENT STEP " << Step+1 << " - ";
  PlotTitle << ClusteringCore->GetClusteringAlgorithmName();

  bool verbose_state = system_messages::verbose;
  system_messages::verbose = false;

  if (!Plots->PrintPlots(CurrentDataFileName.str(),
                         CurrentPlotFileNamePrefix.str(),
                         PlotTitle.str(),
                         CurrentPartition.GetIDs()))
  {
    system_messages::verbose = verbose_state;
    SetError(true);
    SetErrorMessage(Plots->GetLastError());
    return false;
  }

  system_messages::verbose = verbose_state;


  return true;
}