void MUSTER_XCLARA::ProcessClusterAssignment(cluster::kmedoids &muster_algorithm, Partition &DataPartition, size_t DataSize) { vector<cluster_id_t>& ClusterAssignmentVector = DataPartition.GetAssignmentVector(); set<cluster_id_t>& DifferentIDs = DataPartition.GetIDs(); /* map<medoid_id, cluster_id_t> ClusterTranslation; map<medoid_id, cluster_id_t>::iterator ClusterTranslationQuery; cluster_id_t CurrentClusterId = MIN_CLUSTERID; */ ClusterAssignmentVector.clear(); DifferentIDs.clear(); for (size_t i = 0; i < DataSize; i++) { ClusterAssignmentVector.push_back(muster_algorithm.cluster_ids[i]); DifferentIDs.insert(muster_algorithm.cluster_ids[i]); } /* Add one more cluster, to avoid the non-existent NOISE cluster */ /* Not needed any more DataPartition.NumberOfClusters (DifferentIDs.size()); DataPartition.HasNoise(false); */ /* DEBUG for (ClusterTranslationQuery = ClusterTranslation.begin(); ClusterTranslationQuery != ClusterTranslation.end(); ++ClusterTranslationQuery) { cout << "medoid_id = " << ClusterTranslationQuery->first; cout << " cluster_id = " << ClusterTranslationQuery->second; cout << endl; } */ }
/** * Generate the nodes that describe a given partition. It generates the cluster * statistics, rename the clusters depending on their duration, and also compute * the sequence score of each cluster * * \Bursts Set of bursts used in the cluster analysis * \Partition Partition obtained using the clustering algorithm * \Node The set of nodes that describe the partition * * \return True if the the node were generated correctly, false otherwise * */ bool ClusteringRefinementDivisive::GenerateNodes(const vector<CPUBurst*>& Bursts, Partition& CurrentPartition, vector<ClusterInformation*>& Nodes) { SequenceScore Scoring; vector<SequenceScoreValue> CurrentClustersScores; double GlobalScore; map<cluster_id_t, percentage_t> PercentageDurations; map<cluster_id_t, vector<instance_t> > BurstsPerNode; map<cluster_id_t, vector<instance_t> >::iterator BurstPerNodeIt; bool NoNoise = false; ostringstream Messages; ClusteringStatistics Statistics; vector<cluster_id_t>& AssignmentVector = CurrentPartition.GetAssignmentVector(); if (Bursts.size() != AssignmentVector.size()) { ostringstream ErrorMessage; ErrorMessage << "number of points (" << Bursts.size(); ErrorMessage << ") different from number of IDs (" << AssignmentVector.size() << ")"; ErrorMessage << " when generating partition points"; SetErrorMessage(ErrorMessage.str()); SetError(true); return false; } /* Update Statistics */ Statistics.InitStatistics(CurrentPartition.GetIDs()); // Messages.str(""); // Messages << "|---> Computing statistics" << endl; // system_messages::information(Messages.str()); if (!Statistics.ComputeStatistics(Bursts, CurrentPartition.GetAssignmentVector())) { SetErrorMessage(Statistics.GetLastError()); return false; } Statistics.TranslatedIDs(CurrentPartition.GetAssignmentVector()); PercentageDurations = Statistics.GetPercentageDurations(); Messages.str(""); Messages << "|-----> Computing score" << endl; system_messages::information(Messages.str()); if (!Scoring.ComputeScore(Bursts, CurrentPartition.GetAssignmentVector(), PercentageDurations, CurrentClustersScores, GlobalScore, false, string(""), true)) { SetErrorMessage("unable to generate nodes", Scoring.GetLastError()); SetError(true); return false; } /* Generate current level hierarchy */ map<cluster_id_t, double> CurrentClustersDurations = Statistics.GetDurationSums(); map<cluster_id_t, size_t> CurrentClustersIndividuals = Statistics.GetIndividuals(); Nodes.clear(); for (size_t i = 0; i < CurrentClustersScores.size(); i++) { cluster_id_t CurrentID = CurrentClustersScores[i].GetID(); ClusterInformation* NewNode = new ClusterInformation(CurrentClustersScores[i].GetID(), CurrentClustersScores[i].GetClusterScore(), CurrentClustersScores[i].GetOccurrences(), CurrentClustersDurations[CurrentID], CurrentClustersIndividuals[CurrentID]); Nodes.push_back(NewNode); /* DEBUG cout << "Subcluster ID = " << CurrentID << " Score = " << CurrentClustersScores[i].GetClusterScore(); cout << " Individuals = " << CurrentClustersIndividuals[CurrentID] << endl; */ BurstsPerNode[CurrentID] = vector<instance_t> (0); } /* Fill each node with the instances */ for (size_t i = 0; i < Bursts.size(); i++) { BurstsPerNode[AssignmentVector[i]].push_back(Bursts[i]->GetInstance()); /* DEBUG cout << "NODE ID = " << AssignmentVector[i] << " INSTANCE = " << Bursts[i]->GetInstance() << endl; */ } for (size_t i = 0; i < Nodes.size(); i++) { /* DEBUG cout << "NODE with ID = " << Nodes[i]->GetID() << " has " << BurstsPerNode[i].size() << " instances" << endl; */ Nodes[i]->SetInstances(BurstsPerNode[Nodes[i]->GetID()]); } return true; }
/** * Generates the DATA files and the GNUPlot scripts for a given cluster results * * \param Bursts Vector containing the bursts of the current step * \param CurrentPartition Partition object of the burst vector * \param Step Depth of the current step * * \return True if plots were printed correctly, false otherwise * */ bool ClusteringRefinementDivisive::PrintPlots(const vector<CPUBurst*>& Bursts, Partition& CurrentPartition, size_t Step) { ostringstream CurrentDataFileName; ostringstream CurrentPlotFileNamePrefix; ostringstream PlotTitle; ofstream CurrentDataStream; vector<cluster_id_t>& IDs = CurrentPartition.GetAssignmentVector(); CurrentPlotFileNamePrefix << OutputFilePrefix << ".STEP" << (Step+1); CurrentDataFileName << OutputFilePrefix << ".STEP" << (Step+1) << ".DATA.csv"; if (Bursts.size() != IDs.size()) { ostringstream ErrorMessage; ErrorMessage << "number of points (" << Bursts.size(); ErrorMessage << ") different from number of IDs (" << IDs.size() << ")"; ErrorMessage << " when printing plots"; SetErrorMessage(ErrorMessage.str()); SetError(true); return false; } CurrentDataStream.open(CurrentDataFileName.str().c_str(), ios_base::trunc); if (CurrentDataStream.fail()) { ostringstream ErrorMessage; ErrorMessage << "unable to open data output file for step " << Step; SetError(true); SetErrorMessage(ErrorMessage.str().c_str(), strerror(errno)); return false; } /* Flush points */ ParametersManager *Parameters = ParametersManager::GetInstance(); vector<string> ClusteringParametersNames; vector<string> ExtrapolationParametersNames; vector<bool> ClusteringParametersPrecision; vector<bool> ExtrapolationParametersPrecision; ClusteringParametersNames = Parameters->GetClusteringParametersNames(); ExtrapolationParametersNames = Parameters->GetExtrapolationParametersNames(); ClusteringParametersPrecision = Parameters->GetClusteringParametersPrecision(); ExtrapolationParametersPrecision = Parameters->GetExtrapolationParametersPrecision(); CurrentDataStream << "# Instance,TaskId,ThreadId,Begin_Time,End_Time,Duration, Line"; for (size_t i = 0; i < ClusteringParametersNames.size(); i++) { CurrentDataStream << "," << ClusteringParametersNames[i]; } for (size_t i = 0; i < ClusteringParametersNames.size(); i++) { CurrentDataStream << "," << ClusteringParametersNames[i] << "_Norm"; } for (size_t i = 0; i < ExtrapolationParametersNames.size(); i++) { CurrentDataStream << "," << ExtrapolationParametersNames[i]; } CurrentDataStream << ",ClusterID" << endl; for (size_t i = 0; i < Bursts.size(); i++) { Bursts[i]->Print(CurrentDataStream, ClusteringParametersPrecision, ExtrapolationParametersPrecision, IDs[i]+PARAVER_OFFSET); } /* Create plots */ PlottingManager *Plots; Plots = PlottingManager::GetInstance(); // No Data Extraction /* DEBUG cout << __FUNCTION__ << "Algorithm name = " << ClusteringCore->GetClusteringAlgorithmName() << endl; cout << "Current partition has " << CurrentPartition.NumberOfClusters() << " clusters" << endl; */ PlotTitle << "REFINEMENT STEP " << Step+1 << " - "; PlotTitle << ClusteringCore->GetClusteringAlgorithmName(); bool verbose_state = system_messages::verbose; system_messages::verbose = false; if (!Plots->PrintPlots(CurrentDataFileName.str(), CurrentPlotFileNamePrefix.str(), PlotTitle.str(), CurrentPartition.GetIDs())) { system_messages::verbose = verbose_state; SetError(true); SetErrorMessage(Plots->GetLastError()); return false; } system_messages::verbose = verbose_state; return true; }