bool DimReducer::GetDataFromClusters(OkcData* okcdata, const char*) { double contour_radius = 0.02; LinkList* data_linkList = new LinkList(); LinkList* entries_linkList = new LinkList(); // Prepare the file name for clustering results char cgfilename[FILENAME_MAXLEN]; // Assume the file name extension is ".okc" int N = strlen(okcdata->filepath); strcpy(cgfilename, okcdata->filepath); strcpy(&cgfilename[N-4],".cg"); // First generate a cluster tree ClusterTree* btree = new ClusterTree(); btree->ReadClusterTreeFile(okcdata->filepath, cgfilename, okcdata); if (!btree) { // If we fail to create the cluster tree, return false return false; } CollectClustersOnContour(okcdata, btree, contour_radius, btree->nodes[0], data_linkList, entries_linkList); data_array = (double**) data_linkList->TransToArray(); entries_array = (int**) entries_linkList->TransToArray(); data_num = data_linkList->GetLength(); delete data_linkList; delete entries_linkList; delete btree; return true; }
bool SBBOperator::doOperator() { //Verify that we have a correct data type for input assert(typeid(*m_input)==typeid(ClusterTree)); ClusterTree* input = dynamic_cast<ClusterTree*>(m_input); // If the non-brushed cluster radius has been changed, // re-mark the contour of non-brushed nodes if (m_changeFlag&DIRTY_CLUSTER_RADIUS) { ClusterTree::hierMarkContourNodes(input, m_hierInfo->root_radius * m_hierInfo->cluster_radius, IS_CONTOUR0); } // If the brushed cluster radius has been changed, // re-mark the contour of brushed nodes if (m_changeFlag&DIRTY_BRUSHED_RADIUS) { ClusterTree::hierMarkContourNodes( input, m_hierInfo->root_radius * m_hierInfo->brushed_radius, IS_CONTOUR1 ); } // If the handle position has been changed,re-mark the node color if ( m_changeFlag&DIRTY_HANDLE_POSITION) { ClusterTree::SetupBrushParameters(input, m_hierInfo); } OkcData* output = new OkcData(); input->generateSBBResult(output); SAFE_DELETE(m_output); m_output = (Data*)output; return true; }
int hmat_cluster_get_info(hmat_cluster_tree_t *tree, hmat_cluster_info_t* info) { ClusterTree* cl = static_cast<ClusterTree*>((void*) tree); info->spatial_dimension = cl->data.coordinates()->dimension(); info->dimension = cl->data.coordinates()->size(); info->nr_tree_nodes = cl->nodesCount(); return 0; }
void MSA::SetGSCWeights() const { ClusterTree CT; CalcBLOSUMWeights(CT); // Calculate weights and store in tree. ClusterNode *ptrRoot = CT.GetRoot(); ptrRoot->SetWeight2(1.0); SetSubtreeGSCWeight(ptrRoot->GetLeft()); SetSubtreeGSCWeight(ptrRoot->GetRight()); // Copy weights from tree to MSA. SetSubtreeWeight2(ptrRoot); }
ClusterTree* ClusterTree::copy(const ClusterTree* copyFather) const { ClusterTree* result = NULL; if (!copyFather) { // La racine doit s'occuper le tableau des points et le mapping. result = new ClusterTree(data.dofData_->copy()); copyFather = result; } else { result = copyFather->slice(data.offset(), data.size()); } if (!isLeaf()) { for (int i=0 ; i<nrChild(); i++) result->insertChild(i, ((ClusterTree*) getChild(i))->copy(copyFather)); } return result; }
int main (int argc, const char * argv[]) { InfoLog log("ClusterTreeExample"); //Load some training data to train the ClusterTree model MatrixDouble trainingData; if( !trainingData.loadFromCSVFile("ClusterTreeData.csv") ){ log << "Failed to load training data!" << endl; return EXIT_FAILURE; } //Create a new ClusterTree instance ClusterTree ctree; //Set the number of steps that will be used to choose the best splitting values //More steps will give you a better model, but will take longer to train ctree.setNumSplittingSteps( 100 ); //Set the maximum depth of the tree ctree.setMaxDepth( 10 ); //Set the minimum number of samples allowed per node ctree.setMinNumSamplesPerNode( 10 ); //Set the minimum RMS error allowed per node ctree.setMinRMSErrorPerNode( 0.1 ); //Train a cluster tree model if( !ctree.train( trainingData ) ){ log << "Failed to train model!" << endl; return EXIT_FAILURE; } if( !ctree.saveModelToFile("Model.grt") ){ log << "Failed to train model!" << endl; return EXIT_FAILURE; } if( !ctree.loadModelFromFile("Model.grt") ){ log << "Failed to train model!" << endl; return EXIT_FAILURE; } //Print the tree ctree.print(); return EXIT_SUCCESS; }
// Return value is the group count, i.e. the effective number // of distinctly different sequences. unsigned MSA::CalcBLOSUMWeights(ClusterTree &BlosumCluster) const { // Build distance matrix DistFunc DF; unsigned uSeqCount = GetSeqCount(); DF.SetCount(uSeqCount); for (unsigned i = 0; i < uSeqCount; ++i) for (unsigned j = i+1; j < uSeqCount; ++j) { double dDist = GetPctIdentityPair(i, j); assert(dDist >= 0.0 && dDist <= 1.0); DF.SetDist(i, j, (float) (1.0 - dDist)); } // Cluster based on the distance function BlosumCluster.Create(DF); // Return value is HMMer's "effective sequence count". return SetBLOSUMNodeWeight(BlosumCluster.GetRoot(), 1.0 - BLOSUM_DIST); }
bool StandardAdmissibilityCondition::isAdmissible(const ClusterTree& rows, const ClusterTree& cols) { CompressionMethod m = HMatSettings::getInstance().compressionMethod; bool isFullAlgo = !(m == AcaPartial || m == AcaPlus); size_t elements = ((size_t) rows.data.size()) * cols.data.size(); if(always_ && (rows.isLeaf() || cols.isLeaf())) return true; if(isFullAlgo && elements > maxElementsPerBlock) return false; if(!isFullAlgo && elements > maxElementsPerBlockAca_) return false; // a one element cluster would have a 0 diameter so we stop // before it happen. if(rows.data.size() < 2 || cols.data.size() < 2) return false; if(always_) return true; AxisAlignedBoundingBox* rows_bbox = static_cast<AxisAlignedBoundingBox*>(rows.admissibilityAlgoData_); if (rows_bbox == NULL) { rows_bbox = new AxisAlignedBoundingBox(rows.data); rows.admissibilityAlgoData_ = rows_bbox; } AxisAlignedBoundingBox* cols_bbox = static_cast<AxisAlignedBoundingBox*>(cols.admissibilityAlgoData_); if (cols_bbox == NULL) { cols_bbox = new AxisAlignedBoundingBox(cols.data); cols.admissibilityAlgoData_ = cols_bbox; } return std::min(rows_bbox->diameter(), cols_bbox->diameter()) <= eta_ * rows_bbox->distanceTo(*cols_bbox); }
/*! \brief Return the number of children in the column dimension. */ inline int nrChildCol() const { // if cols admissible, only one child = itself return colsAdmissible ? 1 : cols_->nrChild() ; }
/*! \brief Return the number of children in the row dimension. */ inline int nrChildRow() const { // if rows admissible, only one child = itself return rowsAdmissible ? 1 : rows_->nrChild() ; }