//! メンバ追加のテスト(複数) TEST_F( ClusterInfoTest, test_addMembers ) { ClusterInfo ci; const vector< DataIndex >* pMembers; vector< DataIndex > addedData; addedData.push_back( 5 ); addedData.push_back( 7 ); addedData.push_back( 9 ); ci.addMembers( addedData ); pMembers = ci.getMembers(); ASSERT_EQ( 3, pMembers->size() ); EXPECT_EQ( 5, pMembers->at( 0 ) ); EXPECT_EQ( 7, pMembers->at( 1 ) ); EXPECT_EQ( 9, pMembers->at( 2 ) ); addedData.clear(); addedData.push_back( 4 ); addedData.push_back( 6 ); addedData.push_back( 8 ); addedData.push_back( 10 ); ci.addMembers( addedData ); pMembers = ci.getMembers(); ASSERT_EQ( 7, pMembers->size() ); EXPECT_EQ( 5, pMembers->at( 0 ) ); // ※ソートしない EXPECT_EQ( 7, pMembers->at( 1 ) ); EXPECT_EQ( 9, pMembers->at( 2 ) ); EXPECT_EQ( 4, pMembers->at( 3 ) ); EXPECT_EQ( 6, pMembers->at( 4 ) ); EXPECT_EQ( 8, pMembers->at( 5 ) ); EXPECT_EQ( 10, pMembers->at( 6 ) ); }
TRI_voc_cid_t CollectionNameResolver::getCollectionIdCluster( std::string const& name) const { if (!ServerState::isRunningInCluster(_serverRole)) { return getCollectionIdLocal(name); } if (name[0] >= '0' && name[0] <= '9') { // name is a numeric id TRI_voc_cid_t cid = static_cast<TRI_voc_cid_t>(arangodb::basics::StringUtils::uint64(name)); // Now validate the cid TRI_col_type_e type = getCollectionTypeCluster(getCollectionNameCluster(cid)); if (type == TRI_COL_TYPE_UNKNOWN) { return 0; } return cid; } try { // We have to look up the collection info: ClusterInfo* ci = ClusterInfo::instance(); auto cinfo = ci->getCollection(_vocbase->name(), name); TRI_ASSERT(cinfo != nullptr); return cinfo->cid(); } catch (...) { } return 0; }
/* * Aim: to create binary tree of unique record id -> unique inventor id, * also to allow fast search and insertion/deletion. * the unique inventor id is also a const Record pointer, * meaning that different unique record ids may be associated with a same * const Record pointer that represents them. * * Algorithm: clean the uinv2count and uid2uinv tree first. * For any cluster in the cCluser_Info object: * For any const Record pointer p in the cluster member list: * create a std::pair of (p, d), where d is the delegate * (or representative) of the cluster * insert the pair into uid2uinv map. * End for * End for * * uinv2count is updated in the same way. */ void cBlocking_Operation_By_Coauthors::build_uid2uinv_tree(const ClusterInfo & cluster) { uinv2count_tree.clear(); uid2uinv_tree.clear(); uint32_t count = 0; //typedef list<Cluster> cRecGroup; // Maybe should be RecordGroup typedef list<Cluster> ClusterList; std::cout << "Building trees: 1. Unique Record ID to Unique Inventer ID. "; std::cout << "2 Unique Inventer ID to Number of holding patents ........"; std::cout << std::endl; map<string, ClusterList>::const_iterator p = cluster.get_cluster_map().begin(); for (; p != cluster.get_cluster_map().end(); ++p) { ClusterList::const_iterator q = p->second.begin(); for (; q != p->second.end(); ++q) { const Record * value = q->get_cluster_head().m_delegate; map<const Record *, uint32_t>::iterator pcount = uinv2count_tree.find(value); if (pcount == uinv2count_tree.end()) pcount = uinv2count_tree.insert(std::pair<const Record *, uint32_t>(value, 0)).first; for (RecordPList::const_iterator r = q->get_fellows().begin(); r != q->get_fellows().end(); ++r) { const Record * key = *r; uid2uinv_tree.insert(std::pair<const Record * , const Record *>(key, value )); ++(pcount->second); ++count; } } } std::cout << count << " nodes has been created inside the tree." << std::endl; }
//! コンストラクタ(マージ)のテスト TEST_F( ClusterInfoTest, test_mergeConstructor ) { ClusterInfo *pC1 = new ClusterInfo(); pC1->addMember( 3 ); pC1->addMember( 5 ); pC1->addMember( 1 ); ClusterInfo *pC2 = new ClusterInfo(); pC2->addMember( 2 ); pC2->addMember( 4 ); pC2->addMember( 6 ); const vector< DataIndex >* pMembers = NULL; ClusterInfo *pCm = new ClusterInfo( pC1, pC2, 100.0f, 123 ); pMembers = pCm->getMembers(); ASSERT_EQ( 6, pMembers->size() ); EXPECT_EQ( 1, pMembers->at( 0 ) ); // ※ソートされる EXPECT_EQ( 2, pMembers->at( 1 ) ); EXPECT_EQ( 3, pMembers->at( 2 ) ); EXPECT_EQ( 4, pMembers->at( 3 ) ); EXPECT_EQ( 5, pMembers->at( 4 ) ); EXPECT_EQ( 6, pMembers->at( 5 ) ); ASSERT_EQ( 100.0f, pCm->getMergeCost() ); ASSERT_EQ( 123, pCm->getId() ); delete pCm; delete pC1; delete pC2; return; }
//! メンバ追加のテスト(複数) TEST_F( ClusterInfoTest, test_setAndGetmergeCost ) { ClusterInfo ci; ci.setMergeCost( 0.5f ); EXPECT_EQ( 0.5f, ci.getMergeCost() ); ci.setMergeCost( 1234.567f ); EXPECT_EQ( 1234.567f, ci.getMergeCost() ); }
Float HierarchicalClustering::computeClusterVariance( const ClusterInfo &cluster, const MatrixFloat &data ){ VectorFloat mean(N,0); VectorFloat std(N,0); //Compute the mean UINT numSamples = cluster.getNumSamplesInCluster(); for(UINT j=0; j<N; j++){ for(UINT i=0; i<numSamples; i++){ UINT index = cluster[i]; mean[j] += data[ index ][j]; } mean[j] /= Float( numSamples ); } //Compute the std dev for(UINT j=0; j<N; j++){ for(UINT i=0; i<numSamples; i++){ std[j] += grt_sqr( data[ cluster[i] ][j] - mean[j] ); } std[j] = grt_sqrt( std[j] / Float( numSamples-1 ) ); } Float variance = 0; for(UINT j=0; j<N; j++){ variance += std[j]; } return variance/N; }
//! メンバ追加のテスト TEST_F( ClusterInfoTest, test_addMember ) { ClusterInfo ci; const vector< DataIndex >* pMembers; ci.addMember( 1 ); pMembers = ci.getMembers(); ASSERT_EQ( 1, pMembers->size() ); EXPECT_EQ( 1, pMembers->at( 0 ) ); ci.addMember( 100 ); pMembers = ci.getMembers(); ASSERT_EQ( 2, pMembers->size() ); EXPECT_EQ( 1, pMembers->at( 0 ) ); EXPECT_EQ( 100, pMembers->at( 1 ) ); }
Float HierarchicalClustering::computeClusterDistance( const ClusterInfo &clusterA, const ClusterInfo &clusterB ){ Float minDist = grt_numeric_limits< Float >::max(); const UINT numSamplesA = clusterA.getNumSamplesInCluster(); const UINT numSamplesB = clusterB.getNumSamplesInCluster(); //Find the minimum distance between the two clusters for(UINT i=0; i<numSamplesA; i++){ for(UINT j=0; j<numSamplesB; j++){ if( distanceMatrix[ clusterA[i] ][ clusterB[j] ] < minDist ){ minDist = distanceMatrix[ clusterA[i] ][ clusterB[j] ]; } } } return minDist; }
bool GPFSHandler::setBasicInfo() { // do an initial polling here to get some cluster info if(pollinghandler == NULL) return false; pollinghandler->refreshClusterRecipe(); MErrno err = M_OK; ClusterInfo* clusterInfo = new ClusterInfo(&err); pollinghandler->updateClusterInfo(clusterInfo); char cname[30] = {0}; strcpy(cname,clusterInfo->getName()); cluster = cname; delete clusterInfo; procname = DAEMON_NAME; char hname[50] = {0}; if(gethostname(hname,50) < 0) return false; hostname = hname; return true; }
TRI_col_type_e CollectionNameResolver::getCollectionTypeCluster( std::string const& name) const { // This fires in Single server case as well if (!ServerState::isCoordinator(_serverRole)) { return getCollectionType(name); } if (name[0] >= '0' && name[0] <= '9') { // name is a numeric id return getCollectionTypeCluster( getCollectionName(static_cast<TRI_voc_cid_t>( arangodb::basics::StringUtils::uint64(name)))); } try { // We have to look up the collection info: ClusterInfo* ci = ClusterInfo::instance(); auto cinfo = ci->getCollection(_vocbase->name(), name); TRI_ASSERT(cinfo != nullptr); return cinfo->type(); } catch(...) { } return TRI_COL_TYPE_UNKNOWN; }
void ClusterFeature::prepare() { ServerState::instance()->setDataPath(_dataPath); ServerState::instance()->setLogPath(_logPath); ServerState::instance()->setArangodPath(_arangodPath); ServerState::instance()->setDBserverConfig(_dbserverConfig); ServerState::instance()->setCoordinatorConfig(_coordinatorConfig); V8DealerFeature* v8Dealer = ApplicationServer::getFeature<V8DealerFeature>("V8Dealer"); v8Dealer->defineDouble("SYS_DEFAULT_REPLICATION_FACTOR_SYSTEM", _systemReplicationFactor); // create callback registery _agencyCallbackRegistry.reset( new AgencyCallbackRegistry(agencyCallbacksPath())); // Initialize ClusterInfo library: ClusterInfo::createInstance(_agencyCallbackRegistry.get()); // initialize ConnectionManager library httpclient::ConnectionManager::initialize(); // create an instance (this will not yet create a thread) ClusterComm::instance(); AgencyFeature* agency = application_features::ApplicationServer::getFeature<AgencyFeature>( "Agency"); if (agency->isEnabled() || _enableCluster) { // initialize ClusterComm library, must call initialize only once ClusterComm::initialize(); auto authenticationFeature = application_features::ApplicationServer::getFeature<AuthenticationFeature>( "Authentication"); if (authenticationFeature->isEnabled() && !authenticationFeature->hasUserdefinedJwt()) { LOG(FATAL) << "Cluster authentication enabled but jwt not set via command line. Please" << " provide --server.jwt-secret which is used throughout the cluster."; FATAL_ERROR_EXIT(); } } // return if cluster is disabled if (!_enableCluster) { return; } ServerState::instance()->setClusterEnabled(); // register the prefix with the communicator AgencyComm::setPrefix(_agencyPrefix); for (size_t i = 0; i < _agencyEndpoints.size(); ++i) { std::string const unified = Endpoint::unifiedForm(_agencyEndpoints[i]); if (unified.empty()) { LOG(FATAL) << "invalid endpoint '" << _agencyEndpoints[i] << "' specified for --cluster.agency-endpoint"; FATAL_ERROR_EXIT(); } AgencyComm::addEndpoint(unified); } // Now either _myId is set properly or _myId is empty and _myLocalInfo and // _myAddress are set. if (!_myAddress.empty()) { ServerState::instance()->setAddress(_myAddress); } // disable error logging for a while ClusterComm::instance()->enableConnectionErrorLogging(false); // perform an initial connect to the agency std::string const endpoints = AgencyComm::getEndpointsString(); if (!AgencyComm::initialize()) { LOG(FATAL) << "Could not connect to agency endpoints (" << endpoints << ")"; FATAL_ERROR_EXIT(); } ServerState::instance()->setLocalInfo(_myLocalInfo); if (!_myId.empty()) { ServerState::instance()->setId(_myId); } if (!_myRole.empty()) { ServerState::RoleEnum role = ServerState::stringToRole(_myRole); if (role == ServerState::ROLE_SINGLE || role == ServerState::ROLE_UNDEFINED) { LOG(FATAL) << "Invalid role provided. Possible values: PRIMARY, " "SECONDARY, COORDINATOR"; FATAL_ERROR_EXIT(); } if (!ServerState::instance()->registerWithRole(role)) { LOG(FATAL) << "Couldn't register at agency."; FATAL_ERROR_EXIT(); } } ServerState::RoleEnum role = ServerState::instance()->getRole(); if (role == ServerState::ROLE_UNDEFINED) { // no role found LOG(FATAL) << "unable to determine unambiguous role for server '" << _myId << "'. No role configured in agency (" << endpoints << ")"; FATAL_ERROR_EXIT(); } if (role == ServerState::ROLE_SINGLE) { LOG(FATAL) << "determined single-server role for server '" << _myId << "'. Please check the configurarion in the agency (" << endpoints << ")"; FATAL_ERROR_EXIT(); } if (_myId.empty()) { _myId = ServerState::instance()->getId(); // has been set by getRole! } // check if my-address is set if (_myAddress.empty()) { // no address given, now ask the agency for our address _myAddress = ServerState::instance()->getAddress(); } // if nonempty, it has already been set above // If we are a coordinator, we wait until at least one DBServer is there, // otherwise we can do very little, in particular, we cannot create // any collection: if (role == ServerState::ROLE_COORDINATOR) { ClusterInfo* ci = ClusterInfo::instance(); double start = TRI_microtime(); while (true) { LOG(INFO) << "Waiting for DBservers to show up..."; ci->loadCurrentDBServers(); std::vector<ServerID> DBServers = ci->getCurrentDBServers(); if (DBServers.size() >= 1 && (DBServers.size() > 1 || TRI_microtime() - start > 15.0)) { LOG(INFO) << "Found " << DBServers.size() << " DBservers."; break; } sleep(1); }; } if (_myAddress.empty()) { LOG(FATAL) << "unable to determine internal address for server '" << _myId << "'. Please specify --cluster.my-address or configure the " "address for this server in the agency."; FATAL_ERROR_EXIT(); } // now we can validate --cluster.my-address std::string const unified = Endpoint::unifiedForm(_myAddress); if (unified.empty()) { LOG(FATAL) << "invalid endpoint '" << _myAddress << "' specified for --cluster.my-address"; FATAL_ERROR_EXIT(); } }
bool HierarchicalClustering::train_(MatrixFloat &data){ trained = false; clusters.clear(); distanceMatrix.clear(); if( data.getNumRows() == 0 || data.getNumCols() == 0 ){ return false; } //Set the rows and columns M = data.getNumRows(); N = data.getNumCols(); //Build the distance matrix distanceMatrix.resize(M,M); //Build the distance matrix for(UINT i=0; i<M; i++){ for(UINT j=0; j<M; j++){ if( i== j ) distanceMatrix[i][j] = grt_numeric_limits< Float >::max(); else{ distanceMatrix[i][j] = squaredEuclideanDistance(data[i], data[j]); } } } //Build the initial clusters, at the start each sample gets its own cluster UINT uniqueClusterID = 0; Vector< ClusterInfo > clusterData(M); for(UINT i=0; i<M; i++){ clusterData[i].uniqueClusterID = uniqueClusterID++; clusterData[i].addSampleToCluster(i); } trainingLog << "Starting clustering..." << std::endl; //Create the first cluster level, each sample is it's own cluster UINT level = 0; ClusterLevel newLevel; newLevel.level = level; for(UINT i=0; i<M; i++){ newLevel.clusters.push_back( clusterData[i] ); } clusters.push_back( newLevel ); //Move to level 1 and start the search level++; bool keepClustering = true; while( keepClustering ){ //Find the closest two clusters within the cluster data Float minDist = grt_numeric_limits< Float >::max(); Vector< Vector< UINT > > clusterPairs; UINT K = (UINT)clusterData.size(); for(UINT i=0; i<K; i++){ for(UINT j=0; j<K; j++){ if( i != j ){ Float dist = computeClusterDistance( clusterData[i], clusterData[j] ); if( dist < minDist ){ minDist = dist; Vector< UINT > clusterPair(2); clusterPair[0] = i; clusterPair[1] = j; clusterPairs.clear(); clusterPairs.push_back( clusterPair ); } } } } if( minDist == grt_numeric_limits< Float >::max() ){ keepClustering = false; warningLog << "train_(MatrixFloat &data) - Failed to find any cluster at level: " << level << std::endl; return false; }else{ //Merge the two closest clusters together and create a new level ClusterLevel newLevel; newLevel.level = level; //Create the new cluster ClusterInfo newCluster; newCluster.uniqueClusterID = uniqueClusterID++; const UINT numClusterPairs = clusterPairs.getSize(); for(UINT k=0; k<numClusterPairs; k++){ //Add all the samples in the first cluster to the new cluster UINT numSamplesInClusterA = clusterData[ clusterPairs[k][0] ].getNumSamplesInCluster(); for(UINT i=0; i<numSamplesInClusterA; i++){ UINT index = clusterData[ clusterPairs[k][0] ][ i ]; newCluster.addSampleToCluster( index ); } //Add all the samples in the second cluster to the new cluster UINT numSamplesInClusterB = clusterData[ clusterPairs[k][1] ].getNumSamplesInCluster(); for(UINT i=0; i<numSamplesInClusterB; i++){ UINT index = clusterData[ clusterPairs[k][1] ][ i ]; newCluster.addSampleToCluster( index ); } //Compute the cluster variance newCluster.clusterVariance = computeClusterVariance( newCluster, data ); //Remove the two cluster pairs (so they will not be used in the next search UINT idA = clusterData[ clusterPairs[k][0] ].getUniqueClusterID(); UINT idB = clusterData[ clusterPairs[k][1] ].getUniqueClusterID(); UINT numRemoved = 0; Vector< ClusterInfo >::iterator iter = clusterData.begin(); while( iter != clusterData.end() ){ if( iter->getUniqueClusterID() == idA || iter->getUniqueClusterID() == idB ){ iter = clusterData.erase( iter ); if( ++numRemoved >= 2 ) break; }else iter++; } } //Add the merged cluster to the clusterData clusterData.push_back( newCluster ); //Add the new level and cluster data to the main cluster buffer newLevel.clusters.push_back( newCluster ); clusters.push_back( newLevel ); //Update the level level++; } //Check to see if we should stop clustering if( level >= M ){ keepClustering = false; } if( clusterData.size() == 0 ){ keepClustering = false; } trainingLog << "Cluster level: " << level << " Number of clusters: " << clusters.back().getNumClusters() << std::endl; } //Flag that the model is trained trained = true; //Setup the cluster labels clusterLabels.resize(numClusters); for(UINT i=0; i<numClusters; i++){ clusterLabels[i] = i+1; } clusterLikelihoods.resize(numClusters,0); clusterDistances.resize(numClusters,0); return true; }
//! 子クラスタの取得、設定のテスト TEST_F( ClusterInfoTest, test_setAndGetChildren ) { ClusterInfo c; ClusterInfo c1; c1.addMember( 1 ); c1.addMember( 100 ); ClusterInfo c2; c2.addMember( 10 ); c2.addMember( 11 ); c2.addMember( 12 ); // 初期状態 ASSERT_EQ( NULL, c.getChild1() ); ASSERT_EQ( NULL, c.getChild2() ); ASSERT_EQ( 0, c.getMembers()->size() ); // 子クラスタを設定 c.setChildren( &c1, &c2 ); ASSERT_EQ( &c1, c.getChild1() ); ASSERT_EQ( &c2, c.getChild2() ); ASSERT_EQ( 5, c.getMembers()->size() ); ASSERT_EQ( 1, c.getMembers()->at( 0 ) ); ASSERT_EQ( 10, c.getMembers()->at( 1 ) ); ASSERT_EQ( 11, c.getMembers()->at( 2 ) ); ASSERT_EQ( 12, c.getMembers()->at( 3 ) ); ASSERT_EQ( 100, c.getMembers()->at( 4 ) ); // 両方NULLを設定(メンバは変わらない) c.setChildren( NULL, NULL ); ASSERT_EQ( NULL, c.getChild1() ); ASSERT_EQ( NULL, c.getChild2() ); ASSERT_EQ( 5, c.getMembers()->size() ); ASSERT_EQ( 1, c.getMembers()->at( 0 ) ); ASSERT_EQ( 10, c.getMembers()->at( 1 ) ); ASSERT_EQ( 11, c.getMembers()->at( 2 ) ); ASSERT_EQ( 12, c.getMembers()->at( 3 ) ); ASSERT_EQ( 100, c.getMembers()->at( 4 ) ); // 片方NULLのみを設定(メンバは変わる) c.setChildren( NULL, &c1 ); ASSERT_EQ( NULL, c.getChild1() ); ASSERT_EQ( &c1, c.getChild2() ); ASSERT_EQ( 2, c.getMembers()->size() ); ASSERT_EQ( 1, c.getMembers()->at( 0 ) ); ASSERT_EQ( 100, c.getMembers()->at( 1 ) ); return; }
std::string DistributeBlock::createKey(VPackSlice) const { ClusterInfo* ci = ClusterInfo::instance(); uint64_t uid = ci->uniqid(); return std::to_string(uid); }
void GPFSConfigHandler::task() { int nFSs = 0; int nPools = 0; int nDisks = 0; int nFsets = 0; int nNodes = 0; TEAL_ERR_T ret; string msg; char tmp[10]; string fsName; string stgName; string diskName; string fsetName; string nodeName; string clusterName; FilesystemInfo* fsInfo = NULL; StoragePoolInfo* stgInfo = NULL; DiskInfo* diskInfo = NULL; FileSet* fsetInfo = NULL; FileSet* fileSetList = NULL; MErrno err = M_OK; log_info("########################Start refreshing all entities#########################################"); err = GPFSHandler::getPollHandler()->getDaemonState(); if(err != M_OK) { msg = "daemon is down on local node "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->refreshClusterRecipe(); if(err != M_OK) { msg = "refresh cluster failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } ClusterInfo* clusterInfo = new ClusterInfo(&err); //update cluster info err = GPFSHandler::getPollHandler()->updateClusterInfo(clusterInfo); if(err != M_OK) { msg = "update cluster info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } //update all nodes info err = GPFSHandler::getPollHandler()->updateNodeInfo(clusterInfo); if(err != M_OK) { msg = "update node failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->getClusterInfo(clusterInfo); //this maybe not needed if(err != M_OK) { msg = "get cluster info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updateDiskSDRInfo(); if(err != M_OK) { /*TODO: This API invokes "mmsdrquery 30 3001:3004:3005:3006:3007:3008:3002:3003" under the cover. Need to check if it is a real error or an expected configuration to determin whether to ignore it or not.*/ msg = "update disk SDR info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); msg += ", ignore it..."; log_warn(msg); // return; // simply ignore it since there a configuration of two clusters and NSD may not be seen from the FS cluster. } err = GPFSHandler::getPollHandler()->updateFilesystemInfo(clusterInfo, 1);// to get perfermance statics even if not used. if(err != M_OK) { msg = "update file system failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updateMountedNodeInfo(clusterInfo); // to get mounted node info if(err != M_OK) { /*TODO: This API invokes "mmlsmount all_local -Y" under the cover. Need to check if it is a real error or an expected configuration to determin whether to ignore it or not.*/ msg = "update mounted node info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); msg += ", ignore it..."; log_warn(msg); // return; // simply ignore it since there maybe no local file system configured } err = GPFSHandler::getPollHandler()->updateVfsStatsInfo(clusterInfo); // to get vfs info if(err != M_OK) { msg = "update vfs info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updateThreadUtilInfo(clusterInfo); // to get thread util info if(err != M_OK) { msg = "update thread util info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updateIocStatsInfo(clusterInfo); // to get ioc statics info if(err != M_OK) { msg = "update ioc statics info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updateCacheStatsInfo(clusterInfo); // to get cache statics info if(err != M_OK) { msg = "update cache statics info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updatePCacheStatsInfo(clusterInfo); // to get pcache statics info if(err != M_OK) { msg = "update pcache statics info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updateFilesystemManagerInfo(clusterInfo);// update fs manager if(err != M_OK) { msg = "update file system manager failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updatePolicyInfo(clusterInfo); // to get policy info if(err != M_OK) { msg = "update policy info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } err = GPFSHandler::getPollHandler()->updateFilesystemConfigInfo(clusterInfo);// update fs config if(err != M_OK) { msg = "update file system config failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } ClusterStatus* clusterStatus = new ClusterStatus(); err = GPFSHandler::getPollHandler()->getClusterStatus(clusterStatus); if(err != M_OK) { msg = "get cluster status failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } clusterName = clusterInfo->getName(); int i = 0; string clusterid = clusterInfo->getId(); nFSs = clusterInfo->getNumFilesystems(); //log fs one by one for( i = 0 ; i < nFSs; i++) { fsInfo = clusterInfo->getFilesystem(i); if (fsInfo == NULL) { msg = "NULL filesystem "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&i); log_error(msg); continue; } fsName = fsInfo->getName(); err = GPFSHandler::getPollHandler()->updateStoragePoolInfo(clusterInfo, (char*)fsName.c_str()); if(err != M_OK) { msg = "update storage pool info for file system: "; msg += fsName; msg += " failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); continue; } msg = "Refresh file system: "; msg += fsName; log_debug(msg); ret = refreshFS(fsInfo, clusterid); if(ret != TEAL_SUCCESS) { msg = "Refresh file system: "; msg += fsName; msg += " failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } nPools = fsInfo->getNumStoragePools(); int j = 0; //log stg one by one for(; j < nPools; j++ ) { stgInfo = fsInfo->getStoragePool(j); if(stgInfo == NULL) { msg = "ERR stgInfo for storage pool: "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&j); msg += " in (fs: "; msg += fsName; msg += ") is NULL"; log_error(msg); continue; } stgName = stgInfo->getName(); err = GPFSHandler::getPollHandler()->updateDiskInfo(clusterInfo, (char*)fsName.c_str(), (char*)stgName.c_str(),1); if(err != M_OK) { msg = "update disk info in (file system: "; msg += fsName; msg += ", storage pool: "; msg += stgName; msg += ") failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); continue; } msg = "Refresh storage pool: "; msg += stgName; msg += " in (fs: "; msg += fsName; msg += ")"; log_debug(msg); ret = refreshStgPool(stgInfo, clusterid, fsName); if(ret != TEAL_SUCCESS) { msg = "Refresh storage pool: "; msg += stgName; msg += " in (fs: "; msg += fsName; msg += ") failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } int k = 0; nDisks = stgInfo->getNumDisks(); //log disk one by one for(; k < nDisks ; k++ ) { diskInfo = stgInfo->getDisk(k); if(diskInfo == NULL) { msg = "diskInfo for disk: "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&k); msg += " in (storage pool: "; msg += stgName; msg += ", fs: "; msg += fsName; msg += ") is NULL"; log_error(msg); continue; } diskName = diskInfo->getName(); msg = "Refresh disk: "; msg += diskName; msg += " in (storage pool: "; msg += stgName; msg += ", fs: "; msg += fsName; msg += ")"; log_debug(msg); ret = refreshDisk(diskInfo, clusterid); if(ret != TEAL_SUCCESS) { msg = "Refresh disk: "; msg += diskName; msg += " in (storage pool: "; msg += stgName; msg += ", fs: "; msg += fsName; msg += ") failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } }//end of refresh disks }//end of refresh stgpool /* core dump in GPFS 3.4, only effective in 3.5 err = GPFSHandler::getPollHandler()->getFileSets((char*)fsName.c_str(), &fileSetList); if(err != M_OK) { msg = "update fileset info in (fs: "; msg += fsName; msg += ") failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_error(msg); nFsets = 0; fileSetList = NULL; continue; } //at first time to get nFsets but will not return M_OK */ err = GPFSHandler::getPollHandler()->getFileSets1((char*)fsName.c_str(), fileSetList, &nFsets); if(nFsets <= 0) { msg = "no fileset found in (fs: "; msg += fsName; msg += ")"; log_warn(msg); nFsets = 0; fileSetList = NULL; continue; } fileSetList = new FileSet[nFsets]; err = GPFSHandler::getPollHandler()->getFileSets1((char*)fsName.c_str(), fileSetList, &nFsets); if(err != M_OK) { msg = "update fileset info in (fs: "; msg += fsName; msg += ") failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); nFsets = 0; fileSetList = NULL; continue; } int l = 0; //log fileset one by one for(; l < nFsets; l++ ) { fsetInfo = &fileSetList[l]; if(fsetInfo == NULL) { msg = "fsetInfo for fset: "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&i); msg += " in (fs: "; msg += fsName; msg += ") is NULL"; log_error(msg); continue; } fsetName = fsetInfo->getName(); msg = "Refresh fileset: "; msg += fsetName; msg += " in (fs: "; msg += fsName; msg += ")"; log_debug(msg); ret = refreshFset(fsetInfo, clusterid); if(ret != TEAL_SUCCESS) { msg = "Refresh file set: "; msg += fsetName; msg += " in (fs: "; msg += fsName; msg += ") failed with"; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } }//end of refresh fset if(fileSetList) { delete []fileSetList; fileSetList = NULL; nFsets = 0; fsetInfo = NULL; } }//end of refresh fs nNodes = clusterInfo->getNumNodes(); // to get disk access info, place this here to update num_access_disk in nodeinfo and need to invoke updateStoragePool() prior to this API err = GPFSHandler::getPollHandler()->updateDiskAccessInfo(clusterInfo); if(err != M_OK) { msg = "update disk access info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_warn(msg); /* Simply ignore this error to continue.... log_error(msg); return; */ } NodeInfo* nodeInfo = NULL; //log node one by one for( i = 0 ; i < nNodes; i++) { nodeInfo = clusterInfo->getNode(i); if (nodeInfo == NULL) { msg = "nodeInfo for node "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&i); msg += "is NULL"; log_error(msg); continue; } nodeName = nodeInfo->getName(); msg = "Refresh node: "; msg += nodeName; log_debug(msg); ret = refreshNode(nodeInfo, clusterid); if(ret != TEAL_SUCCESS) { msg = "Refresh node: "; msg += nodeName; msg += " failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); continue; } }//end of refresh node //refresh free disks here since free disk number/info can only be got after invoking updateDiskInfo() to all fs/stgpool err = GPFSHandler::getPollHandler()->updateFreeDiskInfo(clusterInfo); if(err != M_OK) { msg = "update free disk info failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); msg += ", ignore it..."; log_warn(msg); } nDisks = clusterInfo->getNumFreeDisks(); int k = 0; for(; k < nDisks ; k++ ) { diskInfo = clusterInfo->getFreeDisk(k); if(diskInfo == NULL) { msg = "diskInfo for free disk: "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&i); msg += " is NULL"; log_error(msg); continue; } diskName = diskInfo->getName(); int s; int nServers = diskInfo->getNumServerItems(); int nBacks = diskInfo->getNumBackupServerItems(); string node_name; for(s = 0; s < nServers; s++) { DiskServerInfo *ds = diskInfo->getServer(s); node_name += string(ds->getName()) + string(" "); } for(s = 0; s < nBacks; s++) { DiskServerInfo *ds = diskInfo->getBackupServer(s); node_name += string(ds->getName()) + string(" "); } msg = "Refresh free disk: "; msg += "("; msg += diskName; msg += ")"; log_debug(msg); char svrList[NAME_STRING_LEN] = {0}; strcpy(svrList,node_name.c_str()); ret = refreshDisk(diskInfo, clusterid, svrList); if(ret != TEAL_SUCCESS) { msg = "Refresh free disk: "; msg += "("; msg += diskName; msg += ") failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } }//end of refresh free disks //refresh cluster here since free disk number/info can only be got after invoking updateDiskInfo() to all fs/stgpool msg = "Refresh cluster: "; msg += clusterName; log_debug(msg); ret = refreshCluster(clusterInfo,clusterStatus); if(ret != TEAL_SUCCESS) { msg = "Refresh cluster failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&err); log_error(msg); } log_info("##################Start to refresh perseus configuration###################"); int nRgAllocated = 6; /* number of rg slots allocated in the buffer in advance*/ char *bufP = NULL; int bufLen = 0; int rc = 0; int nPdisk = 0; int nVdisk = 0; int nRg = 0; int nDa = 0; string pdiskName; string vdiskName; string rgName; string daName; gpfsRecoveryGroupSdrInfo *rgSdrTableP = NULL; gpfsRecoveryGroupSdrInfo *rgSdrP = NULL; gpfsRecoveryGroup *rgTableP = NULL; gpfsRecoveryGroup *rgP = NULL; gpfsRecoveryGroupDeclusteredArray* daP = NULL; gpfsDeclusteredArrayPdisk* pdiskP = NULL; gpfsDeclusteredArrayVdisk* vdiskP = NULL; rgSdrTableP = new gpfsRecoveryGroupSdrInfo[nRgAllocated]; nRg = nRgAllocated; /* get initial info from SDR (all RG names) */ rc = getNsdRAIDSdrInfo(rgSdrTableP, &nRg); // retry if failed with ENOMEM if(rc == ENOMEM) { log_debug("Not enough memory allocated, reallocate..."); nRgAllocated = nRg > nRgAllocated ? nRg : nRgAllocated; delete[] rgSdrTableP; rgSdrTableP = NULL; rgSdrTableP = new gpfsRecoveryGroupSdrInfo[nRgAllocated]; nRg = nRgAllocated; rc = getNsdRAIDSdrInfo(rgSdrTableP, &nRg); } if (rc == M_OK) { if (nRg >= 1) { rgTableP = new gpfsRecoveryGroup[nRg]; if (rgTableP == NULL) { msg = "Initial RG table failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&rc); log_error(msg); return; } for (i = 0, rgSdrP = rgSdrTableP; i < nRg && i < nRgAllocated; i++, rgSdrP++) { rgP = rgTableP + i; rgP->updateRgSdrInfo(rgSdrP->getRecoveryGroupName(),rgSdrP->getRecoveryGroupServerList(),rgSdrP->getRecoveryGroupId()); rc = getRecoveryGroupSummary(rgP); //refresh rg info if (rc == 0) { rgName = rgP->getRecoveryGroupName(); rc = getRecoveryGroupDeclusteredArrays(rgP); // refresh da info if (rc == 0) { int l = 0; int nDa = rgP->getRecoveryGroupDeclusterArrays(); bool allDaOK = true; // is all DA ok? for(; l < nDa; l++) { daP = rgP->getDeclusteredArrayP(l); if(daP == NULL) { msg = "da: "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&l); msg += "in (rg: "; msg += rgName; msg += ") is NULL"; log_error(msg); continue; } daName = daP->getDeclusteredArrayName(); msg = "Refresh da: "; msg += daName; msg += " in rg: "; msg += rgName; log_debug(msg); ret = refreshDa(daP, clusterid, rgName); if(ret != TEAL_SUCCESS) { msg = "Refresh declustered array: "; msg += daName; msg += " in (rg: "; msg += rgName; msg += ") failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } int j = 0; int k = 0; nPdisk = daP->getDeclusteredArrayPdisks(); nVdisk = daP->getDeclusteredArrayVdisks(); for(; j < nPdisk; j++) { pdiskP = daP->getDeclusteredArrayPdiskP(j); if(pdiskP == NULL) { msg = "pdisk: "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&j); msg += " in (rg: "; msg += rgName; msg += ", da: "; msg += daName; msg += ") is NULL"; log_error(msg); continue; } pdiskName = pdiskP->getPdiskName(); msg = "Refresh pdisk: "; msg += pdiskName; msg += " in (rg: "; msg += rgName; msg += ", da: "; msg += daName; msg += ")"; log_debug(msg); ret = refreshPdisk(pdiskP,clusterid,rgName,daName); if(ret != TEAL_SUCCESS) { msg = "Refresh pdisk: "; msg += pdiskName; msg += " in (rg: "; msg += rgName; msg += ", da: "; msg += daName; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } } for(; k < nVdisk; k++) { vdiskP = daP->getDeclusteredArrayVdiskP(k); if(vdiskP == NULL) { msg = "vdisk: "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&k); msg += " in (rg: "; msg += rgName; msg += ", da: "; msg += daName; msg += ") is NULL"; log_error(msg); continue; } vdiskName = vdiskP->getVdiskName(); msg = "Refresh vdisk: "; msg += vdiskName; msg += " in (rg: "; msg += rgName; msg += ", da: "; msg += daName; log_debug(msg); ret = refreshVdisk(vdiskP,clusterid,rgName,daName); if(ret != TEAL_SUCCESS) { msg = "Refresh vdisk: "; msg += vdiskName; msg += " in (rg: "; msg += rgName; msg += ", da: "; msg += daName; msg += ") failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } } allDaOK &= strcmp(daP->getDeclusteredNeedsService(),"yes"); // check all DA's status } msg = "Refresh rg: "; msg += rgName; log_debug(msg); ret = refreshRg(rgP, clusterid,allDaOK); if(ret != TEAL_SUCCESS) { msg = "Refresh recovery group: "; msg += rgName; msg += " failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&ret); log_error(msg); } } else { msg = "get DA to refresh DA in RG: "; msg += rgName; msg += " failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&rc); log_warn(msg); continue; } } else { msg = "get RG summary to refresh RG: "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&i); msg += " failed with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&rc); log_warn(msg); continue; } } } else { log_warn("No recovery group found!"); } } else if(rc == ENODEV) { msg = "No perseus configuration.."; log_info(msg); } else { msg = "Failed to getNsdRAIDSdrInfo with "; msg += Utils::int_to_char(tmp,10,(unsigned int*)&rc); log_warn(msg); } log_info("########################End of refresh all entities#########################################"); return; }