Example #1
0
   /*
    * get file info from every replica
    * return value don't needed, we just need finfos
    */
   int read_file_info(const VUINT64& replicas,
       const uint64_t block_id, const uint64_t file_id, map<uint64_t, FileInfo>& finfos)
   {
     finfos.clear();
     int ret = TFS_SUCCESS;
     VUINT64::const_iterator iter = replicas.begin();
     for ( ; iter != replicas.end(); iter++)
     {
       int32_t retry = 2;
       while (retry--)
       {
         FileInfo info;
         ret = ToolUtil::read_file_info(*iter, block_id, file_id, FORCE_STAT, info);
         if (TFS_SUCCESS == ret)
         {
           finfos.insert(make_pair(*iter, info));
           break;
         }
         else if (EXIT_META_NOT_FOUND_ERROR == ret) // file not exist, just ignore
         {
           break;
         }
       }
     }

     return (finfos.size() > 0) ? TFS_SUCCESS : TFS_ERROR;
   }
Example #2
0
void print_result(const VUINT64& need_sync_block_list, const VUINT64& lost_block_list)
{
  if (lost_block_list.size() > 0)
  {
    fprintf(stdout, "BOTH LOST BLOCK COUNT: %zd\n", lost_block_list.size());
    for (uint32_t i = 0; i < lost_block_list.size(); i++)
    {
      fprintf(stdout, "%" PRI64_PREFIX "u\n", lost_block_list.at(i));
    }
  }

  int32_t real_need_sync_block_size = need_sync_block_list.size() - lost_block_list.size();
  if (real_need_sync_block_size > 0)
  {
    fprintf(stdout, "NEED SYNC BLOCK COUNT: %d\n", real_need_sync_block_size);
    set<uint64_t> lost_block_set;
    lost_block_set.insert(lost_block_list.begin(), lost_block_list.end());
    for (uint32_t i = 0; i < need_sync_block_list.size(); i++)
    {
      uint64_t block_id = need_sync_block_list.at(i);
      if (lost_block_set.find(block_id) == lost_block_set.end())
      {
        fprintf(stdout, "%" PRI64_PREFIX "u\n", block_id);// can sync from slave cluster
      }
    }
  }
}
void recover_block_from_slave_cluster(const char* ns_addr, const char* ns_slave_addr,
    const VUINT64& tmp_fail_block_list, VUINT64& success_block_list,
    VUINT64& fail_block_list, multimap<uint64_t, uint64_t>& fail_block_file_list)
{
  int ret = TFS_SUCCESS;
  VUINT64::const_iterator vit = tmp_fail_block_list.begin();
  multimap<uint64_t, uint64_t> tmp_fail_block_file_list;
  vector<FileInfoV2> finfos;
  for (; vit != tmp_fail_block_list.end(); vit++)
  {
    uint64_t block_id = (*vit);
    finfos.clear();
    ret = ToolUtil::read_file_infos_v2(Func::get_host_ip(ns_slave_addr), block_id, finfos);
    if (ret == TFS_SUCCESS)
    {
      bool all_success = true;
      int32_t copy_file_succ_count = 0;
      tmp_fail_block_file_list.clear();
      vector<FileInfoV2>::const_iterator v_file_iter = finfos.begin();
      for (; v_file_iter != finfos.end(); v_file_iter++)
      {
        if ((v_file_iter->status_ & FILE_STATUS_DELETE) != 0)
          continue;

        uint64_t file_id = v_file_iter->id_;
        ret = copy_file_from_slave_cluster(ns_slave_addr, ns_addr, block_id, (*v_file_iter));
        if (TFS_SUCCESS == ret)
        {
          ++copy_file_succ_count;
          TBSYS_LOG(DEBUG, "recover block_id: %" PRI64_PREFIX "u, file_id: %" PRI64_PREFIX "u successful from slave cluster!", block_id, file_id);
        }
        else
        {
          TBSYS_LOG(WARN, "recover block_id: %" PRI64_PREFIX "u, file_id: %" PRI64_PREFIX "u failed from slave cluster, ret: %d", block_id, file_id, ret);
          all_success = false;
          tmp_fail_block_file_list.insert(pair<uint64_t, uint64_t>(block_id, file_id));
        }
      }

      if (all_success)
      {
        success_block_list.push_back(block_id);
      }
      else if (0 == copy_file_succ_count)
      {
        fail_block_list.push_back(block_id);
      }
      else
      {
        fail_block_file_list.insert(tmp_fail_block_file_list.begin(), tmp_fail_block_file_list.end());
      }
    }
    else
    {
      fail_block_list.push_back(block_id);
    }
  }
}
Example #4
0
    int32_t elect_ds_exclude_group(const DS_WEIGHT& weights, const int32_t elect_count, int64_t& elect_seq,
        VUINT64& elect_ds_list)
    {
      if (elect_count == 0)
      {
        TBSYS_LOG(DEBUG, "current elect count(%d) <= 0, must be return", elect_count);
        return 0;
      }

      std::set < uint32_t > existlan;
      for (uint32_t i = 0; i < elect_ds_list.size(); ++i)
      {
        uint32_t lan = Func::get_lan(elect_ds_list[i], SYSPARAM_NAMESERVER.group_mask_);
        existlan.insert(lan);
      }

      //dump_weigths(weights);

      DS_WEIGHT::const_iterator iter = weights.begin();
      int32_t need_elect_count = elect_count;
      TBSYS_LOG(DEBUG, "weights.size(%u), need_elect_count(%d)", weights.size(), need_elect_count);
      DataServerStatInfo* ds_stat_info = NULL;
      while (iter != weights.end() && need_elect_count > 0)
      {
        ds_stat_info = iter->second->get_ds();
        uint32_t dlan = Func::get_lan(ds_stat_info->id_, SYSPARAM_NAMESERVER.group_mask_);
        if (existlan.find(dlan) == existlan.end())
        {
          existlan.insert(dlan);
          elect_ds_list.push_back(ds_stat_info->id_);
          if (elect_seq > 0)
            iter->second->elect(++elect_seq);
          --need_elect_count;
        }
        ++iter;
      }
      TBSYS_LOG(DEBUG, "current elect_count(%d)", elect_count - need_elect_count);
      return elect_count - need_elect_count;
    }
Example #5
0
 void OpMeta::set_members(const VUINT64& servers)
 {
   done_server_size_ = 0;
   server_size_ = servers.size();
   start_time_ = Func::get_monotonic_time_us();
   for (int32_t index = 0; index < server_size_; index++)
   {
     members_[index].server_ = servers[index];
     members_[index].info_.block_id_ = INVALID_BLOCK_ID;
     members_[index].info_.version_= INVALID_VERSION;
     members_[index].status_ = EXIT_TIMEOUT_ERROR;
   }
 }
Example #6
0
    int MiscRequester::read_block_index(const uint64_t ns_id,
        const uint64_t block_id, const uint64_t attach_block_id,
        IndexDataV2& index_data)
    {
      VUINT64 replicas;
      int32_t index = 0;
      int ret = NsRequester::get_block_replicas(ns_id, block_id, replicas);
      if (TFS_SUCCESS == ret)
      {
        ret = replicas.size() > 0 ? TFS_SUCCESS : EXIT_NO_DATASERVER;
        if (TFS_SUCCESS == ret)
        {
          index = random() % replicas.size();
        }

        if (TFS_SUCCESS == ret)
        {
          ret = DsRequester::read_block_index(replicas[index],
            block_id, attach_block_id, index_data);
        }
      }
      return ret;
    }
Example #7
0
    int32_t elect_ds_normal(const DS_WEIGHT& weights, const int32_t elect_count, int64_t& elect_seq,
        VUINT64& elect_ds_list)
    {
      if (elect_count == 0)
        return 0;

      int32_t need_elect_count = elect_count;

      DS_WEIGHT::const_iterator iter = weights.begin();
      while (iter != weights.end() && need_elect_count > 0)
      {
        elect_ds_list.push_back(iter->second->get_ds()->id_);
        if (elect_seq > 0)
          iter->second->elect(++elect_seq);
        --need_elect_count;
        ++iter;
      }
      return elect_count - need_elect_count;
    }
Example #8
0
// if parameter: ds_id != INVALID_SERVER_ID, check from current cluster, need exclude repliates in this ds;
// if parameter: ds_id == INVALID_SERVER_ID, check from slave cluster
int get_and_check_all_blocks_copy(const uint64_t ns_id, const VUINT64& blocks, VUINT64& no_copy_blocks, const uint64_t ds_id = INVALID_SERVER_ID)
{
  no_copy_blocks.clear();
  vector<BlockMeta> blocks_meta;
  int ret = ToolUtil::get_all_blocks_meta(ns_id, blocks, blocks_meta, true);
  if (TFS_SUCCESS == ret)
  {
    VUINT64 lost_in_family;
    for (uint32_t block_index = 0; block_index < blocks_meta.size(); ++block_index)
    {
      // blk_meta.result_ just be used in trunk(stable2.7)
      BlockMeta blk_meta = blocks_meta.at(block_index);
      uint64_t block_id = blk_meta.block_id_;
      FamilyInfoExt family_info = blk_meta.family_info_;
      if (INVALID_FAMILY_ID == family_info.family_id_)
      {
        // if family can't reinstall, family_id here is also invalid
        if (INVALID_BLOCK_ID == block_id || IS_VERFIFY_BLOCK(block_id))
          continue;
        // else family data block regard as normal block

        if (0 == blk_meta.size_)
        {
          // maybe not exist the block
          no_copy_blocks.push_back(block_id);
        }
        else if (INVALID_SERVER_ID != ds_id && 1 == blk_meta.size_ && ds_id == blk_meta.ds_[0])
        {
          // only exist one copy and be positioned in this ds
          no_copy_blocks.push_back(block_id);
        }
      }
      else // block is marshalling
      {
        int data_num = GET_DATA_MEMBER_NUM(family_info.family_aid_info_);
        int member_num = data_num + GET_CHECK_MEMBER_NUM(family_info.family_aid_info_);
        int32_t alive_num = 0;
        lost_in_family.clear();
        for (int i = 0; i < member_num; ++i)
        {
          // pair is <blockid, ds_id>
          pair<uint64_t, uint64_t>& item = family_info.members_[i];
          if (INVALID_SERVER_ID != item.second && item.second != ds_id) // exclude current ds
          {
            ++alive_num;
          }
          else
          {
            // keep all lost(and will lost) data blockid
            if (!IS_VERFIFY_BLOCK(item.first) && INVALID_BLOCK_ID != item.first)
            {
              lost_in_family.push_back(item.first);
            }
          }
        }
        if (alive_num < data_num) // can't reinstate
        {
          no_copy_blocks.insert(no_copy_blocks.end(), lost_in_family.begin(), lost_in_family.end());
        }
      }
    }
  }
  else
  {
    // this error need script-calller care
    TBSYS_LOG(ERROR, "get blockis ds_list fail from ns: %s, ret:%d",
        tbsys::CNetUtil::addrToString(ns_id).c_str(), ret);
  }
  return ret;
}
Example #9
0
int check_all_block_in_disk(const char* ns_addr, const char* ns_slave_addr, const uint64_t ds_id,
    const VUINT64& blocks, VUINT64& need_sync_block_list, VUINT64& lost_block_list)
{
  int ret = TFS_SUCCESS;
  uint64_t ns_id = Func::get_host_ip(ns_addr);
  uint64_t slave_ns_id = Func::get_host_ip(ns_slave_addr);
  need_sync_block_list.clear();
  lost_block_list.clear();

  // check current cluster's block in blocks array
  ret = get_and_check_all_blocks_copy(ns_id, blocks, need_sync_block_list, ds_id);
  if (TFS_SUCCESS == ret && need_sync_block_list.size() > 0)
  {
    // check slave cluster's block in need_sync_block_list array
    VUINT64 raw_lost_blocks;
    ret = get_and_check_all_blocks_copy(slave_ns_id, need_sync_block_list, raw_lost_blocks);

    // raw_lost_blocks need unique, then remove all blocks which not in need_sync_block_list
    set<uint64_t> need_sync_block_set, lost_block_set;
    need_sync_block_set.insert(need_sync_block_list.begin(), need_sync_block_list.end()); // to sort
    lost_block_set.insert(raw_lost_blocks.begin(), raw_lost_blocks.end()); // to sort & unique

    lost_block_list.resize(raw_lost_blocks.size());
    VUINT64::iterator it = set_intersection(raw_lost_blocks.begin(), raw_lost_blocks.end(),
        need_sync_block_list.begin(), need_sync_block_list.end(), lost_block_list.begin());
    lost_block_list.resize(it - lost_block_list.begin());// set real size
  }
  return ret;
}
int recover_block_from_disk_data(const char* ns_addr, const char* ns_slave_addr, BlockManager& block_manager,
    VUINT64& tmp_fail_block_list, VUINT64& no_need_recover_block_list, VUINT64& success_block_list,
    multimap<uint64_t, uint64_t>& fail_block_file_list, VUINT64& fail_block_list)
{
  int ret = TFS_SUCCESS;
  VUINT64 blocks;
  vector<FileInfoV2> finfos;
  multimap<uint64_t, uint64_t> tmp_fail_block_file_list;
  uint64_t ns_id = Func::get_host_ip(ns_addr);

  //get all blocks from ds disk
  block_manager.get_all_block_ids(blocks);//肯定返回成功

  vector<BlockMeta> blocks_meta;
  ret = ToolUtil::get_all_blocks_meta(ns_id, blocks, blocks_meta, false);// no need get check block
  TBSYS_LOG(DEBUG , "all logic blocks count: %zd, data block count: %zd", blocks.size(), blocks_meta.size());
  if (TFS_SUCCESS == ret)
  {
    int32_t bret = TFS_SUCCESS;
    for (uint32_t block_index = 0; block_index < blocks_meta.size(); ++block_index)
    {
      BlockMeta blk_meta = blocks_meta.at(block_index);
      uint64_t block_id = blk_meta.block_id_;
      if (blk_meta.size_ > 0 || INVALID_FAMILY_ID != blk_meta.family_info_.family_id_)
      {
        TBSYS_LOG(DEBUG , "blockid: %" PRI64_PREFIX "u  no need recover, ds_size:%d, family_id: %" PRI64_PREFIX "u",
            block_id, blk_meta.size_, blk_meta.family_info_.family_id_);
        no_need_recover_block_list.push_back(block_id);//只要还有副本或者副本丢失但有编组(不考虑退化读恢复)都不用本工具恢复
      }
      else
      {
        bret = rm_no_replicate_block_from_ns(ns_addr, block_id);//now T_NEWBLK will can not remove empty ds_list's block
        if (TFS_SUCCESS != bret)
        {
          fail_block_list.push_back(block_id);
          TBSYS_LOG(WARN , "remove block %" PRI64_PREFIX "u from ns: %s failed, ret: %d", block_id, ns_addr, bret);
        }
        else
        {
          IndexHeaderV2 header;
          finfos.clear();
          bret = block_manager.traverse(header, finfos, block_id, block_id);
          if (TFS_SUCCESS != bret)
          {
            TBSYS_LOG(WARN , "block %" PRI64_PREFIX "u get local file infos failed, ret: %d", block_id, bret);
            tmp_fail_block_list.push_back(block_id);//只有本磁盘读取block的文件index错误,才需要整个block尝试从辅集群恢复
          }
          else
          {
            bool all_success = true;
            int32_t copy_file_succ_count = 0;
            tmp_fail_block_file_list.clear();
            for (uint32_t file_index = 0; file_index < finfos.size(); ++file_index)
            {
              // skip deleted file
              if ((finfos.at(file_index).status_ & FILE_STATUS_DELETE) != 0)
                continue;

              uint64_t file_id = finfos.at(file_index).id_;
              bret = copy_file(block_manager, block_id, finfos.at(file_index));
              if (TFS_SUCCESS == bret)
              {
                TBSYS_LOG(DEBUG, "recover block_id: %" PRI64_PREFIX "u, file_id: %" PRI64_PREFIX "u successful!", block_id, file_id);
              }
              else
              {// 如果磁盘中该文件数据已经损坏(如crc出错),则从对等集群(ns_slave_addr)拷贝数据复制
                bret = copy_file_from_slave_cluster(ns_slave_addr, ns_addr, block_id, finfos.at(file_index));
                if (TFS_SUCCESS == bret)
                {
                  TBSYS_LOG(DEBUG, "recover block_id: %" PRI64_PREFIX "u, file_id: %" PRI64_PREFIX "u successful from slave cluster!", block_id, file_id);
                }
                else
                {
                  TBSYS_LOG(WARN, "recover block_id: %" PRI64_PREFIX "u, file_id: %" PRI64_PREFIX "u failed from slave cluster, ret: %d!", block_id, file_id, bret);
                  all_success = false;
                  tmp_fail_block_file_list.insert(pair<uint64_t, uint64_t>(block_id, file_id));
                }
              }
              if (TFS_SUCCESS == bret)
              {
                ++copy_file_succ_count;
              }
            }

            if (all_success)
            {
              success_block_list.push_back(block_id);
              if (0 == copy_file_succ_count)
              {
                TBSYS_LOG(DEBUG, "recover block_id: %" PRI64_PREFIX "u need to do nothing,"
                    " because the count of files who need to recover is ZERO except DELETE status files!", block_id);
              }
            }
            else if (0 == copy_file_succ_count)// all file(exclude DELETE) fail
            {
              TBSYS_LOG(WARN, "recover block_id: %" PRI64_PREFIX "u's files failed, copy_file_succ_count is Zero !", block_id);
              fail_block_list.push_back(block_id);// for print fail block to out log file at end
            }
            else
            {
              fail_block_file_list.insert(tmp_fail_block_file_list.begin(), tmp_fail_block_file_list.end());
            }
          }
        }
      }
    }//end for blocks loop
  }
  else
  {
    TBSYS_LOG(WARN, "get blockis ds_list error, ret:%d", ret);
  }
  TBSYS_LOG(INFO, "success_block_list size: %zd, tmp_fail_block_list size: %zd, fail_block_list size: %zd",
      success_block_list.size(), tmp_fail_block_list.size(), fail_block_list.size());
  return ret;
}
Example #11
0
    int OpManager::forward_op(tbnet::Packet* message,
        const uint64_t block_id, const int64_t family_id, const VUINT64& servers)
    {
      // post request to slaves
      int ret = TFS_SUCCESS;
      DsRuntimeGlobalInformation& ds_info = DsRuntimeGlobalInformation::instance();

      // take master's version to slave
      BlockInfoV2 block_info;
      ret = get_block_manager().get_block_info(block_info, block_id);
      for (uint32_t i = 0; TFS_SUCCESS == ret && i < servers.size(); i++)
      {
        if (servers[i] == ds_info.information_.id_)
        {
          continue;  // exclude self
        }

        if (WRITE_FILE_MESSAGE_V2 == message->getPCode())
        {
          WriteFileMessageV2* msg = dynamic_cast<WriteFileMessageV2*>(message);
          msg->set_version(block_info.version_);  // version will take to slave
          if (INVALID_FAMILY_ID != family_id)
          {
            FamilyInfoExt& info = msg->get_family_info();
            msg->set_block_id(info.get_block(servers[i]));
            info.family_id_ = INVALID_FAMILY_ID;  // family will not take to slave
          }
        }
        else if (CLOSE_FILE_MESSAGE_V2 == message->getPCode())
        {
          CloseFileMessageV2* msg = dynamic_cast<CloseFileMessageV2*>(message);
          if (INVALID_FAMILY_ID != family_id)
          {
            FamilyInfoExt& info = msg->get_family_info();
            msg->set_block_id(info.get_block(servers[i]));
            info.family_id_ = INVALID_FAMILY_ID;
          }
        }
        else if (UNLINK_FILE_MESSAGE_V2 == message->getPCode())
        {
          UnlinkFileMessageV2* msg = dynamic_cast<UnlinkFileMessageV2*>(message);
          if (INVALID_FAMILY_ID != family_id)
          {
            FamilyInfoExt& info = msg->get_family_info();
            msg->set_block_id(info.get_block(servers[i]));
            info.family_id_ = INVALID_FAMILY_ID;
          }
        }
        else
        {
          assert(false);
        }

        // forward will clone source msg
        ret = post_msg_to_server(servers[i], message, ds_async_callback, true);
        if (TFS_SUCCESS != ret)
        {
          TBSYS_LOG(WARN, "forward request to slave fail, ret : %d", ret);
        }
      }

      return ret;
    }
Example #12
0
    bool elect_move_dest_ds(const vector<ServerCollect*>& ds_list,
        const ReplicateDestStrategy::counter_type& dest_counter, const VUINT64& elect_ds_list, const uint64_t src_ds,
        uint64_t & dest_ds)
    {
      vector<ServerCollect*>::const_iterator maxit = std::max_element(ds_list.begin(), ds_list.end(), CompareLoad());

      int32_t max_load = 1;
      if (maxit != ds_list.end())
        max_load = (*maxit)->get_ds()->current_load_;

      NsGlobalInfo ginfo;
      ginfo.max_load_ = max_load; // only max_load & alive_server_count could be useful, calc.
      ginfo.alive_server_count_ = ds_list.size();
      // elect seq not used in this case;
      ReplicateSourceStrategy strategy(1, ginfo, dest_counter);

      DS_WEIGHT weights;
      StoreWeight < ReplicateSourceStrategy > store(strategy, weights);
      std::for_each(ds_list.begin(), ds_list.end(), store);

      std::set < uint32_t > existlan;
      uint32_t elect_ds_list_size = elect_ds_list.size();
      for (uint32_t i = 0; i < elect_ds_list_size; ++i)
      {
        uint32_t lan = Func::get_lan(elect_ds_list[i], SYSPARAM_NAMESERVER.group_mask_);
        existlan.insert(lan);
      }

      dest_ds = 0;
      uint64_t first_elect_ds = 0;
      uint32_t dlan = 0;
      DataServerStatInfo* ds_stat_info = NULL;
      DS_WEIGHT::const_iterator iter = weights.begin();
      while (iter != weights.end())
      {
        ds_stat_info = iter->second->get_ds();

        dlan = Func::get_lan(ds_stat_info->id_, SYSPARAM_NAMESERVER.group_mask_);

        if ((first_elect_ds == 0) && (existlan.find(dlan) == existlan.end()))
        {
          first_elect_ds = ds_stat_info->id_;
        }

        if ((dest_ds == 0) && (existlan.find(dlan) == existlan.end()) && (ReplicateStrategy::get_ds_ip(src_ds)
            == ReplicateStrategy::get_ds_ip(ds_stat_info->id_)))
        {
          dest_ds = ds_stat_info->id_;
        }

        if ((first_elect_ds != 0) && (dest_ds != 0))
        {
          break;
        }
        ++iter;
      }

      if (dest_ds == 0)
      {
        dest_ds = first_elect_ds;
      }
      return (dest_ds != 0);
    }