Пример #1
0
int TaskManager::finish_task(const bool result, const TaskInfo & task)
{
  int ret = OB_SUCCESS;
  if (task.get_token() != task_token_)
  {
    TBSYS_LOG(ERROR, "check task token failed:token[%ld], task[%ld]", 
        task_token_, task.get_token());
    ret = OB_ERROR;
  }
  else
  {
    map<uint64_t, TaskInfo>::iterator it;
    int64_t timestamp = tbsys::CTimeUtil::getTime(); 
    tbsys::CThreadGuard lock(&lock_);
    int64_t task_count = get_server_task_count(task.get_location()[task.get_index()].chunkserver_);
    if (task_count < 1)
    {
      TBSYS_LOG(WARN, "check server task count failed:task[%lu], count[%ld]", task.get_id(), task_count);
    }
    else
    {
      TBSYS_LOG(DEBUG, "server ip = %ld, task_count = %ld", 
                task.get_location()[task.get_index()].chunkserver_.get_ipv4(),
                task_count);
      // wait timeout for next dispatch
      working_queue_[task.get_location()[task.get_index()].chunkserver_] = --task_count;
      // print_access_server();
    }
      
    it = doing_queue_.find(task.get_id());
    if (it != doing_queue_.end())
    {
      if (true == result)
      {
        ++total_finish_count_;
        total_finish_time_ += timestamp - task.get_timestamp();
        complete_queue_.insert(pair<uint64_t, TaskInfo>(task.get_id(), task));
        doing_queue_.erase(it);
      }
      // WARN: not insert into wait queue for timeout if result != true
    }
    else
    {
      it = complete_queue_.find(task.get_id());
      if (it != complete_queue_.end())
      {
        if (true == result)
        {
          // for compute average finish time
          ++total_finish_count_;
          total_finish_time_ += timestamp - task.get_timestamp();
        }
        TBSYS_LOG(WARN, "find the task already finished:task[%lu]", task.get_id());
      }
      else
      {
        TBSYS_LOG(ERROR, "not find this task in doing and complete queue:task[%lu]", task.get_id());
        ret = OB_ERROR;
      }
    }
  }
  TBSYS_LOG(INFO, "finish monitor task [id=%lu] stat:wait[%lu], doing[%lu], finish[%lu], avg_time[%ld]", task.get_id(),
      wait_queue_.size(), doing_queue_.size(), complete_queue_.size(), total_finish_time_ / total_finish_count_);
  return ret;
}
Пример #2
0
int TaskManager::fetch_task(TaskCounter & counter, TaskInfo & task)
{
  int ret = OB_SUCCESS;
  int64_t task_count = 0;
  bool find_task = false;
  map<uint64_t, TaskInfo>::iterator it;
  tbsys::CThreadGuard lock(&lock_);
  // step 1. check waiting task
  for (it = wait_queue_.begin(); it != wait_queue_.end(); ++it)
  {
    for (int64_t i = 0; i < it->second.get_location().size(); ++i)
    {

      if (it->second.get_location()[i].tablet_version_ != tablet_version_ &&
          it->second.get_location()[i].tablet_version_ != (tablet_version_ + 1))
      {
#if 1
        it->second.get_location()[i].dump(it->second.get_location()[i]);
        TBSYS_LOG(DEBUG, "skip task[%ld], due to version compatiablility", it->first);
#endif
        continue;
      }

      TBSYS_LOG(DEBUG, "server:%ld is selected, task_count = %ld", 
                it->second.get_location()[i].chunkserver_.get_ipv4(),
                task_count);

      task_count = get_server_task_count(it->second.get_location()[i].chunkserver_);
      if (task_count >= max_count_)
      {
        continue;
      }
      else
      {
        task = it->second;
        task.set_index(i);
        find_task = true;
        task.set_timestamp(tbsys::CTimeUtil::getTime());
        // remove this item to doing_queue_
        wait_queue_.erase(it);
        doing_queue_.insert(pair<uint64_t, TaskInfo>(task.get_id(), task));
        break;
      }
    }
    if (find_task)
    {
      break;
    }
  }
  
  // step 2. check doing timeout task
  if ((false == find_task) && (total_finish_count_ != 0))
  {
    int64_t timestamp = tbsys::CTimeUtil::getTime();
    int64_t avg_finish_time = total_finish_time_/total_finish_count_;
    for (it = doing_queue_.begin(); it != doing_queue_.end(); ++it)
    {
      if ((timestamp - it->second.get_timestamp()) > (avg_times_ * avg_finish_time))
      {
        int64_t last_index = task.get_index();  /* last used mergeserver index */

        // timeout so reset the visit count 
        for (int64_t i = 0; i < it->second.get_location().size(); ++i)
        {
          if (i == last_index) {
            continue;                           /* do not allocate same task to same server */
          }

          task_count = get_server_task_count(it->second.get_location()[i].chunkserver_);
          // must > not include equal with
          if (task_count > max_count_)
          {
            continue;
          }
          else
          {
            TBSYS_LOG(INFO, "check task timeout:task[%lu], avg_time[%ld], timeout_times[%ld], "
                "total_time[%ld], total_finish[%ld], finish[%ld], now[%ld], add_time[%ld], old_idx=%lu, new_idx=%lu",
                it->second.get_id(), avg_finish_time, avg_times_, total_finish_time_, total_finish_count_,
                complete_queue_.size(), timestamp, it->second.get_timestamp(),
                last_index, i);
            // update timestamp
            it->second.set_timestamp(timestamp);
            task = it->second;
            task.set_index(i);
            find_task = true;
            break;
          }
        }

        if (find_task)
        {
          break;
        }
      }
    }
  }
  
  // set task start timestamp
  task.set_timestamp(tbsys::CTimeUtil::getTime());
  counter.total_count_ = total_task_count_;
  counter.wait_count_ = wait_queue_.size();
  counter.doing_count_ = doing_queue_.size();
  counter.finish_count_ = complete_queue_.size();
  if (false == find_task)
  {
    TBSYS_LOG(DEBUG, "not find suitable task");
    ret = OB_ERROR;
  }
  else
  {
    // update working queue for first merge server
    working_queue_[task.get_location()[task.get_index()].chunkserver_] = ++task_count;
    //print_access_server();
  }
  return ret;
}