int TaskManager::fetch_task(TaskCounter & counter, TaskInfo & task) { int ret = OB_SUCCESS; int64_t task_count = 0; bool find_task = false; map<uint64_t, TaskInfo>::iterator it; tbsys::CThreadGuard lock(&lock_); // step 1. check waiting task for (it = wait_queue_.begin(); it != wait_queue_.end(); ++it) { for (int64_t i = 0; i < it->second.get_location().size(); ++i) { if (it->second.get_location()[i].tablet_version_ != tablet_version_ && it->second.get_location()[i].tablet_version_ != (tablet_version_ + 1)) { #if 1 it->second.get_location()[i].dump(it->second.get_location()[i]); TBSYS_LOG(DEBUG, "skip task[%ld], due to version compatiablility", it->first); #endif continue; } TBSYS_LOG(DEBUG, "server:%ld is selected, task_count = %ld", it->second.get_location()[i].chunkserver_.get_ipv4(), task_count); task_count = get_server_task_count(it->second.get_location()[i].chunkserver_); if (task_count >= max_count_) { continue; } else { task = it->second; task.set_index(i); find_task = true; task.set_timestamp(tbsys::CTimeUtil::getTime()); // remove this item to doing_queue_ wait_queue_.erase(it); doing_queue_.insert(pair<uint64_t, TaskInfo>(task.get_id(), task)); break; } } if (find_task) { break; } } // step 2. check doing timeout task if ((false == find_task) && (total_finish_count_ != 0)) { int64_t timestamp = tbsys::CTimeUtil::getTime(); int64_t avg_finish_time = total_finish_time_/total_finish_count_; for (it = doing_queue_.begin(); it != doing_queue_.end(); ++it) { if ((timestamp - it->second.get_timestamp()) > (avg_times_ * avg_finish_time)) { int64_t last_index = task.get_index(); /* last used mergeserver index */ // timeout so reset the visit count for (int64_t i = 0; i < it->second.get_location().size(); ++i) { if (i == last_index) { continue; /* do not allocate same task to same server */ } task_count = get_server_task_count(it->second.get_location()[i].chunkserver_); // must > not include equal with if (task_count > max_count_) { continue; } else { TBSYS_LOG(INFO, "check task timeout:task[%lu], avg_time[%ld], timeout_times[%ld], " "total_time[%ld], total_finish[%ld], finish[%ld], now[%ld], add_time[%ld], old_idx=%lu, new_idx=%lu", it->second.get_id(), avg_finish_time, avg_times_, total_finish_time_, total_finish_count_, complete_queue_.size(), timestamp, it->second.get_timestamp(), last_index, i); // update timestamp it->second.set_timestamp(timestamp); task = it->second; task.set_index(i); find_task = true; break; } } if (find_task) { break; } } } } // set task start timestamp task.set_timestamp(tbsys::CTimeUtil::getTime()); counter.total_count_ = total_task_count_; counter.wait_count_ = wait_queue_.size(); counter.doing_count_ = doing_queue_.size(); counter.finish_count_ = complete_queue_.size(); if (false == find_task) { TBSYS_LOG(DEBUG, "not find suitable task"); ret = OB_ERROR; } else { // update working queue for first merge server working_queue_[task.get_location()[task.get_index()].chunkserver_] = ++task_count; //print_access_server(); } return ret; }