// add a new task int TaskManager::insert_task(const TabletLocation & location, TaskInfo & task) { int ret = OB_SUCCESS; task.set_location(location); task.set_token(task_token_); int64_t timestamp = tbsys::CTimeUtil::getTime(); task.set_timestamp(timestamp); std::map<ObServer, int64_t>::const_iterator it; tbsys::CThreadGuard lock(&lock_); task.set_id(++task_id_alloc_); ++total_task_count_; wait_queue_.insert(pair<uint64_t, TaskInfo>(task.get_id(), task)); for (int64_t i = 0; i < location.size(); ++i) { // TODO server counter for select task it = server_manager_.find(location[i].chunkserver_); if (it != server_manager_.end()) { server_manager_[location[i].chunkserver_] = it->second + 1; } else { server_manager_.insert(pair<ObServer, int64_t>(location[i].chunkserver_, 1)); } } TBSYS_LOG(DEBUG, "insert task succ:id[%lu], count[%lu]", task_id_alloc_, total_task_count_); return ret; }
int TaskFactory::insert_new_task(const TabletLocation & list, TaskInfo & task) { int ret = OB_SUCCESS; // alarm instantly if (0 == list.size()) { TBSYS_LOG(ERROR, "check task server list count failed:task[%lu], count[%lu]", task.get_id(), list.size()); } // remain do insert task ret = task_manager_->insert_task(list, task); if (ret != OB_SUCCESS) { TBSYS_LOG(ERROR, "insert a new task failed:ret[%d]", ret); } else { TBSYS_LOG(TRACE, "insert a new task succ:task[%lu], table_name[%.*s], tablet[%s]", task.get_id(), task.get_param().get_table_name().length(), task.get_param().get_table_name().ptr(), to_cstring(*task.get_param().get_range())); } return ret; }
int TaskFactory::insert_new_task(const TabletLocation & list, TaskInfo & task) { int ret = OB_SUCCESS; // alarm instantly if (0 == list.size()) { TBSYS_LOG(ERROR, "check task server list count failed:task[%lu], count[%lu]", task.get_id(), list.size()); } // remain do insert task ret = task_manager_->insert_task(list, task); if (ret != OB_SUCCESS) { TBSYS_LOG(ERROR, "insert a new task failed:ret[%d]", ret); } return ret; }
// add a new task int TaskManager::insert_task(const TabletLocation & location, TaskInfo & task) { int ret = OB_SUCCESS; task.set_location(location); task.set_token(task_token_); int64_t timestamp = tbsys::CTimeUtil::getTime(); task.set_timestamp(timestamp); std::map<ObServer, int64_t>::const_iterator it; tbsys::CThreadGuard lock(&lock_); task.set_id(++task_id_alloc_); ++total_task_count_; wait_queue_.insert(pair<uint64_t, TaskInfo>(task.get_id(), task)); for (int64_t i = 0; i < location.size(); ++i) { // TODO server counter for select task it = server_manager_.find(location[i].chunkserver_); if (it != server_manager_.end()) { server_manager_[location[i].chunkserver_] = it->second + 1; } else { server_manager_.insert(pair<ObServer, int64_t>(location[i].chunkserver_, 1)); } } TBSYS_LOG(DEBUG, "insert task succ:id[%lu], table_name[%.*s], range[%s], count[%lu]", task_id_alloc_, task.get_param().get_table_name().length(), task.get_param().get_table_name().ptr(), to_cstring(*task.get_param().get_range()), total_task_count_); #if false std::map<uint64_t, TaskInfo>::iterator temp_it; for (temp_it = wait_queue_.begin(); temp_it != wait_queue_.end(); ++temp_it) { TBSYS_LOG(TRACE, "dump task range:task[%ld], range[%p:%s], key_obj[%p:%p]", temp_it->first, temp_it->second.get_param().get_range(), to_cstring(*temp_it->second.get_param().get_range()), temp_it->second.get_param().get_range()->start_key_.ptr(), temp_it->second.get_param().get_range()->end_key_.ptr()); } #endif return ret; }
int TaskManager::finish_task(const bool result, const TaskInfo & task) { int ret = OB_SUCCESS; if (task.get_token() != task_token_) { TBSYS_LOG(ERROR, "check task token failed:token[%ld], task[%ld]", task_token_, task.get_token()); ret = OB_ERROR; } else { map<uint64_t, TaskInfo>::iterator it; int64_t timestamp = tbsys::CTimeUtil::getTime(); tbsys::CThreadGuard lock(&lock_); int64_t task_count = get_server_task_count(task.get_location()[task.get_index()].chunkserver_); if (task_count < 1) { TBSYS_LOG(WARN, "check server task count failed:task[%lu], count[%ld]", task.get_id(), task_count); } else { TBSYS_LOG(DEBUG, "server ip = %ld, task_count = %ld", task.get_location()[task.get_index()].chunkserver_.get_ipv4(), task_count); // wait timeout for next dispatch working_queue_[task.get_location()[task.get_index()].chunkserver_] = --task_count; // print_access_server(); } it = doing_queue_.find(task.get_id()); if (it != doing_queue_.end()) { if (true == result) { ++total_finish_count_; total_finish_time_ += timestamp - task.get_timestamp(); complete_queue_.insert(pair<uint64_t, TaskInfo>(task.get_id(), task)); doing_queue_.erase(it); } // WARN: not insert into wait queue for timeout if result != true } else { it = complete_queue_.find(task.get_id()); if (it != complete_queue_.end()) { if (true == result) { // for compute average finish time ++total_finish_count_; total_finish_time_ += timestamp - task.get_timestamp(); } TBSYS_LOG(WARN, "find the task already finished:task[%lu]", task.get_id()); } else { TBSYS_LOG(ERROR, "not find this task in doing and complete queue:task[%lu]", task.get_id()); ret = OB_ERROR; } } } TBSYS_LOG(INFO, "finish monitor task [id=%lu] stat:wait[%lu], doing[%lu], finish[%lu], avg_time[%ld]", task.get_id(), wait_queue_.size(), doing_queue_.size(), complete_queue_.size(), total_finish_time_ / total_finish_count_); return ret; }
int TaskManager::fetch_task(TaskCounter & counter, TaskInfo & task) { int ret = OB_SUCCESS; int64_t task_count = 0; bool find_task = false; map<uint64_t, TaskInfo>::iterator it; tbsys::CThreadGuard lock(&lock_); // step 1. check waiting task for (it = wait_queue_.begin(); it != wait_queue_.end(); ++it) { for (int64_t i = 0; i < it->second.get_location().size(); ++i) { if (it->second.get_location()[i].tablet_version_ != tablet_version_ && it->second.get_location()[i].tablet_version_ != (tablet_version_ + 1)) { #if 1 it->second.get_location()[i].dump(it->second.get_location()[i]); TBSYS_LOG(DEBUG, "skip task[%ld], due to version compatiablility", it->first); #endif continue; } TBSYS_LOG(DEBUG, "server:%ld is selected, task_count = %ld", it->second.get_location()[i].chunkserver_.get_ipv4(), task_count); task_count = get_server_task_count(it->second.get_location()[i].chunkserver_); if (task_count >= max_count_) { continue; } else { task = it->second; task.set_index(i); find_task = true; task.set_timestamp(tbsys::CTimeUtil::getTime()); // remove this item to doing_queue_ wait_queue_.erase(it); doing_queue_.insert(pair<uint64_t, TaskInfo>(task.get_id(), task)); break; } } if (find_task) { break; } } // step 2. check doing timeout task if ((false == find_task) && (total_finish_count_ != 0)) { int64_t timestamp = tbsys::CTimeUtil::getTime(); int64_t avg_finish_time = total_finish_time_/total_finish_count_; for (it = doing_queue_.begin(); it != doing_queue_.end(); ++it) { if ((timestamp - it->second.get_timestamp()) > (avg_times_ * avg_finish_time)) { int64_t last_index = task.get_index(); /* last used mergeserver index */ // timeout so reset the visit count for (int64_t i = 0; i < it->second.get_location().size(); ++i) { if (i == last_index) { continue; /* do not allocate same task to same server */ } task_count = get_server_task_count(it->second.get_location()[i].chunkserver_); // must > not include equal with if (task_count > max_count_) { continue; } else { TBSYS_LOG(INFO, "check task timeout:task[%lu], avg_time[%ld], timeout_times[%ld], " "total_time[%ld], total_finish[%ld], finish[%ld], now[%ld], add_time[%ld], old_idx=%lu, new_idx=%lu", it->second.get_id(), avg_finish_time, avg_times_, total_finish_time_, total_finish_count_, complete_queue_.size(), timestamp, it->second.get_timestamp(), last_index, i); // update timestamp it->second.set_timestamp(timestamp); task = it->second; task.set_index(i); find_task = true; break; } } if (find_task) { break; } } } } // set task start timestamp task.set_timestamp(tbsys::CTimeUtil::getTime()); counter.total_count_ = total_task_count_; counter.wait_count_ = wait_queue_.size(); counter.doing_count_ = doing_queue_.size(); counter.finish_count_ = complete_queue_.size(); if (false == find_task) { TBSYS_LOG(DEBUG, "not find suitable task"); ret = OB_ERROR; } else { // update working queue for first merge server working_queue_[task.get_location()[task.get_index()].chunkserver_] = ++task_count; //print_access_server(); } return ret; }