void* scan_nodes(void* threadid) { int i, j, k; long tid; unsigned int min_nbr_height; Edge nbr_edge, min_height_edge; long d; long local_e; long local_c; int max_flow; int isInQ; int height_before_lift; int node_push_times = 0, node_lift_times = 0; int op_times = 0; tid = (long)threadid; Node* cur_node; ThreadEnv* thisThread; thisThread = threadEnv + tid; #if 0 if (tid == 0) global_relabel(tid); #endif pthread_barrier_wait(&start_barrier); while (!flow_done()) { cur_node = thisThread->Q[0]; while (cur_node != NoNode) { #ifdef GR if (op_times > gr_threshold) { op_times = 0; if (pthread_mutex_trylock(&gr_mutex) == 0) { global_relabel(tid); pthread_mutex_unlock(&gr_mutex); } } #endif while (cur_node->excess > 0) { #ifdef DEBUG fprintf(stderr, "%d h=%d, e=%ld\n", cur_node - g_node, cur_node->height, cur_node->excess); fflush(stderr); #endif min_nbr_height = UINT_MAX; for (nbr_edge = cur_node->adj_list; nbr_edge < (cur_node + 1)->adj_list; nbr_edge++) { if (nbr_edge->capacity > 0 && (nbr_edge->endpoint)->height < min_nbr_height) { min_nbr_height = (nbr_edge->endpoint)->height; min_height_edge = nbr_edge; } } #ifdef DEBUG fprintf(stderr, "work on %d\n", cur_node - g_node); fflush(stderr); #endif if (cur_node->height > min_nbr_height) { local_e = cur_node->excess; local_c = min_height_edge->capacity; d = MIN(local_e, local_c); if (min_height_edge->endpoint->wave == cur_node->wave && cur_node->height > min_height_edge->endpoint->height) { node_push_times++; op_times++; atomic_add(d, &(min_height_edge->mateedge->capacity)); atomic_sub(d, &(min_height_edge->capacity)); atomic_add(d, &((min_height_edge->endpoint)->excess)); atomic_sub(d, &(cur_node->excess)); #if defined(PUSH) || defined(DEBUG) fprintf(stderr, "[%ld] %ld(%ld) -> %ld -> %ld(%ld) \n", tid, cur_node - g_node, cur_node->excess, d, min_height_edge->endpoint - g_node, (min_height_edge->endpoint)->excess); fflush(stderr); #endif // add min_nbr to local queue isInQ = cmpxchg(&(min_height_edge->endpoint->inQ), 0, tid + 1); if (isInQ == 0) enQ(thisThread, min_height_edge->endpoint); } } else { // if we cannot push to any nodes, then we must be able to lift node_lift_times++; op_times++; pthread_mutex_lock(&(node_mutex[cur_node - g_node])); if (cur_node->height < min_nbr_height + 1) cur_node->height = min_nbr_height + 1; pthread_mutex_unlock(&(node_mutex[cur_node - g_node])); #if defined(LIFT) || defined(DEBUG) fprintf(stderr, "%ld ^ %d, ref %ld(%d)\n", cur_node - g_node, cur_node->height, min_height_edge->endpoint - g_node, min_height_edge->endpoint->height); fflush(stderr); #endif } } // while( g_node[i].excess > 0 ) set0(&(cur_node->inQ)); if (cur_node->excess > 0) { isInQ = cmpxchg(&(cur_node->inQ), 0, tid + 1); if (isInQ == 0) { reenQ(thisThread, cur_node); } else { deQ(thisThread); } } else { deQ(thisThread); } #ifdef HELP if (thisThread->request < MAX_THRD) send_work(tid); #endif cur_node = thisThread->Q[thisThread->head]; } // while (i != -1) #ifdef HELP // Q is empty, find something to do; request_work(tid); #else break; #endif } // while(!flow_done()) atomic_add(node_push_times, &(totalPushes)); atomic_add(node_lift_times, &(totalLifts)); } // scan_node
/*===========================================================================* * get_work * *===========================================================================*/ static void get_work() { /* Normally wait for new input. However, if 'reviving' is * nonzero, a suspended process must be awakened. */ int r, found_one, proc_p; register struct fproc *rp; while (reviving != 0) { found_one = FALSE; /* Find a suspended process. */ for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED)) { found_one = TRUE; /* Found a suspended process */ if (unblock(rp)) return; /* So main loop can process job */ send_work(); } if (!found_one) /* Consistency error */ panic("VFS: get_work couldn't revive anyone"); } for(;;) { /* Normal case. No one to revive. Get a useful request. */ if ((r = sef_receive(receive_from, &m_in)) != OK) { panic("VFS: sef_receive error: %d", r); } proc_p = _ENDPOINT_P(m_in.m_source); if (proc_p < 0) fp = NULL; else fp = &fproc[proc_p]; if (m_in.m_type == EDEADSRCDST) return; /* Failed 'sendrec' */ /* Negative who_p is never used to access the fproc array. Negative * numbers (kernel tasks) are treated in a special way. */ if (who_p >= (int)(sizeof(fproc) / sizeof(struct fproc))) panic("receive process out of range: %d", who_p); if (who_p >= 0 && fproc[who_p].fp_endpoint == NONE) { printf("VFS: ignoring request from %d: NONE endpoint %d (%d)\n", m_in.m_source, who_p, m_in.m_type); continue; } /* Internal consistency check; our mental image of process numbers and * endpoints must match with how the rest of the system thinks of them. */ if (who_p >= 0 && fproc[who_p].fp_endpoint != who_e) { if (fproc[who_p].fp_endpoint == NONE) printf("slot unknown even\n"); printf("VFS: receive endpoint inconsistent (source %d, who_p " "%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p, fproc[who_p].fp_endpoint, who_e); panic("VFS: inconsistent endpoint "); } return; } }
void process_request(char* code_sign_key) { PLATFORM* platform; int retval; double last_rpc_time, x; struct tm *rpc_time_tm; bool ok_to_send_work = !config.dont_send_jobs; bool have_no_work = false; char buf[256]; HOST initial_host; unsigned int i; time_t t; memset(&g_reply->wreq, 0, sizeof(g_reply->wreq)); // if client has sticky files we don't need any more, tell it // do_file_delete_regex(); // if different major version of BOINC, just send a message // if (wrong_core_client_version() || unacceptable_os() || unacceptable_cpu() ) { ok_to_send_work = false; } // if no jobs reported and none to send, return without accessing DB // if (!ok_to_send_work && !g_request->results.size()) { return; } warn_user_if_core_client_upgrade_scheduled(); if (requesting_work()) { if (config.locality_scheduling || config.locality_scheduler_fraction || config.enable_assignment) { have_no_work = false; } else { lock_sema(); have_no_work = ssp->no_work(g_pid); if (have_no_work) { g_wreq->no_jobs_available = true; } unlock_sema(); } } // If: // - there's no work, // - a config flag is set, // - client isn't returning results, // - this isn't an initial RPC, // - client is requesting work // then return without accessing the DB. // This is an efficiency hack for when servers are overloaded // if ( have_no_work && config.nowork_skip && requesting_work() && (g_request->results.size() == 0) && (g_request->hostid != 0) ) { g_reply->insert_message("No work available", "low"); g_reply->set_delay(DELAY_NO_WORK_SKIP); if (!config.msg_to_host && !config.enable_vda) { log_messages.printf(MSG_NORMAL, "No work - skipping DB access\n"); return; } } // FROM HERE ON DON'T RETURN; "goto leave" instead // (because ssp->no_work() may have tagged an entry in the work array // with our process ID) retval = open_database(); if (retval) { send_error_message("Server can't open database", 3600); g_reply->project_is_down = true; goto leave; } retval = authenticate_user(); if (retval) goto leave; if (g_reply->user.id == 0) { log_messages.printf(MSG_CRITICAL, "No user ID!\n"); } initial_host = g_reply->host; g_reply->host.rpc_seqno = g_request->rpc_seqno; g_reply->nucleus_only = false; log_request(); // is host blacklisted? // if (g_reply->host._max_results_day == -1) { send_error_message("Not accepting requests from this host", 86400); goto leave; } if (strlen(config.sched_lockfile_dir)) { int pid_with_lock = lock_sched(); if (pid_with_lock > 0) { log_messages.printf(MSG_CRITICAL, "Another scheduler instance [PID=%d] is running for this host\n", pid_with_lock ); } else if (pid_with_lock) { log_messages.printf(MSG_CRITICAL, "Error acquiring lock for [HOST#%d]\n", g_reply->host.id ); } if (pid_with_lock) { send_error_message( "Another scheduler instance is running for this host", 60 ); goto leave; } } // in deciding whether it's a new day, // add a random factor (based on host ID) // to smooth out network traffic over the day // retval = rand(); srand(g_reply->host.id); x = drand()*86400; srand(retval); last_rpc_time = g_reply->host.rpc_time; t = (time_t)(g_reply->host.rpc_time + x); rpc_time_tm = localtime(&t); g_request->last_rpc_dayofyear = rpc_time_tm->tm_yday; t = time(0); g_reply->host.rpc_time = t; t += (time_t)x; rpc_time_tm = localtime(&t); g_request->current_rpc_dayofyear = rpc_time_tm->tm_yday; retval = modify_host_struct(g_reply->host); // write time stats to disk if present // if (g_request->have_time_stats_log) { write_time_stats_log(); } // look up the client's platform(s) in the DB // platform = ssp->lookup_platform(g_request->platform.name); if (platform) g_request->platforms.list.push_back(platform); // if primary platform is anonymous, ignore alternate platforms // if (strcmp(g_request->platform.name, "anonymous")) { for (i=0; i<g_request->alt_platforms.size(); i++) { platform = ssp->lookup_platform(g_request->alt_platforms[i].name); if (platform) g_request->platforms.list.push_back(platform); } } if (g_request->platforms.list.size() == 0) { sprintf(buf, "%s %s", _("This project doesn't support computers of type"), g_request->platform.name ); g_reply->insert_message(buf, "notice"); log_messages.printf(MSG_CRITICAL, "[HOST#%d] platform '%s' not found\n", g_reply->host.id, g_request->platform.name ); g_reply->set_delay(DELAY_PLATFORM_UNSUPPORTED); goto leave; } handle_global_prefs(); read_host_app_versions(); update_n_jobs_today(); handle_results(); handle_file_xfer_results(); if (config.enable_vda) { handle_vda(); } // Do this before resending lost jobs // if (bad_install_type()) { ok_to_send_work = false; } if (!requesting_work()) { ok_to_send_work = false; } send_work_setup(); if (g_request->have_other_results_list) { if (ok_to_send_work && (config.resend_lost_results || g_wreq->resend_lost_results) && !g_request->results_truncated ) { if (resend_lost_work()) { ok_to_send_work = false; } } if (config.send_result_abort) { send_result_abort(); } } if (requesting_work()) { if (!send_code_sign_key(code_sign_key)) { ok_to_send_work = false; } if (have_no_work) { if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] No jobs in shmem cache\n" ); } } // if last RPC was within config.min_sendwork_interval, don't send work // if (!have_no_work && ok_to_send_work) { if (config.min_sendwork_interval) { double diff = dtime() - last_rpc_time; if (diff < config.min_sendwork_interval) { ok_to_send_work = false; log_messages.printf(MSG_NORMAL, "Not sending work - last request too recent: %f\n", diff ); sprintf(buf, "Not sending work - last request too recent: %d sec", (int)diff ); g_reply->insert_message(buf, "low"); // the 1.01 is in case client's clock // is slightly faster than ours // g_reply->set_delay(1.01*config.min_sendwork_interval); } } if (ok_to_send_work) { send_work(); } } if (g_wreq->no_jobs_available) { g_reply->insert_message("Project has no tasks available", "low"); } } handle_msgs_from_host(); if (config.msg_to_host) { handle_msgs_to_host(); } update_host_record(initial_host, g_reply->host, g_reply->user); write_host_app_versions(); leave: if (!have_no_work) { ssp->restore_work(g_pid); } }
/*===========================================================================* * main * *===========================================================================*/ int main(void) { /* This is the main program of the file system. The main loop consists of * three major activities: getting new work, processing the work, and sending * the reply. This loop never terminates as long as the file system runs. */ int transid; struct job *job; /* SEF local startup. */ sef_local_startup(); printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS); /* This is the main loop that gets work, processes it, and sends replies. */ while (TRUE) { yield_all(); /* let other threads run */ self = NULL; job = NULL; send_work(); get_work(); transid = TRNS_GET_ID(m_in.m_type); if (IS_VFS_FS_TRANSID(transid)) { job = worker_getjob( (thread_t) transid - VFS_TRANSID); if (job == NULL) { printf("VFS: spurious message %d from endpoint %d\n", m_in.m_type, m_in.m_source); continue; } m_in.m_type = TRNS_DEL_ID(m_in.m_type); } if (job != NULL) { do_fs_reply(job); continue; } else if (who_e == PM_PROC_NR) { /* Calls from PM */ /* Special control messages from PM */ sys_worker_start(do_pm); continue; } else if (is_notify(call_nr)) { /* A task notify()ed us */ sys_worker_start(do_control_msgs); continue; } else if (who_p < 0) { /* i.e., message comes from a task */ /* We're going to ignore this message. Tasks should * send notify()s only. */ printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr); continue; } /* At this point we either have results from an asynchronous device * or a new system call. In both cases a new worker thread has to be * started and there might not be one available from the pool. This is * not a problem (requests/replies are simply queued), except when * they're from an FS endpoint, because these can cause a deadlock. * handle_work() takes care of the details. */ if (IS_DEV_RS(call_nr)) { /* We've got results for a device request */ handle_work(do_async_dev_result); continue; } else { /* Normal syscall. */ handle_work(do_work); } } return(OK); /* shouldn't come here */ }
/*===========================================================================* * main * *===========================================================================*/ int main(void) { /* This is the main program of the file system. The main loop consists of * three major activities: getting new work, processing the work, and sending * the reply. This loop never terminates as long as the file system runs. */ int transid; struct worker_thread *wp; /* SEF local startup. */ sef_local_startup(); printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS); /* This is the main loop that gets work, processes it, and sends replies. */ while (TRUE) { worker_yield(); /* let other threads run */ send_work(); /* The get_work() function returns TRUE if we have a new message to * process. It returns FALSE if it spawned other thread activities. */ if (!get_work()) continue; transid = TRNS_GET_ID(m_in.m_type); if (IS_VFS_FS_TRANSID(transid)) { wp = worker_get((thread_t) transid - VFS_TRANSID); if (wp == NULL || wp->w_fp == NULL) { printf("VFS: spurious message %d from endpoint %d\n", m_in.m_type, m_in.m_source); continue; } m_in.m_type = TRNS_DEL_ID(m_in.m_type); do_reply(wp); continue; } else if (who_e == PM_PROC_NR) { /* Calls from PM */ /* Special control messages from PM */ service_pm(); continue; } else if (is_notify(call_nr)) { /* A task ipc_notify()ed us */ switch (who_e) { case DS_PROC_NR: /* Start a thread to handle DS events, if no thread * is pending or active for it already. DS is not * supposed to issue calls to VFS or be the subject of * postponed PM requests, so this should be no problem. */ if (worker_can_start(fp)) handle_work(ds_event); break; case KERNEL: mthread_stacktraces(); break; case CLOCK: /* Timer expired. Used only for select(). Check it. */ expire_timers(m_in.m_notify.timestamp); break; default: printf("VFS: ignoring notification from %d\n", who_e); } continue; } else if (who_p < 0) { /* i.e., message comes from a task */ /* We're going to ignore this message. Tasks should * send ipc_notify()s only. */ printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr); continue; } if (IS_BDEV_RS(call_nr)) { /* We've got results for a block device request. */ bdev_reply(); } else if (IS_CDEV_RS(call_nr)) { /* We've got results for a character device request. */ cdev_reply(); } else { /* Normal syscall. This spawns a new thread. */ handle_work(do_work); } } return(OK); /* shouldn't come here */ }