void *LLW_train_thread(void *th_data) { // Recover data struct ThreadData *data = (struct ThreadData *)th_data; const int thread_id = data->thread_id; const int nprocs = data->nprocs; struct Model *model = data->model; struct KernelCache *kernelcache = data->kernelcache; long chunk_size = data->chunk_size; const double accuracy = data->accuracy; double **gradient = data->gradient; double **H_alpha = data->H_alpha; double *best_primal_upper_bound = data->best_primal_upper_bound; int *activeset = data->activeset; long *nb_SV = data->nb_SV; double *lp_rhs = data->lp_rhs; FILE *fp = data->logfile_ptr; pthread_mutex_unlock(&thread_data_mutex); // Release thread_data for next thread // Local variables int do_eval; char yesno; long long return_status = -1; // Prepare the cache struct TrainingCache cache; cache.chunk_size = chunk_size; LLW_alloc_memory(&cache, model->Q, model->nb_data, chunk_size); cache.kc = kernelcache; cache.activeset = activeset; cache.lp_rhs = lp_rhs; double **delta = matrix(chunk_size, model->Q); double previous_ratio = 0.0; double improvement = 1.0; double theta_opt; int jump = false; if(accuracy == 0) do_eval = 0; else do_eval = 1; /* Prepare parallel gradient computations: - the gradient vector is split into NUMTHREADS_GRAD parts (along i) - each part is updated by a different thread */ // max number of threads for gradient updates is nprocs pthread_t *grad_threads = (pthread_t *)malloc(sizeof(pthread_t) * nprocs); // start with 1 thread (main load on kernel evaluations) int numthreads_grad = 1; void *status; int rc; long k; struct ThreadGradient_data *grad_data = (struct ThreadGradient_data *)malloc(sizeof(struct ThreadGradient_data) * nprocs); // Disable parallel gradient computation for small data sets int parallel_gradient_update = 1; if(model->nb_data < 5000 || nprocs == 1) parallel_gradient_update = 0; if(parallel_gradient_update) { for(k=0;k<nprocs;k++) { grad_data[k].gradient = gradient; grad_data[k].H_alpha = H_alpha; grad_data[k].cache = &cache; grad_data[k].model = model; } grad_data[0].start_i = 1; grad_data[0].end_i = model->nb_data / numthreads_grad; for(k=1;k<numthreads_grad-1;k++) { grad_data[k].start_i = grad_data[k-1].end_i + 1; grad_data[k].end_i = grad_data[k].start_i + model->nb_data / numthreads_grad -1; } if(numthreads_grad>1) { grad_data[numthreads_grad-1].start_i = grad_data[numthreads_grad-2].end_i + 1; grad_data[numthreads_grad-1].end_i = model->nb_data; } } #ifdef _WIN32 // Init POOL TP_WORK ** work; if(parallel_gradient_update) { work = malloc(sizeof(TP_WORK *) * nprocs); for(k=0;k<nprocs;k++) work[k] = CreateThreadpoolWork(LLW_update_gradient_thread2, (void *) &grad_data[k], NULL); } #endif // Switch to nprocs/4 threads for gradient update when 25% of the kernel matrix is cached int percentage_step = 1; long percentage = model->nb_data / 4; int next_numthreads_grad = nprocs/4; if(next_numthreads_grad == 0) next_numthreads_grad = 1; // Main loop int thread_stop = 0; do { if((TRAIN_SMALL_STEP < TRAIN_STEP) && (model->iter%TRAIN_SMALL_STEP) == 0) { printf("."); fflush(stdout); } // Select a random chunk of data to optimize select_random_chunk(&cache,model); // Compute the kernel submatrix for this chunk compute_K(&cache,model); // Enter Critical Section (using and modifying the model) pthread_mutex_lock(&(model->mutex)); jump = LLW_solve_lp(gradient, &cache, model); if(jump == false) jump = LLW_check_opt_sol(gradient,&cache,model); if(jump == false) { LLW_compute_delta(delta,&cache,model); theta_opt = LLW_compute_theta_opt(delta, &cache, model); if (theta_opt > 0.0) { *nb_SV += LLW_compute_new_alpha(theta_opt,&cache,model); if(parallel_gradient_update) { // Update gradient in parallel for(k=0;k<numthreads_grad;k++) { #ifdef _WIN32 SubmitThreadpoolWork(work[k]); #else rc = pthread_create(&grad_threads[k], NULL, LLW_update_gradient_thread, (void *) &grad_data[k]); #endif } // Wait for gradient computations to terminate for(k=0;k<numthreads_grad;k++) { #ifdef _WIN32 WaitForThreadpoolWorkCallbacks(work[k], FALSE); #else rc = pthread_join(grad_threads[k],&status); #endif } } else { // old-style non-threaded gradient update (for small data sets) LLW_update_gradient(gradient,H_alpha, &cache,model); } } } if((do_eval && (model->iter%TRAIN_STEP) == 0) || EVAL || STOP || (do_eval && model->ratio >= accuracy) ) { if(fp != NULL) fprintf(fp,"%ld ",model->iter); if(EVAL) printf("\n\n*** Evaluating the model at iteration %ld...\n",model->iter); // Evaluate how far we are in the optimization // (prints more info if interrutped by user) previous_ratio = model->ratio; model->ratio = MSVM_eval(best_primal_upper_bound, gradient, H_alpha, NULL, model, EVAL, fp); print_training_info(*nb_SV, model); improvement = model->ratio - previous_ratio; if(EVAL) // if interrupted by user (otherwise let the ratio decide if we go on training) { printf("\n *** Do you want to continue training ([y]/n)? "); yesno = getchar(); if(yesno=='n') { STOP = 1; } EVAL = 0; // reset interruption trigger } } // Release kernel submatrix in cache release_K(&cache); // Check if a sufficient % of the kernel matrix is cached if( parallel_gradient_update && cache.kc->max_idx >= percentage ) { // and switch thread to compute gradient upates instead of kernel rows if it is thread_stop = switch_thread(nprocs, &numthreads_grad, &next_numthreads_grad, &percentage, &percentage_step, grad_data, thread_id, model->nb_data); // (threads are actually stopped to leave the CPUs // to other threads that will compute gradient updates) } model->iter++; // Release mutex: End of critical section pthread_mutex_unlock(&(model->mutex)); } while(model->iter <= MSVM_TRAIN_MAXIT && (!do_eval || (model->ratio < accuracy && improvement != 0.0)) && !STOP && !thread_stop); // Release mutex: End of critical section (see below) pthread_mutex_unlock(&(model->mutex)); #ifdef _WIN32 if(parallel_gradient_update){ for(k=0;k<numthreads_grad;k++) CloseThreadpoolWork(work[k]); } #endif // compute return_status if(do_eval && (model->ratio >= accuracy || improvement==0.0)) return_status = 0; // optimum reached or no more improvement. // Free memory LLW_free_memory(&cache); free(delta[1]);free(delta); free(grad_threads); free(grad_data); pthread_exit((void*)return_status); }
size_t vt_fread(void *buf, size_t size, size_t count, struct vt_file *vt_file) { struct vt *vtptr = vt_file->vtptr; unsigned int i = 0; char *b = ((char*)buf); switch(vtptr->mode){ case VT_MODE_OLD: for(; i<size*count/4;){ int ch; if(vtptr->block) ch = vt_kb_get(vt_file); else ch = vt_kb_peek(vt_file); if(ch < 0) return i/size; ((int*)buf)[i] = ch; i++; } break; case VT_MODE_NORMAL: if(vtptr->kb_queue_count){ //kprintf("(%d in queue)", vtptr->kb_queue_count); spinl_lock(&vtptr->queuelock); for(; i<size*count && vtptr->kb_queue_count;){ b[i] = vtptr->kb_queue[vtptr->kb_queue_start]; i++; vtptr->kb_queue_count--; vtptr->kb_queue_start++; vtptr->kb_queue_start %= VT_KB_QUEUE_SIZE; } spinl_unlock(&vtptr->queuelock); } for(; i<size*count;){ int ch; if(vtptr->block) ch = vt_kb_get(vt_file); else ch = vt_kb_peek(vt_file); if(ch < 0) return i/size; if(ch<=0xff && ch>=0){ b[i] = (char)ch; i++; } else{ char *code = NULL; //char temp[10]; switch(ch){ case KEY_UP: code = "\x1b[A"; break; case KEY_DOWN: code = "\x1b[B"; break; case KEY_RIGHT: code = "\x1b[C"; break; case KEY_LEFT: code = "\x1b[D"; break; case KEY_HOME: code = "\x1b[1~"; break; case KEY_INS: code = "\x1b[2~"; break; case KEY_DEL: code = "\x1b[3~"; break; case KEY_END: code = "\x1b[4~"; break; case KEY_PGUP: code = "\x1b[5~"; break; case KEY_PGDOWN: code = "\x1b[6~"; break; case KEY_F1: code = "\x1b[[A"; break; case KEY_F2: code = "\x1b[[B"; break; case KEY_F3: code = "\x1b[[C"; break; case KEY_F4: code = "\x1b[[D"; break; case KEY_F5: code = "\x1b[[E"; break; case KEY_F6: code = "\x1b[[17~"; break; case KEY_F7: code = "\x1b[[18~"; break; case KEY_F8: code = "\x1b[[19~"; break; case KEY_F9: code = "\x1b[[20~"; break; case KEY_F10: code = "\x1b[[21~"; break; case KEY_F11: code = "\x1b[[23~"; break; case KEY_F12: code = "\x1b[[24~"; break; } if(code){ //kprintf("CODE(%s)", code); unsigned int len = strlen(code); unsigned int a; for(a=0; a<len && a<size*count-i; a++){ //kprintf("(%c->b)", code[a]); b[i] = code[a]; i++; } if(a!=len){ for(;a<len;a++){ spinl_lock(&vtptr->queuelock); //kprintf("(%c->q)", code[a]); if(vtptr->kb_queue_count >= VT_KB_QUEUE_SIZE){ kprintf("vt_fread(): warning: code doesn't fit in queue!"); } vtptr->kb_queue[vtptr->kb_queue_end] = code[a]; vtptr->kb_queue_count++; vtptr->kb_queue_end++; vtptr->kb_queue_end %= VT_KB_QUEUE_SIZE; spinl_unlock(&vtptr->queuelock); } } } else{ /*b[i] = 'E'; i++;*/ } } } break; case VT_MODE_RAWEVENTS: //kprintf("vt_fread(): rawevents\n"); for(; i < size*count/4;){ if(vt_file->vtptr->block){ while(!vt_file->vtptr->kb_buf_count) switch_thread(); } int event = vt_get_and_parse_next_key_event(vt_file); //kprintf("event=%d\n", event); if(event < -1){ kprintf("vt_fread(): error in vt_get_and_parse_next_key_event\n"); return i*sizeof(uint_t)/size; } if(event == -1){ if(vt_file->vtptr->block){ continue; } else{ //kprintf("vt_thread(): not blocking -> returning\n"); return i*sizeof(uint_t)/size; } } ((uint_t*)buf)[i] = event; i++; } break; default: panic("vt_fread(): ei näin"); } return count; }