void _calculate_parameters(double h,my_point p[],double w[],int num) { double H,I,J,K,L,A0, A1; double x,y,d; if (num > MAX_POINTS_NUM) { fprintf(stderr,"Point number is larger than previous set!\n"); return; } is_set_ret = false; compute_Aj(h,w,num); H = compute_H(p,num); I = compute_I(p,num); J = compute_J(p,num); K = compute_K(p,num); L = compute_L(p,num); A0 = H -h*h*J*J-K+h*h*L*L; A1 = 2*(I-h*h*J*L); // printf("H=%.3lf I=%.3lf J=%.3lf K=%.3lf L=%.3lf A0=%.3lf A1=%.3lf\n", // H,I,J,K,L,A0,A1); // printf("Calculated as follows:\n"); if (0 == A0) { if (0 == A1) { // A0 A1 are0 // x,y could be any value #if 1 printf("The distribution of the given points is a circle.\n"); x = y = sqrt(2.0) / 2; d = -(h*h*(J*x+L*y)); compute_error(d,x,y,p,num); #else #endif } else { // A0 is 0 A1 is not 0,x2=1/2,x2+y2=1 double ar[2] = {sqrt(2.0)/2,-sqrt(2.0)/2};// possible values of x,y int i,j; for (i=0;i<2;i++) { x = ar[i]; for (j=0;j<2;j++) { y = ar[j]; d = -(h*h*(J*x+L*y)); compute_error(d,x,y,p,num); } } } } else if (0 == A1) { double x_ar[4] = {0,0,1,-1}; double y_ar[4] = {1,-1,0,0};//possible values of x,y int i; for (i=0;i<4;i++) { x = x_ar[i]; y = y_ar[i]; d = -(h*h*(J*x+L*y)); compute_error(d,x,y,p,num); } } else { // A0!=0 A1!=0 double t = A0 / sqrt (A1*A1+A0*A0); // 0 < t < 1 double x_ar[4] = {sqrt (0.5*(1+t)),sqrt (0.5*(1-t)), -sqrt (0.5*(1+t)),-sqrt (0.5*(1-t))}; // possible values of x , x2 ≠ 0 or 1 int i; for (i=0;i<4;i++) { x = x_ar[i]; y = (A1/A0)* (x - 0.5/x); d = -(h*h*(J*x+L*y)); compute_error(d,x,y,p,num); } } }
void *LLW_train_thread(void *th_data) { // Recover data struct ThreadData *data = (struct ThreadData *)th_data; const int thread_id = data->thread_id; const int nprocs = data->nprocs; struct Model *model = data->model; struct KernelCache *kernelcache = data->kernelcache; long chunk_size = data->chunk_size; const double accuracy = data->accuracy; double **gradient = data->gradient; double **H_alpha = data->H_alpha; double *best_primal_upper_bound = data->best_primal_upper_bound; int *activeset = data->activeset; long *nb_SV = data->nb_SV; double *lp_rhs = data->lp_rhs; FILE *fp = data->logfile_ptr; pthread_mutex_unlock(&thread_data_mutex); // Release thread_data for next thread // Local variables int do_eval; char yesno; long long return_status = -1; // Prepare the cache struct TrainingCache cache; cache.chunk_size = chunk_size; LLW_alloc_memory(&cache, model->Q, model->nb_data, chunk_size); cache.kc = kernelcache; cache.activeset = activeset; cache.lp_rhs = lp_rhs; double **delta = matrix(chunk_size, model->Q); double previous_ratio = 0.0; double improvement = 1.0; double theta_opt; int jump = false; if(accuracy == 0) do_eval = 0; else do_eval = 1; /* Prepare parallel gradient computations: - the gradient vector is split into NUMTHREADS_GRAD parts (along i) - each part is updated by a different thread */ // max number of threads for gradient updates is nprocs pthread_t *grad_threads = (pthread_t *)malloc(sizeof(pthread_t) * nprocs); // start with 1 thread (main load on kernel evaluations) int numthreads_grad = 1; void *status; int rc; long k; struct ThreadGradient_data *grad_data = (struct ThreadGradient_data *)malloc(sizeof(struct ThreadGradient_data) * nprocs); // Disable parallel gradient computation for small data sets int parallel_gradient_update = 1; if(model->nb_data < 5000 || nprocs == 1) parallel_gradient_update = 0; if(parallel_gradient_update) { for(k=0;k<nprocs;k++) { grad_data[k].gradient = gradient; grad_data[k].H_alpha = H_alpha; grad_data[k].cache = &cache; grad_data[k].model = model; } grad_data[0].start_i = 1; grad_data[0].end_i = model->nb_data / numthreads_grad; for(k=1;k<numthreads_grad-1;k++) { grad_data[k].start_i = grad_data[k-1].end_i + 1; grad_data[k].end_i = grad_data[k].start_i + model->nb_data / numthreads_grad -1; } if(numthreads_grad>1) { grad_data[numthreads_grad-1].start_i = grad_data[numthreads_grad-2].end_i + 1; grad_data[numthreads_grad-1].end_i = model->nb_data; } } #ifdef _WIN32 // Init POOL TP_WORK ** work; if(parallel_gradient_update) { work = malloc(sizeof(TP_WORK *) * nprocs); for(k=0;k<nprocs;k++) work[k] = CreateThreadpoolWork(LLW_update_gradient_thread2, (void *) &grad_data[k], NULL); } #endif // Switch to nprocs/4 threads for gradient update when 25% of the kernel matrix is cached int percentage_step = 1; long percentage = model->nb_data / 4; int next_numthreads_grad = nprocs/4; if(next_numthreads_grad == 0) next_numthreads_grad = 1; // Main loop int thread_stop = 0; do { if((TRAIN_SMALL_STEP < TRAIN_STEP) && (model->iter%TRAIN_SMALL_STEP) == 0) { printf("."); fflush(stdout); } // Select a random chunk of data to optimize select_random_chunk(&cache,model); // Compute the kernel submatrix for this chunk compute_K(&cache,model); // Enter Critical Section (using and modifying the model) pthread_mutex_lock(&(model->mutex)); jump = LLW_solve_lp(gradient, &cache, model); if(jump == false) jump = LLW_check_opt_sol(gradient,&cache,model); if(jump == false) { LLW_compute_delta(delta,&cache,model); theta_opt = LLW_compute_theta_opt(delta, &cache, model); if (theta_opt > 0.0) { *nb_SV += LLW_compute_new_alpha(theta_opt,&cache,model); if(parallel_gradient_update) { // Update gradient in parallel for(k=0;k<numthreads_grad;k++) { #ifdef _WIN32 SubmitThreadpoolWork(work[k]); #else rc = pthread_create(&grad_threads[k], NULL, LLW_update_gradient_thread, (void *) &grad_data[k]); #endif } // Wait for gradient computations to terminate for(k=0;k<numthreads_grad;k++) { #ifdef _WIN32 WaitForThreadpoolWorkCallbacks(work[k], FALSE); #else rc = pthread_join(grad_threads[k],&status); #endif } } else { // old-style non-threaded gradient update (for small data sets) LLW_update_gradient(gradient,H_alpha, &cache,model); } } } if((do_eval && (model->iter%TRAIN_STEP) == 0) || EVAL || STOP || (do_eval && model->ratio >= accuracy) ) { if(fp != NULL) fprintf(fp,"%ld ",model->iter); if(EVAL) printf("\n\n*** Evaluating the model at iteration %ld...\n",model->iter); // Evaluate how far we are in the optimization // (prints more info if interrutped by user) previous_ratio = model->ratio; model->ratio = MSVM_eval(best_primal_upper_bound, gradient, H_alpha, NULL, model, EVAL, fp); print_training_info(*nb_SV, model); improvement = model->ratio - previous_ratio; if(EVAL) // if interrupted by user (otherwise let the ratio decide if we go on training) { printf("\n *** Do you want to continue training ([y]/n)? "); yesno = getchar(); if(yesno=='n') { STOP = 1; } EVAL = 0; // reset interruption trigger } } // Release kernel submatrix in cache release_K(&cache); // Check if a sufficient % of the kernel matrix is cached if( parallel_gradient_update && cache.kc->max_idx >= percentage ) { // and switch thread to compute gradient upates instead of kernel rows if it is thread_stop = switch_thread(nprocs, &numthreads_grad, &next_numthreads_grad, &percentage, &percentage_step, grad_data, thread_id, model->nb_data); // (threads are actually stopped to leave the CPUs // to other threads that will compute gradient updates) } model->iter++; // Release mutex: End of critical section pthread_mutex_unlock(&(model->mutex)); } while(model->iter <= MSVM_TRAIN_MAXIT && (!do_eval || (model->ratio < accuracy && improvement != 0.0)) && !STOP && !thread_stop); // Release mutex: End of critical section (see below) pthread_mutex_unlock(&(model->mutex)); #ifdef _WIN32 if(parallel_gradient_update){ for(k=0;k<numthreads_grad;k++) CloseThreadpoolWork(work[k]); } #endif // compute return_status if(do_eval && (model->ratio >= accuracy || improvement==0.0)) return_status = 0; // optimum reached or no more improvement. // Free memory LLW_free_memory(&cache); free(delta[1]);free(delta); free(grad_threads); free(grad_data); pthread_exit((void*)return_status); }
bool extract_clips(const char *input_path,const char *cluster_path,const char *output_path,const char *index_out_path,int clip_size) { DiskReadMda X(input_path); DiskReadMda C(cluster_path); if (X.totalSize()<=1) { printf("Problem reading input file: %s\n",input_path); return false; } if (C.totalSize()<=1) { printf("Problem reading input file: %s\n",cluster_path); return false; } int M=X.N1(); int T=clip_size; int num_clips=C.N2(); int K=compute_K(C); printf("K=%d\n",K); Mda index_out; index_out.allocate(1,K); MDAIO_HEADER H_out; H_out.data_type=MDAIO_TYPE_FLOAT32; H_out.num_bytes_per_entry=4; H_out.num_dims=3; H_out.dims[0]=M; H_out.dims[1]=T; H_out.dims[2]=num_clips; FILE *outf=fopen(output_path,"wb"); if (!outf) { printf("Unable to open output file: %s\n",output_path); return false; } mda_write_header(&H_out,outf); float *buf=(float *)malloc(sizeof(float)*M*T); int jj=0; for (int k=1; k<=K; k++) { index_out.setValue(jj,0,k-1); for (int i=0; i<num_clips; i++) { int ii=0; int time0=(int)C.value(1,i); int k0=(int)C.value(2,i); if (k0==k) { for (int t=0; t<T; t++) { for (int m=0; m<M; m++) { buf[ii]=X.value(m,t+time0-T/2); ii++; } } mda_write_float32(buf,&H_out,M*T,outf); jj++; } } } free(buf); fclose(outf); if (!index_out.write(index_out_path)) { printf("Unable to write output file: %s\n",index_out_path); return false; } return true; }