// return the app version with greatest projected FLOPS // for the given job and host, or NULL if none is available // // NOTE: the BEST_APP_VERSION structure returned by this // must not be modified or reused; // a pointer to it is stored in APP_VERSION. // // check_req: if set, return only app versions that use resources // for which the work request is nonzero. // This check is not done for: // - assigned jobs // - resent jobs // reliable_only: use only versions for which this host is "reliable" // // We "memoize" the results, maintaining an array g_wreq->best_app_versions // that maps app ID to the best app version (or NULL). // BEST_APP_VERSION* get_app_version( WORKUNIT& wu, bool check_req, bool reliable_only ) { unsigned int i; int j; BEST_APP_VERSION* bavp; char buf[256]; bool job_needs_64b = (wu.rsc_memory_bound > max_32b_address_space()); if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] get_app_version(): getting app version for WU#%lu (%s) appid:%lu\n", wu.id, wu.name, wu.appid ); if (job_needs_64b) { log_messages.printf(MSG_NORMAL, "[version] job needs 64-bit app version: mem bnd %f\n", wu.rsc_memory_bound ); } } APP* app = ssp->lookup_app(wu.appid); if (!app) { log_messages.printf(MSG_CRITICAL, "WU refers to nonexistent app: %lu\n", wu.appid ); return NULL; } // if the app uses homogeneous app version, // don't send to anonymous platform client. // Then check if the WU is already committed to an app version // if (app->homogeneous_app_version) { if (g_wreq->anonymous_platform) { return NULL; } if ( wu.app_version_id) { return check_homogeneous_app_version(wu, reliable_only); } } // see if app is already in memoized array // std::vector<BEST_APP_VERSION*>::iterator bavi; bavi = g_wreq->best_app_versions.begin(); while (bavi != g_wreq->best_app_versions.end()) { bavp = *bavi; if (bavp->appid == wu.appid && (job_needs_64b == bavp->for_64b_jobs)) { if (!bavp->present) { #if 0 if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] returning cached NULL\n" ); } #endif return NULL; } // if we're at the jobs-in-progress limit for this // app and resource type, fall through and find another version // if (config.max_jobs_in_progress.exceeded( app, bavp->host_usage.proc_type )) { if (config.debug_version_select) { app_version_desc(*bavp, buf); log_messages.printf(MSG_NORMAL, "[version] %s: max jobs in progress exceeded\n", buf ); } g_wreq->best_app_versions.erase(bavi); break; } // if we previously chose an app version but don't need more work // for that processor type, fall through and find another version // if (check_req && g_wreq->rsc_spec_request) { int pt = bavp->host_usage.proc_type; if (!g_wreq->need_proc_type(pt)) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] have %s version but no more %s work needed\n", proc_type_name(pt), proc_type_name(pt) ); } g_wreq->best_app_versions.erase(bavi); break; } } if (config.debug_version_select) { app_version_desc(*bavp, buf); log_messages.printf(MSG_NORMAL, "[version] returning cached version: %s\n", buf ); } return bavp; } ++bavi; } // here if app was not in memoized array, // or we couldn't use the app version there. if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] looking for version of %s\n", app->name ); } bavp = new BEST_APP_VERSION; bavp->appid = wu.appid; bavp->for_64b_jobs = job_needs_64b; if (g_wreq->anonymous_platform) { CLIENT_APP_VERSION* cavp = get_app_version_anonymous( *app, job_needs_64b, reliable_only ); if (!cavp) { bavp->present = false; } else { bavp->present = true; bavp->host_usage = cavp->host_usage; bavp->cavp = cavp; int gavid = host_usage_to_gavid(cavp->host_usage, *app); bavp->reliable = app_version_is_reliable(gavid); bavp->trusted = app_version_is_trusted(gavid); if (config.debug_version_select) { app_version_desc(*bavp, buf); log_messages.printf(MSG_NORMAL, "[version] using %s\n", buf); } } g_wreq->best_app_versions.push_back(bavp); if (!bavp->present) return NULL; return bavp; } // Go through the client's platforms, // and scan the app versions for each platform. // Pick the one with highest expected FLOPS // // if config.prefer_primary_platform is set: // stop scanning platforms once we find a feasible version bavp->host_usage.projected_flops = 0; bavp->avp = NULL; for (i=0; i<g_request->platforms.list.size(); i++) { bool found_feasible_version = false; PLATFORM* p = g_request->platforms.list[i]; if (job_needs_64b && !is_64b_platform(p->name)) { continue; } for (j=0; j<ssp->napp_versions; j++) { HOST_USAGE host_usage; APP_VERSION& av = ssp->app_versions[j]; if (av.appid != wu.appid) continue; if (av.platformid != p->id) continue; if (av.beta) { if (!g_wreq->allow_beta_work) { continue; } } if (strlen(av.plan_class)) { if (!app_plan(*g_request, av.plan_class, host_usage)) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] app_plan() returned false\n", av.id ); } continue; } if (!g_request->client_cap_plan_class) { if (!host_usage.is_sequential_app()) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] client %d lacks plan class capability\n", av.id, g_request->core_client_version ); } continue; } } } else { host_usage.sequential_app(g_reply->host.p_fpops); } // skip versions that go against resource prefs // int pt = host_usage.proc_type; if (g_wreq->dont_use_proc_type[pt]) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] Skipping %s version - user prefs say no %s\n", av.id, proc_type_name(pt), proc_type_name(pt) ); } continue; } if (reliable_only && !app_version_is_reliable(av.id)) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] not reliable\n", av.id ); } continue; } if (daily_quota_exceeded(av.id, host_usage)) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] daily quota exceeded\n", av.id ); } continue; } // skip versions for which we're at the jobs-in-progress limit // if (config.max_jobs_in_progress.exceeded(app, host_usage.proc_type)) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] jobs in progress limit exceeded\n", av.id ); config.max_jobs_in_progress.print_log(); } continue; } // skip versions for resources we don't need // if (check_req && !need_this_resource(host_usage, &av, NULL)) { continue; } // skip versions which require a newer core client // if (g_request->core_client_version < av.min_core_version) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] client version %d < min core version %d\n", av.id, g_request->core_client_version, av.min_core_version ); } // Do not tell the user he needs to update the client // just because the client is too old for a particular app version // g_wreq->outdated_client = true; continue; } if (av.max_core_version && g_request->core_client_version > av.max_core_version) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] client version %d > max core version %d\n", av.id, g_request->core_client_version, av.max_core_version ); } continue; } // at this point we know the version is feasible, // so if config.prefer_primary_platform is set // we won't look any further. // found_feasible_version = true; // pick the fastest version. // Throw in a random factor in case the estimates are off. // DB_HOST_APP_VERSION* havp = gavid_to_havp(av.id); double r = 1; long n = 1; if (havp) { // slowly move from raw calc to measured performance as number // of results increases // n = std::max((long)havp->pfc.n, (long)n); double old_projected_flops = host_usage.projected_flops; estimate_flops(host_usage, av); host_usage.projected_flops = (host_usage.projected_flops*(n-1) + old_projected_flops)/n; // special case for versions that don't work on a given host. // This is defined as: // 1. pfc.n is 0 // 2. The max_jobs_per_day is 1 // 3. Consecutive valid is 0. // In that case, heavily penalize this app_version most of the // time. // if ((havp->pfc.n==0) && (havp->max_jobs_per_day==1) && (havp->consecutive_valid==0)) { if (drand() > 0.01) { host_usage.projected_flops *= 0.01; if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] App version AV#%lu is failing on HOST#%lu\n", havp->app_version_id, havp->host_id ); } } } } if (config.version_select_random_factor) { r += config.version_select_random_factor*rand_normal()/n; if (r <= .1) { r = .1; } } if (config.debug_version_select && bavp && bavp->avp) { log_messages.printf(MSG_NORMAL, "[version] Comparing AV#%lu (%.2f GFLOP) against AV#%lu (%.2f GFLOP)\n", av.id, host_usage.projected_flops/1e+9, bavp->avp->id, bavp->host_usage.projected_flops/1e+9 ); } if (r*host_usage.projected_flops > bavp->host_usage.projected_flops) { if (config.debug_version_select && (host_usage.projected_flops <= bavp->host_usage.projected_flops)) { log_messages.printf(MSG_NORMAL, "[version] [AV#%lu] Random factor wins. r=%f n=%ld\n", av.id, r, n ); } host_usage.projected_flops*=r; bavp->host_usage = host_usage; bavp->avp = &av; bavp->reliable = app_version_is_reliable(av.id); bavp->trusted = app_version_is_trusted(av.id); if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] Best app version is now AV%lu (%.2f GFLOP)\n", bavp->avp->id, bavp->host_usage.projected_flops/1e+9 ); } } else { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] Not selected, AV#%lu r*%.2f GFLOP <= Best AV %.2f GFLOP (r=%f, n=%ld)\n", av.id, host_usage.projected_flops/1e+9, bavp->host_usage.projected_flops/1e+9, r, n ); } } } // loop over app versions if (config.prefer_primary_platform && found_feasible_version) { break; } } // loop over client platforms if (bavp->avp) { estimate_flops(bavp->host_usage, *bavp->avp); if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] Best version of app %s is [AV#%lu] (%.2f GFLOPS)\n", app->name, bavp->avp->id, bavp->host_usage.projected_flops/1e9 ); } bavp->present = true; g_wreq->best_app_versions.push_back(bavp); } else { // Here if there's no app version we can use. // if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] returning NULL; platforms:\n" ); for (i=0; i<g_request->platforms.list.size(); i++) { PLATFORM* p = g_request->platforms.list[i]; log_messages.printf(MSG_NORMAL, "[version] %s\n", p->name ); } } g_wreq->best_app_versions.push_back(bavp); return NULL; } return bavp; }
// [a b]の範囲の乱数を実数で得る double rand_range(double a, double b) { return a + (b - a) * rand_normal(); }
void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) { #ifdef GPU char *backup_directory = "/home/kunle12/backup/"; srand(time(0)); char *base = basecfg(cfg); char *abase = basecfg(acfg); printf("%s\n", base); network *gnet = load_network(cfg, weight, clear); network *anet = load_network(acfg, aweight, clear); //float orig_rate = anet->learning_rate; int i, j, k; layer imlayer = {0}; for (i = 0; i < gnet->n; ++i) { if (gnet->layers[i].out_c == 3) { imlayer = gnet->layers[i]; break; } } printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); int imgs = gnet->batch*gnet->subdivisions; i = *gnet->seen/imgs; data train, buffer; list *plist = get_paths(train_images); //int N = plist->size; char **paths = (char **)list_to_array(plist); load_args args= get_base_args(anet); args.paths = paths; args.n = imgs; args.m = plist->size; args.d = &buffer; args.type = CLASSIFICATION_DATA; args.threads=16; args.classes = 1; char *ls[2] = {"imagenet", "zzzzzzzz"}; args.labels = ls; pthread_t load_thread = load_data_in_thread(args); clock_t time; gnet->train = 1; anet->train = 1; int x_size = gnet->inputs*gnet->batch; int y_size = gnet->truths*gnet->batch; float *imerror = cuda_make_array(0, y_size); //int ay_size = anet->truths*anet->batch; float aloss_avg = -1; //data generated = copy_data(train); if (maxbatch == 0) maxbatch = gnet->max_batches; while (get_current_batch(gnet) < maxbatch) { i += 1; time=clock(); pthread_join(load_thread, 0); train = buffer; //translate_data_rows(train, -.5); //scale_data_rows(train, 2); load_thread = load_data_in_thread(args); printf("Loaded: %lf seconds\n", sec(clock()-time)); data gen = copy_data(train); for (j = 0; j < imgs; ++j) { train.y.vals[j][0] = 1; gen.y.vals[j][0] = 0; } time=clock(); for(j = 0; j < gnet->subdivisions; ++j){ get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); int z; for(z = 0; z < x_size; ++z){ gnet->input[z] = rand_normal(); } for(z = 0; z < gnet->batch; ++z){ float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); } /* for(z = 0; z < 100; ++z){ printf("%f, ", gnet->input[z]); } printf("\n"); printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); */ //cuda_push_array(gnet->input_gpu, gnet->input, x_size); //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); *gnet->seen += gnet->batch; forward_network(gnet); fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); anet->delta_gpu = imerror; forward_network(anet); backward_network(anet); //float genaloss = *anet->cost / anet->batch; //printf("%f\n", genaloss); scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); backward_network(gnet); /* for(k = 0; k < gnet->n; ++k){ layer l = gnet->layers[k]; cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); } */ for(k = 0; k < gnet->batch; ++k){ int index = j*gnet->batch + k; copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); } } harmless_update_network_gpu(anet); data merge = concat_data(train, gen); //randomize_data(merge); float aloss = train_network(anet, merge); //translate_image(im, 1); //scale_image(im, .5); //translate_image(im2, 1); //scale_image(im2, .5); #ifdef OPENCV if(display){ image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); show_image(im, "gen", 1); show_image(im2, "train", 1); save_image(im, "gen"); save_image(im2, "train"); } #endif /* if(aloss < .1){ anet->learning_rate = 0; } else if (aloss > .3){ anet->learning_rate = orig_rate; } */ update_network_gpu(gnet); free_data(merge); free_data(train); free_data(gen); if (aloss_avg < 0) aloss_avg = aloss; aloss_avg = aloss_avg*.9 + aloss*.1; printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); if(i%10000==0){ char buff[256]; sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); save_weights(gnet, buff); sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); save_weights(anet, buff); } if(i%1000==0){ char buff[256]; sprintf(buff, "%s/%s.backup", backup_directory, base); save_weights(gnet, buff); sprintf(buff, "%s/%s.backup", backup_directory, abase); save_weights(anet, buff); } } char buff[256]; sprintf(buff, "%s/%s_final.weights", backup_directory, base); save_weights(gnet, buff); #endif free_network(gnet); free_network(anet); }
void FC_nonp_variance_varselection::update(void) { unsigned i; // updating psi2 double r_delta; if (FC_delta.beta(0,0) == 0) r_delta = r; else r_delta = 1; FC_psi2.beta(0,0) = rand_invgamma(v+0.5,Q+0.5*beta(0,0)*r_delta); FC_psi2.update(); // end: updating psi2 // updating delta double u = uniform(); double L = 1/sqrt(r)*exp(- beta(0,0)/(2*FC_psi2.beta(0,0))*(1/r-1)); double pr1 = 1/(1+ ((1-FC_omega.beta(0,0))/FC_omega.beta(0,0))*L); if (u <=pr1) { FC_delta.beta(0,0) = 1; r_delta = 1; } else { FC_delta.beta(0,0) = 0; r_delta = r; } FC_delta.update(); // end: updating delta // updating w FC_omega.beta(0,0) = randnumbers::rand_beta(a_omega+FC_delta.beta(0,0), b_omega+1-FC_delta.beta(0,0)); FC_omega.update(); // end: updating w // updating tau2 FCnonpp->designp->compute_effect(X,FCnonpp->beta); double * worklin; if (likep->linpred_current==1) worklin = likep->linearpred1.getV(); else worklin = likep->linearpred2.getV(); double Sigmatau; double mutau = 0; double * Xp = X.getV(); double * responsep = likep->workingresponse.getV(); double varinv = 1/(likep->get_scale()*beta(0,0)); double xtx=0; for (i=0;i<X.rows();i++,Xp++,responsep++,worklin++) { xtx += pow(*Xp,2); mutau += (*Xp) * ((*responsep) - (*worklin)+(*Xp)); } Sigmatau = 1/(varinv*xtx + 1/(r_delta*FC_psi2.beta(0,0))); mutau *= Sigmatau/(likep->get_scale()*sqrt(beta(0,0))); double tau = mutau + sqrt(Sigmatau) * rand_normal(); double tau2 = tau*tau; if (tau2 < 0.000000001) tau2 = 0.000000001; beta(0,0) = tau2; beta(0,1) = likep->get_scale()/beta(0,0); FCnonpp->tau2 = beta(0,0); // end: updating tau2 acceptance++; FC::update(); }
void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) { #ifdef GPU char *backup_directory = "/home/kunle12/backup/"; srand(time(0)); char *base = basecfg(cfg); char *abase = basecfg(acfg); printf("%s\n", base); network *gnet = load_network(cfg, weight, clear); network *anet = load_network(acfg, aweight, clear); int i, j, k; layer imlayer = gnet->layers[gnet->n-1]; printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); int imgs = gnet->batch*gnet->subdivisions; i = *gnet->seen/imgs; data train, buffer; list *plist = get_paths(train_images); char **paths = (char **)list_to_array(plist); load_args args= get_base_args(anet); args.paths = paths; args.n = imgs; args.m = plist->size; args.d = &buffer; args.type = CLASSIFICATION_DATA; args.threads=16; args.classes = 1; char *ls[2] = {"imagenet", "zzzzzzzz"}; args.labels = ls; pthread_t load_thread = load_data_in_thread(args); clock_t time; gnet->train = 1; anet->train = 1; int x_size = gnet->inputs*gnet->batch; int y_size = gnet->truths*gnet->batch; float *imerror = cuda_make_array(0, y_size); float aloss_avg = -1; if (maxbatch == 0) maxbatch = gnet->max_batches; while (get_current_batch(gnet) < maxbatch) { { int cb = get_current_batch(gnet); float alpha = (float) cb / (maxbatch/2); if(alpha > 1) alpha = 1; float beta = 1 - alpha; printf("%f %f\n", alpha, beta); set_network_alpha_beta(gnet, alpha, beta); set_network_alpha_beta(anet, beta, alpha); } i += 1; time=clock(); pthread_join(load_thread, 0); train = buffer; load_thread = load_data_in_thread(args); printf("Loaded: %lf seconds\n", sec(clock()-time)); data gen = copy_data(train); for (j = 0; j < imgs; ++j) { train.y.vals[j][0] = 1; gen.y.vals[j][0] = 0; } time=clock(); for (j = 0; j < gnet->subdivisions; ++j) { get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); int z; for(z = 0; z < x_size; ++z){ gnet->input[z] = rand_normal(); } /* for(z = 0; z < gnet->batch; ++z){ float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); } */ *gnet->seen += gnet->batch; forward_network(gnet); fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); anet->delta_gpu = imerror; forward_network(anet); backward_network(anet); //float genaloss = *anet->cost / anet->batch; scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); backward_network(gnet); for(k = 0; k < gnet->batch; ++k){ int index = j*gnet->batch + k; copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); } } harmless_update_network_gpu(anet); data merge = concat_data(train, gen); float aloss = train_network(anet, merge); #ifdef OPENCV if(display){ image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); show_image(im, "gen", 1); show_image(im2, "train", 1); save_image(im, "gen"); save_image(im2, "train"); } #endif update_network_gpu(gnet); free_data(merge); free_data(train); free_data(gen); if (aloss_avg < 0) aloss_avg = aloss; aloss_avg = aloss_avg*.9 + aloss*.1; printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); if(i%10000==0){ char buff[256]; sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); save_weights(gnet, buff); sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); save_weights(anet, buff); } if(i%1000==0){ char buff[256]; sprintf(buff, "%s/%s.backup", backup_directory, base); save_weights(gnet, buff); sprintf(buff, "%s/%s.backup", backup_directory, abase); save_weights(anet, buff); } } char buff[256]; sprintf(buff, "%s/%s_final.weights", backup_directory, base); save_weights(gnet, buff); #endif free_network( gnet ); free_network( anet ); }
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, int batch_normalize) { int i; layer l = {0}; l.type = DECONVOLUTIONAL; l.h = h; l.w = w; l.c = c; l.n = n; l.batch = batch; l.stride = stride; l.size = size; l.weights = calloc(c*n*size*size, sizeof(float)); l.weight_updates = calloc(c*n*size*size, sizeof(float)); l.biases = calloc(n, sizeof(float)); l.bias_updates = calloc(n, sizeof(float)); float scale = 1./sqrt(size*size*c); for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); for(i = 0; i < n; ++i){ l.biases[i] = scale; } l.pad = l.size/2; l.out_h = (l.h) * l.stride + l.size/2 - l.pad; l.out_w = (l.w) * l.stride + l.size/2 - l.pad; l.out_c = n; l.outputs = l.out_w * l.out_h * l.out_c; l.inputs = l.w * l.h * l.c; l.output = calloc(l.batch*l.out_h * l.out_w * n, sizeof(float)); l.delta = calloc(l.batch*l.out_h * l.out_w * n, sizeof(float)); l.forward = forward_deconvolutional_layer; l.backward = backward_deconvolutional_layer; l.update = update_deconvolutional_layer; l.batch_normalize = batch_normalize; if(batch_normalize){ l.scales = calloc(n, sizeof(float)); l.scale_updates = calloc(n, sizeof(float)); for(i = 0; i < n; ++i){ l.scales[i] = 1; } l.mean = calloc(n, sizeof(float)); l.variance = calloc(n, sizeof(float)); l.mean_delta = calloc(n, sizeof(float)); l.variance_delta = calloc(n, sizeof(float)); l.rolling_mean = calloc(n, sizeof(float)); l.rolling_variance = calloc(n, sizeof(float)); l.x = calloc(l.batch*l.outputs, sizeof(float)); l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); } #ifdef GPU l.forward_gpu = forward_deconvolutional_layer_gpu; l.backward_gpu = backward_deconvolutional_layer_gpu; l.update_gpu = update_deconvolutional_layer_gpu; if(gpu_index >= 0){ l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); l.biases_gpu = cuda_make_array(l.biases, n); l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n); l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); if(batch_normalize){ l.mean_gpu = cuda_make_array(l.mean, n); l.variance_gpu = cuda_make_array(l.variance, n); l.rolling_mean_gpu = cuda_make_array(l.mean, n); l.rolling_variance_gpu = cuda_make_array(l.variance, n); l.mean_delta_gpu = cuda_make_array(l.mean, n); l.variance_delta_gpu = cuda_make_array(l.variance, n); l.scales_gpu = cuda_make_array(l.scales, n); l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); l.x_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); } } #ifdef CUDNN cudnnCreateTensorDescriptor(&l.dstTensorDesc); cudnnCreateTensorDescriptor(&l.normTensorDesc); cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); #endif #endif l.activation = activation; l.workspace_size = get_workspace_size(l); fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); return l; }
double NORMAL_DIST::sample() { if (!std_dev) return mean; return (mean + std_dev * rand_normal()); }
// return BEST_APP_VERSION for the given job and host, or NULL if none // // check_req: check whether we still need work for the resource // This check is not done for: // - assigned jobs // - resent jobs // reliable_only: use only versions for which this host is "reliable" // // We "memoize" the results, maintaining an array g_wreq->best_app_versions // that maps app ID to the best app version (or NULL). // BEST_APP_VERSION* get_app_version( WORKUNIT& wu, bool check_req, bool reliable_only ) { unsigned int i; int j; BEST_APP_VERSION* bavp; char buf[256]; bool job_needs_64b = (wu.rsc_memory_bound > max_32b_address_space()); if (config.debug_version_select) { if (job_needs_64b) { log_messages.printf(MSG_NORMAL, "[version] job needs 64-bit app version: mem bnd %f\n", wu.rsc_memory_bound ); } } APP* app = ssp->lookup_app(wu.appid); if (!app) { log_messages.printf(MSG_CRITICAL, "WU refers to nonexistent app: %d\n", wu.appid ); return NULL; } // handle the case where we're using homogeneous app version // and the WU is already committed to an app version // if (app->homogeneous_app_version && wu.app_version_id) { return check_homogeneous_app_version(wu, reliable_only); } // see if app is already in memoized array // std::vector<BEST_APP_VERSION*>::iterator bavi; bavi = g_wreq->best_app_versions.begin(); while (bavi != g_wreq->best_app_versions.end()) { bavp = *bavi; if (bavp->appid == wu.appid && (job_needs_64b == bavp->for_64b_jobs)) { if (!bavp->present) { #if 0 if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] returning cached NULL\n" ); } #endif return NULL; } // if we're at the jobs-in-progress limit for this // app and resource type, fall through and find another version // if (config.max_jobs_in_progress.exceeded( app, bavp->host_usage.uses_gpu()) ) { if (config.debug_version_select) { app_version_desc(*bavp, buf); log_messages.printf(MSG_NORMAL, "[version] %s: max jobs in progress exceeded\n", buf ); } g_wreq->best_app_versions.erase(bavi); break; } // if we previously chose a CUDA app but don't need more CUDA work, // fall through and find another version // if (check_req && g_wreq->rsc_spec_request && bavp->host_usage.ncudas > 0 && !g_wreq->need_cuda() ) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] have CUDA version but no more CUDA work needed\n" ); } g_wreq->best_app_versions.erase(bavi); break; } // same, ATI // if (check_req && g_wreq->rsc_spec_request && bavp->host_usage.natis > 0 && !g_wreq->need_ati() ) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] have ATI version but no more ATI work needed\n" ); } g_wreq->best_app_versions.erase(bavi); break; } // same, CPU // if (check_req && g_wreq->rsc_spec_request && !bavp->host_usage.ncudas && !bavp->host_usage.natis && !g_wreq->need_cpu() ) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] have CPU version but no more CPU work needed\n" ); } g_wreq->best_app_versions.erase(bavi); break; } if (config.debug_version_select) { app_version_desc(*bavp, buf); log_messages.printf(MSG_NORMAL, "[version] returning cached version: %s\n", buf ); } return bavp; } bavi++; } // here if app was not in memoized array, // or we couldn't use the app version there. if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] looking for version of %s\n", app->name ); } bavp = new BEST_APP_VERSION; bavp->appid = wu.appid; bavp->for_64b_jobs = job_needs_64b; if (g_wreq->anonymous_platform) { CLIENT_APP_VERSION* cavp = get_app_version_anonymous( *app, job_needs_64b, reliable_only ); if (!cavp) { bavp->present = false; } else { bavp->present = true; bavp->host_usage = cavp->host_usage; bavp->cavp = cavp; int gavid = host_usage_to_gavid(cavp->host_usage, *app); bavp->reliable = app_version_is_reliable(gavid); bavp->trusted = app_version_is_trusted(gavid); if (config.debug_version_select) { app_version_desc(*bavp, buf); log_messages.printf(MSG_NORMAL, "[version] using %s\n", buf); } } g_wreq->best_app_versions.push_back(bavp); if (!bavp->present) return NULL; return bavp; } // Go through the client's platforms, // and scan the app versions for each platform. // Pick the one with highest expected FLOPS // // if config.prefer_primary_platform is set: // stop scanning platforms once we find a feasible version bavp->host_usage.projected_flops = 0; bavp->avp = NULL; for (i=0; i<g_request->platforms.list.size(); i++) { bool found_feasible_version = false; PLATFORM* p = g_request->platforms.list[i]; if (job_needs_64b && !is_64b_platform(p->name)) { continue; } for (j=0; j<ssp->napp_versions; j++) { HOST_USAGE host_usage; APP_VERSION& av = ssp->app_versions[j]; if (av.appid != wu.appid) continue; if (av.platformid != p->id) continue; if (g_request->core_client_version < av.min_core_version) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] client version %d < min core version %d\n", av.id, g_request->core_client_version, av.min_core_version ); } g_wreq->outdated_client = true; continue; } if (strlen(av.plan_class)) { if (!app_plan(*g_request, av.plan_class, host_usage)) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] app_plan() returned false\n", av.id ); } continue; } if (!g_request->client_cap_plan_class) { if (!host_usage.is_sequential_app()) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] client %d lacks plan class capability\n", av.id, g_request->core_client_version ); } continue; } } } else { host_usage.sequential_app(g_reply->host.p_fpops); } // skip versions that go against resource prefs // if (host_usage.ncudas && g_wreq->no_cuda) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] Skipping CUDA version - user prefs say no CUDA\n", av.id ); g_wreq->no_cuda_prefs = true; } continue; } if (host_usage.natis && g_wreq->no_ati) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] Skipping ATI version - user prefs say no ATI\n", av.id ); g_wreq->no_ati_prefs = true; } continue; } if (!(host_usage.uses_gpu()) && g_wreq->no_cpu) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] Skipping CPU version - user prefs say no CPUs\n", av.id ); g_wreq->no_cpu_prefs = true; } continue; } if (reliable_only && !app_version_is_reliable(av.id)) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] not reliable\n", av.id ); } continue; } if (daily_quota_exceeded(av.id, host_usage)) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] daily quota exceeded\n", av.id ); } continue; } // skip versions for which we're at the jobs-in-progress limit // if (config.max_jobs_in_progress.exceeded(app, host_usage.uses_gpu())) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] [AV#%d] jobs in progress limit exceeded\n", av.id ); config.max_jobs_in_progress.print_log(); } continue; } // skip versions for resources we don't need // if (!need_this_resource(host_usage, &av, NULL)) { continue; } // at this point we know the version is feasible, // so if config.prefer_primary_platform is set // we won't look any further. // found_feasible_version = true; // pick the fastest version. // Throw in a random factor in case the estimates are off. // double r = 1 + .1*rand_normal(); if (r*host_usage.projected_flops > bavp->host_usage.projected_flops) { bavp->host_usage = host_usage; bavp->avp = &av; bavp->reliable = app_version_is_reliable(av.id); bavp->trusted = app_version_is_trusted(av.id); } } // loop over app versions if (config.prefer_primary_platform && found_feasible_version) { break; } } // loop over client platforms if (bavp->avp) { estimate_flops(bavp->host_usage, *bavp->avp); if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] Best version of app %s is [AV#%d] (%.2f GFLOPS)\n", app->name, bavp->avp->id, bavp->host_usage.projected_flops/1e9 ); } bavp->present = true; g_wreq->best_app_versions.push_back(bavp); } else { // Here if there's no app version we can use. // if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] returning NULL; platforms:\n" ); for (i=0; i<g_request->platforms.list.size(); i++) { PLATFORM* p = g_request->platforms.list[i]; log_messages.printf(MSG_NORMAL, "[version] %s\n", p->name ); } } g_wreq->best_app_versions.push_back(bavp); return NULL; } return bavp; }
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) { int i; convolutional_layer l = {0}; l.type = CONVOLUTIONAL; l.groups = groups; l.h = h; l.w = w; l.c = c; l.n = n; l.binary = binary; l.xnor = xnor; l.batch = batch; l.stride = stride; l.size = size; l.pad = padding; l.batch_normalize = batch_normalize; l.weights = calloc(c/groups*n*size*size, sizeof(float)); l.weight_updates = calloc(c/groups*n*size*size, sizeof(float)); l.biases = calloc(n, sizeof(float)); l.bias_updates = calloc(n, sizeof(float)); l.nweights = c/groups*n*size*size; l.nbiases = n; // float scale = 1./sqrt(size*size*c); float scale = sqrt(2./(size*size*c/l.groups)); //scale = .02; //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal(); int out_w = convolutional_out_width(l); int out_h = convolutional_out_height(l); l.out_h = out_h; l.out_w = out_w; l.out_c = n; l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = l.w * l.h * l.c; l.output = calloc(l.batch*l.outputs, sizeof(float)); l.delta = calloc(l.batch*l.outputs, sizeof(float)); l.forward = forward_convolutional_layer; l.backward = backward_convolutional_layer; l.update = update_convolutional_layer; if(binary){ l.binary_weights = calloc(l.nweights, sizeof(float)); l.cweights = calloc(l.nweights, sizeof(char)); l.scales = calloc(n, sizeof(float)); } if(xnor){ l.binary_weights = calloc(l.nweights, sizeof(float)); l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); } if(batch_normalize){ l.scales = calloc(n, sizeof(float)); l.scale_updates = calloc(n, sizeof(float)); for(i = 0; i < n; ++i){ l.scales[i] = 1; } l.mean = calloc(n, sizeof(float)); l.variance = calloc(n, sizeof(float)); l.mean_delta = calloc(n, sizeof(float)); l.variance_delta = calloc(n, sizeof(float)); l.rolling_mean = calloc(n, sizeof(float)); l.rolling_variance = calloc(n, sizeof(float)); l.x = calloc(l.batch*l.outputs, sizeof(float)); l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); } if(adam){ l.m = calloc(l.nweights, sizeof(float)); l.v = calloc(l.nweights, sizeof(float)); l.bias_m = calloc(n, sizeof(float)); l.scale_m = calloc(n, sizeof(float)); l.bias_v = calloc(n, sizeof(float)); l.scale_v = calloc(n, sizeof(float)); } #ifdef GPU l.forward_gpu = forward_convolutional_layer_gpu; l.backward_gpu = backward_convolutional_layer_gpu; l.update_gpu = update_convolutional_layer_gpu; if(gpu_index >= 0){ if (adam) { l.m_gpu = cuda_make_array(l.m, l.nweights); l.v_gpu = cuda_make_array(l.v, l.nweights); l.bias_m_gpu = cuda_make_array(l.bias_m, n); l.bias_v_gpu = cuda_make_array(l.bias_v, n); l.scale_m_gpu = cuda_make_array(l.scale_m, n); l.scale_v_gpu = cuda_make_array(l.scale_v, n); } l.weights_gpu = cuda_make_array(l.weights, l.nweights); l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); l.biases_gpu = cuda_make_array(l.biases, n); l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); if(binary){ l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); } if(xnor){ l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); } if(batch_normalize){ l.mean_gpu = cuda_make_array(l.mean, n); l.variance_gpu = cuda_make_array(l.variance, n); l.rolling_mean_gpu = cuda_make_array(l.mean, n); l.rolling_variance_gpu = cuda_make_array(l.variance, n); l.mean_delta_gpu = cuda_make_array(l.mean, n); l.variance_delta_gpu = cuda_make_array(l.variance, n); l.scales_gpu = cuda_make_array(l.scales, n); l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); } #ifdef CUDNN cudnnCreateTensorDescriptor(&l.normTensorDesc); cudnnCreateTensorDescriptor(&l.srcTensorDesc); cudnnCreateTensorDescriptor(&l.dstTensorDesc); cudnnCreateFilterDescriptor(&l.weightDesc); cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); cudnnCreateTensorDescriptor(&l.ddstTensorDesc); cudnnCreateFilterDescriptor(&l.dweightDesc); cudnnCreateConvolutionDescriptor(&l.convDesc); cudnn_convolutional_setup(&l); #endif } #endif l.workspace_size = get_workspace_size(l); l.activation = activation; //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); return l; }