enum piglit_result piglit_display(void) { bool pass = true; float green[4] = {0.0, 1.0, 0.0, 0.0}; GLuint q; int iters; int num_results = 5; float cpu_time[num_results]; float gpu_time[num_results]; float delta[num_results]; float cpu_time_mean; float delta_mean, delta_stddev; float cpu_overhead; float t, t_cutoff; int i; glColor4f(0.0, 1.0, 0.0, 0.0); glGenQueries(1, &q); /* Prime the drawing pipe before we start measuring time, * since the first draw call is likely to be slower than all * others. */ draw(q, 1); /* Figure out some baseline difference between GPU time * elapsed and CPU time elapsed for a single draw call (CPU * overhead of timer query and glFinish()). * * Note that this doesn't take into account any extra CPU time * elapsed from start to finish if multiple batchbuffers are * accumulated by the driver in getting to our 1/10th of a * second elapsed time goal, and some other client sneaks * rendering in in between those batches. * * Part of the rendering size being relatively large is to * hopefully avoid that, though it might be better to have * some time-consuming shader with a single draw call instead. */ cpu_overhead = 0; for (i = 0; i < num_results; i++) { cpu_time[i] = draw(q, 1); gpu_time[i] = get_gpu_time(q); cpu_overhead += cpu_time[i] - gpu_time[i]; } cpu_overhead /= num_results; /* Find a number of draw calls that takes about 1/10th of a * second. */ retry: for (iters = 1; ; iters *= 2) { if (draw(q, iters) > 0.1) break; } /* Now, do several runs like this so we can determine if the * timer matches up with wall time. */ for (i = 0; i < num_results; i++) { cpu_time[i] = draw(q, iters); gpu_time[i] = get_gpu_time(q); } cpu_time_mean = 0; delta_mean = 0; for (i = 0; i < num_results; i++) { delta[i] = cpu_time[i] - cpu_overhead - gpu_time[i]; cpu_time_mean += cpu_time[i]; delta_mean += delta[i]; } cpu_time_mean /= num_results; delta_mean /= num_results; /* There's some risk of our "get to 0.1 seconds" loop deciding * that a small number of iters was sufficient if we got * scheduled out for a while. Re-run if so. * * We wouldn't have that problem if we could rely on the GPU * time elapsed query, but that's the thing we're testing. */ if (cpu_time_mean < 0.05) goto retry; /* Calculate stddevs. */ delta_stddev = 0; for (i = 0; i < num_results; i++) { float d = delta[i] - delta_mean; delta_stddev += d * d / (num_results - 1); } delta_stddev = sqrt(delta_stddev); /* Dependent t-test for paired samples. * * This is a good test, because we expect the two times (cpu * and gpu) of the samples to be correlated, and we expect the * stddev to match (since time it should arise from system * variables like scheduling of other tasks and state of the * caches). Unless maybe the variance of cpu time is greater * than gpu time, because we may see scheduling accounted for * in our CPU (wall) time, while scheduling other tasks * doesn't end up counted toward our GPU time. */ t = delta_mean / (delta_stddev / sqrt(num_results)); /* Integral of Student's t distribution for 4 degrees of * freedom (num_results = 5), two-tailed (we care about * difference above or below 0, not just one direction), at * p = .05. */ t_cutoff = 2.776; /* Now test that our sampled distribution (rate of clock * advance between CPU and GPU) was within expectations for a * delta of 0. I actually want to be testing the likelihood * that the real difference is enough that we actually care. * I didn't find an easy way to account for that after a bunch * of wikipedia browsing, so I'll punt on proper analysis for * now and just check that the sampled delta isn't too small * to care about. */ if (t > t_cutoff && fabs(delta_mean) > .05 * cpu_time_mean) { fprintf(stderr, "GPU time didn't match CPU time\n"); printf("Estimated CPU overhead: %f\n", cpu_overhead); printf("Difference: %f secs (+/- %f secs)\n", delta_mean, delta_stddev); printf("t = %f\n", t); printf("%20s %20s %20s\n", "gpu_time", "cpu_time", "delta"); for (i = 0; i < num_results; i++) { printf("%20f %20f %20f\n", gpu_time[i], cpu_time[i], delta[i]); } pass = false; } pass = piglit_probe_rect_rgba(0, 0, piglit_width, piglit_height, green) && pass; piglit_present_results(); glDeleteQueries(1, &q); return pass ? PIGLIT_PASS : PIGLIT_FAIL; }
void *ref1(void*){ int target_fps = 0; int target_Ug = 0; int target_Uc = 0; start_sampling = 1; int fps; int Ug; static unsigned long long Ug_bp,Ug_tp; get_gpu_time(&Ug_bp,&Ug_tp); int Uc; static unsigned long long Uc_bp,Uc_tp; get_cpu_time(4,&Uc_bp,&Uc_tp); int Fc,Fg; while(true){ if(game == -1){sleep(1);start_sampling=1;continue;} Fc=get_cpu_freq(0); Fg=get_gpu_freq(); //printf("Sample%d Fc=%d,Fg=%d,Uc=%d,Ug=%d\n",start_sampling,get_cpu_freq(0),get_gpu_freq(),Uc,Ug); if(start_sampling == 1){ fps = get_fps(binder); Uc = get_cpu_util(4,&Uc_bp,&Uc_tp); Ug = get_gpu_util(&Ug_bp,&Ug_tp); printf("Sample%d Fc=%d,Fg=%d,Uc=%d,Ug=%d\n",start_sampling,Fc,Fg,Uc,Ug); if(target_fps < fps)target_fps = fps; if(target_Uc < Uc)target_Uc = Uc; if(target_Ug < Ug)target_Ug = Ug; set_cpu_freq(0,FL[3]); set_gpu_freq(GFL[4]); start_sampling=2; sleep(1);continue; }else if(start_sampling == 2){ fps = get_fps(binder); Uc = get_cpu_util(4,&Uc_bp,&Uc_tp); Ug = get_gpu_util(&Ug_bp,&Ug_tp); printf("Sample%d Fc=%d,Fg=%d,Uc=%d,Ug=%d\n",start_sampling,Fc,Fg,Uc,Ug); if(target_fps < fps)target_fps = fps; if(target_Uc < Uc)target_Uc = Uc; if(target_Ug < Ug)target_Ug = Ug; set_cpu_freq(0,FL[10]); set_gpu_freq(GFL[0]); start_sampling=3; sleep(1);continue; }else if(start_sampling == 3){ fps = get_fps(binder); Uc = get_cpu_util(4,&Uc_bp,&Uc_tp); Ug = get_gpu_util(&Ug_bp,&Ug_tp); printf("Sample%d Fc=%d,Fg=%d,Uc=%d,Ug=%d\n",start_sampling,Fc,Fg,Uc,Ug); if(target_fps < fps)target_fps = fps; if(target_Uc < Uc)target_Uc = Uc; if(target_Ug < Ug)target_Ug = Ug; set_cpu_freq(0,FL[10]); set_gpu_freq(GFL[4]); start_sampling=4; sleep(1);continue; }else if(start_sampling == 4){ printf("Sample%d Fc=%d,Fg=%d\n",start_sampling,get_cpu_freq(0),get_gpu_freq()); printf("Q : %d , Ug : %d , Uc : %d\n",target_fps,target_Ug,target_Uc); start_sampling=5; } fps = get_fps(binder); int fps_dev = (target_fps - fps)*100/target_fps; static int FL_point = freq_level-2; static int pre_FL = FL_point; static int GFL_point = gpu_freq_level-1; static int pre_GFL = GFL_point; int CPU_Sensitive = 1; int Fc_adj = 0; int Fg_adj = 0; int Ug_adj = 0; if(fps_dev > 10){ //printf("UP! %d %d %d\n",fps,target_fps,fps_dev); Uc = get_cpu_util(4,&Uc_bp,&Uc_tp); Ug = get_gpu_util(&Ug_bp,&Ug_tp); int fps_dif = (target_fps - fps); int Uc_dev = (target_Uc - Uc)*100/target_Uc; int Ug_dev = (target_Ug - Ug)*100/target_Ug; if(CPU_Sensitive){ //CPU Sensitive Fc_adj = (double)fps_dif * reciprocal(Coef_Fc2Q[game]) + FL[FL_point]; Ug_adj = Ug + (double)fps_dif * reciprocal(Coef_Ug2Q[game]); if(Ug_adj > target_Ug){ Fg_adj = (double)(Ug_adj - target_Ug) * -reciprocal(Coef_Fg2Ug[game]) + GFL[GFL_point]; } }else{ //GPU Sensitive Fg_adj = (double)fps_dif * reciprocal(Coef_Fg2Q[game]) + GFL[GFL_point]; Ug_adj = Ug + (double)fps_dif * reciprocal(Coef_Uc2Q[game]); if(Ug_adj > target_Ug){ Fc_adj = (double)(Ug_adj - target_Ug) * -reciprocal(Coef_Fc2Uc[game]) + FL[FL_point]; } } while(FL_point < freq_level && FL[FL_point] < Fc_adj)FL_point++; while(GFL_point < gpu_freq_level && GFL[GFL_point] < Fg_adj)GFL_point++; if(FL_point != pre_FL){ if(FL_point > freq_level-2){ FL_point = freq_level-2; }else if(FL_point < 2){ FL_point = 2; } pre_FL = FL_point; set_cpu_freq(0,FL[FL_point]); } if(GFL_point != pre_GFL){ if(GFL_point > gpu_freq_level-1){ GFL_point = gpu_freq_level-1; }else if(GFL_point < 0){ GFL_point = 0; } pre_GFL = GFL_point; set_gpu_freq(GFL[GFL_point]); } }else if(fps_dev <= 10){ //printf("DOWN! POWER SAVE %d %d %d\n",fps,target_fps,fps_dev); Uc = get_cpu_util(4,&Uc_bp,&Uc_tp); Ug = get_gpu_util(&Ug_bp,&Ug_tp); int Uc_dev = target_Uc - Uc; int Ug_dev = target_Ug - Ug; if(Uc_dev*100/Uc > 10) Fc_adj = (double)(Uc_dev) * reciprocal(Coef_Fc2Uc[game]) + FL[FL_point]; if(Ug_dev*100/Ug > 10) Fg_adj = (double)(Ug_dev) * reciprocal(Coef_Fg2Ug[game]) + GFL[GFL_point]; while(FL_point > 0 && FL[FL_point] > Fc_adj)FL_point--; while(GFL_point > 0 && GFL[GFL_point] > Fg_adj)GFL_point--; if(FL_point != pre_FL){ if(FL_point > freq_level-2){ FL_point = freq_level-2; }else if(FL_point < 2){ FL_point = 2; } pre_FL = FL_point; set_cpu_freq(0,FL[FL_point]); } if(GFL_point != pre_GFL){ if(GFL_point > gpu_freq_level-1){ GFL_point = gpu_freq_level-1; }else if(GFL_point < 0){ GFL_point = 0; } pre_GFL = GFL_point; set_gpu_freq(GFL[GFL_point]); } } sleep(1); } return 0; }