예제 #1
0
enum piglit_result
piglit_display(void)
{
	bool pass = true;
	float green[4] = {0.0, 1.0, 0.0, 0.0};
	GLuint q;
	int iters;
	int num_results = 5;
	float cpu_time[num_results];
	float gpu_time[num_results];
	float delta[num_results];
	float cpu_time_mean;
	float delta_mean, delta_stddev;
	float cpu_overhead;
	float t, t_cutoff;
	int i;

	glColor4f(0.0, 1.0, 0.0, 0.0);
	glGenQueries(1, &q);

	/* Prime the drawing pipe before we start measuring time,
	 * since the first draw call is likely to be slower than all
	 * others.
	 */
	draw(q, 1);

	/* Figure out some baseline difference between GPU time
	 * elapsed and CPU time elapsed for a single draw call (CPU
	 * overhead of timer query and glFinish()).
	 *
	 * Note that this doesn't take into account any extra CPU time
	 * elapsed from start to finish if multiple batchbuffers are
	 * accumulated by the driver in getting to our 1/10th of a
	 * second elapsed time goal, and some other client sneaks
	 * rendering in in between those batches.
	 *
	 * Part of the rendering size being relatively large is to
	 * hopefully avoid that, though it might be better to have
	 * some time-consuming shader with a single draw call instead.
	 */
	cpu_overhead = 0;
	for (i = 0; i < num_results; i++) {
		cpu_time[i] = draw(q, 1);
		gpu_time[i] = get_gpu_time(q);

		cpu_overhead += cpu_time[i] - gpu_time[i];
	}
	cpu_overhead /= num_results;

	/* Find a number of draw calls that takes about 1/10th of a
	 * second.
	 */
retry:
	for (iters = 1; ; iters *= 2) {
		if (draw(q, iters) > 0.1)
			break;
	}

	/* Now, do several runs like this so we can determine if the
	 * timer matches up with wall time.
	 */
	for (i = 0; i < num_results; i++) {
		cpu_time[i] = draw(q, iters);
		gpu_time[i] = get_gpu_time(q);
	}

	cpu_time_mean = 0;
	delta_mean = 0;
	for (i = 0; i < num_results; i++) {
		delta[i] = cpu_time[i] - cpu_overhead - gpu_time[i];
		cpu_time_mean += cpu_time[i];
		delta_mean += delta[i];
	}
	cpu_time_mean /= num_results;
	delta_mean /= num_results;

	/* There's some risk of our "get to 0.1 seconds" loop deciding
	 * that a small number of iters was sufficient if we got
	 * scheduled out for a while.  Re-run if so.
	 *
	 * We wouldn't have that problem if we could rely on the GPU
	 * time elapsed query, but that's the thing we're testing.
	 */
	if (cpu_time_mean < 0.05)
		goto retry;

	/* Calculate stddevs. */
	delta_stddev = 0;
	for (i = 0; i < num_results; i++) {
		float d = delta[i] - delta_mean;
		delta_stddev += d * d / (num_results - 1);
	}
	delta_stddev = sqrt(delta_stddev);

	/* Dependent t-test for paired samples.
	 *
	 * This is a good test, because we expect the two times (cpu
	 * and gpu) of the samples to be correlated, and we expect the
	 * stddev to match (since time it should arise from system
	 * variables like scheduling of other tasks and state of the
	 * caches).  Unless maybe the variance of cpu time is greater
	 * than gpu time, because we may see scheduling accounted for
	 * in our CPU (wall) time, while scheduling other tasks
	 * doesn't end up counted toward our GPU time.
	 */
	t = delta_mean / (delta_stddev / sqrt(num_results));

	/* Integral of Student's t distribution for 4 degrees of
	 * freedom (num_results = 5), two-tailed (we care about
	 * difference above or below 0, not just one direction), at
	 * p = .05.
	 */
	t_cutoff = 2.776;

	/* Now test that our sampled distribution (rate of clock
	 * advance between CPU and GPU) was within expectations for a
	 * delta of 0.  I actually want to be testing the likelihood
	 * that the real difference is enough that we actually care.
	 * I didn't find an easy way to account for that after a bunch
	 * of wikipedia browsing, so I'll punt on proper analysis for
	 * now and just check that the sampled delta isn't too small
	 * to care about.
	 */
	if (t > t_cutoff && fabs(delta_mean) > .05 * cpu_time_mean) {
		fprintf(stderr, "GPU time didn't match CPU time\n");
		printf("Estimated CPU overhead: %f\n", cpu_overhead);
		printf("Difference: %f secs (+/- %f secs)\n",
		       delta_mean, delta_stddev);
		printf("t = %f\n", t);

		printf("%20s %20s %20s\n",
		       "gpu_time", "cpu_time", "delta");
		for (i = 0; i < num_results; i++) {
			printf("%20f %20f %20f\n",
			       gpu_time[i], cpu_time[i], delta[i]);
		}

		pass = false;
	}

	pass = piglit_probe_rect_rgba(0, 0, piglit_width, piglit_height,
				      green) && pass;

	piglit_present_results();

	glDeleteQueries(1, &q);

	return pass ? PIGLIT_PASS : PIGLIT_FAIL;
}
예제 #2
0
void *ref1(void*){
	int target_fps = 0;
	int target_Ug = 0;
	int target_Uc = 0;
	
	start_sampling = 1;
	int fps;
	
	int Ug;
	static unsigned long long Ug_bp,Ug_tp;
	get_gpu_time(&Ug_bp,&Ug_tp);
	
	int Uc;
	static unsigned long long Uc_bp,Uc_tp;
	get_cpu_time(4,&Uc_bp,&Uc_tp);
	
	int Fc,Fg;
	
	while(true){
		if(game == -1){sleep(1);start_sampling=1;continue;}
		Fc=get_cpu_freq(0);
		Fg=get_gpu_freq();
		//printf("Sample%d Fc=%d,Fg=%d,Uc=%d,Ug=%d\n",start_sampling,get_cpu_freq(0),get_gpu_freq(),Uc,Ug);
		if(start_sampling == 1){			
			fps = get_fps(binder);			
			Uc = get_cpu_util(4,&Uc_bp,&Uc_tp);
			Ug = get_gpu_util(&Ug_bp,&Ug_tp);			
			printf("Sample%d Fc=%d,Fg=%d,Uc=%d,Ug=%d\n",start_sampling,Fc,Fg,Uc,Ug);			
			if(target_fps < fps)target_fps = fps;
			if(target_Uc < Uc)target_Uc = Uc;
			if(target_Ug < Ug)target_Ug = Ug;		
			
			set_cpu_freq(0,FL[3]);
			set_gpu_freq(GFL[4]);
			start_sampling=2;
			sleep(1);continue;
		}else if(start_sampling == 2){
			fps = get_fps(binder);			
			Uc = get_cpu_util(4,&Uc_bp,&Uc_tp);
			Ug = get_gpu_util(&Ug_bp,&Ug_tp);			
			printf("Sample%d Fc=%d,Fg=%d,Uc=%d,Ug=%d\n",start_sampling,Fc,Fg,Uc,Ug);			
			if(target_fps < fps)target_fps = fps;
			if(target_Uc < Uc)target_Uc = Uc;
			if(target_Ug < Ug)target_Ug = Ug;
			
			set_cpu_freq(0,FL[10]);
			set_gpu_freq(GFL[0]);
			start_sampling=3;
			sleep(1);continue;
		}else if(start_sampling == 3){
			fps = get_fps(binder);			
			Uc = get_cpu_util(4,&Uc_bp,&Uc_tp);
			Ug = get_gpu_util(&Ug_bp,&Ug_tp);			
			printf("Sample%d Fc=%d,Fg=%d,Uc=%d,Ug=%d\n",start_sampling,Fc,Fg,Uc,Ug);			
			if(target_fps < fps)target_fps = fps;
			if(target_Uc < Uc)target_Uc = Uc;
			if(target_Ug < Ug)target_Ug = Ug;
			
			set_cpu_freq(0,FL[10]);
			set_gpu_freq(GFL[4]);
			start_sampling=4;
			sleep(1);continue;
		}else if(start_sampling == 4){
			printf("Sample%d Fc=%d,Fg=%d\n",start_sampling,get_cpu_freq(0),get_gpu_freq());
			printf("Q : %d , Ug : %d , Uc : %d\n",target_fps,target_Ug,target_Uc);			
			start_sampling=5;
		}
		
		fps = get_fps(binder);
		int fps_dev = (target_fps - fps)*100/target_fps;
		
		static int FL_point = freq_level-2;
		static int pre_FL = FL_point;
		static int GFL_point = gpu_freq_level-1;
		static int pre_GFL = GFL_point;
		
		int CPU_Sensitive = 1;
		
		int Fc_adj = 0;
		int Fg_adj = 0;
		int Ug_adj = 0;
		if(fps_dev > 10){
			//printf("UP! %d %d %d\n",fps,target_fps,fps_dev);
			Uc = get_cpu_util(4,&Uc_bp,&Uc_tp);
			Ug = get_gpu_util(&Ug_bp,&Ug_tp);
			
			int fps_dif = (target_fps - fps);
			int Uc_dev = (target_Uc - Uc)*100/target_Uc;
			int Ug_dev = (target_Ug - Ug)*100/target_Ug;
			
			
			if(CPU_Sensitive){
				//CPU Sensitive			
				Fc_adj = (double)fps_dif * reciprocal(Coef_Fc2Q[game]) + FL[FL_point];
				Ug_adj = Ug + (double)fps_dif * reciprocal(Coef_Ug2Q[game]);
				if(Ug_adj > target_Ug){
					Fg_adj = (double)(Ug_adj - target_Ug) * -reciprocal(Coef_Fg2Ug[game]) + GFL[GFL_point];
				}
			}else{
				//GPU Sensitive
				Fg_adj = (double)fps_dif * reciprocal(Coef_Fg2Q[game]) + GFL[GFL_point];
				Ug_adj = Ug + (double)fps_dif * reciprocal(Coef_Uc2Q[game]);
				if(Ug_adj > target_Ug){
					Fc_adj = (double)(Ug_adj - target_Ug) * -reciprocal(Coef_Fc2Uc[game]) + FL[FL_point];
				}
			}

			while(FL_point < freq_level && FL[FL_point] < Fc_adj)FL_point++;
			while(GFL_point < gpu_freq_level && GFL[GFL_point] < Fg_adj)GFL_point++;	
				
			
			
			if(FL_point != pre_FL){		
				if(FL_point > freq_level-2){
					FL_point = freq_level-2;
				}else if(FL_point < 2){
					FL_point = 2;
				}		
				pre_FL = FL_point;
				set_cpu_freq(0,FL[FL_point]);			
			}
			if(GFL_point != pre_GFL){			
				if(GFL_point > gpu_freq_level-1){
					GFL_point = gpu_freq_level-1;
				}else if(GFL_point < 0){
					GFL_point = 0;
				}		
				pre_GFL = GFL_point;
				set_gpu_freq(GFL[GFL_point]);			
			}
			
			
		}else if(fps_dev <= 10){			
			//printf("DOWN! POWER SAVE %d %d %d\n",fps,target_fps,fps_dev);
			Uc = get_cpu_util(4,&Uc_bp,&Uc_tp);
			Ug = get_gpu_util(&Ug_bp,&Ug_tp);
			
			int Uc_dev = target_Uc - Uc;
			int Ug_dev = target_Ug - Ug;
			if(Uc_dev*100/Uc > 10)
				Fc_adj = (double)(Uc_dev) * reciprocal(Coef_Fc2Uc[game]) + FL[FL_point];
			if(Ug_dev*100/Ug > 10)
				Fg_adj = (double)(Ug_dev) * reciprocal(Coef_Fg2Ug[game]) + GFL[GFL_point];
			
			while(FL_point > 0 && FL[FL_point] > Fc_adj)FL_point--;
			while(GFL_point > 0 && GFL[GFL_point] > Fg_adj)GFL_point--;
			
			if(FL_point != pre_FL){		
				if(FL_point > freq_level-2){
					FL_point = freq_level-2;
				}else if(FL_point < 2){
					FL_point = 2;
				}		
				pre_FL = FL_point;
				set_cpu_freq(0,FL[FL_point]);			
			}
			
			
			if(GFL_point != pre_GFL){			
				if(GFL_point > gpu_freq_level-1){
					GFL_point = gpu_freq_level-1;
				}else if(GFL_point < 0){
					GFL_point = 0;
				}		
				pre_GFL = GFL_point;
				set_gpu_freq(GFL[GFL_point]);			
			}	
			
		}		
		sleep(1);
	}	
	return 0;
}