void time_pwm_nv50(unsigned int cnum) { int i; for (i = 0; i < 2; i++) { uint32_t a = 0xe114 + i * 8; uint32_t b = a + 4; uint32_t save0 = nva_rd32(cnum, a); uint32_t save1 = nva_rd32(cnum, b); struct timeval start, end; nva_wr32(cnum, a, 0x200000); nva_wr32(cnum, b, 0x80080000); while (nva_rd32(cnum, b) & 0x80000000); nva_wr32(cnum, b, 0x80080000); while (nva_rd32(cnum, b) & 0x80000000); gettimeofday(&start, NULL); nva_wr32(cnum, b, 0x80080000); while (nva_rd32(cnum, b) & 0x80000000); gettimeofday(&end, NULL); uint64_t td = (time_diff_us(start, end)); printf("PWM %d: %dHz\n", i, (int)(1000000ll * 0x200000 / td)); nva_wr32(cnum, a, save0); nva_wr32(cnum, b, 0x80000000 | save1); } }
void time_pgraph_dispatch_clock(unsigned int card) { struct timeval start, end; ptime_t t_start, t_end; u32 reg; if (nva_cards[card]->chipset.card_type == 0x50) reg = 0x4008f8; else if (nva_cards[card]->chipset.card_type == 0xc0) reg = 0x4040f4; else { printf("pgraph_dispatch_clock is only available on nv50+ chipsets\n"); return; } gettimeofday(&start, NULL); t_start = nva_rd32(card, reg); do { gettimeofday(&end, NULL); } while (time_diff_us(start, end) < 1000000); gettimeofday(&end, NULL); t_end = nva_rd32(card, reg); printf("PGRAPH.DISPATCH's clock: 1s = %llu cycles --> frequency = %f MHz\n", (t_end - t_start), (t_end - t_start)/1000000.0); }
ptime_t time_ptimer_clock(unsigned int card) { struct timeval start, end; ptime_t t_start, t_end; gettimeofday(&start, NULL); t_start = get_time(card); do { gettimeofday(&end, NULL); } while (time_diff_us(start, end) < 1000000); gettimeofday(&end, NULL); t_end = get_time(card); return t_end - t_start; }
static ptime_t pms_launch(int cnum, struct pms_ucode* pms, ptime_t *wall_time) { u32 pbus1098; u32 pms_data, pms_kick; ptime_t ptimer_start, ptimer_end; struct timeval wall_start, wall_end; int i; if (nva_cards[cnum].chipset < 0x90) { pms_data = 0x001400; pms_kick = 0x00000003; } else { pms_data = 0x080000; pms_kick = 0x00000001; } /* upload ucode */ pbus1098 = nva_mask(cnum, 0x001098, 0x00000008, 0x00000000); nva_wr32(cnum, 0x001304, 0x00000000); for (i = 0; i < pms->len / 4; i++) nva_wr32(cnum, pms_data + (i * 4), pms->ptr.u32[i]); nva_wr32(cnum, 0x001098, pbus1098 | 0x18); /* and run it! */ gettimeofday(&wall_start, NULL); ptimer_start = get_time(cnum); nva_wr32(cnum, 0x00130c, pms_kick); /* Wait for completion */ while (nva_rd32(cnum, 0x001308) & 0x100); ptimer_end = get_time(cnum); gettimeofday(&wall_end, NULL); if (wall_time) *wall_time = time_diff_us(wall_start, wall_end); return ptimer_end - ptimer_start - (get_time(cnum) - get_time(cnum)); }
/*------------------------------------------------------------ * 時間から作ってもいいかどうかチェックする。 * 引数 * cindex int インデックス * 返り値 * BOOL 作っていい場合 TRUE(1) * BOOL 作っていけない場合 FALSE(0) ------------------------------------------------------------*/ BOOL NPC_createCheckGenerateFromTime( int cindex ) { struct timeval old; if( !NPC_CHECKCREATEINDEX(cindex) ) return FALSE; if( NPC_create[cindex].workdata[NPC_CREATEWORKNEVERMAKE] ) return FALSE; if( NPC_create[cindex].intdata[NPC_CREATEBORNNUM] <= NPC_create[cindex].workdata[NPC_CREATEWORKENEMYNUM] ) return FALSE; if( NPC_create[cindex].intdata[NPC_CREATETIME] < 0 )return FALSE; old.tv_sec = NPC_create[cindex].workdata[NPC_CREATEWORKMAKESTARTSEC]; old.tv_usec= NPC_create[cindex].workdata[NPC_CREATEWORKMAKESTARTUSEC]; if( time_diff_us( NowTime , old ) <= NPC_create[cindex].intdata[NPC_CREATETIME]*1000 ) return FALSE; return TRUE; }
int main(int argc, char** argv){ if(argc < 3){ std::cout << "usage:" << argv[0] << " src.png dst.png" << std::endl; return 1; } int mode = 0; // for cuda if(argc == 4){ // this is silly implementation char* m = argv[3]; mode = m[0] - '0'; } const char* input_file = argv[1]; const char* output_file = argv[2]; std::vector<unsigned char> in_image; unsigned int width, height; const unsigned input_error = lodepng::decode(in_image, width, height, input_file); if(input_error){ std::cout << "decoder error " << input_error << ": " << lodepng_error_text(input_error) << std::endl; return 1; } unsigned char* input_image = new unsigned char[in_image.size()]; unsigned char* output_image = new unsigned char[in_image.size()]; std::copy(in_image.begin(), in_image.end(), input_image); // first CUDA call takes a whlie. // For benchmark, I do not take account of first call. const int N = 5; long sum_of_n_minus_one = 0; for(int i=0;i<N;i++){ struct timeval st; struct timeval et; gettimeofday(&st, NULL); if(mode == 0){ rgb_invert_in_cuda(output_image, input_image, width, height); }else if(mode == 1){ rgb_invert_in_cpu(output_image, input_image, width, height); }else if(mode == 2){ rgb_invert_in_cuda_uchar_array(output_image, input_image, width, height); }else{ std::cout << "ERROR: unknown mode: " << mode << std::endl; return 1; } gettimeofday(&et, NULL); long us = time_diff_us(st, et); if(i != 0){ sum_of_n_minus_one += us; } } printf("%lu\n",sum_of_n_minus_one/(N-1)); std::vector<unsigned char> out_image(output_image, output_image+in_image.size()); unsigned output_error = lodepng::encode(output_file, out_image, width, height); if(output_error){ std::cout << "encoder error " << output_error << ": "<< lodepng_error_text(output_error) << std::endl; return 1; } return 0; }