int main(int argc,char *argv[]) { double *Anext; double *A0; int nx,ny,nz,tx,ty,tz,timesteps; int i; ticks t1, t2; double spt; /* parse command line options */ if (argc < 8) { printf("\nUSAGE:\n%s <grid x> <grid y> <grid z> <block x> <block y> <block z> <timesteps>\n", argv[0]); printf("\nTIME SKEWING CONSTRAINTS:\nIn each dimension, <grid size - 2> should be a multiple of <block size>.\n"); printf("\nCIRCULAR QUEUE CONSTRAINTS:\n<grid y - 2> should be a multiple of <block y>. The block sizes in the other dimensions are ignored.\n\n"); return EXIT_FAILURE; } nx = atoi(argv[1]); ny = atoi(argv[2]); nz = atoi(argv[3]); tx = atoi(argv[4]); ty = atoi(argv[5]); tz = atoi(argv[6]); timesteps = atoi(argv[7]); printf("%dx%dx%d, blocking: %dx%dx%d, timesteps: %d\n", nx,ny,nz,tx,ty,tz,timesteps); #ifdef HAVE_PAPI PAPI_library_init(PAPI_VER_CURRENT); #endif /* find conversion factor from ticks to seconds */ spt = seconds_per_tick(); /* allocate arrays */ Anext=(double*)malloc(sizeof(double)*nx*ny*nz); A0=(double*)malloc(sizeof(double)*nx*ny*nz); printf("USING TIMER: %s \t SECONDS PER TICK:%g \n", TIMER_DESC, spt); for (i=0;i<NUM_TRIALS;i++) { /* initialize arrays to all ones */ StencilInit(nx,ny,nz,Anext); StencilInit(nx,ny,nz,A0); #ifdef CIRCULARQUEUEPROBE if (timesteps > 1) { CircularQueueInit(nx, ty, timesteps); } #endif // clear_cache(); t1 = getticks(); /* stencil function */ StencilProbe(A0, Anext, nx, ny, nz, tx, ty, tz, timesteps); t2 = getticks(); printf("elapsed ticks: %g time:%g \n", elapsed(t2, t1), spt * elapsed(t2,t1)); #ifdef COMPUTECHECKSUM extern double CheckSum(int, int, int, double*); double r1=CheckSum(nx, ny, nz, A0); double r2=CheckSum(nx, ny, nz, Anext); printf("Checksums: A0=%g, Anext=%g\n", r1, r2); #endif } /* free arrays */ free(Anext); free(A0); return EXIT_SUCCESS; }
void send_traffic() { ticks hz, tick_start = 0, tick_delta = 0; u_int32_t buffer_id = 0; int sent_bytes; #ifdef BURST_API int i, sent_packets; #endif if (bind_core >= 0) bind2core(bind_core); if(pps > 0) { /* cumputing usleep delay */ tick_start = getticks(); usleep(1); tick_delta = getticks() - tick_start; /* cumputing CPU freq */ tick_start = getticks(); usleep(1001); hz = (getticks() - tick_start - tick_delta) * 1000 /*kHz -> Hz*/; printf("Estimated CPU freq: %lu Hz\n", (long unsigned int) hz); tick_delta = (double) (hz / pps); tick_start = getticks(); } #ifdef BURST_API /****** Burst API ******/ if (use_pkt_burst_api) { while (likely(!do_shutdown && (!num_to_send || numPkts < num_to_send))) { if (numPkts < num_queue_buffers + NBUFF || num_ips > 1) { /* forge all buffers 1 time */ for (i = 0; i < BURSTLEN; i++) { buffers[buffer_id + i]->len = packet_len; if (stdin_packet_len > 0) memcpy(pfring_zc_pkt_buff_data(buffers[buffer_id + i], zq), stdin_packet, stdin_packet_len); else forge_udp_packet(pfring_zc_pkt_buff_data(buffers[buffer_id + i], zq), numPkts + i); } } /* TODO send unsent packets when a burst is partially sent */ while (unlikely((sent_packets = pfring_zc_send_pkt_burst(zq, &buffers[buffer_id], BURSTLEN, flush_packet)) <= 0)) { if (unlikely(do_shutdown)) break; if (!active) usleep(1); } numPkts += sent_packets; numBytes += ((packet_len + 24 /* 8 Preamble + 4 CRC + 12 IFG */ ) * sent_packets); buffer_id += BURSTLEN; buffer_id &= NBUFFMASK; if(pps > 0) { u_int8_t synced = 0; while((getticks() - tick_start) < (numPkts * tick_delta)) if (!synced) pfring_zc_sync_queue(zq, tx_only), synced = 1; } } } else { #endif /****** Packet API ******/ while (likely(!do_shutdown && (!num_to_send || numPkts < num_to_send))) { buffers[buffer_id]->len = packet_len; #if 1 if (numPkts < num_queue_buffers + NBUFF || num_ips > 1) { /* forge all buffers 1 time */ if (stdin_packet_len > 0) memcpy(pfring_zc_pkt_buff_data(buffers[buffer_id], zq), stdin_packet, stdin_packet_len); else forge_udp_packet(pfring_zc_pkt_buff_data(buffers[buffer_id], zq), numPkts); } #else { u_char *pkt_data = pfring_zc_pkt_buff_data(buffers[buffer_id], zq); int k; u_int8_t j = numPkts; for(k = 0; k < buffers[buffer_id]->len; k++) pkt_data[k] = j++; pkt_data[k-1] = cluster_id; } #endif while (unlikely((sent_bytes = pfring_zc_send_pkt(zq, &buffers[buffer_id], flush_packet)) < 0)) { if (unlikely(do_shutdown)) break; if (!active) usleep(1); } numPkts++; numBytes += sent_bytes + 24; /* 8 Preamble + 4 CRC + 12 IFG */ buffer_id++; buffer_id &= NBUFFMASK; if(pps > 0) { u_int8_t synced = 0; while((getticks() - tick_start) < (numPkts * tick_delta)) if (!synced) pfring_zc_sync_queue(zq, tx_only), synced = 1; } } #ifdef BURST_API } #endif if (!flush_packet) pfring_zc_sync_queue(zq, tx_only); }
uint32_t load_rom(uint8_t* filename, uint32_t base_addr, uint8_t flags) { UINT bytes_read; DWORD filesize; UINT count=0; tick_t ticksstart, ticks_total=0; ticksstart=getticks(); printf("%s\n", filename); file_open(filename, FA_READ); if(file_res) { uart_putc('?'); uart_putc(0x30+file_res); return 0; } filesize = file_handle.fsize; smc_id(&romprops); file_close(); /* reconfigure FPGA if necessary */ if(romprops.fpga_conf) { printf("reconfigure FPGA with %s...\n", romprops.fpga_conf); fpga_pgm((uint8_t*)romprops.fpga_conf); } set_mcu_addr(base_addr + romprops.load_address); file_open(filename, FA_READ); ff_sd_offload=1; sd_offload_tgt=0; f_lseek(&file_handle, romprops.offset); for(;;) { ff_sd_offload=1; sd_offload_tgt=0; bytes_read = file_read(); if (file_res || !bytes_read) break; if(!(count++ % 512)) { uart_putc('.'); } } file_close(); set_mapper(romprops.mapper_id); printf("rom header map: %02x; mapper id: %d\n", romprops.header.map, romprops.mapper_id); ticks_total=getticks()-ticksstart; printf("%u ticks total\n", ticks_total); if(romprops.mapper_id==3) { printf("BSX Flash cart image\n"); printf("attempting to load BSX BIOS /sd2snes/bsxbios.bin...\n"); load_sram_offload((uint8_t*)"/sd2snes/bsxbios.bin", 0x800000); printf("attempting to load BS data file /sd2snes/bsxpage.bin...\n"); load_sram_offload((uint8_t*)"/sd2snes/bsxpage.bin", 0x900000); printf("Type: %02x\n", romprops.header.destcode); set_bsx_regs(0xc0, 0x3f); uint16_t rombase; if(romprops.header.ramsize & 1) { rombase = romprops.load_address + 0xff00; // set_bsx_regs(0x36, 0xc9); } else { rombase = romprops.load_address + 0x7f00; // set_bsx_regs(0x34, 0xcb); } sram_writebyte(0x33, rombase+0xda); sram_writebyte(0x00, rombase+0xd4); sram_writebyte(0xfc, rombase+0xd5); set_fpga_time(0x0220110301180530LL); } if(romprops.has_dspx || romprops.has_cx4) { printf("DSPx game. Loading firmware image %s...\n", romprops.dsp_fw); load_dspx(romprops.dsp_fw, romprops.fpga_features); /* fallback to DSP1B firmware if DSP1.bin is not present */ if(file_res && romprops.dsp_fw == DSPFW_1) { load_dspx(DSPFW_1B, romprops.fpga_features); } if(file_res) { snes_menu_errmsg(MENU_ERR_NODSP, (void*)romprops.dsp_fw); } } uint32_t rammask; uint32_t rommask; while(filesize > (romprops.romsize_bytes + romprops.offset)) { romprops.romsize_bytes <<= 1; } if(romprops.header.ramsize == 0) { rammask = 0; } else { rammask = romprops.ramsize_bytes - 1; } rommask = romprops.romsize_bytes - 1; printf("ramsize=%x rammask=%lx\nromsize=%x rommask=%lx\n", romprops.header.ramsize, rammask, romprops.header.romsize, rommask); set_saveram_mask(rammask); set_rom_mask(rommask); readled(0); if(flags & LOADROM_WITH_SRAM) { if(romprops.ramsize_bytes) { sram_memset(SRAM_SAVE_ADDR, romprops.ramsize_bytes, 0); strcpy(strrchr((char*)filename, (int)'.'), ".srm"); printf("SRM file: %s\n", filename); load_sram(filename, SRAM_SAVE_ADDR); saveram_crc_old = calc_sram_crc(SRAM_SAVE_ADDR, romprops.ramsize_bytes); } else { printf("No SRAM\n"); } } printf("check MSU..."); if(msu1_check(filename)) { romprops.fpga_features |= FEAT_MSU1; romprops.has_msu1 = 1; } else { romprops.has_msu1 = 0; } printf("done\n"); romprops.fpga_features |= FEAT_SRTC; romprops.fpga_features |= FEAT_213F; fpga_set_213f(romprops.region); fpga_set_features(romprops.fpga_features); if(flags & LOADROM_WITH_RESET) { fpga_dspx_reset(1); snes_reset_pulse(); fpga_dspx_reset(0); } return (uint32_t)filesize; }
int main(int argc, char* argv[]) { unsigned long long t0, t1; unsigned long n = (argc>1) ? atol(argv[1]) : 1000000; const unsigned int warmup = 10; unsigned int repetitions = (argc>2) ? atol(argv[2]) : 100; double scaling = 7.33; printf("vec_scale test for %lu elements \n", n); REALTYPE * x = safemalloc(n*sizeof(REALTYPE)); REALTYPE * y = safemalloc(n*sizeof(REALTYPE)); for (unsigned int i=0;i<n;i++) x[i] = (double) i; for (unsigned int i=0;i<n;i++) y[i] = (double) -i; /* basic */ for (unsigned int r=0;r<warmup;r++) vec_scale_c_basic(n,x,y,scaling); t0 = getticks(); for (unsigned int r=0;r<repetitions;r++) vec_scale_c_basic(n,x,y,scaling); t1 = getticks(); printf("%20s of %lu %s took %12llu cycles (%6.2lf cycles/element) \n", "vec_scale_c_basic", n, REALNAME , (t1-t0)/repetitions, ((double)t1-t0)/(repetitions*n) ); /* unroll2 */ for (unsigned int r=0;r<warmup;r++) vec_scale_c_unroll2(n,x,y,scaling); t0 = getticks(); for (unsigned int r=0;r<repetitions;r++) vec_scale_c_unroll2(n,x,y,scaling); t1 = getticks(); printf("%20s of %lu %s took %12llu cycles (%6.2lf cycles/element) \n", "vec_scale_c_unroll2", n, REALNAME , (t1-t0)/repetitions, ((double)t1-t0)/(repetitions*n) ); /* unroll4 */ for (unsigned int r=0;r<warmup;r++) vec_scale_c_unroll4(n,x,y,scaling); t0 = getticks(); for (unsigned int r=0;r<repetitions;r++) vec_scale_c_unroll4(n,x,y,scaling); t1 = getticks(); printf("%20s of %lu %s took %12llu cycles (%6.2lf cycles/element) \n", "vec_scale_c_unroll4", n, REALNAME , (t1-t0)/repetitions, ((double)t1-t0)/(repetitions*n) ); /* unroll4 */ for (unsigned int r=0;r<warmup;r++) vec_scale_c_unroll8(n,x,y,scaling); t0 = getticks(); for (unsigned int r=0;r<repetitions;r++) vec_scale_c_unroll8(n,x,y,scaling); t1 = getticks(); printf("%20s of %lu %s took %12llu cycles (%6.2lf cycles/element) \n", "vec_scale_c_unroll8", n, REALNAME , (t1-t0)/repetitions, ((double)t1-t0)/(repetitions*n) ); /**********/ free(y); free(x); return 0; }
int main(int argc, char *argv[]){ int x, y, limit, fd; char* buf; unsigned int* pages; int seq = atoi(argv[1]); ticks size; char buf[40]; ticks start, end; FILE* myFile; buf = (char*)malloc((2 * 4096) * sizeof(char)); if ((unsigned long)buf % 4096) { buf += 4096 - (unsigned long)buf % 4096; } for(size = CACHE_LOW ; size <= CACHE_HIGH ; size += (128*1024)){ myFile = fopen("tmp.txt", "w"); limit = (size / 4096); for(x = 0 ; x < limit ; x++){ fwrite (buf, 1, 4096, myFile); } fclose(myFile); fd = open("tmp.txt", O_DIRECT); pages = permutation(limit, 4096); ticks sum = 0; for(x = 0 ; x < 100 ; x++){ start = getticks(); if(seq == 1) lseek(fd, 0, SEEK_SET); for(y = 0 ; y < limit ; y++){ if(seq == 0) lseek(fd, pages[y], SEEK_SET); read(fd, buf, 4096); } end = getticks(); sum += (end-start); } ticks avg = sum/100; double speed = (3.5e9 * limit * 4096) / avg; speed /= (1024 * 1024); printf("%lldK: %.3lf\n", (size/1024), speed); close(fd); unlink("tmp.txt"); } }
// 2 pinned comms threads, processing 2out and 4in buffers. int io_func(void *arg) { struct rte_ring *ring1, *ring2; static struct message *out_msg1, *out_msg2; static struct message msg1, msg2; #ifndef __baremetal__ cpu_set_t cpuset; CPU_ZERO(&cpuset); pthread_t current_thread = pthread_self(); #endif msg1.payload1 = 0; msg2.payload1 = 0; if (*(int*)arg == 0) { msg1.message_id = 1; ring1 = out1; #ifndef __baremetal__ CPU_SET(2, &cpuset); pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); #endif } else { msg2.message_id = 3; ring1 = out2; #ifndef __baremetal__ CPU_SET(3, &cpuset); pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); #endif } // Simple round robin message sender/receiver while (1) { int received1 = 0, received2 = 0; unsigned long long t1 = getticks(); // Try sending msg1.payload1++; int sent = rte_ring_mp_enqueue(app1, &msg1); if (-1 == sent) {// FIXME to handle the error } msg2.payload1++; sent = rte_ring_mp_enqueue(app2, &msg2); // receive output, barrier while (!received1) { /*#ifdef DEBUG #ifdef __baremetal__ kprintf #else printf #endif ("io spin %d %d \n", received1, received2); #endif*/ if(!received1) { if(!rte_ring_mc_dequeue(ring1, (void **)&out_msg1)) { #ifdef DEBUG #ifdef __baremetal__ kprintf #else printf #endif ("packet from app ring 1 %d %d\n", out_msg1->message_id, out_msg1->payload1); #endif if (out_msg1->payload1 == N) return 0; received1 = -1; } } } } return 0; }
// // processData: process channel data // // Notes: // Perform a DFB and subchannelization of the specified channel. // Data is placed in a half-frame buffer, which is then passed // to the spectrometry task for spectrum analysis. // void WorkerTask::processData(Msg *msg) { // get the channel HalfFrameInfo *hfInfo = static_cast<HalfFrameInfo *> (msg->getData()); Assert(hfInfo); // if not collecting data, just flush the data without doing a DFB, // to ensure that the input buffer does not fill up. if (!activity) { channel->dfbFlush(hfInfo->sample); return; } #if WORKER_TIMING uint64_t t0 = getticks(); #endif // find a pair of half frame buffers BufPair *hfBuf = channel->allocHfBuf(); if (!hfBuf) Fatal(ERR_NBA); ComplexFloat32 *rBuf = static_cast<ComplexFloat32 *> (hfBuf->getBufData(ATADataPacketHeader::RCIRC)); ComplexFloat32 *lBuf = static_cast<ComplexFloat32 *> (hfBuf->getBufData(ATADataPacketHeader::LCIRC)); // build the array of output pointers buildOutputArray(rOut, rBuf, channel->getSamplesPerSubchannelHalfFrame(), channel->getTotalSubchannels(), channel->getUsableSubchannels()); buildOutputArray(lOut, lBuf, channel->getSamplesPerSubchannelHalfFrame(), channel->getTotalSubchannels(), channel->getUsableSubchannels()); #if WORKER_TIMING uint64_t t1 = getticks(); #endif // do the DFB and count the half frame channel->dfbProcess(hfInfo->sample, sampleBuf, rOut, lOut); #if WORKER_TIMING uint64_t t2 = getticks(); #endif // call the spectrometer to compute the spectra SpecState s = spectrometer->processHalfFrame(channel->getHalfFrame(), hfBuf); // handle state changes switch (s) { case BLStarted: startBaseline(); break; case DCStarted: startDataCollection(); break; case DCComplete: completeDataCollection(); break; default: break; } #if WORKER_TIMING uint64_t t3 = getticks(); ++timing.iterations; timing.buildArrays += elapsed(t1, t0); timing.dfb += elapsed(t2, t1); timing.spectrometer += elapsed(t3, t2); timing.total += elapsed(t3, t0); #endif }
int main(int argc, char* argv[]) { char c, *pcap_in = NULL, mac_address[6]; int promisc, i, verbose = 0, active_poll = 0, reforge_mac = 0; u_int mac_a, mac_b, mac_c, mac_d, mac_e, mac_f; char buffer[1500]; int send_len = 60; u_int32_t num = 1; int bind_core = -1; u_int16_t cpu_percentage = 0; double gbit_s = 0, td, pps; ticks tick_start = 0, tick_delta = 0; ticks hz = 0; struct packet *tosend; while((c = getopt(argc,argv,"hi:n:g:l:af:r:vm:" #if 0 "b:" #endif )) != -1) { switch(c) { case 'h': printHelp(); break; case 'i': in_dev = strdup(optarg); break; case 'f': pcap_in = strdup(optarg); break; case 'n': num = atoi(optarg); break; case 'g': bind_core = atoi(optarg); break; case 'l': send_len = atoi(optarg); break; case 'v': verbose = 1; break; case 'a': active_poll = 1; break; case 'r': sscanf(optarg, "%lf", &gbit_s); break; #if 0 case 'b': cpu_percentage = atoi(optarg); #endif break; case 'm': if(sscanf(optarg, "%02X:%02X:%02X:%02X:%02X:%02X", &mac_a, &mac_b, &mac_c, &mac_d, &mac_e, &mac_f) != 6) { printf("Invalid MAC address format (XX:XX:XX:XX:XX:XX)\n"); return(0); } else { reforge_mac = 1; mac_address[0] = mac_a, mac_address[1] = mac_b, mac_address[2] = mac_c; mac_address[3] = mac_d, mac_address[4] = mac_e, mac_address[5] = mac_f; } break; } } if(in_dev == NULL) printHelp(); printf("Sending packets on %s\n", in_dev); /* hardcode: promisc=1, to_ms=500 */ promisc = 1; pd = pfring_open(in_dev, promisc, 1500, 0); if(pd == NULL) { printf("pfring_open %s error\n", in_dev); return(-1); } else { u_int32_t version; pfring_set_application_name(pd, "pfdnasend"); pfring_version(pd, &version); printf("Using PF_RING v.%d.%d.%d\n", (version & 0xFFFF0000) >> 16, (version & 0x0000FF00) >> 8, version & 0x000000FF); } signal(SIGINT, sigproc); signal(SIGTERM, sigproc); signal(SIGINT, sigproc); if(send_len < 60) send_len = 60; if(gbit_s > 0) { /* cumputing usleep delay */ tick_start = getticks(); usleep(1); tick_delta = getticks() - tick_start; /* cumputing CPU freq */ tick_start = getticks(); usleep(1001); hz = (getticks() - tick_start - tick_delta) * 1000 /*kHz -> Hz*/; printf("Estimated CPU freq: %llu Hz\n", hz); /* computing max rate */ pps = ((gbit_s * 1000000000) / 8 /*byte*/) / (8 /*Preamble*/ + send_len + 4 /*CRC*/ + 12 /*IFG*/); td = (double)(hz / pps); tick_delta = (ticks)td; printf("Number of %d-byte Packet Per Second at %.2f Gbit/s: %.2f\n", (send_len + 4 /*CRC*/), gbit_s, pps); } if(pcap_in) { char ebuf[256]; u_char *pkt; struct pcap_pkthdr *h; pcap_t *pt = pcap_open_offline(pcap_in, ebuf); u_int num_pcap_pkts = 0; if(pt) { struct packet *last = NULL; while(1) { struct packet *p; int rc = pcap_next_ex(pt, &h, (const u_char**)&pkt); if(rc <= 0) break; p = (struct packet*)malloc(sizeof(struct packet)); if(p) { p->len = h->caplen; p->next = NULL; p->pkt = (char*)malloc(p->len); if(p->pkt == NULL) { printf("Not enough memory\n"); break; } else { memcpy(p->pkt, pkt, p->len); if(reforge_mac) memcpy(p->pkt, mac_address, 6); } if(last) { last->next = p; last = p; } else pkt_head = p, last = p; } else { printf("Not enough memory\n"); break; } if(verbose) printf("Read %d bytes packet from pcap file %s\n", p->len, pcap_in); num_pcap_pkts++; } /* while */ pcap_close(pt); printf("Read %d packets from pcap file %s\n", num_pcap_pkts, pcap_in); last->next = pkt_head; /* Loop */ num *= num_pcap_pkts; } else { printf("Unable to open file %s\n", pcap_in); pfring_close(pd); return(-1); } } else { struct packet *p; for(i=0; i<send_len; i++) buffer[i] = i; if(reforge_mac) memcpy(buffer, mac_address, 6); p = (struct packet*)malloc(sizeof(struct packet)); if(p) { p->len = send_len; p->next = p; /* Loop */ p->pkt = (char*)malloc(p->len); memcpy(p->pkt, buffer, send_len); pkt_head = p; } } if(bind_core >= 0) bind2core(bind_core); if(wait_for_packet && (cpu_percentage > 0)) { if(cpu_percentage > 99) cpu_percentage = 99; pfring_config(cpu_percentage); } if(!verbose) { signal(SIGALRM, my_sigalarm); alarm(1); } gettimeofday(&startTime, NULL); memcpy(&lastTime, &startTime, sizeof(startTime)); if(gbit_s > 0) tick_start = getticks(); tosend = pkt_head; i = 0; pfring_set_direction(pd, tx_only_direction); if(pfring_enable_ring(pd) != 0) { printf("Unable to enable ring :-(\n"); pfring_close(pd); return(-1); } while(!num || i < num) { int rc; redo: rc = pfring_send(pd, tosend->pkt, tosend->len, 0 /* Don't flush (it does PF_RING automatically) */); if(verbose) printf("[%d] pfring_send(%d) returned %d\n", i, tosend->len, rc); if(rc == -1) { /* Not enough space in buffer */ if(gbit_s == 0) { if(!active_poll) { if(bind_core >= 0) usleep(1); else pfring_poll(pd, 0); } } else { /* Just waste some time */ while((getticks() - tick_start) < (num_pkt_good_sent * tick_delta)) ; } goto redo; } else num_pkt_good_sent++, num_bytes_good_sent += tosend->len+24 /* 8 Preamble + 4 CRC + 12 IFG */, tosend = tosend->next; if(num > 0) i++; } /* for */ print_stats(0); pfring_close(pd); return(0); }
int main(int argc, char **argv) { int j, k; /**< indices */ int d; /**< number of dimensions */ int N; /**< number of source nodes */ int M; /**< number of target nodes */ int n; /**< expansion degree */ int m; /**< cut-off parameter */ int p; /**< degree of smoothness */ const char *s; /**< name of kernel */ C (*kernel)(R, int, const R *); /**< kernel function */ R c; /**< parameter for kernel */ fastsum_plan my_fastsum_plan; /**< plan for fast summation */ C *direct; /**< array for direct computation */ ticks t0, t1; /**< for time measurement */ R time; /**< for time measurement */ R error = K(0.0); /**< for error computation */ R eps_I; /**< inner boundary */ R eps_B; /**< outer boundary */ if (argc != 11) { printf("\nfastsum_test d N M n m p kernel c eps_I eps_B\n\n"); printf(" d dimension \n"); printf(" N number of source nodes \n"); printf(" M number of target nodes \n"); printf(" n expansion degree \n"); printf(" m cut-off parameter \n"); printf(" p degree of smoothness \n"); printf(" kernel kernel function (e.g., gaussian)\n"); printf(" c kernel parameter \n"); printf(" eps_I inner boundary \n"); printf(" eps_B outer boundary \n\n"); exit(EXIT_FAILURE); } else { d = atoi(argv[1]); N = atoi(argv[2]); c = K(1.0) / POW((R)(N), K(1.0) / ((R)(d))); M = atoi(argv[3]); n = atoi(argv[4]); m = atoi(argv[5]); p = atoi(argv[6]); s = argv[7]; c = (R)(atof(argv[8])); eps_I = (R)(atof(argv[9])); eps_B = (R)(atof(argv[10])); if (strcmp(s, "gaussian") == 0) kernel = gaussian; else if (strcmp(s, "multiquadric") == 0) kernel = multiquadric; else if (strcmp(s, "inverse_multiquadric") == 0) kernel = inverse_multiquadric; else if (strcmp(s, "logarithm") == 0) kernel = logarithm; else if (strcmp(s, "thinplate_spline") == 0) kernel = thinplate_spline; else if (strcmp(s, "one_over_square") == 0) kernel = one_over_square; else if (strcmp(s, "one_over_modulus") == 0) kernel = one_over_modulus; else if (strcmp(s, "one_over_x") == 0) kernel = one_over_x; else if (strcmp(s, "inverse_multiquadric3") == 0) kernel = inverse_multiquadric3; else if (strcmp(s, "sinc_kernel") == 0) kernel = sinc_kernel; else if (strcmp(s, "cosc") == 0) kernel = cosc; else if (strcmp(s, "cot") == 0) kernel = kcot; else { s = "multiquadric"; kernel = multiquadric; } } printf( "d=%d, N=%d, M=%d, n=%d, m=%d, p=%d, kernel=%s, c=%" __FGS__ ", eps_I=%" __FGS__ ", eps_B=%" __FGS__ " \n", d, N, M, n, m, p, s, c, eps_I, eps_B); #ifdef NF_KUB printf("nearfield correction using piecewise cubic Lagrange interpolation\n"); #elif defined(NF_QUADR) printf("nearfield correction using piecewise quadratic Lagrange interpolation\n"); #elif defined(NF_LIN) printf("nearfield correction using piecewise linear Lagrange interpolation\n"); #endif #ifdef _OPENMP #pragma omp parallel { #pragma omp single { printf("nthreads=%d\n", omp_get_max_threads()); } } FFTW(init_threads)(); #endif /** init d-dimensional fastsum plan */ fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, 0, n, m, p, eps_I, eps_B); //fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B); if (my_fastsum_plan.flags & NEARFIELD_BOXES) printf( "determination of nearfield candidates based on partitioning into boxes\n"); else printf("determination of nearfield candidates based on search tree\n"); /** init source knots in a d-ball with radius 0.25-eps_b/2 */ k = 0; while (k < N) { R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0); R r2 = K(0.0); for (j = 0; j < d; j++) my_fastsum_plan.x[k * d + j] = K(2.0) * r_max * NFFT(drand48)() - r_max; for (j = 0; j < d; j++) r2 += my_fastsum_plan.x[k * d + j] * my_fastsum_plan.x[k * d + j]; if (r2 >= r_max * r_max) continue; k++; } for (k = 0; k < N; k++) { /* R r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((R)rand()/(R)RAND_MAX,1.0/d); my_fastsum_plan.x[k*d+0] = r; for (j=1; j<d; j++) { R phi=2.0*KPI*(R)rand()/(R)RAND_MAX; my_fastsum_plan.x[k*d+j] = r; for (t=0; t<j; t++) { my_fastsum_plan.x[k*d+t] *= cos(phi); } my_fastsum_plan.x[k*d+j] *= sin(phi); } */ my_fastsum_plan.alpha[k] = NFFT(drand48)() + II * NFFT(drand48)(); } /** init target knots in a d-ball with radius 0.25-eps_b/2 */ k = 0; while (k < M) { R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0); R r2 = K(0.0); for (j = 0; j < d; j++) my_fastsum_plan.y[k * d + j] = K(2.0) * r_max * NFFT(drand48)() - r_max; for (j = 0; j < d; j++) r2 += my_fastsum_plan.y[k * d + j] * my_fastsum_plan.y[k * d + j]; if (r2 >= r_max * r_max) continue; k++; } /* for (k=0; k<M; k++) { R r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((R)rand()/(R)RAND_MAX,1.0/d); my_fastsum_plan.y[k*d+0] = r; for (j=1; j<d; j++) { R phi=2.0*KPI*(R)rand()/(R)RAND_MAX; my_fastsum_plan.y[k*d+j] = r; for (t=0; t<j; t++) { my_fastsum_plan.y[k*d+t] *= cos(phi); } my_fastsum_plan.y[k*d+j] *= sin(phi); } } */ /** direct computation */ printf("direct computation: "); fflush(NULL); t0 = getticks(); fastsum_exact(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** copy result */ direct = (C *) NFFT(malloc)((size_t)(my_fastsum_plan.M_total) * (sizeof(C))); for (j = 0; j < my_fastsum_plan.M_total; j++) direct[j] = my_fastsum_plan.f[j]; /** precomputation */ printf("pre-computation: "); fflush(NULL); t0 = getticks(); fastsum_precompute(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** fast computation */ printf("fast computation: "); fflush(NULL); t0 = getticks(); fastsum_trafo(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** compute max error */ error = K(0.0); for (j = 0; j < my_fastsum_plan.M_total; j++) { if (CABS(direct[j] - my_fastsum_plan.f[j]) / CABS(direct[j]) > error) error = CABS(direct[j] - my_fastsum_plan.f[j]) / CABS(direct[j]); } printf("max relative error: %" __FES__ "\n", error); /** finalise the plan */ fastsum_finalize(&my_fastsum_plan); return EXIT_SUCCESS; }
/** inverse NFFT-based mpolar FFT */ static int inverse_mpolar_fft(fftw_complex *f, int T, int R, fftw_complex *f_hat, int NN, int max_i, int m) { ticks t0, t1; int j,k; /**< index for nodes and freqencies */ nfft_plan my_nfft_plan; /**< plan for the nfft-2D */ solver_plan_complex my_infft_plan; /**< plan for the inverse nfft */ double *x, *w; /**< knots and associated weights */ int l; /**< index for iterations */ int N[2],n[2]; int M; /**< number of knots */ N[0]=NN; n[0]=2*N[0]; /**< oversampling factor sigma=2 */ N[1]=NN; n[1]=2*N[1]; /**< oversampling factor sigma=2 */ x = (double *)nfft_malloc(5*T*R/2*(sizeof(double))); if (x==NULL) return -1; w = (double *)nfft_malloc(5*T*R/4*(sizeof(double))); if (w==NULL) return -1; /** init two dimensional NFFT plan */ M=mpolar_grid(T,R,x,w); nfft_init_guru(&my_nfft_plan, 2, N, M, n, m, PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /** init two dimensional infft plan */ solver_init_advanced_complex(&my_infft_plan,(nfft_mv_plan_complex*)(&my_nfft_plan), CGNR | PRECOMPUTE_WEIGHT ); /** init nodes, given samples and weights */ for(j=0;j<my_nfft_plan.M_total;j++) { my_nfft_plan.x[2*j+0] = x[2*j+0]; my_nfft_plan.x[2*j+1] = x[2*j+1]; my_infft_plan.y[j] = f[j]; my_infft_plan.w[j] = w[j]; } /** precompute psi, the entries of the matrix B */ if(my_nfft_plan.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_PSI) nfft_precompute_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_FULL_PSI) nfft_precompute_full_psi(&my_nfft_plan); /** initialise damping factors */ if(my_infft_plan.flags & PRECOMPUTE_DAMP) for(j=0;j<my_nfft_plan.N[0];j++) for(k=0;k<my_nfft_plan.N[1];k++) { my_infft_plan.w_hat[j*my_nfft_plan.N[1]+k]= (sqrt(pow(j-my_nfft_plan.N[0]/2,2)+pow(k-my_nfft_plan.N[1]/2,2))>(my_nfft_plan.N[0]/2)?0:1); } /** initialise some guess f_hat_0 */ for(k=0;k<my_nfft_plan.N_total;k++) my_infft_plan.f_hat_iter[k] = 0.0 + _Complex_I*0.0; t0 = getticks(); /** solve the system */ solver_before_loop_complex(&my_infft_plan); if (max_i<1) { l=1; for(k=0;k<my_nfft_plan.N_total;k++) my_infft_plan.f_hat_iter[k] = my_infft_plan.p_hat_iter[k]; } else { for(l=1;l<=max_i;l++) { solver_loop_one_step_complex(&my_infft_plan); } } t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); /** copy result */ for(k=0;k<my_nfft_plan.N_total;k++) f_hat[k] = my_infft_plan.f_hat_iter[k]; /** finalise the plans and free the variables */ solver_finalize_complex(&my_infft_plan); nfft_finalize(&my_nfft_plan); nfft_free(x); nfft_free(w); return EXIT_SUCCESS; }
/** Comparison of the FFTW, mpolar FFT, and inverse mpolar FFT */ static int comparison_fft(FILE *fp, int N, int T, int R) { ticks t0, t1; fftw_plan my_fftw_plan; fftw_complex *f_hat,*f; int m,k; double t_fft, t_dft_mpolar; f_hat = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*N*N); f = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*(T*R/4)*5); my_fftw_plan = fftw_plan_dft_2d(N,N,f_hat,f,FFTW_BACKWARD,FFTW_MEASURE); for(k=0; k<N*N; k++) f_hat[k] = (((double)rand())/RAND_MAX) + _Complex_I* (((double)rand())/RAND_MAX); t0 = getticks(); for(m=0;m<65536/N;m++) { fftw_execute(my_fftw_plan); /* touch */ f_hat[2]=2*f_hat[0]; } t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); t_fft=N*GLOBAL_elapsed_time/65536; if(N<256) { mpolar_dft(f_hat,N,f,T,R,1); t_dft_mpolar=GLOBAL_elapsed_time; } for (m=3; m<=9; m+=3) { if((m==3)&&(N<256)) fprintf(fp,"%d\t&\t&\t%1.1e&\t%1.1e&\t%d\t",N,t_fft,t_dft_mpolar,m); else if(m==3) fprintf(fp,"%d\t&\t&\t%1.1e&\t &\t%d\t",N,t_fft,m); else fprintf(fp," \t&\t&\t &\t &\t%d\t",m); printf("N=%d\tt_fft=%1.1e\tt_dft_mpolar=%1.1e\tm=%d\t",N,t_fft,t_dft_mpolar,m); mpolar_fft(f_hat,N,f,T,R,m); fprintf(fp,"%1.1e&\t",GLOBAL_elapsed_time); printf("t_mpolar=%1.1e\t",GLOBAL_elapsed_time); inverse_mpolar_fft(f,T,R,f_hat,N,2*m,m); if(m==9) fprintf(fp,"%1.1e\\\\\\hline\n",GLOBAL_elapsed_time); else fprintf(fp,"%1.1e\\\\\n",GLOBAL_elapsed_time); printf("t_impolar=%1.1e\n",GLOBAL_elapsed_time); } fflush(fp); nfft_free(f); nfft_free(f_hat); return EXIT_SUCCESS; }
/** NFFT-based mpolar FFT */ static int mpolar_fft(fftw_complex *f_hat, int NN, fftw_complex *f, int T, int R, int m) { ticks t0, t1; int j,k; /**< index for nodes and freqencies */ nfft_plan my_nfft_plan; /**< plan for the nfft-2D */ double *x, *w; /**< knots and associated weights */ int N[2],n[2]; int M; /**< number of knots */ N[0]=NN; n[0]=2*N[0]; /**< oversampling factor sigma=2 */ N[1]=NN; n[1]=2*N[1]; /**< oversampling factor sigma=2 */ x = (double *)nfft_malloc(5*T*R/2*(sizeof(double))); if (x==NULL) return -1; w = (double *)nfft_malloc(5*T*R/4*(sizeof(double))); if (w==NULL) return -1; /** init two dimensional NFFT plan */ M=mpolar_grid(T,R,x,w); nfft_init_guru(&my_nfft_plan, 2, N, M, n, m, PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /** init nodes from mpolar grid*/ for(j=0;j<my_nfft_plan.M_total;j++) { my_nfft_plan.x[2*j+0] = x[2*j+0]; my_nfft_plan.x[2*j+1] = x[2*j+1]; } /** precompute psi, the entries of the matrix B */ if(my_nfft_plan.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_PSI) nfft_precompute_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_FULL_PSI) nfft_precompute_full_psi(&my_nfft_plan); /** init Fourier coefficients from given image */ for(k=0;k<my_nfft_plan.N_total;k++) my_nfft_plan.f_hat[k] = f_hat[k]; t0 = getticks(); /** NFFT-2D */ nfft_trafo(&my_nfft_plan); t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); /** copy result */ for(j=0;j<my_nfft_plan.M_total;j++) f[j] = my_nfft_plan.f[j]; /** finalise the plans and free the variables */ nfft_finalize(&my_nfft_plan); nfft_free(x); nfft_free(w); return EXIT_SUCCESS; }
void tic () { g_fTimeStart = gettime (); g_tickStart = getticks (); }
void lcd_splashscreen(void) { lcd_timeout = getticks() + MS_TO_TICKS(1000 * 5); lcd_timer = true; }
void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags) { nfsft_plan plan; int k, n; // int N, M, trafo_adjoint; int t, j; ticks t0, t1; double tt_total, tt_pre; // fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M); /*#ifdef _OPENMP fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan"); #else fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan"); #endif*/ /* precomputation (for fast polynomial transform) */ // nfsft_precompute(N,1000.0,0U,0U); /* Initialize transform plan using the guru interface. All input and output * arrays are allocated by nfsft_init_guru(). Computations are performed with * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical * Fourier coefficients is preserved during transformations. The NFFT uses a * cut-off parameter m = 6. See the NFFT 3 manual for details. */ nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m); /*#ifdef _OPENMP fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan"); #else fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan"); #endif*/ for (j=0; j < plan.M_total; j++) { for (t=0; t < 2; t++) // fscanf(infile, "%lg", plan.x+2*j+t); plan.x[2*j+t] = x[2*j+t]; } if (trafo_adjoint==0) { memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex)); for (k = 0; k <= plan.N; k++) for (n = -k; n <= k; n++) { // fscanf(infile, "%lg %lg", &re, &im); // plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im; plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)]; } } else { for (j=0; j < plan.M_total; j++) { // fscanf(infile, "%lg %lg", &re, &im); // plan.f[j] = re + _Complex_I * im; plan.f[j] = f[j]; } memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex)); } t0 = getticks(); /* precomputation (for NFFT, node-dependent) */ nfsft_precompute_x(&plan); t1 = getticks(); tt_pre = nfft_elapsed_seconds(t1,t0); if (trafo_adjoint==0) nfsft_trafo(&plan); else nfsft_adjoint(&plan); t1 = getticks(); tt_total = nfft_elapsed_seconds(t1,t0); #ifndef MEASURE_TIME plan.MEASURE_TIME_t[0] = 0.0; plan.MEASURE_TIME_t[2] = 0.0; #endif #ifndef MEASURE_TIME_FFTW plan.MEASURE_TIME_t[1] = 0.0; #endif printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total); /** finalise the one dimensional plan */ nfsft_finalize(&plan); }
/** * @brief Cool down */ void Cool () { printf (" Cooling down ... \n"); ticks cgstart = getticks(); double len, rndn, dl, prob; size_t i = 0, j = 0; while (i < m_maxit && m_ct > m_ft) { // Swap two sites SwapRange (m_sol, m_nsol); len = Lengthiness (m_k, m_nsol); // Shorter than before: keep if (len <= m_len) { m_sol = m_nsol; m_len = len; if (m_verb) { if (j % 5 == 0 && j > 0) printf ("\n"); printf (" %04zu: %02.4f", i, m_len); j++; } } else if (m_accworse) { // accept worse solution with some probability dl = len - m_len; rndn = gsl_rng_uniform (m_rng); prob = exp (- std::abs(dl) / m_ct); if (prob > rndn) { m_sol = m_nsol; m_len = len; if (m_verb) { if (j % 5 == 0 && j > 0) printf ("\n"); printf (" %04zu: %02.4f", i, m_len); j++; } } } if (m_bestlen > m_len) { m_best = m_sol; m_bestlen = m_len; m_bestn = i; } // decrease temperature m_ct -= m_cr; i++; } printf ("\n ... done. Best trip length (" JL_SIZE_T_SPECIFIER "): %.4f, WTime: %.4f seconds.\n\n", m_bestn, m_bestlen, elapsed(getticks(), cgstart) / Toolbox::Instance()->ClockRate()); }
int main(int argc, char* argv[]) { fprintf(stderr,"BEGIN TESTING OF DP ACCUMULATE\n"); printf("%18s %18s %18s %18s %18s %18s\n","dim","basic","1-hummer","2-hummers","4-hummers","8-hummers"); int k; for (k=6;k<20;k++) { int dim = pow(2,k); int count = 10; int i,j; unsigned long long t0, t1; unsigned long long dt0, dt1, dt2, dt3, dt4; double* a; double* b; double* c; double scale = 0.1; posix_memalign((void**)&a, 16*sizeof(double), dim*sizeof(double)); posix_memalign((void**)&b, 16*sizeof(double), dim*sizeof(double)); posix_memalign((void**)&c, 16*sizeof(double), dim*sizeof(double)); for (i=0;i<dim;i++) a[i] = 1.0 - 2*(double)rand()/(double)RAND_MAX; fprintf(stderr,"BASIC VERSION\n"); // WARM-UP for (i=0;i<dim;i++) b[i] = 0.0; for (i=0;i<dim;i++) b[i] += scale*a[i]; // TIMING for (i=0;i<dim;i++) b[i] = 0.0; t0 = getticks(); for (j=0;j<count;j++) { for (i=0;i<dim;i++) b[i] += scale*a[i]; } t1 = getticks(); dt0 = t1 - t0; fprintf(stderr,"INTRINSICS VERSION 1\n"); // WARM-UP for (i=0;i<dim;i++) c[i] = 0.0; for (i=0;i<dim;i+=2) { __stfpd(&c[i], __fxcpmadd( __lfpd(&c[i]), __lfpd(&a[i]), scale) ); } // TIMING for (i=0;i<dim;i++) c[i] = 0.0; t0 = getticks(); for (j=0;j<count;j++) { for (i=0;i<dim;i+=2) { __stfpd(&c[i], __fxcpmadd( __lfpd(&c[i]), __lfpd(&a[i]), scale) ); } } t1 = getticks(); dt1 = t1 - t0; // VERIFICATION for (i=0;i<dim;i++) { if (b[i] != c[i]) { printf("%4d %30.15f %30.15f\n",i,b[i],c[i]); } } fprintf(stderr,"INTRINSICS VERSION 2\n"); // WARM-UP for (i=0;i<dim;i++) c[i] = 0.0; for (i=0;i<dim;i+=4) { { double _Complex a0, a2, c0, c2; a0 = __lfpd(&a[i ]); a2 = __lfpd(&a[i+2]); c0 = __lfpd(&c[i ]); c2 = __lfpd(&c[i+2]); c0 = __fxcpmadd(c0,a0,scale); c2 = __fxcpmadd(c2,a2,scale); __stfpd(&c[i ],c0); __stfpd(&c[i+2],c2); } } // TIMING for (i=0;i<dim;i++) c[i] = 0.0; t0 = getticks(); for (j=0;j<count;j++) { for (i=0;i<dim;i+=4) { { double _Complex a0, a2, c0, c2; a0 = __lfpd(&a[i ]); a2 = __lfpd(&a[i+2]); c0 = __lfpd(&c[i ]); c2 = __lfpd(&c[i+2]); c0 = __fxcpmadd(c0,a0,scale); c2 = __fxcpmadd(c2,a2,scale); __stfpd(&c[i ],c0); __stfpd(&c[i+2],c2); } } } t1 = getticks(); dt2 = t1 - t0; // VERIFICATION for (i=0;i<dim;i++) { if (b[i] != c[i]) { printf("%4d %30.15f %30.15f\n",i,b[i],c[i]); } } fprintf(stderr,"INTRINSICS VERSION 3\n"); // WARM-UP for (i=0;i<dim;i++) c[i] = 0.0; for (i=0;i<dim;i+=8) { { double _Complex a0, a2, a4, a6; double _Complex c0, c2, c4, c6; a0 = __lfpd(&a[i ]); a2 = __lfpd(&a[i+2]); a4 = __lfpd(&a[i+4]); a6 = __lfpd(&a[i+6]); c0 = __lfpd(&c[i ]); c2 = __lfpd(&c[i+2]); c4 = __lfpd(&c[i+4]); c6 = __lfpd(&c[i+6]); c0 = __fxcpmadd(c0,a0,scale); c2 = __fxcpmadd(c2,a2,scale); c4 = __fxcpmadd(c4,a4,scale); c6 = __fxcpmadd(c6,a6,scale); __stfpd(&c[i ],c0); __stfpd(&c[i+2],c2); __stfpd(&c[i+4],c4); __stfpd(&c[i+6],c6); } } // TIMING for (i=0;i<dim;i++) c[i] = 0.0; t0 = getticks(); for (j=0;j<count;j++) { for (i=0;i<dim;i+=8) { { double _Complex a0, a2, a4, a6; double _Complex c0, c2, c4, c6; a0 = __lfpd(&a[i ]); a2 = __lfpd(&a[i+2]); a4 = __lfpd(&a[i+4]); a6 = __lfpd(&a[i+6]); c0 = __lfpd(&c[i ]); c2 = __lfpd(&c[i+2]); c4 = __lfpd(&c[i+4]); c6 = __lfpd(&c[i+6]); c0 = __fxcpmadd(c0,a0,scale); c2 = __fxcpmadd(c2,a2,scale); c4 = __fxcpmadd(c4,a4,scale); c6 = __fxcpmadd(c6,a6,scale); __stfpd(&c[i ],c0); __stfpd(&c[i+2],c2); __stfpd(&c[i+4],c4); __stfpd(&c[i+6],c6); } } } t1 = getticks(); dt3 = t1 - t0; // VERIFICATION for (i=0;i<dim;i++) { if (b[i] != c[i]) { printf("%4d %30.15f %30.15f\n",i,b[i],c[i]); } } fprintf(stderr,"INTRINSICS VERSION 4\n"); // WARM-UP for (i=0;i<dim;i++) c[i] = 0.0; for (i=0;i<dim;i+=16) { { double _Complex a0, a2, a4, a6, a8, a10, a12, a14; double _Complex c0, c2, c4, c6, c8, c10, c12, c14; a0 = __lfpd(&a[i ]); a2 = __lfpd(&a[i+ 2]); a4 = __lfpd(&a[i+ 4]); a6 = __lfpd(&a[i+ 6]); a4 = __lfpd(&a[i+ 8]); a6 = __lfpd(&a[i+10]); a4 = __lfpd(&a[i+12]); a6 = __lfpd(&a[i+14]); c0 = __lfpd(&c[i ]); c2 = __lfpd(&c[i+ 2]); c4 = __lfpd(&c[i+ 4]); c6 = __lfpd(&c[i+ 6]); c4 = __lfpd(&c[i+ 8]); c6 = __lfpd(&c[i+10]); c4 = __lfpd(&c[i+12]); c6 = __lfpd(&c[i+14]); c0 = __fxcpmadd( c0, a0,scale); c2 = __fxcpmadd( c2, a2,scale); c4 = __fxcpmadd( c4, a4,scale); c6 = __fxcpmadd( c6, a6,scale); c4 = __fxcpmadd( c8, a8,scale); c6 = __fxcpmadd(c10,a10,scale); c4 = __fxcpmadd(c12,a12,scale); c6 = __fxcpmadd(c14,a14,scale); __stfpd(&c[i ],c0); __stfpd(&c[i+ 2],c2); __stfpd(&c[i+ 4],c4); __stfpd(&c[i+ 6],c6); __stfpd(&c[i+ 8],c4); __stfpd(&c[i+10],c6); __stfpd(&c[i+12],c4); __stfpd(&c[i+14],c6); } } // TIMING for (i=0;i<dim;i++) c[i] = 0.0; t0 = getticks(); for (j=0;j<count;j++) { for (i=0;i<dim;i+=16) { { double _Complex a0, a2, a4, a6, a8, a10, a12, a14; double _Complex c0, c2, c4, c6, c8, c10, c12, c14; a0 = __lfpd(&a[i ]); a2 = __lfpd(&a[i+ 2]); a4 = __lfpd(&a[i+ 4]); a6 = __lfpd(&a[i+ 6]); a8 = __lfpd(&a[i+ 8]); a10 = __lfpd(&a[i+10]); a12 = __lfpd(&a[i+12]); a14 = __lfpd(&a[i+14]); c0 = __lfpd(&c[i ]); c2 = __lfpd(&c[i+ 2]); c4 = __lfpd(&c[i+ 4]); c6 = __lfpd(&c[i+ 6]); c8 = __lfpd(&c[i+ 8]); c10 = __lfpd(&c[i+10]); c12 = __lfpd(&c[i+12]); c14 = __lfpd(&c[i+14]); c0 = __fxcpmadd( c0, a0,scale); c2 = __fxcpmadd( c2, a2,scale); c4 = __fxcpmadd( c4, a4,scale); c6 = __fxcpmadd( c6, a6,scale); c8 = __fxcpmadd( c8, a8,scale); c10 = __fxcpmadd(c10,a10,scale); c12 = __fxcpmadd(c12,a12,scale); c14 = __fxcpmadd(c14,a14,scale); __stfpd(&c[i ], c0); __stfpd(&c[i+ 2], c2); __stfpd(&c[i+ 4], c4); __stfpd(&c[i+ 6], c6); __stfpd(&c[i+ 8], c8); __stfpd(&c[i+10],c10); __stfpd(&c[i+12],c12); __stfpd(&c[i+14],c14); } } } t1 = getticks(); dt4 = t1 - t0; // VERIFICATION for (i=0;i<dim;i++) { if (b[i] != c[i]) { printf("%4d %30.15f %30.15f\n",i,b[i],c[i]); } } printf("%18d %18llu %18llu %18llu %18llu %18llu\n",dim,dt0,dt1,dt2,dt3,dt4); free(a); free(b); free(c); } fprintf(stderr,"ALL DONE\n"); return(0); }
/*===========================================================================* * sef_cb_init_fresh * *===========================================================================*/ static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *UNUSED(info)) { /* Initialize the reincarnation server. */ struct boot_image *ip; int s,i; int nr_image_srvs, nr_image_priv_srvs, nr_uncaught_init_srvs; struct rproc *rp; struct rproc *replica_rp; struct rprocpub *rpub; struct boot_image image[NR_BOOT_PROCS]; struct boot_image_priv *boot_image_priv; struct boot_image_sys *boot_image_sys; struct boot_image_dev *boot_image_dev; int pid, replica_pid; endpoint_t replica_endpoint; int ipc_to; int *calls; int all_c[] = { ALL_C, NULL_C }; int no_c[] = { NULL_C }; /* See if we run in verbose mode. */ env_parse("rs_verbose", "d", 0, &rs_verbose, 0, 1); if ((s = sys_getinfo(GET_HZ, &system_hz, sizeof(system_hz), 0, 0)) != OK) panic("Cannot get system timer frequency\n"); /* Initialize the global init descriptor. */ rinit.rproctab_gid = cpf_grant_direct(ANY, (vir_bytes) rprocpub, sizeof(rprocpub), CPF_READ); if(!GRANT_VALID(rinit.rproctab_gid)) { panic("unable to create rprocpub table grant: %d", rinit.rproctab_gid); } /* Initialize some global variables. */ RUPDATE_INIT(); shutting_down = FALSE; /* Get a copy of the boot image table. */ if ((s = sys_getimage(image)) != OK) { panic("unable to get copy of boot image table: %d", s); } /* Determine the number of system services in the boot image table. */ nr_image_srvs = 0; for(i=0;i<NR_BOOT_PROCS;i++) { ip = &image[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(ip->endpoint))) { continue; } nr_image_srvs++; } /* Determine the number of entries in the boot image priv table and make sure * it matches the number of system services in the boot image table. */ nr_image_priv_srvs = 0; for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) { boot_image_priv = &boot_image_priv_table[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) { continue; } nr_image_priv_srvs++; } if(nr_image_srvs != nr_image_priv_srvs) { panic("boot image table and boot image priv table mismatch"); } /* Reset the system process table. */ for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) { rp->r_flags = 0; rp->r_init_err = ERESTART; rp->r_pub = &rprocpub[rp - rproc]; rp->r_pub->in_use = FALSE; rp->r_pub->old_endpoint = NONE; rp->r_pub->new_endpoint = NONE; } /* Initialize the system process table in 4 steps, each of them following * the appearance of system services in the boot image priv table. * - Step 1: set priviliges, sys properties, and dev properties (if any) * for every system service. */ for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) { boot_image_priv = &boot_image_priv_table[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) { continue; } /* Lookup the corresponding entries in other tables. */ boot_image_info_lookup(boot_image_priv->endpoint, image, &ip, NULL, &boot_image_sys, &boot_image_dev); rp = &rproc[boot_image_priv - boot_image_priv_table]; rpub = rp->r_pub; /* * Set privileges. */ /* Get label. */ strcpy(rpub->label, boot_image_priv->label); /* Force a static priv id for system services in the boot image. */ rp->r_priv.s_id = static_priv_id( _ENDPOINT_P(boot_image_priv->endpoint)); /* Initialize privilege bitmaps and signal manager. */ rp->r_priv.s_flags = boot_image_priv->flags; /* priv flags */ rp->r_priv.s_init_flags = SRV_OR_USR(rp, SRV_I, USR_I); /* init flags */ rp->r_priv.s_trap_mask= SRV_OR_USR(rp, SRV_T, USR_T); /* traps */ ipc_to = SRV_OR_USR(rp, SRV_M, USR_M); /* targets */ fill_send_mask(&rp->r_priv.s_ipc_to, ipc_to == ALL_M); rp->r_priv.s_sig_mgr= SRV_OR_USR(rp, SRV_SM, USR_SM); /* sig mgr */ rp->r_priv.s_bak_sig_mgr = NONE; /* backup sig mgr */ /* Initialize kernel call mask bitmap. */ calls = SRV_OR_USR(rp, SRV_KC, USR_KC) == ALL_C ? all_c : no_c; fill_call_mask(calls, NR_SYS_CALLS, rp->r_priv.s_k_call_mask, KERNEL_CALL, TRUE); /* Set the privilege structure. RS and VM are exceptions and are already * running. */ if(boot_image_priv->endpoint != RS_PROC_NR && boot_image_priv->endpoint != VM_PROC_NR) { if ((s = sys_privctl(ip->endpoint, SYS_PRIV_SET_SYS, &(rp->r_priv))) != OK) { panic("unable to set privilege structure: %d", s); } } /* Synch the privilege structure with the kernel. */ if ((s = sys_getpriv(&(rp->r_priv), ip->endpoint)) != OK) { panic("unable to synch privilege structure: %d", s); } /* * Set sys properties. */ rpub->sys_flags = boot_image_sys->flags; /* sys flags */ /* * Set dev properties. */ rpub->dev_nr = boot_image_dev->dev_nr; /* major device number */ /* Build command settings. Also set the process name. */ strlcpy(rp->r_cmd, ip->proc_name, sizeof(rp->r_cmd)); rp->r_script[0]= '\0'; build_cmd_dep(rp); strlcpy(rpub->proc_name, ip->proc_name, sizeof(rpub->proc_name)); /* Initialize vm call mask bitmap. */ calls = SRV_OR_USR(rp, SRV_VC, USR_VC) == ALL_C ? all_c : no_c; fill_call_mask(calls, NR_VM_CALLS, rpub->vm_call_mask, VM_RQ_BASE, TRUE); /* Scheduling parameters. */ rp->r_scheduler = SRV_OR_USR(rp, SRV_SCH, USR_SCH); rp->r_priority = SRV_OR_USR(rp, SRV_Q, USR_Q); rp->r_quantum = SRV_OR_USR(rp, SRV_QT, USR_QT); /* Get some settings from the boot image table. */ rpub->endpoint = ip->endpoint; /* Set some defaults. */ rp->r_old_rp = NULL; /* no old version yet */ rp->r_new_rp = NULL; /* no new version yet */ rp->r_prev_rp = NULL; /* no prev replica yet */ rp->r_next_rp = NULL; /* no next replica yet */ rp->r_uid = 0; /* root */ rp->r_check_tm = 0; /* not checked yet */ rp->r_alive_tm = getticks(); /* currently alive */ rp->r_stop_tm = 0; /* not exiting yet */ rp->r_asr_count = 0; /* no ASR updates yet */ rp->r_restarts = 0; /* no restarts so far */ rp->r_period = 0; /* no period yet */ rp->r_exec = NULL; /* no in-memory copy yet */ rp->r_exec_len = 0; /* Mark as in use and active. */ rp->r_flags = RS_IN_USE | RS_ACTIVE; rproc_ptr[_ENDPOINT_P(rpub->endpoint)]= rp; rpub->in_use = TRUE; } /* - Step 2: allow every system service in the boot image to run. */ nr_uncaught_init_srvs = 0; for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) { boot_image_priv = &boot_image_priv_table[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) { continue; } /* Lookup the corresponding slot in the system process table. */ rp = &rproc[boot_image_priv - boot_image_priv_table]; rpub = rp->r_pub; /* RS/VM are already running as we speak. */ if(boot_image_priv->endpoint == RS_PROC_NR || boot_image_priv->endpoint == VM_PROC_NR) { if ((s = init_service(rp, SEF_INIT_FRESH, rp->r_priv.s_init_flags)) != OK) { panic("unable to initialize %d: %d", boot_image_priv->endpoint, s); } /* VM will still send an RS_INIT message, though. */ if (boot_image_priv->endpoint != RS_PROC_NR) { nr_uncaught_init_srvs++; } continue; } /* Allow the service to run. */ if ((s = sched_init_proc(rp)) != OK) { panic("unable to initialize scheduling: %d", s); } if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) { panic("unable to initialize privileges: %d", s); } /* Initialize service. We assume every service will always get * back to us here at boot time. */ if(boot_image_priv->flags & SYS_PROC) { if ((s = init_service(rp, SEF_INIT_FRESH, rp->r_priv.s_init_flags)) != OK) { panic("unable to initialize service: %d", s); } if(rpub->sys_flags & SF_SYNCH_BOOT) { /* Catch init ready message now to synchronize. */ catch_boot_init_ready(rpub->endpoint); } else { /* Catch init ready message later. */ nr_uncaught_init_srvs++; } } } /* - Step 3: let every system service complete initialization by * catching all the init ready messages left. */ while(nr_uncaught_init_srvs) { catch_boot_init_ready(ANY); nr_uncaught_init_srvs--; } /* - Step 4: all the system services in the boot image are now running. * Complete the initialization of the system process table in collaboration * with other system services. */ for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) { boot_image_priv = &boot_image_priv_table[i]; /* System services only. */ if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) { continue; } /* Lookup the corresponding slot in the system process table. */ rp = &rproc[boot_image_priv - boot_image_priv_table]; rpub = rp->r_pub; /* Get pid from PM. */ rp->r_pid = getnpid(rpub->endpoint); if(rp->r_pid < 0) { panic("unable to get pid: %d", rp->r_pid); } } /* Set alarm to periodically check service status. */ if (OK != (s=sys_setalarm(RS_DELTA_T, 0))) panic("couldn't set alarm: %d", s); #if USE_LIVEUPDATE /* Now create a new RS instance and let the current * instance live update into the replica. Clone RS' own slot first. */ rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)]; if((s = clone_slot(rp, &replica_rp)) != OK) { panic("unable to clone current RS instance: %d", s); } /* Fork a new RS instance with root:wheel. */ pid = srv_fork(0, 0); if(pid < 0) { panic("unable to fork a new RS instance: %d", pid); } replica_pid = pid ? pid : getpid(); if ((s = getprocnr(replica_pid, &replica_endpoint)) != 0) panic("unable to get replica endpoint: %d", s); replica_rp->r_pid = replica_pid; replica_rp->r_pub->endpoint = replica_endpoint; if(pid == 0) { /* New RS instance running. */ /* Live update the old instance into the new one. */ s = update_service(&rp, &replica_rp, RS_SWAP, 0); if(s != OK) { panic("unable to live update RS: %d", s); } cpf_reload(); /* Clean up the old RS instance, the new instance will take over. */ cleanup_service(rp); /* Ask VM to pin memory for the new RS instance. */ if((s = vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0)) != OK) { panic("unable to pin memory for the new RS instance: %d", s); } } else { /* Old RS instance running. */ /* Set up privileges for the new instance and let it run. */ s = sys_privctl(replica_endpoint, SYS_PRIV_SET_SYS, &(replica_rp->r_priv)); if(s != OK) { panic("unable to set privileges for the new RS instance: %d", s); } if ((s = sched_init_proc(replica_rp)) != OK) { panic("unable to initialize RS replica scheduling: %d", s); } s = sys_privctl(replica_endpoint, SYS_PRIV_YIELD, NULL); if(s != OK) { panic("unable to yield control to the new RS instance: %d", s); } NOT_REACHABLE; } #endif /* USE_LIVEUPDATE */ return(OK); }
int app_func(void *arg) { struct rte_ring *ring1, *ring2; static struct message *msg1, *msg2; static struct message *msg3, *msg4; static struct message out_msg1, out_msg2; unsigned long long int t1 = 0, t2 = 0; int i = 0; out_msg1.message_id = 2; out_msg1.payload1 = 0; out_msg2.message_id = 2; out_msg2.payload1 = 0; #ifndef __baremetal__ cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(1, &cpuset); pthread_t current_thread = pthread_self(); pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); #endif if (*(int*)arg == 0) { ring1 = app1; ring2 = app2; // Only this branch is used for now, for 3 threads } else { ring1 = app3; ring2 = app3; } // Simple round robin message parser/scheduler for (i = 0; i < N; i++) { int received1 = 0, received2 = 0; int received3 = 0, received4 = 0; // Scan the command queue, barrier while (!(received1 && received2 && received3 && received4)) { /*#ifdef DEBUG #ifdef __baremetal__ kprintf #else printf #endif ("app spin %d %d %d %d\n", received1, received2, received3, received4); #endif*/ if(!received1) { if(!rte_ring_mc_dequeue(ring1, (void **)&msg1)) { #ifdef DEBUG #ifdef __baremetal__ kprintf #else printf #endif ("packet from io ring 1 %d %d\n", msg1->message_id, msg1->payload1); #endif received1 = -1; } } if(!received2) { if(!rte_ring_mc_dequeue(ring2, (void **)&msg2)) { received2 = -1; #ifdef DEBUG #ifdef __baremetal__ kprintf #else printf #endif ("packet from io ring 2 %d %d\n", msg2->message_id, msg2->payload1); #endif } } if(!received3) { if(!rte_ring_mc_dequeue(ring1, (void **)&msg3)) { received3 = -1; #ifdef DEBUG #ifdef __baremetal__ kprintf #else printf #endif ("packet from io ring 12 %d %d\n", msg3->message_id, msg3->payload1); #endif } } if(!received4) { if(!rte_ring_mc_dequeue(ring2, (void **)&msg4)) { received4 = -1; #ifdef DEBUG #ifdef __baremetal__ kprintf #else printf #endif ("packet from io ring 22 %d %d\n", msg4->message_id, msg4->payload1); #endif } } } if (t1 == 0) t1 = getticks(); else { #ifdef DEBUG #ifdef __baremetal__ kprintf #else printf #endif ("next iteration %d\n", (unsigned int)(getticks() - t1)); #endif } // Send stuff back, use mp function in case out_msg1.payload1++; int sent = rte_ring_mp_enqueue(out1, &out_msg1); out_msg2.payload1++; sent = rte_ring_mp_enqueue(out2, &out_msg2); } t2 = getticks(); #ifndef __baremetal__ printf #else kprintf #endif ("%d\n", ((unsigned int)(t2 - t1))/N); #ifndef __baremetal__ return 0; #else return ((unsigned int)(t2 - t1))/N; #endif }
/** * Convert the packet data to floating point and store in the buffers. * * Description:\n * Accepts a synchronized pair of packets (right + left or x + y), * converts them to single-precision floating point complex and stores * them in the input buffers. When there is not enough space in the * buffers to store the data, completed samples are flushed to make * room.\n * Notes:\n * No polarization conversion is performed, so linear and circular * polarization are maintained. */ void Channel::addData(ChannelPacket *rp, ChannelPacket *lp) { #if CHANNEL_TIMING uint64_t t0 = getticks(); #endif Assert(rp); Assert(lp); Assert(right); Assert(left); const ATADataPacketHeader& rpHdr = rp->getHeader(); const ATADataPacketHeader& lpHdr = lp->getHeader(); ASSERT(rpHdr.len == lpHdr.len); // complexInt16 *xData = reinterpret_cast<complexInt16 *> (rp->getData()); // complexInt16 *yData = reinterpret_cast<complexInt16 *> (lp->getData()); // complexFloat32 j(0, 1); // left->flush(); // right->flush(); // flush only when necessary if (right->getFree() < rpHdr.len || left->getFree() < lpHdr.len) { lock(); // flush as many iterations as possible uint64_t rDone = right->getDone(); uint64_t lDone = left->getDone(); uint64_t rNext = right->getNext(); uint64_t lNext = left->getNext(); while (!pendingList.empty()) { PendingList::iterator p = pendingList.begin(); // flush as many iterations as possible; an iteration can be // flushed when the DFB using its data has been completed. if (p->second) { Assert(lDone <= p->first); Assert(rDone <= p->first); if (p->first > lNext) { // uint64_t pFirst = p->first; Assert(p->first <= lNext); } Assert(p->first <= rNext); left->setDone(lDone = p->first); right->setDone(rDone = p->first); pendingList.erase(p); } else break; } unlock(); ++flushes; // there'd better be room now if (right->getFree() < rpHdr.len || left->getFree() < lpHdr.len) Fatal(ERR_IBO); } #if CHANNEL_TIMING uint64_t t1 = getticks(); #endif // copy the sample data into the input buffers. The data is simply // copiedThe data is converted // to single-precision floating point prior to storage. Since the // buffer length // is a multiple of the packet sample length, there should always // be enough room to do the copy without wrapping. ComplexInt16 *rData = static_cast<ComplexInt16 *> (right->getWrite(lpHdr.len)); Assert(rData); ComplexInt16 *lData = static_cast<ComplexInt16 *> (left->getWrite(lpHdr.len)); Assert(lData); memcpy(rData, rp->getSamples(), rp->getDataSize()); memcpy(lData, lp->getSamples(), lp->getDataSize()); #if CHANNEL_TIMING uint64_t t2 = getticks(); #endif #ifdef notdef for (int32_t i = 0; i < rpHdr.len; ++i) { rData[i] = rp->getSample(i); lData[i] = lp->getSample(i); } #endif sampleCnt += rpHdr.len; right->setLast(lpHdr.len); left->setLast(lpHdr.len); ++curSeq; #if CHANNEL_TIMING uint64_t t3 = getticks(); ++timing.packets; float t = elapsed(t1, t0); if (t > timing.maxFlush) timing.maxFlush = t; timing.flush += t; timing.store += elapsed(t2, t1); timing.set += elapsed(t3, t2); timing.total += elapsed(t3, t0); #endif }
int main(int argc, char *argv[]) { int size; int curarg; uint8 *buf; Interface = SexyAL_Init(0); DriverTypes = Interface->EnumerateTypes(Interface); puts("\n*** Festalon v"FESTALON_VERSION); if(argc<=1) { printf("\tUsage: %s [options] file1.nsf file2.nsf ... fileN.nsf\n\n",argv[0]); puts("\tExample Options:"); puts("\t -rate 44100\t\tSet playback rate to 44100Hz(default: 48000Hz)."); puts("\t -quality 1\t\tSet quality to 1, the highest(the lowestis 0; default: 0)."); puts("\t -volume 200\t\tSet volume to 200%(default: 100%)."); puts("\t -buffering 100\t\tSet desired buffering level to 100 milliseconds(default: 100)."); puts("\t -lowpass 1\t\tTurn on lowpass filter(default: 0)."); puts("\t -lowpasscorner 8000\tSet lowpass corner frequency(at which the response is about -3dB) to 8000Hz(default: 10000Hz)."); puts("\t -lowpassorder 2\tSet lowpass filter order to 2(default: 2)."); puts("\t -record filename.wav\tRecords audio in MS PCM format. An existing file won't be overwritten."); puts("\t\t\t\tWhen playing multiple files, only the first opened file will have its music recorded."); puts("\t -ao x\t\t\tSelect output type/driver. Valid choices are:"); { int x = 0; while(DriverTypes[x].name) { printf("\t\t\t\t %s - %s",DriverTypes[x].short_name,DriverTypes[x].name); if(!x) printf("\t(*Default*)"); puts(""); x++; } } puts("\t -aodevice id\t\tSelect output device by id. The default is \"NULL\", which opens the default/preferred device."); puts("\t\t\t\t Try \"-aodevice help\" to see the list of devices."); Interface->Destroy(Interface); return(-1); } curarg = ParseArguments(argc, argv, TODArgs); if(-1 == (CurDriverIndex = FindCurDriver(DriverTypes, Config.ao))) return(-1); if(Config.aodevice) { if(!strcmp(Config.aodevice,"help")) { SexyAL_enumdevice *devices = Interface->EnumerateDevices(Interface, DriverTypes[CurDriverIndex].type); if(!devices) { printf("\tNo predefined output devices are available with this driver type.\n"); return(-1); } printf("\tOutput devices(ID, name) for \"%s\":\n",DriverTypes[CurDriverIndex].name); while(devices->next) { printf("\t %s, %s\n",devices->id,devices->name); devices = devices->next; } return(-1); } } if(Config.quality != 0 && Config.quality != 1) Config.quality = 0; tcgetattr(0,&oldtio); newtio=oldtio; newtio.c_lflag&=~(ICANON|ECHO); signal(SIGTERM,siggo); signal(SIGINT,siggo); tcsetattr(0,TCSANOW,&newtio); sa = fcntl(fileno(stdin), F_GETFL); fcntl(fileno(stdin), F_SETFL, O_NONBLOCK); for(;curarg < argc && !eexit; curarg++) //while(argc-->1 && !eexit) { FILE *fp; printf("\nLoading %s... ",argv[curarg]); if(!(fp=fopen(argv[curarg],"rb"))) {printf("Error opening file: %s\n",strerror(errno));continue;} fseek(fp,0,SEEK_END); size=ftell(fp); fseek(fp,0,SEEK_SET); buf=malloc(size); fread(buf,1,size,fp); fclose(fp); if(!(Player=FESTAI_Load(buf,size) )) { puts("Error loading file!"); free(buf); continue; } free(buf); puts(""); memset(&format, 0, sizeof(format)); memset(&buffering, 0, sizeof(buffering)); format.sampformat = SEXYAL_FMT_PCMFLOAT; format.channels = Player->OutChannels; format.rate = Config.rate; buffering.fragsizems = 10; // Granularity. buffering.ms = Config.buffering; printf(" Using \"%s\" audio driver:",DriverTypes[CurDriverIndex].name); if(!(Output=Interface->Open(Interface,Config.aodevice,&format,&buffering, DriverTypes[CurDriverIndex].type))) { puts("Error opening sound device."); continue; //return(-1); } putchar('\n'); printf(" Playback Rate:\t%dHz\n",format.rate); printf(" Output Channels:\t%d\n",format.channels); printf(" Output Format:\t"); if(format.sampformat == SEXYAL_FMT_PCMFLOAT) puts("\t32-bit Floating Point"); else { if(format.sampformat&0x1) printf("Signed, "); else printf("Unsigned, "); printf("%d bits\n",(format.sampformat >> 4) << 3); } printf(" Estimated Latency:\t%d ms\n",buffering.latency * 1000 / format.rate); FESTAI_SetSound(Player, format.rate, Config.quality); if(WRFilename) { if(!FCEUI_BeginWaveRecord(format.rate,Player->OutChannels,WRFilename)) { free(WRFilename); WRFilename = 0; } } //printf("%d:%d\n",buffering.fragsize, buffering.fragcount); format.sampformat=SEXYAL_FMT_PCMFLOAT; format.channels=Player->OutChannels; format.byteorder=0; Output->SetConvert(Output,&format); Config.volume = FESTAI_SetVolume(Player, Config.volume); FESTAI_Disable(Player, disabled); poosh(); puts(""); ShowInfo(); if(!FESTAI_SetLowpass(Player, Config.lowpass, Config.lowpasscorner, Config.lowpassorder)) { puts(" Error activating lowpass filter!"); } else if(Config.lowpass) printf(" Lowpass filter on. Corner frequency: %dHz, Order: %d\n",Config.lowpasscorner,Config.lowpassorder); current=FESTAI_SongControl(Player,0,0); lastms = ~0; frames_written = 0; puts("\n\n"); ShowStatus(1,1); //#define TICKEROO #ifdef TICKEROO static uint64 last_tick,total_ticks,total; total = 0; total_ticks = 0; #endif while(!eexit) { static char t[3]={0,0,0}; int len; float *buf; ShowStatus(0,0); if(paused) { usleep(10); } else { #ifdef TICKEROO last_tick=getticks(); #endif buf=FESTAI_Emulate(Player, &len); #ifdef TICKEROO total_ticks += getticks() - last_tick; total += len; #endif if(WRFilename) FCEU_WriteWaveData(buf, len); frames_written += len; Output->Write(Output,buf,len); #ifdef TICKEROO if(total >= 100000/2) { printf("%8f\n", (double)total*1000000/total_ticks); rmode(); exit(1); } #endif } while(read(fileno(stdin),&t[0],1)>=0) { static char kcc[20] = { '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')'}; int x; for(x=0; x<20; x++) if(kcc[x] == t[0]) { disabled^=1<<x; FESTAI_Disable(Player,disabled); poosh(); ShowStatus(0,1); break; } if(t[2]==27 && t[1]==91) switch(t[0]) { case 65:current=FESTAI_SongControl(Player,10,0);poosh(); lastms = ~0;frames_written=0;ShowStatus(1,1);break; case 66:current=FESTAI_SongControl(Player,-10,0);poosh(); lastms = ~0;frames_written=0;ShowStatus(1,1);break; case 67:current=FESTAI_SongControl(Player,1,0);poosh(); lastms = ~0;frames_written=0;ShowStatus(1,1);break; case 68:current=FESTAI_SongControl(Player,-1,0);poosh(); lastms = ~0;frames_written=0;ShowStatus(1,1);break; } else switch(tolower(t[0])) { case 10: lastms = ~0;frames_written=0;poosh();current=FESTAI_SongControl(Player, 0,0);ShowStatus(1,1);break; case '`': poosh();break; case 'a': Config.volume-=25; Config.volume = FESTAI_SetVolume(Player, Config.volume);poosh();ShowStatus(0,1);break; case 's': Config.volume+=25; Config.volume = FESTAI_SetVolume(Player, Config.volume);poosh();ShowStatus(0,1);break; case '_': case '-': Config.volume--; FESTAI_SetVolume(Player,Config.volume);poosh();ShowStatus(0,1);break; case '+': case '=': Config.volume++; FESTAI_SetVolume(Player,Config.volume);poosh();ShowStatus(0,1);break; case 'p': TogglePause();break; case 'q': goto exito; /* Alternate song selection keys. Especially needed for DOS port. */ case '\'':current=FESTAI_SongControl(Player,10,0);poosh(); lastms = ~0;frames_written=0;ShowStatus(1,1);break; case ';':current=FESTAI_SongControl(Player,-10,0);poosh(); lastms = ~0;frames_written=0;ShowStatus(1,1);break; case '.':current=FESTAI_SongControl(Player,1,0);poosh(); lastms = ~0;frames_written=0;ShowStatus(1,1);break; case ',':current=FESTAI_SongControl(Player,-1,0);poosh(); lastms = ~0;frames_written=0;ShowStatus(1,1);break; } t[2]=t[1]; t[1]=t[0]; } } exito: if(WRFilename) { FCEUI_EndWaveRecord(); free(WRFilename); WRFilename = 0; } poosh(); FESTAI_Close(Player); if(Output) Output->Close(Output); Output = 0; } rmode(); free(DriverTypes); if(Interface) Interface->Destroy(Interface); return(0); }
/** * Entry point for all mapping strategies */ int do_mapping(network_t *network, options_t *opt, mapping_t *mapping) { int ret = ORCC_OK; idx_t *part; ticks startTime, endTime; assert(network != NULL); assert(opt != NULL); assert(mapping != NULL); part = (idx_t*) malloc(sizeof(idx_t) * (network->nb_actors)); if(check_verbosity(ORCC_VL_VERBOSE_2)) { print_network(network); } startTime = getticks(); if (opt->nb_processors != 1) { switch (opt->mapping_strategy) { #ifdef METIS_ENABLE case ORCC_MS_METIS_REC: ret = do_metis_recursive_partition(network, opt, part); break; case ORCC_MS_METIS_KWAY_CV: ret = do_metis_kway_partition(network, opt, part, METIS_OBJTYPE_CUT); /*TODO : should be METIS_OBJTYPE_VOL : Metis seem's to invert its options */ break; case ORCC_MS_METIS_KWAY_EC: ret = do_metis_kway_partition(network, opt, part, METIS_OBJTYPE_VOL); /*TODO : should be METIS_OBJTYPE_CUT : Metis seem's to invert its options */ break; #endif case ORCC_MS_ROUND_ROBIN: ret = do_round_robbin_mapping(network, opt, part); break; case ORCC_MS_QM: ret = do_quick_mapping(network, opt, part); break; case ORCC_MS_WLB: ret = do_weighted_round_robin_mapping(network, opt, part); break; case ORCC_MS_COWLB: ret = do_weighted_round_robin_comm_mapping(network, opt, part); break; case ORCC_MS_KRWLB: ret = do_KLR_mapping(network, opt, part); break; default: break; } } else { int i; for (i = 0; i < network->nb_actors; i++) { part[i] = 0; } } endTime = getticks(); set_mapping_from_partition(network, part, mapping); if(check_verbosity(ORCC_VL_VERBOSE_1)) { print_mapping(mapping); print_load_balancing(mapping); print_edge_cut(network); print_orcc_trace(ORCC_VL_VERBOSE_2, "Mapping time : %2.lf", elapsed(endTime, startTime)); } free(part); return ret; }
int main(int argc, char *argv[]) { if (argc < 2) { fprintf(stderr,"ERROR: no port provided\n"); exit(1); } // create a new socket int my_socket; my_socket = create_new_socket(); if (my_socket < 0) error("ERROR: failed to create new socket"); int portno; portno = atoi(argv[1]); struct sockaddr_in serv_addr; sockaddr_generator(&serv_addr, NULL, portno); //bind if (bind(my_socket, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) error("ERROR: binding to socket failed"); // listen: on my_socket with queue size 5 listen(my_socket,100); printf("===tcp setup time experiment===\n"); while(1) { int kk; int loop_times = 100; int newsockfd; char* ack = "a"; char buffer[256]; int read_n, write_n; printf("ready and wait to run tcp setup and tear down for %d times.\n", loop_times); double sum = 0.0; double sum2 = 0.0; double sum3 = 0.0; for( kk = 0 ; kk < loop_times; kk ++ ) { bzero(buffer,256); // accept will block until some client connect to the port newsockfd = accept_incoming_connection(my_socket); /*if (newsockfd < 0) */ /*error("ERROR: error when accepting connection");*/ // read and print the message sent from client // read should block too, not sure ?? /*read_n = read(newsockfd, buffer, 255);*/ // write ack back to client /*write_n = write(newsockfd, ack, strlen(ack));*/ /*ticks t0 = getticks();*/ read_n = read(newsockfd, buffer, 255); ticks t1 = getticks(); /*if (read_n < 0) error("ERROR: reading from socket failed");*/ /*if (write_n <= 0) error("ERROR: writing to socket failed");*/ close(newsockfd); ticks t2 = getticks(); double etime = elapsed(t2, t1); /*double etime2 = elapsed(t1, t0);*/ /*double etime3 = elapsed(t2, t0);*/ sum += etime; /*sum2 += etime2;*/ /*sum3 += etime3;*/ } printf("%f\n", (sum / loop_times) * 0.417 / 1e6); /*printf("%f\n", (sum2 / loop_times) * 0.417 / 1e6);*/ /*printf("%f\n", (sum3 / loop_times) * 0.417 / 1e6);*/ // close connection printf("Log: client left!\n\n"); } close(my_socket); return 0; }
static void reconstruct(char* filename,int N,int M,int iteration , int weight) { int j,k,l; double time,min_time,max_time,min_inh,max_inh; ticks t0, t1; double t,real,imag; double w,epsilon=0.0000003; /* epsilon is a the break criterium for the iteration */; mri_inh_2d1d_plan my_plan; solver_plan_complex my_iplan; FILE* fp,*fw,*fout_real,*fout_imag,*finh,*ftime; int my_N[3],my_n[3]; int flags = PRE_PHI_HUT| PRE_PSI |MALLOC_X| MALLOC_F_HAT| MALLOC_F| FFTW_INIT| FFT_OUT_OF_PLACE; unsigned infft_flags = CGNR | PRECOMPUTE_DAMP; double Ts; double W,T; int N3; int m=2; double sigma = 1.25; ftime=fopen("readout_time.dat","r"); finh=fopen("inh.dat","r"); min_time=INT_MAX; max_time=INT_MIN; for(j=0;j<M;j++) { fscanf(ftime,"%le ",&time); if(time<min_time) min_time = time; if(time>max_time) max_time = time; } fclose(ftime); Ts=(min_time+max_time)/2.0; min_inh=INT_MAX; max_inh=INT_MIN; for(j=0;j<N*N;j++) { fscanf(finh,"%le ",&w); if(w<min_inh) min_inh = w; if(w>max_inh) max_inh = w; } fclose(finh); N3=ceil((MAX(fabs(min_inh),fabs(max_inh))*(max_time-min_time)/2.0+(m)/(2*sigma))*4*sigma); /* N3 has to be even */ if(N3%2!=0) N3++; T=((max_time-min_time)/2.0)/(0.5-((double) (m))/N3); W=N3/T; my_N[0]=N; my_n[0]=ceil(N*sigma); my_N[1]=N; my_n[1]=ceil(N*sigma); my_N[2]=N3; my_n[2]=N3; /* initialise nfft */ mri_inh_2d1d_init_guru(&my_plan, my_N, M, my_n, m, sigma, flags, FFTW_MEASURE| FFTW_DESTROY_INPUT); /* precompute lin psi if set */ if(my_plan.plan.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&my_plan.plan); if (weight) infft_flags = infft_flags | PRECOMPUTE_WEIGHT; /* initialise my_iplan, advanced */ solver_init_advanced_complex(&my_iplan,(nfft_mv_plan_complex*)(&my_plan), infft_flags ); /* get the weights */ if(my_iplan.flags & PRECOMPUTE_WEIGHT) { fw=fopen("weights.dat","r"); for(j=0;j<my_plan.M_total;j++) { fscanf(fw,"%le ",&my_iplan.w[j]); } fclose(fw); } /* get the damping factors */ if(my_iplan.flags & PRECOMPUTE_DAMP) { for(j=0;j<N;j++){ for(k=0;k<N;k++) { int j2= j-N/2; int k2= k-N/2; double r=sqrt(j2*j2+k2*k2); if(r>(double) N/2) my_iplan.w_hat[j*N+k]=0.0; else my_iplan.w_hat[j*N+k]=1.0; } } } fp=fopen(filename,"r"); ftime=fopen("readout_time.dat","r"); for(j=0;j<my_plan.M_total;j++) { fscanf(fp,"%le %le %le %le",&my_plan.plan.x[2*j+0],&my_plan.plan.x[2*j+1],&real,&imag); my_iplan.y[j]=real+ _Complex_I*imag; fscanf(ftime,"%le ",&my_plan.t[j]); my_plan.t[j] = (my_plan.t[j]-Ts)/T; } fclose(fp); fclose(ftime); finh=fopen("inh.dat","r"); for(j=0;j<N*N;j++) { fscanf(finh,"%le ",&my_plan.w[j]); my_plan.w[j]/=W; } fclose(finh); if(my_plan.plan.nfft_flags & PRE_PSI) { nfft_precompute_psi(&my_plan.plan); } if(my_plan.plan.nfft_flags & PRE_FULL_PSI) { nfft_precompute_full_psi(&my_plan.plan); } /* init some guess */ for(j=0;j<my_plan.N_total;j++) { my_iplan.f_hat_iter[j]=0.0; } t0 = getticks(); /* inverse trafo */ solver_before_loop_complex(&my_iplan); for(l=0;l<iteration;l++) { /* break if dot_r_iter is smaller than epsilon*/ if(my_iplan.dot_r_iter<epsilon) break; fprintf(stderr,"%e, %i of %i\n",sqrt(my_iplan.dot_r_iter), l+1,iteration); solver_loop_one_step_complex(&my_iplan); } t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); fout_real=fopen("output_real.dat","w"); fout_imag=fopen("output_imag.dat","w"); for (j=0;j<N*N;j++) { /* Verschiebung wieder herausrechnen */ my_iplan.f_hat_iter[j]*=cexp(-2.0*_Complex_I*KPI*Ts*my_plan.w[j]*W); fprintf(fout_real,"%le ",creal(my_iplan.f_hat_iter[j])); fprintf(fout_imag,"%le ",cimag(my_iplan.f_hat_iter[j])); } fclose(fout_real); fclose(fout_imag); solver_finalize_complex(&my_iplan); mri_inh_2d1d_finalize(&my_plan); }
void process_image(IplImage* frame, int draw) { int i, j; float t; uint8_t* pixels; int nrows, ncols, ldim; #define MAXNDETECTIONS 2048 int ndetections; float rcsq[4*MAXNDETECTIONS]; static IplImage* gray = 0; static IplImage* pyr[5] = {0, 0, 0, 0, 0}; /* ... */ // if(!pyr[0]) { // gray = cvCreateImage(cvSize(frame->width, frame->height), frame->depth, 1); // pyr[0] = gray; pyr[1] = cvCreateImage(cvSize(frame->width/2, frame->height/2), frame->depth, 1); pyr[2] = cvCreateImage(cvSize(frame->width/4, frame->height/4), frame->depth, 1); pyr[3] = cvCreateImage(cvSize(frame->width/8, frame->height/8), frame->depth, 1); pyr[4] = cvCreateImage(cvSize(frame->width/16, frame->height/16), frame->depth, 1); } // get grayscale image if(frame->nChannels == 3) cvCvtColor(frame, gray, CV_RGB2GRAY); else cvCopy(frame, gray, 0); // perform detection with the pico library t = getticks(); if(usepyr) { int nd; // pyr[0] = gray; pixels = (uint8_t*)pyr[0]->imageData; nrows = pyr[0]->height; ncols = pyr[0]->width; ldim = pyr[0]->widthStep; ndetections = find_objects(rcsq, MAXNDETECTIONS, cascade, angle, pixels, nrows, ncols, ldim, scalefactor, stridefactor, MAX(16, minsize), MIN(128, maxsize)); for(i=1; i<5; ++i) { cvResize(pyr[i-1], pyr[i], CV_INTER_LINEAR); pixels = (uint8_t*)pyr[i]->imageData; nrows = pyr[i]->height; ncols = pyr[i]->width; ldim = pyr[i]->widthStep; nd = find_objects(&rcsq[4*ndetections], MAXNDETECTIONS-ndetections, cascade, angle, pixels, nrows, ncols, ldim, scalefactor, stridefactor, MAX(64, minsize>>i), MIN(128, maxsize>>i)); for(j=ndetections; j<ndetections+nd; ++j) { rcsq[4*j+0] = (1<<i)*rcsq[4*j+0]; rcsq[4*j+1] = (1<<i)*rcsq[4*j+1]; rcsq[4*j+2] = (1<<i)*rcsq[4*j+2]; } ndetections = ndetections + nd; } } else {
/* void bench_dsp(t_bench *x, t_signal **sp, short *count){ if(x->t_objmode == BENCH_IN){ if(!count[0]) dsp_add(bench_perform_in, 4, x, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n); else dsp_add(bench_perform_in_connected, 5, x, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n); } else dsp_add(bench_perform_out, 5, x, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n); } */ void bench_perform64_in(t_bench *x, t_object *dsp64, double **ins, long numins, double **outs, long numouts, long sampleframes, long flags, void *userparam) { ticks t = getticks(); outs[1][0] = (double)t; }
int bench_openmp(FILE *infile, int n, int m, int p, C (*kernel)(R, int, const R *), R c, R eps_I, R eps_B) { fastsum_plan my_fastsum_plan; int d, L, M; int t, j; R re, im; R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0); ticks t0, t1; R tt_total; fscanf(infile, "%d %d %d", &d, &L, &M); #ifdef _OPENMP FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_threads.plan"); #else FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_single.plan"); #endif fastsum_init_guru(&my_fastsum_plan, d, L, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B); #ifdef _OPENMP FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_threads.plan"); #else FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_single.plan"); #endif for (j = 0; j < L; j++) { for (t = 0; t < d; t++) { R v; fscanf(infile, __FR__, &v); my_fastsum_plan.x[d * j + t] = v * r_max; } } for (j = 0; j < L; j++) { fscanf(infile, __FR__ " " __FR__, &re, &im); my_fastsum_plan.alpha[j] = re + II * im; } for (j = 0; j < M; j++) { for (t = 0; t < d; t++) { R v; fscanf(infile, __FR__, &v); my_fastsum_plan.y[d * j + t] = v * r_max; } } /** precomputation */ t0 = getticks(); fastsum_precompute(&my_fastsum_plan); /** fast computation */ fastsum_trafo(&my_fastsum_plan); t1 = getticks(); tt_total = NFFT(elapsed_seconds)(t1, t0); #ifndef MEASURE_TIME my_fastsum_plan.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[1] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[2] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[3] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[4] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[5] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[6] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[7] = K(0.0); my_fastsum_plan.mv1.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.mv1.MEASURE_TIME_t[2] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[2] = K(0.0); #endif #ifndef MEASURE_TIME_FFTW my_fastsum_plan.mv1.MEASURE_TIME_t[1] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[1] = K(0.0); #endif printf( "%.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ "\n", my_fastsum_plan.MEASURE_TIME_t[0], my_fastsum_plan.MEASURE_TIME_t[1], my_fastsum_plan.MEASURE_TIME_t[2], my_fastsum_plan.MEASURE_TIME_t[3], my_fastsum_plan.MEASURE_TIME_t[4], my_fastsum_plan.MEASURE_TIME_t[5], my_fastsum_plan.MEASURE_TIME_t[6], my_fastsum_plan.MEASURE_TIME_t[7], tt_total - my_fastsum_plan.MEASURE_TIME_t[0] - my_fastsum_plan.MEASURE_TIME_t[1] - my_fastsum_plan.MEASURE_TIME_t[2] - my_fastsum_plan.MEASURE_TIME_t[3] - my_fastsum_plan.MEASURE_TIME_t[4] - my_fastsum_plan.MEASURE_TIME_t[5] - my_fastsum_plan.MEASURE_TIME_t[6] - my_fastsum_plan.MEASURE_TIME_t[7], tt_total, my_fastsum_plan.mv1.MEASURE_TIME_t[0], my_fastsum_plan.mv1.MEASURE_TIME_t[1], my_fastsum_plan.mv1.MEASURE_TIME_t[2], my_fastsum_plan.mv2.MEASURE_TIME_t[0], my_fastsum_plan.mv2.MEASURE_TIME_t[1], my_fastsum_plan.mv2.MEASURE_TIME_t[2]); fastsum_finalize(&my_fastsum_plan); return 0; }
/* t_int *bench_perform_in(t_int *w){ t_bench *x = (t_bench *)w[1]; t_float *out1 = (t_float *)w[2]; t_float *out2 = (t_float *)w[3]; int n = (int)w[4]; ticks t = getticks(); unsigned long l1, l2; l1 = (unsigned long)((t & 0xffffffff00000000LL) >> 32); l2 = (unsigned long)(t & 0xffffffffLL); out2[0] = *((float *)(&l1)); out2[1] = *((float *)(&l2)); return (w + 5); } */ void bench_perform64_in_connected(t_bench *x, t_object *dsp64, double **ins, long numins, double **outs, long numouts, long sampleframes, long flags, void *userparam) { ticks t = getticks(); outs[1][0] = (double)t; memcpy(outs[0], ins[0], sizeof(double) * sampleframes); }
static const void mainLoop(){ ticks = getticks(); while (1) loopWork(); };
//------------------------------------------------------------------------------------------------ // Main function //------------------------------------------------------------------------------------------------ int main(int argc, char *argv[]){ //Variables //--------- int listen_port; int tries; int msg_size = 1000000; struct sockaddr_in serverAddr; struct sockaddr_in clientAddr; //Check input parameters //---------------------- if (argc!=5){ printf("\nERROR! Invalid number of parameters!\n"); printf("Please use: ./server -port number -samples Num_of_Samples\n"); exit(1); } //Set input port and message size //------------------------------- listen_port = atoi(argv[2]); if(listen_port<1024 || listen_port>32768){ printf("\nERROR! Valid port range is 1000 to 32768!\n"); exit(1); } //Set tries //--------- tries = atoi(argv[4]); if(tries<1){ printf("\nERROR! Samples should be a positive number!\n"); exit(1); } //Variables //--------- int sckTemp; int sckListen; socklen_t sinSize; char input[msg_size]; int i, j, k; int tmp; struct sockaddr_in switchAddr; ticks results[tries]; ticks start, end; //Server: assign socket - Binding //------------------------------- sinSize = sizeof(struct sockaddr_in); sckListen = socket(AF_INET,SOCK_STREAM,0); serverAddr.sin_family = AF_INET; serverAddr.sin_port = htons(listen_port); serverAddr.sin_addr.s_addr = htonl(INADDR_ANY); if(bind(sckListen,(struct sockaddr *)&serverAddr,sizeof(struct sockaddr))<0){ perror("bind"); exit(1); } //Server: listen for incomming connections //---------------------------------------- if(listen(sckListen,BACKLOG)<0){ perror("listen"); exit(1); } //Server: wait for new message - Blocking //--------------------------------------- sckTemp = accept(sckListen,(struct sockaddr *)&clientAddr, &sinSize); for(msg_size = 4096; msg_size<=0.5*1024*1024; msg_size+=16384){ for (i=0; i<tries; i++) results[i]=0; for(i=0; i<tries; i++){ //Server: Initialize buffer - read from socket //-------------------------------------------- bzero(input, msg_size); tmp=0; while(tmp<msg_size){ //RECORD START TIME start = getticks(); tmp+=read(sckTemp, input, msg_size-tmp); //RECORD END TIME end = getticks(); results[i] += end-start; } } //Compute avg, stdv and time in ns //------------------------------- double avg =0, bw=0; ticks sum =0; for(i=0; i<tries; i++){ sum += results[i]; } avg = sum / (double)(tries); bw = (msg_size / (double)(1024*1024)) / (double)(avg / (double)(3500000000)); //=> Mbytes / sec printf("Size: %d KB Peak BW: %lf MB/s\n", msg_size / 1024, bw); } /* while (1){ //Server: Initialize buffer - read from and send to socket //-------------------------------------------------------- bzero(input, sizeof (input)); tmp = read(sckTemp, input, sizeof(input), 0); printf("\nRead %d\n", tmp/1024); if(!strcmp(input, "END")) break; //printf("\n>> Server: Received msg \"%s\"", input); //printf("\n>> Server: Sending msg \"%s\"\n", input); //if(write(sckTemp, input, strlen(input)) < 0){ // perror("write socket"); //} } //Compute avg, stdv and time in ns //------------------------------- double avg =0, stdv=0, t; ticks sum =0; for(i=0; i<tries; i++){ avg+= msg_size/((double)results[i]/2.4)/tries; } for(i=0; i<tries; i++){ stdv += (msg_size/((double)results[i]/2.4)-avg)*(msg_size/((double)results[i]/2.4)-avg); results[i]=0; } stdv = stdv/tries; stdv = sqrt(stdv); printf("\nSize: %d KB Peak BW: %lf MB/s\n Standart deviation: %lf\n", msg_size/1024, avg*1000000000/1024/1024, stdv*1000000000/1024/10024); */ //Close socket //------------ close(sckTemp); return 0; }