/* * Clear counters for all tiles in cpu set */ int clear_all_counters(cpu_set_t *cpus) { int num_of_cpus = tmc_cpus_count(cpus); for (int i=0;i<num_of_cpus;i++) { if (tmc_cpus_set_my_cpu(tmc_cpus_find_nth_cpu(cpus, i)) < 0) { tmc_task_die("failure in 'tmc_set_my_cpu'"); return -1; } clear_counters(); } return 0; }
/* * Setup counters for all tiles in cpu set */ int setup_all_counters(cpu_set_t *cpus) { int num_of_cpus = tmc_cpus_count(cpus); for (int i=0;i<num_of_cpus;i++) { if (tmc_cpus_set_my_cpu(tmc_cpus_find_nth_cpu(cpus, i)) < 0) { tmc_task_die("failure in 'tmc_set_my_cpu'"); return -1; } clear_counters(); setup_counters(LOCAL_WR_MISS, LOCAL_WR_CNT, LOCAL_DRD_MISS, LOCAL_DRD_CNT); } return 0; }
void set_cpu_platf(int cpu) { ssmp_my_core = cpu; if (tmc_cpus_set_my_cpu(tmc_cpus_find_nth_cpu(&cpus, cpu)) < 0) { tmc_task_die("Failure in 'tmc_cpus_set_my_cpu()'."); } if (cpu != tmc_cpus_get_my_cpu()) { PRINT("******* i am not CPU %d", tmc_cpus_get_my_cpu()); } }
int main(void) { cpu_set_t cpus; int wr_cnt, wr_miss, drd_cnt, drd_miss; unsigned long start_cycles = get_cycle_count(); // Init cpus if (tmc_cpus_get_my_affinity(&cpus) != 0) { tmc_task_die("Failure in 'tmc_cpus_get_my_affinity()'."); } int num_cpus = tmc_cpus_count(&cpus); printf("cpus_count is: %i\n", num_cpus); // Setup Counters setup_all_counters(&cpus); unsigned long start_for = get_cycle_count(); unsigned long cycles[num_cpus]; unsigned int drd_cnts[num_cpus]; for (int i=0;i<num_cpus;i++) { if (tmc_cpus_set_my_cpu(tmc_cpus_find_nth_cpu(&cpus, i)) < 0) { tmc_task_die("failure in 'tmc_set_my_cpu'"); } read_counters(&wr_cnt, &wr_miss, &drd_cnt, &drd_miss); drd_cnts[i] = drd_cnt; cycles[i] = get_cycle_count(); } unsigned long end_for = get_cycle_count(); for (int i=1;i<num_cpus;i++) { unsigned long temp = cycles[i] - cycles[i-1]; printf("time between %i and %i is %lu\n", i-1, i, temp); printf("drd_cnt for tile %i was %i\n", i, drd_cnts[i]); } printf("Total cycles for-loop: %lu\n", end_for-start_for); return 0; }
void ssmp_mem_init_platf(int id, int num_ues) { ssmp_id_ = id; ssmp_num_ues_ = num_ues; // Now that we're bound to a core, attach to our UDN rectangle. if (tmc_udn_activate() < 0) tmc_task_die("Failure in 'tmc_udn_activate()'."); udn_header = (DynamicHeader* ) memalign(SSMP_CACHE_LINE_SIZE, num_ues * sizeof (DynamicHeader)); if (udn_header == NULL) { tmc_task_die("Failure in allocating dynamic headers"); } int r; for (r = 0; r < num_ues; r++) { int _cpu = tmc_cpus_find_nth_cpu(&cpus, id_to_core[r]); DynamicHeader header = tmc_udn_header_from_cpu(_cpu); udn_header[r] = header; } }
// The worker function for each thread. The sender injects many items // into the queue, and the receiver consumes them. // void* thread_func(void* arg) { int rank = (intptr_t) arg; int capacity = 1 << LG2_CAPACITY; int count = capacity * g_run_multiplier; // Bind this thread to the rank'th core in the cpu set. if (tmc_cpus_set_my_cpu(tmc_cpus_find_nth_cpu(&cpus, rank)) < 0) tmc_task_die("tmc_cpus_set_my_cpu() failed."); if (rank == 1) { queue = memalign(64, sizeof(*queue)); assert(queue != NULL); my_queue_init(queue); } tmc_spin_barrier_wait(&barrier); // Single obj enqueue. uint64_t cycles; if (rank == 0) cycles = bench_sender(count); else cycles = bench_receiver(count); uint64_t reverse_cycles; if (rank == 1) reverse_cycles = bench_sender(count); else reverse_cycles = bench_receiver(count); if (rank == 0) { printf("One-to-one cycles per transfer: %0.2f\n", (float) cycles / count); printf("One-to-one cycles per transfer (reverse): %0.2f\n", (float) reverse_cycles / count); } // Multiple obj enqueue. if (rank == 0) cycles = bench_sender_multiple(count); else cycles = bench_receiver(count); if (rank == 1) reverse_cycles = bench_sender_multiple(count); else reverse_cycles = bench_receiver(count); if (rank == 0) { printf("Multiple one-to-one cycles per transfer: %0.2f\n", (float) cycles / count); printf("Multiple one-to-one cycles per transfer (reverse): %0.2f\n", (float) reverse_cycles / count); } return NULL; }
int main(int argc, char** argv) { char *link_name= "xgbe1"; size_t num_packets = 1000; int instance; int result; for (int i = 1; i < argc; i++){ char* arg = argv[i]; if (!strcmp(arg, "--link") && i + 1 < argc) { link_name = argv[++i]; } else if (!strcmp(arg, "-n") && i + 1 < argc) { num_packets = atoi(argv[++i]); } else if ((!strcmp(arg,"-s")) || (!strcmp(arg,"-l"))) { server = 1; } else if (!strcmp(arg,"--jumbo")) { jumbo = true; } else if ((!strcmp(arg,"-c"))) { server = 0; } else { tmc_task_die("Unknown option '%s'.", arg); } } printf("\n finished parsing"); if (server) printf("\n link egressing is %s", link_name); else printf("\n link ingressing is %s", link_name); // Get the instance. instance = gxio_mpipe_link_instance(link_name); if (instance < 0) tmc_task_die("Link '%s' does not exist.", link_name); gxio_mpipe_context_t context_body; gxio_mpipe_context_t* const context = &context_body; gxio_mpipe_iqueue_t iqueue_body; gxio_mpipe_iqueue_t* iqueue = &iqueue_body; gxio_mpipe_equeue_t equeue_body; gxio_mpipe_equeue_t* const equeue = &equeue_body; // Bind to a single cpu. cpu_set_t cpus; result = tmc_cpus_get_my_affinity(&cpus); VERIFY(result, "tmc_cpus_get_my_affinity()"); result = tmc_cpus_set_my_cpu(tmc_cpus_find_first_cpu(&cpus)); VERIFY(result, "tmc_cpus_set_my_cpu()"); // Start the driver. result = gxio_mpipe_init(context, instance); VERIFY(result, "gxio_mpipe_init()"); gxio_mpipe_link_t link; if (!server) { result = gxio_mpipe_link_open(&link, context, link_name, 0); } else { result = gxio_mpipe_link_open(&link, context, link_name, GXIO_MPIPE_LINK_WAIT ); } VERIFY(result, "gxio_mpipe_link_open()"); int channel = gxio_mpipe_link_channel(&link); //allow the link to receive jumbo packets if (jumbo) gxio_mpipe_link_set_attr(&link, GXIO_MPIPE_LINK_RECEIVE_JUMBO, 1); // Allocate a NotifRing. result = gxio_mpipe_alloc_notif_rings(context, 1, 0, 0); VERIFY(result, "gxio_mpipe_alloc_notif_rings()"); int ring = result; // Allocate one huge page to hold our buffer stack, notif ring, and group tmc_alloc_t alloc = TMC_ALLOC_INIT; tmc_alloc_set_huge(&alloc); tmc_alloc_set_home(&alloc, tmc_cpus_find_nth_cpu(&cpus, 0)); size_t page_size = tmc_alloc_get_huge_pagesize(); void* page = tmc_alloc_map(&alloc, page_size); assert(page!= NULL); void* mem = page; // Init the NotifRing. size_t notif_ring_entries = 128; size_t notif_ring_size = notif_ring_entries * sizeof(gxio_mpipe_idesc_t); result = gxio_mpipe_iqueue_init(iqueue, context, ring, mem, notif_ring_size, 0); VERIFY(result, "gxio_mpipe_iqueue_init()"); mem += notif_ring_size; // Allocate a NotifGroup. result = gxio_mpipe_alloc_notif_groups(context, 1, 0, 0); VERIFY(result, "gxio_mpipe_alloc_notif_groups()"); int group = result; // Allocate a bucket. int num_buckets = 128; result = gxio_mpipe_alloc_buckets(context, num_buckets, 0, 0); VERIFY(result, "gxio_mpipe_alloc_buckets()"); int bucket = result; // Init group and bucket. gxio_mpipe_bucket_mode_t mode = GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY; result = gxio_mpipe_init_notif_group_and_buckets(context, group, ring, 1, bucket, num_buckets, mode); VERIFY(result, "gxio_mpipe_init_notif_group_and_buckets()"); // Alloc edma rings result = gxio_mpipe_alloc_edma_rings(context, 1, 0, 0); VERIFY(result, "gxio_mpipe_alloc_edma_rings"); int edma = result; // Init edma ring. int edma_ring_entries = 512; size_t edma_ring_size = edma_ring_entries * sizeof(gxio_mpipe_edesc_t); result = gxio_mpipe_equeue_init(equeue, context, edma, channel, mem, edma_ring_size, 0); VERIFY(result, "gxio_mpipe_equeue_init()"); mem += edma_ring_size; // Allocate a buffer stack. result = gxio_mpipe_alloc_buffer_stacks(context, 1, 0, 0); VERIFY(result, "gxio_mpipe_alloc_buffer_stacks()"); int stack_idx = result; // Total number of buffers. unsigned int num_buffers = (int)(edma_ring_entries + notif_ring_entries); // Initialize the buffer stack. Must be aligned mod 64K. ALIGN(mem, 0x10000); size_t stack_bytes = gxio_mpipe_calc_buffer_stack_bytes(num_buffers); gxio_mpipe_buffer_size_enum_t buf_size = GXIO_MPIPE_BUFFER_SIZE_16384; result = gxio_mpipe_init_buffer_stack(context, stack_idx, buf_size, mem, stack_bytes, 0); VERIFY(result, "gxio_mpipe_init_buffer_stack()"); mem += stack_bytes; ALIGN(mem, 0x10000); // Register the entire huge page of memory which contains all the buffers. result = gxio_mpipe_register_page(context, stack_idx, page, page_size, 0); VERIFY(result, "gxio_mpipe_register_page()"); // Push some buffers onto the stack. for (int i = 0; i < num_buffers; i++) { gxio_mpipe_push_buffer(context, stack_idx, mem); mem += 16384; } // Register for packets. gxio_mpipe_rules_t rules; gxio_mpipe_rules_init(&rules, context); gxio_mpipe_rules_begin(&rules, bucket, num_buckets, NULL); result = gxio_mpipe_rules_commit(&rules); VERIFY(result, "gxio_mpipe_rules_commit()"); double start, end, exec_time, throughput; start = 0.00; uint64_t cpu_speed; cpu_speed = tmc_perf_get_cpu_speed(); /*Server will initiate the egress and ingress the packets and display the round trip time * Client will ingress the packet, copy it to the edesc and egress it */ if (server) { int send_packets = 0; size_t size_e = 0; struct timespec req_start, req_end; while (send_packets < num_packets) { char* buf = gxio_mpipe_pop_buffer(context, stack_idx); if(buf == NULL) tmc_task_die("Could not allocate initial buffer"); memset(buf,'+',PACKET_SIZE); // Prepare to egress the packet. gxio_mpipe_edesc_t edesc = {{ .bound = 1, .xfer_size = PACKET_SIZE, .stack_idx = stack_idx, .hwb = 1, .size = GXIO_MPIPE_BUFFER_SIZE_16384 }}; gxio_mpipe_edesc_set_va(&edesc, buf); result = gxio_mpipe_equeue_put(equeue, edesc); VERIFY(result, "gxio_mpipe_equeue_put()"); if (send_packets == 0) clock_gettime(CLOCK_REALTIME, &req_start); gxio_mpipe_idesc_t idesc; result = gxio_mpipe_iqueue_get(iqueue,&idesc); VERIFY(result, "gxio_mpipe_iqueue_get()"); size_e += idesc.l2_size; gxio_mpipe_iqueue_drop(iqueue, &idesc); gxio_mpipe_equeue_flush(equeue); send_packets++; } clock_gettime(CLOCK_REALTIME, &req_end); exec_time = ((req_end.tv_sec - req_start.tv_sec)+(req_end.tv_nsec - req_start.tv_nsec)/1E9); fprintf(stdout,"round trip time = %lf\n", exec_time); fprintf(stdout,"latency is %f\n", exec_time/(2 * num_packets )); fprintf(stdout,"size is %zd b\n", size_e); throughput = size_e * 8 * 2 / exec_time; fprintf(stdout,"throughput = %f Mbps\n",throughput/pow(1000, 2)); gxio_mpipe_edesc_t ns = {{ .ns = 1 }}; result = gxio_mpipe_equeue_put(equeue,ns); VERIFY(result, "gxio_mpipe_equeue_put()"); fprintf(stdout,"completed packets %d\n", send_packets); } else {
/** * \brief RunModeTileMpipeWorkers set up to process all modules in each thread. * * \param iface pointer to the name of the interface from which we will * fetch the packets * \retval 0 if all goes well. (If any problem is detected the engine will * exit()) */ int RunModeTileMpipeWorkers(void) { SCEnter(); char tname[TM_THREAD_NAME_MAX]; char *thread_name; TmModule *tm_module; int pipe; RunModeInitialize(); /* Available cpus */ uint16_t ncpus = UtilCpuGetNumProcessorsOnline(); TimeModeSetLive(); unsigned int pipe_max = 1; if (ncpus > 1) pipe_max = ncpus - 1; intmax_t threads; if (ConfGetInt("mpipe.threads", &threads) == 1) { tile_num_pipelines = threads; } else { tile_num_pipelines = pipe_max; } SCLogInfo("%d Tilera worker threads", tile_num_pipelines); ReceiveMpipeInit(); char *mpipe_dev = NULL; int nlive = LiveGetDeviceCount(); if (nlive > 0) { SCLogInfo("Using %d live device(s).", nlive); /*mpipe_dev = LiveGetDevice(0);*/ } else { /* * Attempt to get interface from config file * overrides -i from command line. */ if (ConfGet("mpipe.interface", &mpipe_dev) == 0) { if (ConfGet("mpipe.single_mpipe_dev", &mpipe_dev) == 0) { SCLogError(SC_ERR_RUNMODE, "Failed retrieving " "mpipe.single_mpipe_dev from Conf"); exit(EXIT_FAILURE); } } } /* Get affinity for worker */ cpu_set_t cpus; //int result = tmc_cpus_get_my_affinity(&cpus); int result = tmc_cpus_get_dataplane_cpus(&cpus); if (result < 0) { SCLogError(SC_ERR_INVALID_ARGUMENT, "tmc_cpus_get_my_affinity() returned=%d", result); SCReturnInt(TM_ECODE_FAILED); } for (pipe = 0; pipe < tile_num_pipelines; pipe++) { char *mpipe_devc; if (nlive > 0) { mpipe_devc = SCStrdup("multi"); } else { mpipe_devc = SCStrdup(mpipe_dev); } if (unlikely(mpipe_devc == NULL)) { printf("ERROR: SCStrdup failed for ReceiveMpipe\n"); exit(EXIT_FAILURE); } snprintf(tname, sizeof(tname), "%s#%02d", thread_name_workers, pipe+1); /* create the threads */ ThreadVars *tv_worker = TmThreadCreatePacketHandler(tname, "packetpool", "packetpool", "packetpool", "packetpool", "pktacqloop"); if (tv_worker == NULL) { printf("ERROR: TmThreadsCreate failed\n"); exit(EXIT_FAILURE); } tm_module = TmModuleGetByName("ReceiveMpipe"); if (tm_module == NULL) { printf("ERROR: TmModuleGetByName failed for ReceiveMpipe\n"); exit(EXIT_FAILURE); } TmSlotSetFuncAppend(tv_worker, tm_module, (void *)mpipe_devc); /* Bind to a single cpu. */ int pipe_cpu = tmc_cpus_find_nth_cpu(&cpus, pipe); tv_worker->rank = pipe; TmThreadSetCPUAffinity(tv_worker, pipe_cpu); tm_module = TmModuleGetByName("DecodeMpipe"); if (tm_module == NULL) { printf("ERROR: TmModuleGetByName DecodeMpipe failed\n"); exit(EXIT_FAILURE); } TmSlotSetFuncAppend(tv_worker, tm_module, NULL); tm_module = TmModuleGetByName("StreamTcp"); if (tm_module == NULL) { printf("ERROR: TmModuleGetByName StreamTcp failed\n"); exit(EXIT_FAILURE); } TmSlotSetFuncAppend(tv_worker, tm_module, NULL); if (DetectEngineEnabled()) { tm_module = TmModuleGetByName("Detect"); if (tm_module == NULL) { printf("ERROR: TmModuleGetByName Detect failed\n"); exit(EXIT_FAILURE); } TmSlotSetFuncAppend(tv_worker, tm_module, NULL); } tm_module = TmModuleGetByName("RespondReject"); if (tm_module == NULL) { printf("ERROR: TmModuleGetByName for RespondReject failed\n"); exit(EXIT_FAILURE); } TmSlotSetFuncAppend(tv_worker, tm_module, NULL); SetupOutputs(tv_worker); if (TmThreadSpawn(tv_worker) != TM_ECODE_OK) { printf("ERROR: TmThreadSpawn failed\n"); exit(EXIT_FAILURE); } } return 0; }
int main(int argc, char **argv) { /* limits */ int niter = 1; int nscambi = 100000; /* threads */ int cpu_white, cpu_black, cpu_main; int white_rank = 0, black_rank = 61; void * ch[2]; /* statistics */ struct timeval start; struct timeval end; /* 0 current, 1 sum, 2 square sum, 3 max value */ uint64_t result_test[4] = { 0, 0, 0, 0 }; double elapsed_test[4] = { 0, 0, 0, 0 }; double avg_Tscambio[4] = { 0 }; double sdev_Tscambio[4] = { 0 }; double max_Tscambio[4] = { 0 }; /* 0 results, 1 elapsed_test, 2 Tscambio, 3 avg_Tscambio, 4 sdev_Tscambio, 5 max_Tscambio */ double avg[6]; double sdev[6]; /* others */ cpu_set_t dp, udn_hardwall; int i; int retval[2]; int opt; int longopt; struct option options[] = { { "niter", required_argument, &longopt, 'n' }, { "nscambi",required_argument, &longopt, 'm' }, { "white", required_argument, &longopt, 'w' }, { "black", required_argument, &longopt, 'b' }, { NULL, 0, NULL, 0 } }; while (longopt || -1 != (opt = getopt_long(argc, argv, "n:m:w:b:", options, NULL))) { switch (opt) { case 'n': niter = atoi(optarg); break; case 'w': white_rank = atoi(optarg); break; case 'b': black_rank = atoi(optarg); break; case 'm': nscambi = atoi(optarg); break; case 0: opt=longopt; continue; } longopt =0; } signal(SIGALRM, sighand_alrm); /* defines cpus */ ERRHAND(tmc_cpus_get_dataplane_cpus(&dp)); if (tmc_cpus_count(&dp) < 3) fprintf(stderr, "[ERROR] numero di cpu dataplane disponibili non sufficiente\n"); //ERRHAND(cpu_white = tmc_cpus_find_first_cpu(&dp)); //ERRHAND(cpu_black = tmc_cpus_find_last_cpu(&dp)); ERRHAND(cpu_white = tmc_cpus_find_nth_cpu(&dp, white_rank)); ERRHAND(cpu_black = tmc_cpus_find_nth_cpu(&dp, black_rank)); ERRHAND(cpu_main = tmc_cpus_find_nth_cpu(&dp, tmc_cpus_count(&dp)-2)); /* bind this process to a dataplane cpu */ ERRHAND(tmc_cpus_set_my_cpu(cpu_main)); #if TEST_VERBOSE >= 1 printf("[INFO] main: cpu %d niter %d\n", tmc_cpus_get_my_cpu(), niter); #endif printf("main on cpu %d, white on cpu %d, black on cpu %d, " "num of test iteration %d, num of exchanges %d\n", tmc_cpus_get_my_cpu(), cpu_white, cpu_black, niter, nscambi); /* define ansd initialize udn hardwall */ tmc_cpus_clear(&udn_hardwall); ERRHAND(tmc_cpus_add_cpu(&udn_hardwall, cpu_main)); ERRHAND(tmc_cpus_add_cpu(&udn_hardwall, cpu_white)); ERRHAND(tmc_cpus_add_cpu(&udn_hardwall, cpu_black)); ERRHAND(tmc_udn_init(&dp)); /* init synchronization barriers */ ERRHAND_NZ(pthread_barrier_init(&computation_start, NULL, 2)); ERRHAND_NZ(pthread_barrier_init(&computation_end, NULL, 2)); for (i=0; i<niter; i++) { arg_t arg[2]; Tscambio[1] = 0; Tscambio[2] = 0; Tscambio[3] = 0; /* START TEST i-esimo */ ERRHAND(gettimeofday(&start, NULL)); /* set deadlock alarm */ alarm(deadlock_timeout); /* setup environment */ ERRHAND_NN(ch[0] = ch_create(CH0_IMPL)(cpu_white, cpu_black CH0_CREATE_ARGS)); ERRHAND_NN(ch[1] = ch_create(CH1_IMPL)(cpu_black, cpu_white CH1_CREATE_ARGS)); arg[0].cpu = cpu_white; arg[0].ch[0] = ch[0]; arg[0].ch[1] = ch[1]; arg[0].num_scambi = nscambi; arg[1].cpu = cpu_black; arg[1].ch[0] = ch[0]; arg[1].ch[1] = ch[1]; arg[1].num_scambi = nscambi; /* start computation */ ERRHAND_NZ(pthread_create(&thread_white, NULL, task_pingpong_white, (void *)&arg[0])); ERRHAND_NZ(pthread_create(&thread_black, NULL, task_pingpong_black, (void *)&arg[1])); /* wait end of computation */ ERRHAND_NZ(pthread_join(thread_white, (void *)retval)); ERRHAND_NZ(pthread_join(thread_black, (void *)(retval+1))); /* destroy environment */ ch_destroy(CH0_IMPL)(ch); ch_destroy(CH1_IMPL)(ch+1); /* END TEST i-esimo */ ERRHAND(gettimeofday(&end, NULL)); /* statistiche sugli scambi eseguiti nel test corrente */ calcStatistics(avg[2], sdev[2], Tscambio, nscambi); timersub(&end, &start, &start); prepareStatistics(elapsed_test, start.tv_sec*1000+start.tv_usec/(double)1000); prepareStatistics(result_test, retval[0] + retval[1]); prepareStatistics(avg_Tscambio, avg[2]); prepareStatistics(sdev_Tscambio, sdev[2]); prepareStatistics(max_Tscambio, Tscambio[3]); #if TEST_VERBOSE == 0 //fprintf(stderr, "%d:%f:%f:%f:(%f);", i, avg[2]/2, sdev[2], (double)Tscambio[3]/2, avg_Tscambio[2]); #elif TEST_VERBOSE >= 2 fprintf(stderr, printStatistics_format("Tscambio (cycles)", PRIu64) "[STAT] Tsend (cycles):\n[STAT] %f\n", printStatistics_values(avg[2], sdev[2], Tscambio), avg[2]/(double)2 ); #endif /* TEST_VERBOSE == 0 */ deadlock_continue = 0; } /* for (i=0; i<niter; i++) */ calcStatistics(avg[0], sdev[0], result_test, niter); calcStatistics(avg[1], sdev[1], elapsed_test, niter); calcStatistics(avg[3], sdev[3], avg_Tscambio, niter); calcStatistics(avg[4], sdev[4], sdev_Tscambio, niter); calcStatistics(avg[5], sdev[5], max_Tscambio, niter); /* fprintf(stderr, printStatistics_format("Tscambio avg (cycles)", "f") printStatistics_format("Tscambio sdev (cycles)", "f") "[STAT] Tscambio max (cycles):\n[STAT] %f\n", printStatistics_values(avg[3], sdev[3], avg_Tscambio), printStatistics_values(avg[4], sdev[4], sdev_Tscambio), maxmax_Tscambio ); */ /* fprintf(stderr, printStatistics_format2("Tscambio avg (cycles)", "f") printStatistics_format2("Tscambio sdev ", "f") printStatistics_format2("Tscambio max (cycles)", "f"), printStatistics_values2(avg[3], sdev[3], avg_Tscambio), printStatistics_values2(avg[4], sdev[4], sdev_Tscambio), printStatistics_values2(avg[5], sdev[5], max_Tscambio) ); Tscambio avg (cycles): 110.491957 0.258812 111.400840 Tscambio sdev : 118.790573 63.409627 306.372066 Tscambio max (cycles): 34756.240000 18675.977854 80419.000000 */ fprintf(stderr, "%-20s %-20s %-20s\n" "%-20f %-20f %-20f\n", "avg", "sdev", "max", avg[3], avg[4], avg[5]); fprintf(stderr, "\n\n" " %-20s %-20s %-20s\n" "Tscambio-avg: %-20f %-20f %-20f\n" "Tscambio-dev: %-20f %-20f %-20f\n" "Tscambio-max: %-20f %-20f %-20f\n", "avg", "sdev", "max", avg[3], avg[4], avg[5], sdev[3], sdev[4], sdev[5], avg_Tscambio[3], sdev_Tscambio[3], max_Tscambio[3] ); #if TEST_VERBOSE == 0 #else #endif /* TEST_VERBOSE == 0 */ return 0; }
void* net_thread(void* arg) { int iix = (uintptr_t)arg; /*Ingress interface index*/ int eix; /*Egress interface index*/ int i, n; /*Index, Number*/ gxio_mpipe_iqueue_t *iqueue = iqueues[iix]; /*Ingress queue*/ gxio_mpipe_equeue_t *equeue; /*Egress queue*/ gxio_mpipe_idesc_t *idescs; /*Ingress packet descriptors*/ gxio_mpipe_edesc_t edescs[MAXBATCH]; /*Egress descriptors.*/ long slot; /*Setup egress queue.*/ switch (iix) { case 0: eix = 1; break; case 1: eix = 0; break; case 2: eix = 3; break; case 3: eix = 2; break; default: tmc_task_die("Invalid interface index, %d", iix); break; } equeue = &equeues[eix]; /*Egress queue*/ /*Bind to a single CPU.*/ if (tmc_cpus_set_my_cpu(tmc_cpus_find_nth_cpu(&cpus, DTILEBASE + iix)) < 0) { tmc_task_die("Failed to setup CPU affinity\n"); } if (set_dataplane(0) < 0) { tmc_task_die("Failed to setup dataplane\n"); } /*Line up all network threads.*/ tmc_sync_barrier_wait(&syncbar); tmc_spin_barrier_wait(&spinbar); if (iix == 0) { /*Pause briefly, to let everyone finish passing the barrier.*/ for (i = 0; i < 10000; i++) __insn_mfspr(SPR_PASS); /*Allow packets to flow (on all links).*/ sim_enable_mpipe_links(mpipei, -1); } /*-------------------------------------------------------------------------*/ /* Process(forward) packets. */ /*-------------------------------------------------------------------------*/ while (1) { /*Receive packet(s).*/ n = gxio_mpipe_iqueue_peek(iqueue, &idescs); if (n <= 0) continue; else if (n > 16) n = 16; //TODO: Experiment with this number. #if 0 printf("[%d] Get packet(s), n=%d\n", iix, n); #endif /*Prefetch packet descriptors from L3 to L1.*/ tmc_mem_prefetch(idescs, n * sizeof(*idescs)); /*Reserve slots. NOTE: This might spin.*/ slot = gxio_mpipe_equeue_reserve_fast(equeue, n); /*Process packet(s).*/ for (i = 0; i < n; i++) { /*Detect Call(s), clone the packet and pass it to antother Tile, if necessary.*/ //TODO: For now, inspect and record the packet using this Tile. if (ccap_detect_call(&idescs[i])) { ccap_trace_add(0, &idescs[i]); //TODO: Use actual link number. } /*Send the packets out on the peer port.*/ gxio_mpipe_edesc_copy_idesc(&edescs[i], &idescs[i]); #if 1 /*Drop "error" packets (but ignore "checksum" problems).*/ if (idescs[i].be || idescs[i].me || idescs[i].tr || idescs[i].ce) { edescs[i].ns = 1; } #endif gxio_mpipe_equeue_put_at(equeue, edescs[i], slot + i); gxio_mpipe_iqueue_consume(iqueue, &idescs[i]); } } /*Make compiler happy.*/ return (void *)NULL; }