ch_asymin_ref_ad1_sm_param_b_t * ch_asymin_ref_ad1_sm_param_b_create(int cpu_rcv, int cpu_snd[]) { ch_asymin_ref_ad1_sm_param_b_t *result; tmc_alloc_t alloc_descr = TMC_ALLOC_INIT; tmc_alloc_t alloc_snd = TMC_ALLOC_INIT; tmc_alloc_t alloc_rcv = TMC_ALLOC_INIT; int i; if (NULL == (result = tmc_alloc_map(&alloc_descr, sizeof(ch_asymin_ref_ad1_sm_param_b_t)))) return NULL; tmc_alloc_set_home(&alloc_rcv, cpu_rcv); if (NULL == (result->in_ref = tmc_alloc_map(&alloc_rcv, sizeof(in_rw_t)))) return NULL; for (i=0; i<MAX_CPU; i++) { result->cpu_snd[i] = -1; result->out_ref[i] = NULL; result->in_ref->rdy[i] = 0; } for (i=0; i < MAX_CPU && -1 != cpu_snd[i]; i++) { result->cpu_snd[i] = cpu_snd[i]; tmc_alloc_set_home(&alloc_snd, cpu_snd[i]); if (NULL == (result->out_ref[i] = tmc_alloc_map(&alloc_snd, sizeof(out_rw_t)))) return NULL; result->out_ref[i]->ack = 1; } if (MAX_CPU == i) { errno = EINVAL; return NULL; } result->cpu_rcv = cpu_rcv; result->num_multi = i; result->in_ref->last_rcved = i-1; return result; }
int main(int argc, char** argv) { char *link_name= "xgbe1"; size_t num_packets = 1000; int instance; int result; for (int i = 1; i < argc; i++){ char* arg = argv[i]; if (!strcmp(arg, "--link") && i + 1 < argc) { link_name = argv[++i]; } else if (!strcmp(arg, "-n") && i + 1 < argc) { num_packets = atoi(argv[++i]); } else if ((!strcmp(arg,"-s")) || (!strcmp(arg,"-l"))) { server = 1; } else if (!strcmp(arg,"--jumbo")) { jumbo = true; } else if ((!strcmp(arg,"-c"))) { server = 0; } else { tmc_task_die("Unknown option '%s'.", arg); } } printf("\n finished parsing"); if (server) printf("\n link egressing is %s", link_name); else printf("\n link ingressing is %s", link_name); // Get the instance. instance = gxio_mpipe_link_instance(link_name); if (instance < 0) tmc_task_die("Link '%s' does not exist.", link_name); gxio_mpipe_context_t context_body; gxio_mpipe_context_t* const context = &context_body; gxio_mpipe_iqueue_t iqueue_body; gxio_mpipe_iqueue_t* iqueue = &iqueue_body; gxio_mpipe_equeue_t equeue_body; gxio_mpipe_equeue_t* const equeue = &equeue_body; // Bind to a single cpu. cpu_set_t cpus; result = tmc_cpus_get_my_affinity(&cpus); VERIFY(result, "tmc_cpus_get_my_affinity()"); result = tmc_cpus_set_my_cpu(tmc_cpus_find_first_cpu(&cpus)); VERIFY(result, "tmc_cpus_set_my_cpu()"); // Start the driver. result = gxio_mpipe_init(context, instance); VERIFY(result, "gxio_mpipe_init()"); gxio_mpipe_link_t link; if (!server) { result = gxio_mpipe_link_open(&link, context, link_name, 0); } else { result = gxio_mpipe_link_open(&link, context, link_name, GXIO_MPIPE_LINK_WAIT ); } VERIFY(result, "gxio_mpipe_link_open()"); int channel = gxio_mpipe_link_channel(&link); //allow the link to receive jumbo packets if (jumbo) gxio_mpipe_link_set_attr(&link, GXIO_MPIPE_LINK_RECEIVE_JUMBO, 1); // Allocate a NotifRing. result = gxio_mpipe_alloc_notif_rings(context, 1, 0, 0); VERIFY(result, "gxio_mpipe_alloc_notif_rings()"); int ring = result; // Allocate one huge page to hold our buffer stack, notif ring, and group tmc_alloc_t alloc = TMC_ALLOC_INIT; tmc_alloc_set_huge(&alloc); tmc_alloc_set_home(&alloc, tmc_cpus_find_nth_cpu(&cpus, 0)); size_t page_size = tmc_alloc_get_huge_pagesize(); void* page = tmc_alloc_map(&alloc, page_size); assert(page!= NULL); void* mem = page; // Init the NotifRing. size_t notif_ring_entries = 128; size_t notif_ring_size = notif_ring_entries * sizeof(gxio_mpipe_idesc_t); result = gxio_mpipe_iqueue_init(iqueue, context, ring, mem, notif_ring_size, 0); VERIFY(result, "gxio_mpipe_iqueue_init()"); mem += notif_ring_size; // Allocate a NotifGroup. result = gxio_mpipe_alloc_notif_groups(context, 1, 0, 0); VERIFY(result, "gxio_mpipe_alloc_notif_groups()"); int group = result; // Allocate a bucket. int num_buckets = 128; result = gxio_mpipe_alloc_buckets(context, num_buckets, 0, 0); VERIFY(result, "gxio_mpipe_alloc_buckets()"); int bucket = result; // Init group and bucket. gxio_mpipe_bucket_mode_t mode = GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY; result = gxio_mpipe_init_notif_group_and_buckets(context, group, ring, 1, bucket, num_buckets, mode); VERIFY(result, "gxio_mpipe_init_notif_group_and_buckets()"); // Alloc edma rings result = gxio_mpipe_alloc_edma_rings(context, 1, 0, 0); VERIFY(result, "gxio_mpipe_alloc_edma_rings"); int edma = result; // Init edma ring. int edma_ring_entries = 512; size_t edma_ring_size = edma_ring_entries * sizeof(gxio_mpipe_edesc_t); result = gxio_mpipe_equeue_init(equeue, context, edma, channel, mem, edma_ring_size, 0); VERIFY(result, "gxio_mpipe_equeue_init()"); mem += edma_ring_size; // Allocate a buffer stack. result = gxio_mpipe_alloc_buffer_stacks(context, 1, 0, 0); VERIFY(result, "gxio_mpipe_alloc_buffer_stacks()"); int stack_idx = result; // Total number of buffers. unsigned int num_buffers = (int)(edma_ring_entries + notif_ring_entries); // Initialize the buffer stack. Must be aligned mod 64K. ALIGN(mem, 0x10000); size_t stack_bytes = gxio_mpipe_calc_buffer_stack_bytes(num_buffers); gxio_mpipe_buffer_size_enum_t buf_size = GXIO_MPIPE_BUFFER_SIZE_16384; result = gxio_mpipe_init_buffer_stack(context, stack_idx, buf_size, mem, stack_bytes, 0); VERIFY(result, "gxio_mpipe_init_buffer_stack()"); mem += stack_bytes; ALIGN(mem, 0x10000); // Register the entire huge page of memory which contains all the buffers. result = gxio_mpipe_register_page(context, stack_idx, page, page_size, 0); VERIFY(result, "gxio_mpipe_register_page()"); // Push some buffers onto the stack. for (int i = 0; i < num_buffers; i++) { gxio_mpipe_push_buffer(context, stack_idx, mem); mem += 16384; } // Register for packets. gxio_mpipe_rules_t rules; gxio_mpipe_rules_init(&rules, context); gxio_mpipe_rules_begin(&rules, bucket, num_buckets, NULL); result = gxio_mpipe_rules_commit(&rules); VERIFY(result, "gxio_mpipe_rules_commit()"); double start, end, exec_time, throughput; start = 0.00; uint64_t cpu_speed; cpu_speed = tmc_perf_get_cpu_speed(); /*Server will initiate the egress and ingress the packets and display the round trip time * Client will ingress the packet, copy it to the edesc and egress it */ if (server) { int send_packets = 0; size_t size_e = 0; struct timespec req_start, req_end; while (send_packets < num_packets) { char* buf = gxio_mpipe_pop_buffer(context, stack_idx); if(buf == NULL) tmc_task_die("Could not allocate initial buffer"); memset(buf,'+',PACKET_SIZE); // Prepare to egress the packet. gxio_mpipe_edesc_t edesc = {{ .bound = 1, .xfer_size = PACKET_SIZE, .stack_idx = stack_idx, .hwb = 1, .size = GXIO_MPIPE_BUFFER_SIZE_16384 }}; gxio_mpipe_edesc_set_va(&edesc, buf); result = gxio_mpipe_equeue_put(equeue, edesc); VERIFY(result, "gxio_mpipe_equeue_put()"); if (send_packets == 0) clock_gettime(CLOCK_REALTIME, &req_start); gxio_mpipe_idesc_t idesc; result = gxio_mpipe_iqueue_get(iqueue,&idesc); VERIFY(result, "gxio_mpipe_iqueue_get()"); size_e += idesc.l2_size; gxio_mpipe_iqueue_drop(iqueue, &idesc); gxio_mpipe_equeue_flush(equeue); send_packets++; } clock_gettime(CLOCK_REALTIME, &req_end); exec_time = ((req_end.tv_sec - req_start.tv_sec)+(req_end.tv_nsec - req_start.tv_nsec)/1E9); fprintf(stdout,"round trip time = %lf\n", exec_time); fprintf(stdout,"latency is %f\n", exec_time/(2 * num_packets )); fprintf(stdout,"size is %zd b\n", size_e); throughput = size_e * 8 * 2 / exec_time; fprintf(stdout,"throughput = %f Mbps\n",throughput/pow(1000, 2)); gxio_mpipe_edesc_t ns = {{ .ns = 1 }}; result = gxio_mpipe_equeue_put(equeue,ns); VERIFY(result, "gxio_mpipe_equeue_put()"); fprintf(stdout,"completed packets %d\n", send_packets); } else {
int main(int argc, char** argv) { // Process arguments. int i = 1; while (i < argc) { // Allow "-i FILE" to override STDIN. if (i + 2 <= argc && !strcmp(argv[i], "-i")) { const char* file = argv[i+1]; if (dup2(open(file, O_RDONLY), STDIN_FILENO) < 0) { fprintf(stderr, "Could not open '%s'.\n", file); exit(1); } i += 2; } // Allow "-o FILE" to override STDOUT. else if (i + 2 <= argc && !strcmp(argv[i], "-o")) { const char* file = argv[i+1]; int fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0666); if (dup2(fd, STDOUT_FILENO) < 0) { fprintf(stderr, "Could not open '%s'.\n", file); exit(1); } i += 2; } else { break; } } // Get the UDN coordinates of the BME server tile from our arguments. int server_x, server_y; if (i + 1 != argc || sscanf(argv[i], "%d,%d", &server_x, &server_y) != 2) { fprintf(stderr, "usage: linux_client [-i IN] [-o OUT] <server_x>,<server_y>\n"); exit(1); } // Create a UDN header for the server. DynamicHeader bme_server = { .bits.dest_x = server_x, .bits.dest_y = server_y }; // Bind ourselves to our current CPU, and set up a UDN hardwall // which encompasses the entire chip, so that we can communicate // with the BME server. cpu_set_t cpus; tmc_cpus_clear(&cpus); tmc_cpus_grid_add_all(&cpus); tmc_cpus_set_my_cpu(tmc_cpus_get_my_current_cpu()); if (tmc_udn_init(&cpus) != 0) { perror("UDN hardwall create failed"); exit(1); } if (tmc_udn_activate() != 0) { perror("UDN hardwall activate failed"); exit(1); } // Get one huge page of memory. tmc_alloc_t alloc = TMC_ALLOC_INIT; tmc_alloc_set_huge(&alloc); tmc_alloc_set_home(&alloc, 0); tmc_alloc_set_shared(&alloc); int mlength = 1 << 24; void* maddr = tmc_alloc_map(&alloc, mlength); if (maddr == NULL) { perror("can't mmap"); exit(1); } // Lock down that memory and get its physical address and caching // information, using the bme_mem device driver. struct bme_user_mem_desc_io user_mem_desc; struct bme_phys_mem_desc_io phys_mem_desc; int fd = open("/dev/bme/mem", O_RDWR); if (fd < 0) { perror("couldn't open /dev/bme/mem"); exit(1); } // First we find out how many pages are in the region to be locked down. // (Given our allocation above, we know we must have exactly one large page, // but this is an example of what you would do for large regions.) //user_mem_desc.user.va = maddr; user_mem_desc.user.va = (uintptr_t)maddr; // user_mem_desc.user.va = (__u64)maddr; user_mem_desc.user.len = mlength; if (ioctl(fd, BME_IOC_GET_NUM_PAGES, &user_mem_desc) != 0) { perror("BME_IOC_GET_NUM_PAGES ioctl failed"); exit(1); } // Now that we know how many pages are there, we can request that they be // locked into physical memory, and retrieve their physical address and // cache mapping information. phys_mem_desc.user.va = (uintptr_t)maddr; phys_mem_desc.user.len = mlength; phys_mem_desc.phys = (uintptr_t)malloc(sizeof(struct bme_phys_mem_desc) * user_mem_desc.num_pages); phys_mem_desc.num_pages = user_mem_desc.num_pages; if (ioctl(fd, BME_IOC_LOCK_MEMORY, &phys_mem_desc) != 0) { perror("BME_IOC_LOCK_MEMORY ioctl failed"); exit(1); } // Send the BME application a message telling it about the memory we // just locked down. Since this is an example, we're only sending one // message, for one page. DynamicHeader my_hdr = tmc_udn_header_from_cpu(tmc_cpus_get_my_cpu()); struct bme_phys_mem_desc *phys = (struct bme_phys_mem_desc *)(uintptr_t)phys_mem_desc.phys; tmc_udn_send_6(bme_server, UDN0_DEMUX_TAG, EX_MSG_MAPPING, my_hdr.word, phys->pa, phys->pa >> 32, phys->pte, phys->pte >> 32); uint32_t reply = udn0_receive(); if (reply) { fprintf(stderr, "client: got bad response %d to MAPPING message\n", reply); exit(1); } // Now read our standard input into a buffer in the shared page; send // a request to the BME tile to process that data, putting the output // elsewhere in the shared page; and then write it to standard output. char* inbuf = maddr; char* outbuf = inbuf + PROCESSING_BUFSIZE; int len; while ((len = read(STDIN_FILENO, inbuf, PROCESSING_BUFSIZE)) > 0) { // Note that our message gives the server the offsets of the input and // output buffers, rather than pointers to them. This is because the // server has not mapped in the data at the same set of virtual addresses // we're using. We could arrange this, if desired, although it required // more coordination between the client and server. tmc_udn_send_5(bme_server, UDN0_DEMUX_TAG, EX_MSG_PROCESS, my_hdr.word, 0, len, PROCESSING_BUFSIZE); reply = udn0_receive(); if (reply != len) { fprintf(stderr, "client: got bad response %d to PROCESS " "message (expected %d)\n", reply, len); exit(1); } if (write(STDOUT_FILENO, outbuf, len) != len) { perror("write"); exit(1); } } return 0; }