ch_asymin_ref_ad1_sm_param_b_t *
ch_asymin_ref_ad1_sm_param_b_create(int cpu_rcv, int cpu_snd[])
{
  ch_asymin_ref_ad1_sm_param_b_t *result;
  tmc_alloc_t alloc_descr = TMC_ALLOC_INIT;
  tmc_alloc_t alloc_snd = TMC_ALLOC_INIT;
  tmc_alloc_t alloc_rcv = TMC_ALLOC_INIT;
  int i;

  if (NULL ==
      (result = tmc_alloc_map(&alloc_descr, sizeof(ch_asymin_ref_ad1_sm_param_b_t))))
    return NULL;

  tmc_alloc_set_home(&alloc_rcv, cpu_rcv);
  if (NULL ==
      (result->in_ref = tmc_alloc_map(&alloc_rcv, sizeof(in_rw_t))))
    return NULL;

  for (i=0; i<MAX_CPU; i++) {
    result->cpu_snd[i] = -1;
    result->out_ref[i] = NULL;
    result->in_ref->rdy[i] = 0;
  }
  for (i=0; i < MAX_CPU && -1 != cpu_snd[i]; i++) {
    result->cpu_snd[i] = cpu_snd[i];
    tmc_alloc_set_home(&alloc_snd, cpu_snd[i]);
    if (NULL ==	(result->out_ref[i] = tmc_alloc_map(&alloc_snd,
						    sizeof(out_rw_t))))
      return NULL;
    result->out_ref[i]->ack = 1;
  }
  if (MAX_CPU == i) { errno = EINVAL; return NULL; }
  result->cpu_rcv = cpu_rcv;
  result->num_multi = i;
  result->in_ref->last_rcved = i-1;

  return result;
}
Пример #2
0
int main(int argc, char** argv)
{
	char *link_name= "xgbe1";	
	size_t num_packets = 1000;
	int instance;
	int result;	
	for (int i = 1; i < argc; i++){
		char* arg = argv[i];
		if (!strcmp(arg, "--link") && i + 1 < argc) {
			link_name = argv[++i];
		} else if (!strcmp(arg, "-n") && i + 1 < argc) {
			num_packets = atoi(argv[++i]);
		} else if ((!strcmp(arg,"-s")) || (!strcmp(arg,"-l"))) {
			server = 1;
		} else if (!strcmp(arg,"--jumbo")) {
			jumbo = true;
		} else if ((!strcmp(arg,"-c"))) {
			server = 0;
		} else {
			tmc_task_die("Unknown option '%s'.", arg);
		}
	}
	printf("\n finished parsing");	
	if (server) 
		printf("\n link egressing is %s", link_name);
	else	
		printf("\n link ingressing is %s", link_name);

	// Get the instance.
	instance = gxio_mpipe_link_instance(link_name);
	if (instance < 0)  
		tmc_task_die("Link '%s' does not exist.", link_name);

	gxio_mpipe_context_t context_body;
	gxio_mpipe_context_t* const context = &context_body;

	gxio_mpipe_iqueue_t iqueue_body;
	gxio_mpipe_iqueue_t* iqueue = &iqueue_body;

	gxio_mpipe_equeue_t equeue_body;
	gxio_mpipe_equeue_t* const equeue = &equeue_body;

	// Bind to a single cpu.
	cpu_set_t cpus;
	result = tmc_cpus_get_my_affinity(&cpus);
	VERIFY(result, "tmc_cpus_get_my_affinity()");
	result = tmc_cpus_set_my_cpu(tmc_cpus_find_first_cpu(&cpus));
	VERIFY(result, "tmc_cpus_set_my_cpu()");

	// Start the driver.
	result = gxio_mpipe_init(context, instance);
	VERIFY(result, "gxio_mpipe_init()");

	gxio_mpipe_link_t link;
	if (!server) {
		result = gxio_mpipe_link_open(&link, context, link_name, 0);
	} else {
		result = gxio_mpipe_link_open(&link, context, link_name, GXIO_MPIPE_LINK_WAIT );
	}
	VERIFY(result, "gxio_mpipe_link_open()");
	int channel = gxio_mpipe_link_channel(&link);

	//allow the link to receive jumbo packets
	if (jumbo) 
		gxio_mpipe_link_set_attr(&link, GXIO_MPIPE_LINK_RECEIVE_JUMBO, 1);

	// Allocate a NotifRing.
	result = gxio_mpipe_alloc_notif_rings(context, 1, 0, 0);
	VERIFY(result, "gxio_mpipe_alloc_notif_rings()");
	int ring = result;

	// Allocate one huge page to hold our buffer stack, notif ring, and group
	tmc_alloc_t alloc = TMC_ALLOC_INIT;
	tmc_alloc_set_huge(&alloc);
	tmc_alloc_set_home(&alloc, tmc_cpus_find_nth_cpu(&cpus, 0)); 
	size_t page_size = tmc_alloc_get_huge_pagesize();
	void* page = tmc_alloc_map(&alloc, page_size);
	assert(page!= NULL);
	void* mem = page;

	// Init the NotifRing.
	size_t notif_ring_entries = 128;
	size_t notif_ring_size = notif_ring_entries * sizeof(gxio_mpipe_idesc_t);
	result = gxio_mpipe_iqueue_init(iqueue, context, ring, mem, notif_ring_size, 0);
	VERIFY(result, "gxio_mpipe_iqueue_init()");
	mem += notif_ring_size;

	// Allocate a NotifGroup.
	result = gxio_mpipe_alloc_notif_groups(context, 1, 0, 0);
	VERIFY(result, "gxio_mpipe_alloc_notif_groups()");
	int group = result;

	// Allocate a bucket.
	int num_buckets = 128;
	result = gxio_mpipe_alloc_buckets(context, num_buckets, 0, 0);
	VERIFY(result, "gxio_mpipe_alloc_buckets()");
	int bucket = result;

	// Init group and bucket.
	gxio_mpipe_bucket_mode_t mode = GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY;
	result = gxio_mpipe_init_notif_group_and_buckets(context, group, ring, 1,  bucket, num_buckets, mode);
	VERIFY(result, "gxio_mpipe_init_notif_group_and_buckets()");

	// Alloc edma rings
	result = gxio_mpipe_alloc_edma_rings(context, 1, 0, 0);
	VERIFY(result, "gxio_mpipe_alloc_edma_rings");
	int edma = result;

	// Init edma ring.
	int edma_ring_entries = 512;
	size_t edma_ring_size = edma_ring_entries * sizeof(gxio_mpipe_edesc_t);
	result = gxio_mpipe_equeue_init(equeue, context, edma, channel, mem, edma_ring_size, 0);
	VERIFY(result, "gxio_mpipe_equeue_init()");
	mem += edma_ring_size;

	// Allocate a buffer stack.
	result = gxio_mpipe_alloc_buffer_stacks(context, 1, 0, 0);
	VERIFY(result, "gxio_mpipe_alloc_buffer_stacks()");
	int stack_idx = result;

	// Total number of buffers.
	unsigned int num_buffers = (int)(edma_ring_entries + notif_ring_entries);

	// Initialize the buffer stack.  Must be aligned mod 64K.
	ALIGN(mem, 0x10000);
	size_t stack_bytes = gxio_mpipe_calc_buffer_stack_bytes(num_buffers);	
	gxio_mpipe_buffer_size_enum_t buf_size = GXIO_MPIPE_BUFFER_SIZE_16384;
	result = gxio_mpipe_init_buffer_stack(context, stack_idx, buf_size, mem, stack_bytes, 0);
	VERIFY(result, "gxio_mpipe_init_buffer_stack()");
	mem += stack_bytes;
	ALIGN(mem, 0x10000);

	// Register the entire huge page of memory which contains all the buffers.
	result = gxio_mpipe_register_page(context, stack_idx, page, page_size, 0);
	VERIFY(result, "gxio_mpipe_register_page()");

	// Push some buffers onto the stack.
	for (int i = 0; i < num_buffers; i++) {
		gxio_mpipe_push_buffer(context, stack_idx, mem);
		mem += 16384;
	}

	// Register for packets.
	gxio_mpipe_rules_t rules;
	gxio_mpipe_rules_init(&rules, context);
	gxio_mpipe_rules_begin(&rules, bucket, num_buckets, NULL);
	result = gxio_mpipe_rules_commit(&rules);
	VERIFY(result, "gxio_mpipe_rules_commit()");

	double start, end, exec_time, throughput;
	start = 0.00;		
	uint64_t cpu_speed;
	cpu_speed = tmc_perf_get_cpu_speed();

	/*Server will initiate the egress and ingress the packets and display the round trip time
	 * Client will ingress the packet, copy it to the edesc and egress it
	 */
	if (server) {	
		int send_packets = 0;
		size_t size_e = 0;
		struct timespec req_start, req_end;
		while (send_packets < num_packets) {
			char* buf = gxio_mpipe_pop_buffer(context, stack_idx);
			if(buf == NULL)
				tmc_task_die("Could not allocate initial buffer");
			memset(buf,'+',PACKET_SIZE);
			// Prepare to egress the packet.
			gxio_mpipe_edesc_t edesc = {{
				.bound = 1,
					.xfer_size = PACKET_SIZE,
					.stack_idx = stack_idx,
					.hwb = 1,
					.size = GXIO_MPIPE_BUFFER_SIZE_16384
			}};
			gxio_mpipe_edesc_set_va(&edesc, buf);
			result = gxio_mpipe_equeue_put(equeue, edesc);
			VERIFY(result, "gxio_mpipe_equeue_put()");
			if (send_packets == 0)
				clock_gettime(CLOCK_REALTIME, &req_start);

			gxio_mpipe_idesc_t idesc;
			result = gxio_mpipe_iqueue_get(iqueue,&idesc);
			VERIFY(result, "gxio_mpipe_iqueue_get()");	
			size_e += idesc.l2_size;		
			gxio_mpipe_iqueue_drop(iqueue, &idesc);
			gxio_mpipe_equeue_flush(equeue);
			send_packets++;		
		}	
		clock_gettime(CLOCK_REALTIME, &req_end);
		exec_time = ((req_end.tv_sec - req_start.tv_sec)+(req_end.tv_nsec - req_start.tv_nsec)/1E9);
		fprintf(stdout,"round trip time = %lf\n", exec_time);
		fprintf(stdout,"latency is %f\n", exec_time/(2 * num_packets ));
		fprintf(stdout,"size is %zd b\n", size_e);
		throughput = size_e * 8 * 2 / exec_time;
		fprintf(stdout,"throughput = %f Mbps\n",throughput/pow(1000, 2));
		gxio_mpipe_edesc_t ns = {{ .ns = 1 }};
		result = gxio_mpipe_equeue_put(equeue,ns);
		VERIFY(result, "gxio_mpipe_equeue_put()");
		fprintf(stdout,"completed packets %d\n", send_packets);		
	} else {
Пример #3
0
int
main(int argc, char** argv)
{
  // Process arguments.

  int i = 1;

  while (i < argc)
  {
    // Allow "-i FILE" to override STDIN.
    if (i + 2 <= argc && !strcmp(argv[i], "-i"))
    {
      const char* file = argv[i+1];
      if (dup2(open(file, O_RDONLY), STDIN_FILENO) < 0)
      {
        fprintf(stderr, "Could not open '%s'.\n", file);
        exit(1);
      }
      i += 2;
    }

    // Allow "-o FILE" to override STDOUT.
    else if (i + 2 <= argc && !strcmp(argv[i], "-o"))
    {
      const char* file = argv[i+1];
      int fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0666);
      if (dup2(fd, STDOUT_FILENO) < 0)
      {
        fprintf(stderr, "Could not open '%s'.\n", file);
        exit(1);
      }
      i += 2;
    }

    else
    {
      break;
    }
  }

  // Get the UDN coordinates of the BME server tile from our arguments.
  int server_x, server_y;
  if (i + 1 != argc || sscanf(argv[i], "%d,%d", &server_x, &server_y) != 2)
  {
    fprintf(stderr,
            "usage: linux_client [-i IN] [-o OUT] <server_x>,<server_y>\n");
    exit(1);
  }

  // Create a UDN header for the server.
  DynamicHeader bme_server =
    { .bits.dest_x = server_x, .bits.dest_y = server_y };


  // Bind ourselves to our current CPU, and set up a UDN hardwall
  // which encompasses the entire chip, so that we can communicate
  // with the BME server.

  cpu_set_t cpus;

  tmc_cpus_clear(&cpus);
  tmc_cpus_grid_add_all(&cpus);

  tmc_cpus_set_my_cpu(tmc_cpus_get_my_current_cpu());

  if (tmc_udn_init(&cpus) != 0)
  {
    perror("UDN hardwall create failed");
    exit(1);
  }

  if (tmc_udn_activate() != 0)
  {
    perror("UDN hardwall activate failed");
    exit(1);
  }


  // Get one huge page of memory.
  tmc_alloc_t alloc = TMC_ALLOC_INIT;
  tmc_alloc_set_huge(&alloc);
  tmc_alloc_set_home(&alloc, 0);
  tmc_alloc_set_shared(&alloc);
  int mlength = 1 << 24;
  void* maddr = tmc_alloc_map(&alloc, mlength);
  if (maddr == NULL)
  {
    perror("can't mmap");
    exit(1);
  }


  // Lock down that memory and get its physical address and caching
  // information, using the bme_mem device driver.

  struct bme_user_mem_desc_io user_mem_desc;
  struct bme_phys_mem_desc_io phys_mem_desc;
  int fd = open("/dev/bme/mem", O_RDWR);

  if (fd < 0)
  {
    perror("couldn't open /dev/bme/mem");
    exit(1);
  }


  // First we find out how many pages are in the region to be locked down.
  // (Given our allocation above, we know we must have exactly one large page,
  // but this is an example of what you would do for large regions.)

  //user_mem_desc.user.va = maddr;
  user_mem_desc.user.va = (uintptr_t)maddr;
  //  user_mem_desc.user.va = (__u64)maddr;
  user_mem_desc.user.len = mlength;

  if (ioctl(fd, BME_IOC_GET_NUM_PAGES, &user_mem_desc) != 0)
  {
    perror("BME_IOC_GET_NUM_PAGES ioctl failed");
    exit(1);
  }


  // Now that we know how many pages are there, we can request that they be
  // locked into physical memory, and retrieve their physical address and
  // cache mapping information.

  phys_mem_desc.user.va = (uintptr_t)maddr;
  phys_mem_desc.user.len = mlength;

  phys_mem_desc.phys =
    (uintptr_t)malloc(sizeof(struct bme_phys_mem_desc) *
                      user_mem_desc.num_pages);

  phys_mem_desc.num_pages = user_mem_desc.num_pages;

  if (ioctl(fd, BME_IOC_LOCK_MEMORY, &phys_mem_desc) != 0)
  {
    perror("BME_IOC_LOCK_MEMORY ioctl failed");
    exit(1);
  }


  // Send the BME application a message telling it about the memory we
  // just locked down.  Since this is an example, we're only sending one
  // message, for one page.

  DynamicHeader my_hdr = tmc_udn_header_from_cpu(tmc_cpus_get_my_cpu());

  struct bme_phys_mem_desc *phys =
    (struct bme_phys_mem_desc *)(uintptr_t)phys_mem_desc.phys;

  tmc_udn_send_6(bme_server, UDN0_DEMUX_TAG,
                 EX_MSG_MAPPING,
                 my_hdr.word,
                 phys->pa,
                 phys->pa >> 32,
                 phys->pte,
                 phys->pte >> 32);

  uint32_t reply = udn0_receive();
  if (reply)
  {
    fprintf(stderr, "client: got bad response %d to MAPPING message\n",
            reply);
    exit(1);
  }


  // Now read our standard input into a buffer in the shared page; send
  // a request to the BME tile to process that data, putting the output
  // elsewhere in the shared page; and then write it to standard output.

  char* inbuf = maddr;
  char* outbuf = inbuf + PROCESSING_BUFSIZE;
  
  int len;
  while ((len = read(STDIN_FILENO, inbuf, PROCESSING_BUFSIZE)) > 0)
  {
    // Note that our message gives the server the offsets of the input and
    // output buffers, rather than pointers to them.  This is because the
    // server has not mapped in the data at the same set of virtual addresses
    // we're using.  We could arrange this, if desired, although it required
    // more coordination between the client and server.

    tmc_udn_send_5(bme_server, UDN0_DEMUX_TAG,
                   EX_MSG_PROCESS,
                   my_hdr.word,
                   0,
                   len,
                   PROCESSING_BUFSIZE);

    reply = udn0_receive();
    if (reply != len)
    {
      fprintf(stderr, "client: got bad response %d to PROCESS "
              "message (expected %d)\n", reply, len);
      exit(1);
    }

    if (write(STDOUT_FILENO, outbuf, len) != len)
    {
      perror("write");
      exit(1);
    }
  }

  return 0;
}