Exemple #1
0
int main(int argc, char *argv[])
{

	// Open first AFU found
	struct cxl_afu_h *afu_h;
	afu_h = cxl_afu_next(NULL);
	if (!afu_h) {
		fprintf(stderr, "\nNo AFU found!\n\n");
		return -1;
	}
	afu_h = cxl_afu_open_h(afu_h, CXL_VIEW_DEDICATED);
	if (!afu_h) {
		perror("cxl_afu_open_h");
		return -1;
	}
	// Prepare WED
	struct wed *wed = NULL;
	if (posix_memalign((void **)&wed, CACHELINE_BYTES, sizeof(struct wed))) {
		perror("posix_memalign");
		return -1;
	}
	printf("Allocated WED memory @ 0x%016" PRIx64 "\n", (uint64_t) wed);
	wed->endian_test = 1;
	wed->status = 0;

	// Start AFU
	cxl_afu_attach(afu_h, (uint64_t) wed);

	// Map AFU MMIO registers, if needed
	printf("Mapping AFU registers...\n");
	if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) {
		perror("cxl_mmio_map");
		return -1;
	}

  /**************************************************************************

  Do something here and wait for results.
  
  cxl_mmio_*() functions can only be used here between cxl_mmio_map and
  cxl_mmio_unmap.

  Presumably your application will possibly monitor and/or possibly update
  values in the wed struct or some other place in memory that AFU was
  informed that it could access.  Maybe a bit in the wed struct like those in
  the example "status" field could be updated by the AFU to indicate that
  it has completed a job.  In this example that is why the status field is
  made volatile.  This prevents the compiler from optimization polling of
  the status field.

  **************************************************************************/

	// Unmap AFU MMIO registers, if previously mapped
	cxl_mmio_unmap(afu_h);

	// Free AFU
	cxl_afu_free(afu_h);

	return 0;
}
Exemple #2
0
static int afu_m_close(struct mdev_ctx *mctx)
{
	VERBOSE3("[%s] Enter\n", __func__);
	if (NULL == mctx->afu_h)
		return -1;

	cxl_mmio_unmap(mctx->afu_h);
	cxl_afu_free(mctx->afu_h);
	mctx->afu_h = NULL;

	if (mctx->errinfo)
		free(mctx->errinfo);
	mctx->errinfo = NULL;
	VERBOSE3("[%s] Exit\n", __func__);
	return 0;
}
Exemple #3
0
int main(int argc, char *argv[])
{
	struct cxl_afu_h *afu_h;
	uint64_t wed, wed_check, rand64, rand64_check;
	uint32_t rand32_upper, rand32_lower;
	unsigned seed;
	int opt, option_index;
	char *name;

	name = strrchr(argv[0], '/');
	if (name)
		name++;
	else
		name = argv[0];

	static struct option long_options[] = {
		{"help",	no_argument,		0,		'h'},
		{"seed",	required_argument,	0,		's'},
		{NULL, 0, 0, 0}
	};

	option_index = 0;
	seed = time(NULL);
	while ((opt = getopt_long (argc, argv, "hs:",
				   long_options, &option_index)) >= 0) {
		switch (opt)
		{
		case 0:
			break;
		case 's':
			seed = strtoul(optarg, NULL, 0);
			break;
		case 'h':
		default:
			usage(name);
			return 0;
		}
	}

	// Seed random number generator
	srand(seed);
	printf("%s: seed=%d\n", name, seed);

	// Find first AFU in system
	afu_h = cxl_afu_next(NULL);
	if (!afu_h) {
		fprintf(stderr, "FAILED:No AFU found!\n");
		goto done;
	}

	// Open AFU
	afu_h = cxl_afu_open_h(afu_h, CXL_VIEW_DEDICATED);
	if (!afu_h) {
		perror("FAILED:cxl_afu_open_h");
		goto done;
	}

	printf("Attempt mapping AFU registers before attach\n");
	if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) == 0) {
		printf("FAILED:cxl_mmio_map");
		goto done;
	}

	printf("Attempt mmio read before successful mapping\n");
	if (cxl_mmio_read64(afu_h, 0x8, &wed_check) == 0) {
		printf("FAILED:cxl_mmio_read64");
		goto done;
	}

	// Generate random 64-bit value for WED
	wed = rand();
	wed <<= 32;
	wed |= rand();

	// Start AFU passing random WED value
	cxl_afu_attach(afu_h, wed);

	// Map AFU MMIO registers
	printf("Mapping AFU registers...\n");
	if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) {
		perror("FAILED:cxl_mmio_map");
		goto done;
	}

	/////////////////////////////////////////////////////
	// CHECK 1 - WED value was passed to AFU correctly //
	/////////////////////////////////////////////////////

	// Read WED from AFU and verify
	if (cxl_mmio_read64(afu_h, 0x8, &wed_check) < 0) {
		perror("FAILED:cxl_mmio_read64");
		goto done;
	}
	if (wed != wed_check) {
		printf("\nFAILED:WED mismatch!\n");
		printf("\tExpected:0x%016"PRIx64"\n", wed);
		printf("\tActual  :0x%016"PRIx64"\n", wed_check);
		goto done;
	}
	printf("WED check complete\n");

	//////////////////////////////////////////////////////////////
	// CHECK 2 - Write 64-bit value and check with 32-bit reads //
	//////////////////////////////////////////////////////////////

	// Write random 64-bit value to MMIO space
	rand64 = rand();
	rand64 <<= 32;
	rand64 |= rand();
	if (cxl_mmio_write64(afu_h, 0x17f0, rand64) < 0) {
		perror("FAILED:cxl_mmio_write64");
		goto done;
	}

	// Use two 32-bit read to check 64-bit value written
	if (cxl_mmio_read32(afu_h, 0x17f0, &rand32_upper) < 0) {
		perror("FAILED:cxl_mmio_read32");
		goto done;
	}
	if (cxl_mmio_read32(afu_h, 0x17f4, &rand32_lower) < 0) {
		perror("FAILED:cxl_mmio_read32");
		goto done;
	}
	rand64_check = (uint64_t) rand32_upper;
	rand64_check <<= 32;
	rand64_check |= (uint64_t) rand32_lower;
	if (rand64 != rand64_check) {
		printf("\nFAILED:64-bit write => 32-bit reads mismatch!\n");
		printf("\tExpected:0x%016"PRIx64"\n", rand64);
		printf("\tActual  :0x%016"PRIx64"\n", rand64_check);
		goto done;
	}
	printf("64-bit write => 32-bit reads check complete\n");

	//////////////////////////////////////////////////////////////
	// CHECK 3 - Write 32-bit values and check with 64-bit read //
	//////////////////////////////////////////////////////////////

	// Write two random 32-bit values to a single 64-bit MMIO register
	rand32_upper = rand();
	if (cxl_mmio_write32(afu_h, 0x17f8, rand32_upper) < 0) {
		perror("FAILED:cxl_mmio_write32");
		goto done;
	}
	rand32_lower = rand();
	if (cxl_mmio_write32(afu_h, 0x17fc, rand32_lower) < 0) {
		perror("FAILED:cxl_mmio_write32");
		goto done;
	}

	// Build 64-bit value from two 32-bit values
	rand64 = (uint64_t) rand32_upper;
	rand64 <<= 32;
	rand64 |= (uint64_t) rand32_lower;

	// Check 32-bit writes with one 64-bit read
	if (cxl_mmio_read64(afu_h, 0x17f8, &rand64_check) < 0) {
		perror("FAILED:cxl_mmio_read64");
		goto done;
	}
	if (rand64 != rand64_check) {
		printf("\nFAILED:32-bit writes => 64-bit read mismatch!\n");
		printf("\tExpected:0x%016"PRIx64"\n", rand64);
		printf("\tActual  :0x%016"PRIx64"\n", rand64_check);
		goto done;
	}
	printf("32-bit writes => 64-bit read check complete\n");

	// Report test as passing
	printf("PASSED\n");
done:
	if (afu_h) {
		// Unmap AFU MMIO registers
		cxl_mmio_unmap(afu_h);
		// Free AFU     
		cxl_afu_free(afu_h);
	}

	return 0;
}
Exemple #4
0
int main(int argc, char *argv[])
{
#ifdef PSL9
	MachineConfig machine;
	char *cacheline0, *cacheline1, *name;
	uint64_t wed;
	unsigned seed;
	int i, quadrant, byte, opt, option_index;
	int response;
	int context, machine_number;

	name = strrchr(argv[0], '/');
	if (name)
		name++;
	else
		name = argv[0];

	static struct option long_options[] = {
		{"help",	no_argument,		0,		'h'},
		{"seed",	required_argument,	0,		's'},
		{NULL, 0, 0, 0}
	};

	option_index = 0;
	seed = time(NULL);
	while ((opt = getopt_long (argc, argv, "hs:",
				   long_options, &option_index)) >= 0) {
		switch (opt)
		{
		case 0:
			break;
		case 's':
			seed = strtoul(optarg, NULL, 0);
			break;
		case 'h':
		default:
			usage(name);
			return 0;
		}
	}

	// Seed random number generator
	srand(seed);
	printf("%s: seed=%d\n", name, seed);

	// find first AFU found
	struct cxl_afu_h *afu_h, *afu_m, *afu_s;
	afu_m = afu_s = NULL;
	
        afu_h = cxl_afu_next(NULL);
	if (!afu_h) {
		fprintf(stderr, "\nNo AFU found!\n\n");
		goto done;
	}
	
        
        // afu master 
	afu_m = cxl_afu_open_h(afu_h, CXL_VIEW_MASTER);
	if (!afu_m) {
		perror("cxl_afu_open_h for master");
		goto done;
	}

	// Set WED to random value
	wed = rand();
	wed <<= 32;
	wed |= rand();

	// Start AFU for master
	printf("Attach AFU master\n");
	if (cxl_afu_attach(afu_m, wed) < 0) {
            perror("FAILED:cxl_afu_attach for master");
		goto done;
        }

	printf("wed = 0x%"PRIx64"\n", wed);

	// Map AFU MMIO registers
	printf("Mapping AFU master registers...\n");
	if ((cxl_mmio_map(afu_m, CXL_MMIO_BIG_ENDIAN)) < 0) {
		perror("cxl_mmio_map for master");
		goto done;

	}
	printf("End AFU master mmio map\n");

	context = cxl_afu_get_process_element(afu_m);

	printf("Master context = %d\n", context);
	// Allocate aligned memory for two cachelines
	if (posix_memalign((void **)&cacheline0, CACHELINE_BYTES, CACHELINE_BYTES) != 0) {
		perror("FAILED:posix_memalign");
		goto done;
	}
	if (posix_memalign((void **)&cacheline1, CACHELINE_BYTES, CACHELINE_BYTES) != 0) {
		perror("FAILED:posix_memalign");
		goto done;
	}

	// Pollute first cacheline with random values
	printf("CACHELINE0 = 0x");
	for (i = 0; i < CACHELINE_BYTES; i++)
	{
		cacheline0[i] = rand();
		printf("%02x", cacheline0[i]);
	}
	printf("\n");
	// Initialize machine configuration
	printf("initialize machine\n");
	init_machine(&machine);
	printf("End init machine\n");

	// Use AFU Machine 0 to read the first cacheline from memory to AFU
	printf("Configure, enable and run machine\n");
	if ((response = config_enable_and_run_machine(afu_m, &machine, 0, context, PSL_COMMAND_XLAT_RD_P0, CACHELINE_BYTES, 0, 0, (uint64_t)cacheline0, CACHELINE_BYTES, DIRECTED_M)) < 0)
	{
		printf("FAILED:config_enable_and_run_machine for master XLAT_RD response = %d\n", response);
		goto done;
	}
	printf("End configure enable and run machine for XLAT_RD\n");
	// Check for valid response
	if (response != PSL_RESPONSE_DONE)
	{
		printf("FAILED: Unexpected response code 0x%x\n", response);
		goto done;
	}

	printf("Completed cacheline read\n");

	// Use AFU Machine 0 to write the data to the second cacheline
	if ((response = config_enable_and_run_machine(afu_m, &machine, 0, context, PSL_COMMAND_XLAT_WR_P0, CACHELINE_BYTES, 0, 0, (uint64_t)cacheline1, CACHELINE_BYTES, DIRECTED_M)) < 0)
	{
		printf("FAILED:config_enable_and_run_machine for master XLAT_WR response = %d\n", response);
		goto done;
	}
	printf("End configure enable and run machine for XLAT WR\n");
	// Check for valid response
	if (response != PSL_RESPONSE_DONE)
	{
		printf("FAILED: Unexpected response code 0x%x\n", response);
		goto done;
	}

	// Test if copy from cacheline0 to cacheline1 was successful
	if (memcmp(cacheline0,cacheline1, CACHELINE_BYTES) != 0) {
		printf("FAILED:memcmp\n");
		for (quadrant = 0; quadrant < 4; quadrant++) {
			printf("DEBUG: Expected  Q%d 0x", quadrant);
			for (byte = 0; byte < CACHELINE_BYTES /4; byte++) {
				printf("%02x", cacheline0[byte+(quadrant*32)]);
			}
			printf("\n");
		}
		for (quadrant = 0; quadrant < 4; quadrant++) {
			printf("DEBUG: Actual  Q%d 0x", quadrant);
			for (byte = 0; byte < CACHELINE_BYTES / 4; byte++) {
				printf("%02x", cacheline1[byte+(quadrant*32)]);
			}
			printf("\n");
		}
		goto done;
	}

	printf("Master AFU: PASSED\n");
        
        // afu slave
        // find next afu
        afu_h = cxl_afu_next(NULL);
	if (!afu_h) {
		fprintf(stderr, "\nNo AFU found!\n\n");
		goto done;
	}
	afu_s = cxl_afu_open_h(afu_h, CXL_VIEW_SLAVE);
	if (!afu_s) {
		perror("cxl_afu_open_h for slave");
		goto done;
	}

	// Set WED to random value
	wed = rand();
	wed <<= 32;
	wed |= rand();
	// Start AFU for slave
	if (cxl_afu_attach(afu_s, wed) < 0) {
            perror("FAILED:cxl_afu_attach for slave");
		goto done;
        }

	// Map AFU MMIO registers
	printf("Mapping AFU slave registers...\n");
	if ((cxl_mmio_map(afu_s, CXL_MMIO_BIG_ENDIAN)) < 0) {
		perror("cxl_mmio_map for slave");
		goto done;
	}
	printf("End AFU slave mmio map\n");

	context = cxl_afu_get_process_element(afu_s);
	printf("Slave context = %d\n", context);

	machine_number = 20;

        // Allocate aligned memory for two cachelines
	if (posix_memalign((void **)&cacheline0, CACHELINE_BYTES, CACHELINE_BYTES) != 0) {
		perror("FAILED:posix_memalign");
		goto done;
	}
	if (posix_memalign((void **)&cacheline1, CACHELINE_BYTES, CACHELINE_BYTES) != 0) {
		perror("FAILED:posix_memalign");
		goto done;
	}

	// Pollute first cacheline with random values
	for (i = 0; i < CACHELINE_BYTES; i++)
		cacheline0[i] = rand();

	// Initialize machine configuration
	//init_machine(&machine);

	// Use AFU Machine 1 to read the first cacheline from memory to AFU
	printf("Start config enable and run machine for slave\n");
	if ((response = config_enable_and_run_machine(afu_s, &machine, machine_number, context, PSL_COMMAND_XLAT_RD_P0, CACHELINE_BYTES, 0, 0, (uint64_t)cacheline0, CACHELINE_BYTES, DIRECTED)) < 0)
	{
		printf("FAILED:config_enable_and_run_machine for slave");
		goto done;
	}
	printf("End config enable and run machine for slave\n");
	// Check for valid response
	if (response != PSL_RESPONSE_DONE)
	{
		printf("FAILED: Unexpected response code 0x%x\n", response);
		goto done;
	}

	printf("Completed cacheline read for slave\n");

	// Use AFU Machine 1 to write the data to the second cacheline
	if ((response = config_enable_and_run_machine(afu_s, &machine, machine_number, context, PSL_COMMAND_XLAT_WR_P0, CACHELINE_BYTES, 0, 0, (uint64_t)cacheline1, CACHELINE_BYTES, DIRECTED)) < 0)
	{
		printf("FAILED:config_enable_and_run_machine for slave");
		goto done;
	}

	// Check for valid response
	if (response != PSL_RESPONSE_DONE)
	{
		printf("FAILED: Unexpected response code 0x%x\n", response);
		goto done;
	}

	// Test if copy from cacheline0 to cacheline1 was successful
	if (memcmp(cacheline0,cacheline1, CACHELINE_BYTES) != 0) {
		printf("FAILED:memcmp\n");
		for (quadrant = 0; quadrant < 4; quadrant++) {
			printf("DEBUG: Expected  Q%d 0x", quadrant);
			for (byte = 0; byte < CACHELINE_BYTES /4; byte++) {
				printf("%02x", cacheline0[byte+(quadrant*32)]);
			}
			printf("\n");
		}
		for (quadrant = 0; quadrant < 4; quadrant++) {
			printf("DEBUG: Actual  Q%d 0x", quadrant);
			for (byte = 0; byte < CACHELINE_BYTES / 4; byte++) {
				printf("%02x", cacheline1[byte+(quadrant*32)]);
			}
			printf("\n");
		}
		goto done;
	}

	printf("Slave AFU: PASSED\n");
        
done:
        // unmap and free slave afu 
        if (afu_s) {
            cxl_mmio_unmap(afu_s);
            cxl_afu_free(afu_s);
        }
        // unmap and free master afu
	if (afu_m) {
		// Unmap AFU MMIO registers
		cxl_mmio_unmap(afu_m);

		// Free AFU
		cxl_afu_free(afu_m);
	}
       
#endif
	return 0;
}
int main (int argc, char **argv)
{
    int ret;
    int i, count, data_size;
    uint64_t mmio_state = 0;

    struct wed * capi_wed = NULL;
    struct wed_tx * p_wed_tx = NULL;
    char cxl_device [64];
 
    __u8 * source_buf, * result_buf;
    int16_t * ptr = NULL;
    struct cxl_afu_h * afu_h = NULL;
    FILE *fp;

    int send_num, received_num, loops;
    struct timeval start;
    struct timeval end;
    double interval;

    if(argc != 2)
    {
        printf("uasge: %s imagepath \n", argv[0]);
        exit(1);
    }
    // Malloc buffer for the work element descriptor
    // posix_memalign is used to keep alignment requirement and make DMA engine simple
    ret = posix_memalign ((void **) &capi_wed, CACHELINE_BYTES, sizeof(struct wed));
    if (ret) {
        printf ("Error. Can not malloc buffer for wed.\n");
        return -1;
    }

    ret = posix_memalign ((void **) &p_wed_tx, CACHELINE_BYTES, sizeof(struct wed_tx));
    if (ret) {
        printf ("Error. Can not malloc buffer for wed.\n");
        return -1;
    }

    // Malloc buffer for the source data
    // posix_memalign is used to keep alignment requirement and make DMA engine simple
    ret = posix_memalign ((void **) &source_buf, CACHELINE_BYTES, DATA_SIZE);
    if (ret) {
        printf ("Error. Can not malloc buffer for source buffer.\n");
        free (capi_wed);
        return -1;
    }

    // Malloc buffer for the result
    // posix_memalign is used to keep alignment requirement and make DMA engine simple
    ret = posix_memalign ((void **) &result_buf, CACHELINE_BYTES, DATA_SIZE);
    if (ret) {
        printf ("Error. Can not malloc buffer for result buffer.\n");
        free (capi_wed);
        free (source_buf);
        return -1;
    }

    if ((fp = fopen(argv[1], "r")) == NULL)
    {
        printf ("Image file can not be opened.\n");
        exit(1);
    }
    
    //Read the input data from the file, the data should be 16bits, 
    //You can modify here to for your own data.
    ptr = (int16_t*) source_buf;
    count = 0;
    while(fscanf(fp, "%hd", ptr++) > 0)
    {
        count ++;
        if (count >= DATA_SIZE / sizeof(int16_t))
        {
            printf ("Buff overflow.\n");
            exit(1);
        }
    }
    
    data_size = count * sizeof(int16_t);
    
    //The input data buffer should be aligined with cache line (128bytes)   
    if(data_size % CACHELINE_BYTES)
    {
        data_size = (data_size / CACHELINE_BYTES + 1) * CACHELINE_BYTES;
    }

    capi_wed->data_size = data_size;
    capi_wed->param_s0 = 0x11223344;
    capi_wed->param_s1 = 0x55667788;
    capi_wed->source = source_buf;
    capi_wed->result = result_buf;
    capi_wed->p_wed_tx = (__u8 *)p_wed_tx;

    //Detect which device is avaliable
    strncpy (cxl_device, DEVICE, 64);
    for (ret = 0; ret < 8; ret ++) {
        if (access(cxl_device, W_OK) == 0)
            break;
        cxl_device [12] ++;
    }

    if (ret == 8) {
        printf ("Can not find available CAPI device.\n");
        return -1;
    }
    //Open CAPI device
    afu_h = cxl_afu_open_dev (cxl_device);
    if (!afu_h) {
        printf ("Error. Can not open CAPI device : %s\n", DEVICE);
        return -1;
    }
    printf ("Open CAPI device %s\n", cxl_device);

    // Create the CAPI hardware thread, AFU is enabled
    // capi_wed pointer is sent to the AFU
    // The DMA dose not run at this moment, it will only read the capi_wed struction
    // to the DAM engine
    cxl_afu_attach (afu_h, (__u64) capi_wed);
    printf ("Attach AFU to current application.\n");

    // Map some register from AFU to user space to do runtime control
    // This is an optinal step for CAPI application development dependent on AFU
    if ((cxl_mmio_map (afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) {
        printf ("Error. Can not map registers\n");
        return -1;
    }

    // We clear the status to Zero
    capi_wed->status = 0;
    capi_wed->jcounter = 0;
    capi_wed->ret_size = 0;
    
    // We use a register to show whether the AFU is ready
    // This is an optinal step for CAPI application development dependent on AFU
    do {
        cxl_mmio_read64 (afu_h, MMIO_TRACE_ADDR, &mmio_state);
    } while ((mmio_state & 0xf) != 0x1);

    gettimeofday(&start, NULL);
    //Start the data receive DMA
    cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf0);
    //Start the data send DMA
    cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0x0f);
    
    loops = 100;

    send_num = 1;
    received_num = 0;
    while(1)
    {
        if(capi_wed->status)
        {
            received_num ++;
            if (received_num >= loops)break;
            //Clear the status bit
            capi_wed->status = 0;
            cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf0);
        }
        if (p_wed_tx -> status)
        {
            p_wed_tx->status = 0;
            if (send_num < loops)
            {
                //clear the status bit
                p_wed_tx->status = 0;
                cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0x0f);
                send_num ++;
            }
        }
    }
    gettimeofday(&end, NULL);

    interval = ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0;

    printf ("The %dst job finish. Return Size = 0x%x\n", capi_wed->jcounter, capi_wed->ret_size);
    printf ("The total time cost (%d loops) is %.3fs, each loop cost is %.3fms.\n", loops, interval, interval/loops * 1000.0);

    ptr = (int16_t*) result_buf;
    printf ("Recognition Results (without softmax):\n");
    for (i = 0; i < 10; i++)
    {
        printf("%d \t", *ptr ++);
    }
    printf ("\n");

    free (capi_wed);
    free (source_buf);
    free (result_buf);

    // We use register write to trigger the DAM to finish all jobs
    // This is an optinal step for CAPI application development dependent on AFU
    cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf1);

    // We do register unmap because we map register above
    cxl_mmio_unmap (afu_h);

    // Close the CAPI device
    cxl_afu_free (afu_h);

    return 0;
}