Beispiel #1
0
static int mmio_write(struct cxl_afu_h *afu_h, int ctx, uint32_t offset,
		      uint64_t data)
{
	int rc = -1;
	uint32_t offs = (ctx * MMIO_CTX_OFFSET) + offset;

	VERBOSE3("[%s] Enter, Offset: 0x%x data: 0x%016llx\n",
		__func__, offs, (long long)data);
	rc = cxl_mmio_write64(afu_h, offs, data);
	VERBOSE3("[%s] Exit, rc = %d\n", __func__, rc);
	return rc;
}
Beispiel #2
0
int main(int argc, char *argv[])
{
	struct cxl_afu_h *afu_h;
	uint64_t wed, wed_check, rand64, rand64_check;
	uint32_t rand32_upper, rand32_lower;
	unsigned seed;
	int opt, option_index;
	char *name;

	name = strrchr(argv[0], '/');
	if (name)
		name++;
	else
		name = argv[0];

	static struct option long_options[] = {
		{"help",	no_argument,		0,		'h'},
		{"seed",	required_argument,	0,		's'},
		{NULL, 0, 0, 0}
	};

	option_index = 0;
	seed = time(NULL);
	while ((opt = getopt_long (argc, argv, "hs:",
				   long_options, &option_index)) >= 0) {
		switch (opt)
		{
		case 0:
			break;
		case 's':
			seed = strtoul(optarg, NULL, 0);
			break;
		case 'h':
		default:
			usage(name);
			return 0;
		}
	}

	// Seed random number generator
	srand(seed);
	printf("%s: seed=%d\n", name, seed);

	// Find first AFU in system
	afu_h = cxl_afu_next(NULL);
	if (!afu_h) {
		fprintf(stderr, "FAILED:No AFU found!\n");
		goto done;
	}

	// Open AFU
	afu_h = cxl_afu_open_h(afu_h, CXL_VIEW_DEDICATED);
	if (!afu_h) {
		perror("FAILED:cxl_afu_open_h");
		goto done;
	}

	printf("Attempt mapping AFU registers before attach\n");
	if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) == 0) {
		printf("FAILED:cxl_mmio_map");
		goto done;
	}

	printf("Attempt mmio read before successful mapping\n");
	if (cxl_mmio_read64(afu_h, 0x8, &wed_check) == 0) {
		printf("FAILED:cxl_mmio_read64");
		goto done;
	}

	// Generate random 64-bit value for WED
	wed = rand();
	wed <<= 32;
	wed |= rand();

	// Start AFU passing random WED value
	cxl_afu_attach(afu_h, wed);

	// Map AFU MMIO registers
	printf("Mapping AFU registers...\n");
	if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) {
		perror("FAILED:cxl_mmio_map");
		goto done;
	}

	/////////////////////////////////////////////////////
	// CHECK 1 - WED value was passed to AFU correctly //
	/////////////////////////////////////////////////////

	// Read WED from AFU and verify
	if (cxl_mmio_read64(afu_h, 0x8, &wed_check) < 0) {
		perror("FAILED:cxl_mmio_read64");
		goto done;
	}
	if (wed != wed_check) {
		printf("\nFAILED:WED mismatch!\n");
		printf("\tExpected:0x%016"PRIx64"\n", wed);
		printf("\tActual  :0x%016"PRIx64"\n", wed_check);
		goto done;
	}
	printf("WED check complete\n");

	//////////////////////////////////////////////////////////////
	// CHECK 2 - Write 64-bit value and check with 32-bit reads //
	//////////////////////////////////////////////////////////////

	// Write random 64-bit value to MMIO space
	rand64 = rand();
	rand64 <<= 32;
	rand64 |= rand();
	if (cxl_mmio_write64(afu_h, 0x17f0, rand64) < 0) {
		perror("FAILED:cxl_mmio_write64");
		goto done;
	}

	// Use two 32-bit read to check 64-bit value written
	if (cxl_mmio_read32(afu_h, 0x17f0, &rand32_upper) < 0) {
		perror("FAILED:cxl_mmio_read32");
		goto done;
	}
	if (cxl_mmio_read32(afu_h, 0x17f4, &rand32_lower) < 0) {
		perror("FAILED:cxl_mmio_read32");
		goto done;
	}
	rand64_check = (uint64_t) rand32_upper;
	rand64_check <<= 32;
	rand64_check |= (uint64_t) rand32_lower;
	if (rand64 != rand64_check) {
		printf("\nFAILED:64-bit write => 32-bit reads mismatch!\n");
		printf("\tExpected:0x%016"PRIx64"\n", rand64);
		printf("\tActual  :0x%016"PRIx64"\n", rand64_check);
		goto done;
	}
	printf("64-bit write => 32-bit reads check complete\n");

	//////////////////////////////////////////////////////////////
	// CHECK 3 - Write 32-bit values and check with 64-bit read //
	//////////////////////////////////////////////////////////////

	// Write two random 32-bit values to a single 64-bit MMIO register
	rand32_upper = rand();
	if (cxl_mmio_write32(afu_h, 0x17f8, rand32_upper) < 0) {
		perror("FAILED:cxl_mmio_write32");
		goto done;
	}
	rand32_lower = rand();
	if (cxl_mmio_write32(afu_h, 0x17fc, rand32_lower) < 0) {
		perror("FAILED:cxl_mmio_write32");
		goto done;
	}

	// Build 64-bit value from two 32-bit values
	rand64 = (uint64_t) rand32_upper;
	rand64 <<= 32;
	rand64 |= (uint64_t) rand32_lower;

	// Check 32-bit writes with one 64-bit read
	if (cxl_mmio_read64(afu_h, 0x17f8, &rand64_check) < 0) {
		perror("FAILED:cxl_mmio_read64");
		goto done;
	}
	if (rand64 != rand64_check) {
		printf("\nFAILED:32-bit writes => 64-bit read mismatch!\n");
		printf("\tExpected:0x%016"PRIx64"\n", rand64);
		printf("\tActual  :0x%016"PRIx64"\n", rand64_check);
		goto done;
	}
	printf("32-bit writes => 64-bit read check complete\n");

	// Report test as passing
	printf("PASSED\n");
done:
	if (afu_h) {
		// Unmap AFU MMIO registers
		cxl_mmio_unmap(afu_h);
		// Free AFU     
		cxl_afu_free(afu_h);
	}

	return 0;
}
int main (int argc, char **argv)
{
    int ret;
    int i, count, data_size;
    uint64_t mmio_state = 0;

    struct wed * capi_wed = NULL;
    struct wed_tx * p_wed_tx = NULL;
    char cxl_device [64];
 
    __u8 * source_buf, * result_buf;
    int16_t * ptr = NULL;
    struct cxl_afu_h * afu_h = NULL;
    FILE *fp;

    int send_num, received_num, loops;
    struct timeval start;
    struct timeval end;
    double interval;

    if(argc != 2)
    {
        printf("uasge: %s imagepath \n", argv[0]);
        exit(1);
    }
    // Malloc buffer for the work element descriptor
    // posix_memalign is used to keep alignment requirement and make DMA engine simple
    ret = posix_memalign ((void **) &capi_wed, CACHELINE_BYTES, sizeof(struct wed));
    if (ret) {
        printf ("Error. Can not malloc buffer for wed.\n");
        return -1;
    }

    ret = posix_memalign ((void **) &p_wed_tx, CACHELINE_BYTES, sizeof(struct wed_tx));
    if (ret) {
        printf ("Error. Can not malloc buffer for wed.\n");
        return -1;
    }

    // Malloc buffer for the source data
    // posix_memalign is used to keep alignment requirement and make DMA engine simple
    ret = posix_memalign ((void **) &source_buf, CACHELINE_BYTES, DATA_SIZE);
    if (ret) {
        printf ("Error. Can not malloc buffer for source buffer.\n");
        free (capi_wed);
        return -1;
    }

    // Malloc buffer for the result
    // posix_memalign is used to keep alignment requirement and make DMA engine simple
    ret = posix_memalign ((void **) &result_buf, CACHELINE_BYTES, DATA_SIZE);
    if (ret) {
        printf ("Error. Can not malloc buffer for result buffer.\n");
        free (capi_wed);
        free (source_buf);
        return -1;
    }

    if ((fp = fopen(argv[1], "r")) == NULL)
    {
        printf ("Image file can not be opened.\n");
        exit(1);
    }
    
    //Read the input data from the file, the data should be 16bits, 
    //You can modify here to for your own data.
    ptr = (int16_t*) source_buf;
    count = 0;
    while(fscanf(fp, "%hd", ptr++) > 0)
    {
        count ++;
        if (count >= DATA_SIZE / sizeof(int16_t))
        {
            printf ("Buff overflow.\n");
            exit(1);
        }
    }
    
    data_size = count * sizeof(int16_t);
    
    //The input data buffer should be aligined with cache line (128bytes)   
    if(data_size % CACHELINE_BYTES)
    {
        data_size = (data_size / CACHELINE_BYTES + 1) * CACHELINE_BYTES;
    }

    capi_wed->data_size = data_size;
    capi_wed->param_s0 = 0x11223344;
    capi_wed->param_s1 = 0x55667788;
    capi_wed->source = source_buf;
    capi_wed->result = result_buf;
    capi_wed->p_wed_tx = (__u8 *)p_wed_tx;

    //Detect which device is avaliable
    strncpy (cxl_device, DEVICE, 64);
    for (ret = 0; ret < 8; ret ++) {
        if (access(cxl_device, W_OK) == 0)
            break;
        cxl_device [12] ++;
    }

    if (ret == 8) {
        printf ("Can not find available CAPI device.\n");
        return -1;
    }
    //Open CAPI device
    afu_h = cxl_afu_open_dev (cxl_device);
    if (!afu_h) {
        printf ("Error. Can not open CAPI device : %s\n", DEVICE);
        return -1;
    }
    printf ("Open CAPI device %s\n", cxl_device);

    // Create the CAPI hardware thread, AFU is enabled
    // capi_wed pointer is sent to the AFU
    // The DMA dose not run at this moment, it will only read the capi_wed struction
    // to the DAM engine
    cxl_afu_attach (afu_h, (__u64) capi_wed);
    printf ("Attach AFU to current application.\n");

    // Map some register from AFU to user space to do runtime control
    // This is an optinal step for CAPI application development dependent on AFU
    if ((cxl_mmio_map (afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) {
        printf ("Error. Can not map registers\n");
        return -1;
    }

    // We clear the status to Zero
    capi_wed->status = 0;
    capi_wed->jcounter = 0;
    capi_wed->ret_size = 0;
    
    // We use a register to show whether the AFU is ready
    // This is an optinal step for CAPI application development dependent on AFU
    do {
        cxl_mmio_read64 (afu_h, MMIO_TRACE_ADDR, &mmio_state);
    } while ((mmio_state & 0xf) != 0x1);

    gettimeofday(&start, NULL);
    //Start the data receive DMA
    cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf0);
    //Start the data send DMA
    cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0x0f);
    
    loops = 100;

    send_num = 1;
    received_num = 0;
    while(1)
    {
        if(capi_wed->status)
        {
            received_num ++;
            if (received_num >= loops)break;
            //Clear the status bit
            capi_wed->status = 0;
            cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf0);
        }
        if (p_wed_tx -> status)
        {
            p_wed_tx->status = 0;
            if (send_num < loops)
            {
                //clear the status bit
                p_wed_tx->status = 0;
                cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0x0f);
                send_num ++;
            }
        }
    }
    gettimeofday(&end, NULL);

    interval = ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0;

    printf ("The %dst job finish. Return Size = 0x%x\n", capi_wed->jcounter, capi_wed->ret_size);
    printf ("The total time cost (%d loops) is %.3fs, each loop cost is %.3fms.\n", loops, interval, interval/loops * 1000.0);

    ptr = (int16_t*) result_buf;
    printf ("Recognition Results (without softmax):\n");
    for (i = 0; i < 10; i++)
    {
        printf("%d \t", *ptr ++);
    }
    printf ("\n");

    free (capi_wed);
    free (source_buf);
    free (result_buf);

    // We use register write to trigger the DAM to finish all jobs
    // This is an optinal step for CAPI application development dependent on AFU
    cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf1);

    // We do register unmap because we map register above
    cxl_mmio_unmap (afu_h);

    // Close the CAPI device
    cxl_afu_free (afu_h);

    return 0;
}