static int mmio_read(struct cxl_afu_h *afu_h, int ctx, uint32_t offset, uint64_t *data) { int rc = -1; uint32_t offs = (ctx * MMIO_CTX_OFFSET) + offset; VERBOSE3("[%s] Enter, CTX: %d Offset: 0x%x\n", __func__, ctx, offs); rc = cxl_mmio_read64(afu_h, offs, data); VERBOSE3("[%s] Exit, rc = %d data: 0x%016llx\n", __func__, rc, (long long)*data); return rc; }
int main(int argc, char *argv[]) { struct cxl_afu_h *afu_h; uint64_t wed, wed_check, rand64, rand64_check; uint32_t rand32_upper, rand32_lower; unsigned seed; int opt, option_index; char *name; name = strrchr(argv[0], '/'); if (name) name++; else name = argv[0]; static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"seed", required_argument, 0, 's'}, {NULL, 0, 0, 0} }; option_index = 0; seed = time(NULL); while ((opt = getopt_long (argc, argv, "hs:", long_options, &option_index)) >= 0) { switch (opt) { case 0: break; case 's': seed = strtoul(optarg, NULL, 0); break; case 'h': default: usage(name); return 0; } } // Seed random number generator srand(seed); printf("%s: seed=%d\n", name, seed); // Find first AFU in system afu_h = cxl_afu_next(NULL); if (!afu_h) { fprintf(stderr, "FAILED:No AFU found!\n"); goto done; } // Open AFU afu_h = cxl_afu_open_h(afu_h, CXL_VIEW_DEDICATED); if (!afu_h) { perror("FAILED:cxl_afu_open_h"); goto done; } printf("Attempt mapping AFU registers before attach\n"); if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) == 0) { printf("FAILED:cxl_mmio_map"); goto done; } printf("Attempt mmio read before successful mapping\n"); if (cxl_mmio_read64(afu_h, 0x8, &wed_check) == 0) { printf("FAILED:cxl_mmio_read64"); goto done; } // Generate random 64-bit value for WED wed = rand(); wed <<= 32; wed |= rand(); // Start AFU passing random WED value cxl_afu_attach(afu_h, wed); // Map AFU MMIO registers printf("Mapping AFU registers...\n"); if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) { perror("FAILED:cxl_mmio_map"); goto done; } ///////////////////////////////////////////////////// // CHECK 1 - WED value was passed to AFU correctly // ///////////////////////////////////////////////////// // Read WED from AFU and verify if (cxl_mmio_read64(afu_h, 0x8, &wed_check) < 0) { perror("FAILED:cxl_mmio_read64"); goto done; } if (wed != wed_check) { printf("\nFAILED:WED mismatch!\n"); printf("\tExpected:0x%016"PRIx64"\n", wed); printf("\tActual :0x%016"PRIx64"\n", wed_check); goto done; } printf("WED check complete\n"); ////////////////////////////////////////////////////////////// // CHECK 2 - Write 64-bit value and check with 32-bit reads // ////////////////////////////////////////////////////////////// // Write random 64-bit value to MMIO space rand64 = rand(); rand64 <<= 32; rand64 |= rand(); if (cxl_mmio_write64(afu_h, 0x17f0, rand64) < 0) { perror("FAILED:cxl_mmio_write64"); goto done; } // Use two 32-bit read to check 64-bit value written if (cxl_mmio_read32(afu_h, 0x17f0, &rand32_upper) < 0) { perror("FAILED:cxl_mmio_read32"); goto done; } if (cxl_mmio_read32(afu_h, 0x17f4, &rand32_lower) < 0) { perror("FAILED:cxl_mmio_read32"); goto done; } rand64_check = (uint64_t) rand32_upper; rand64_check <<= 32; rand64_check |= (uint64_t) rand32_lower; if (rand64 != rand64_check) { printf("\nFAILED:64-bit write => 32-bit reads mismatch!\n"); printf("\tExpected:0x%016"PRIx64"\n", rand64); printf("\tActual :0x%016"PRIx64"\n", rand64_check); goto done; } printf("64-bit write => 32-bit reads check complete\n"); ////////////////////////////////////////////////////////////// // CHECK 3 - Write 32-bit values and check with 64-bit read // ////////////////////////////////////////////////////////////// // Write two random 32-bit values to a single 64-bit MMIO register rand32_upper = rand(); if (cxl_mmio_write32(afu_h, 0x17f8, rand32_upper) < 0) { perror("FAILED:cxl_mmio_write32"); goto done; } rand32_lower = rand(); if (cxl_mmio_write32(afu_h, 0x17fc, rand32_lower) < 0) { perror("FAILED:cxl_mmio_write32"); goto done; } // Build 64-bit value from two 32-bit values rand64 = (uint64_t) rand32_upper; rand64 <<= 32; rand64 |= (uint64_t) rand32_lower; // Check 32-bit writes with one 64-bit read if (cxl_mmio_read64(afu_h, 0x17f8, &rand64_check) < 0) { perror("FAILED:cxl_mmio_read64"); goto done; } if (rand64 != rand64_check) { printf("\nFAILED:32-bit writes => 64-bit read mismatch!\n"); printf("\tExpected:0x%016"PRIx64"\n", rand64); printf("\tActual :0x%016"PRIx64"\n", rand64_check); goto done; } printf("32-bit writes => 64-bit read check complete\n"); // Report test as passing printf("PASSED\n"); done: if (afu_h) { // Unmap AFU MMIO registers cxl_mmio_unmap(afu_h); // Free AFU cxl_afu_free(afu_h); } return 0; }
int main (int argc, char **argv) { int ret; int i, count, data_size; uint64_t mmio_state = 0; struct wed * capi_wed = NULL; struct wed_tx * p_wed_tx = NULL; char cxl_device [64]; __u8 * source_buf, * result_buf; int16_t * ptr = NULL; struct cxl_afu_h * afu_h = NULL; FILE *fp; int send_num, received_num, loops; struct timeval start; struct timeval end; double interval; if(argc != 2) { printf("uasge: %s imagepath \n", argv[0]); exit(1); } // Malloc buffer for the work element descriptor // posix_memalign is used to keep alignment requirement and make DMA engine simple ret = posix_memalign ((void **) &capi_wed, CACHELINE_BYTES, sizeof(struct wed)); if (ret) { printf ("Error. Can not malloc buffer for wed.\n"); return -1; } ret = posix_memalign ((void **) &p_wed_tx, CACHELINE_BYTES, sizeof(struct wed_tx)); if (ret) { printf ("Error. Can not malloc buffer for wed.\n"); return -1; } // Malloc buffer for the source data // posix_memalign is used to keep alignment requirement and make DMA engine simple ret = posix_memalign ((void **) &source_buf, CACHELINE_BYTES, DATA_SIZE); if (ret) { printf ("Error. Can not malloc buffer for source buffer.\n"); free (capi_wed); return -1; } // Malloc buffer for the result // posix_memalign is used to keep alignment requirement and make DMA engine simple ret = posix_memalign ((void **) &result_buf, CACHELINE_BYTES, DATA_SIZE); if (ret) { printf ("Error. Can not malloc buffer for result buffer.\n"); free (capi_wed); free (source_buf); return -1; } if ((fp = fopen(argv[1], "r")) == NULL) { printf ("Image file can not be opened.\n"); exit(1); } //Read the input data from the file, the data should be 16bits, //You can modify here to for your own data. ptr = (int16_t*) source_buf; count = 0; while(fscanf(fp, "%hd", ptr++) > 0) { count ++; if (count >= DATA_SIZE / sizeof(int16_t)) { printf ("Buff overflow.\n"); exit(1); } } data_size = count * sizeof(int16_t); //The input data buffer should be aligined with cache line (128bytes) if(data_size % CACHELINE_BYTES) { data_size = (data_size / CACHELINE_BYTES + 1) * CACHELINE_BYTES; } capi_wed->data_size = data_size; capi_wed->param_s0 = 0x11223344; capi_wed->param_s1 = 0x55667788; capi_wed->source = source_buf; capi_wed->result = result_buf; capi_wed->p_wed_tx = (__u8 *)p_wed_tx; //Detect which device is avaliable strncpy (cxl_device, DEVICE, 64); for (ret = 0; ret < 8; ret ++) { if (access(cxl_device, W_OK) == 0) break; cxl_device [12] ++; } if (ret == 8) { printf ("Can not find available CAPI device.\n"); return -1; } //Open CAPI device afu_h = cxl_afu_open_dev (cxl_device); if (!afu_h) { printf ("Error. Can not open CAPI device : %s\n", DEVICE); return -1; } printf ("Open CAPI device %s\n", cxl_device); // Create the CAPI hardware thread, AFU is enabled // capi_wed pointer is sent to the AFU // The DMA dose not run at this moment, it will only read the capi_wed struction // to the DAM engine cxl_afu_attach (afu_h, (__u64) capi_wed); printf ("Attach AFU to current application.\n"); // Map some register from AFU to user space to do runtime control // This is an optinal step for CAPI application development dependent on AFU if ((cxl_mmio_map (afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) { printf ("Error. Can not map registers\n"); return -1; } // We clear the status to Zero capi_wed->status = 0; capi_wed->jcounter = 0; capi_wed->ret_size = 0; // We use a register to show whether the AFU is ready // This is an optinal step for CAPI application development dependent on AFU do { cxl_mmio_read64 (afu_h, MMIO_TRACE_ADDR, &mmio_state); } while ((mmio_state & 0xf) != 0x1); gettimeofday(&start, NULL); //Start the data receive DMA cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf0); //Start the data send DMA cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0x0f); loops = 100; send_num = 1; received_num = 0; while(1) { if(capi_wed->status) { received_num ++; if (received_num >= loops)break; //Clear the status bit capi_wed->status = 0; cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf0); } if (p_wed_tx -> status) { p_wed_tx->status = 0; if (send_num < loops) { //clear the status bit p_wed_tx->status = 0; cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0x0f); send_num ++; } } } gettimeofday(&end, NULL); interval = ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0; printf ("The %dst job finish. Return Size = 0x%x\n", capi_wed->jcounter, capi_wed->ret_size); printf ("The total time cost (%d loops) is %.3fs, each loop cost is %.3fms.\n", loops, interval, interval/loops * 1000.0); ptr = (int16_t*) result_buf; printf ("Recognition Results (without softmax):\n"); for (i = 0; i < 10; i++) { printf("%d \t", *ptr ++); } printf ("\n"); free (capi_wed); free (source_buf); free (result_buf); // We use register write to trigger the DAM to finish all jobs // This is an optinal step for CAPI application development dependent on AFU cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf1); // We do register unmap because we map register above cxl_mmio_unmap (afu_h); // Close the CAPI device cxl_afu_free (afu_h); return 0; }