int main(int argc, char *argv[]) { // Open first AFU found struct cxl_afu_h *afu_h; afu_h = cxl_afu_next(NULL); if (!afu_h) { fprintf(stderr, "\nNo AFU found!\n\n"); return -1; } afu_h = cxl_afu_open_h(afu_h, CXL_VIEW_DEDICATED); if (!afu_h) { perror("cxl_afu_open_h"); return -1; } // Prepare WED struct wed *wed = NULL; if (posix_memalign((void **)&wed, CACHELINE_BYTES, sizeof(struct wed))) { perror("posix_memalign"); return -1; } printf("Allocated WED memory @ 0x%016" PRIx64 "\n", (uint64_t) wed); wed->endian_test = 1; wed->status = 0; // Start AFU cxl_afu_attach(afu_h, (uint64_t) wed); // Map AFU MMIO registers, if needed printf("Mapping AFU registers...\n"); if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) { perror("cxl_mmio_map"); return -1; } /************************************************************************** Do something here and wait for results. cxl_mmio_*() functions can only be used here between cxl_mmio_map and cxl_mmio_unmap. Presumably your application will possibly monitor and/or possibly update values in the wed struct or some other place in memory that AFU was informed that it could access. Maybe a bit in the wed struct like those in the example "status" field could be updated by the AFU to indicate that it has completed a job. In this example that is why the status field is made volatile. This prevents the compiler from optimization polling of the status field. **************************************************************************/ // Unmap AFU MMIO registers, if previously mapped cxl_mmio_unmap(afu_h); // Free AFU cxl_afu_free(afu_h); return 0; }
static int afu_m_close(struct mdev_ctx *mctx) { VERBOSE3("[%s] Enter\n", __func__); if (NULL == mctx->afu_h) return -1; cxl_mmio_unmap(mctx->afu_h); cxl_afu_free(mctx->afu_h); mctx->afu_h = NULL; if (mctx->errinfo) free(mctx->errinfo); mctx->errinfo = NULL; VERBOSE3("[%s] Exit\n", __func__); return 0; }
int main(int argc, char *argv[]) { struct cxl_afu_h *afu_h; uint64_t wed, wed_check, rand64, rand64_check; uint32_t rand32_upper, rand32_lower; unsigned seed; int opt, option_index; char *name; name = strrchr(argv[0], '/'); if (name) name++; else name = argv[0]; static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"seed", required_argument, 0, 's'}, {NULL, 0, 0, 0} }; option_index = 0; seed = time(NULL); while ((opt = getopt_long (argc, argv, "hs:", long_options, &option_index)) >= 0) { switch (opt) { case 0: break; case 's': seed = strtoul(optarg, NULL, 0); break; case 'h': default: usage(name); return 0; } } // Seed random number generator srand(seed); printf("%s: seed=%d\n", name, seed); // Find first AFU in system afu_h = cxl_afu_next(NULL); if (!afu_h) { fprintf(stderr, "FAILED:No AFU found!\n"); goto done; } // Open AFU afu_h = cxl_afu_open_h(afu_h, CXL_VIEW_DEDICATED); if (!afu_h) { perror("FAILED:cxl_afu_open_h"); goto done; } printf("Attempt mapping AFU registers before attach\n"); if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) == 0) { printf("FAILED:cxl_mmio_map"); goto done; } printf("Attempt mmio read before successful mapping\n"); if (cxl_mmio_read64(afu_h, 0x8, &wed_check) == 0) { printf("FAILED:cxl_mmio_read64"); goto done; } // Generate random 64-bit value for WED wed = rand(); wed <<= 32; wed |= rand(); // Start AFU passing random WED value cxl_afu_attach(afu_h, wed); // Map AFU MMIO registers printf("Mapping AFU registers...\n"); if ((cxl_mmio_map(afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) { perror("FAILED:cxl_mmio_map"); goto done; } ///////////////////////////////////////////////////// // CHECK 1 - WED value was passed to AFU correctly // ///////////////////////////////////////////////////// // Read WED from AFU and verify if (cxl_mmio_read64(afu_h, 0x8, &wed_check) < 0) { perror("FAILED:cxl_mmio_read64"); goto done; } if (wed != wed_check) { printf("\nFAILED:WED mismatch!\n"); printf("\tExpected:0x%016"PRIx64"\n", wed); printf("\tActual :0x%016"PRIx64"\n", wed_check); goto done; } printf("WED check complete\n"); ////////////////////////////////////////////////////////////// // CHECK 2 - Write 64-bit value and check with 32-bit reads // ////////////////////////////////////////////////////////////// // Write random 64-bit value to MMIO space rand64 = rand(); rand64 <<= 32; rand64 |= rand(); if (cxl_mmio_write64(afu_h, 0x17f0, rand64) < 0) { perror("FAILED:cxl_mmio_write64"); goto done; } // Use two 32-bit read to check 64-bit value written if (cxl_mmio_read32(afu_h, 0x17f0, &rand32_upper) < 0) { perror("FAILED:cxl_mmio_read32"); goto done; } if (cxl_mmio_read32(afu_h, 0x17f4, &rand32_lower) < 0) { perror("FAILED:cxl_mmio_read32"); goto done; } rand64_check = (uint64_t) rand32_upper; rand64_check <<= 32; rand64_check |= (uint64_t) rand32_lower; if (rand64 != rand64_check) { printf("\nFAILED:64-bit write => 32-bit reads mismatch!\n"); printf("\tExpected:0x%016"PRIx64"\n", rand64); printf("\tActual :0x%016"PRIx64"\n", rand64_check); goto done; } printf("64-bit write => 32-bit reads check complete\n"); ////////////////////////////////////////////////////////////// // CHECK 3 - Write 32-bit values and check with 64-bit read // ////////////////////////////////////////////////////////////// // Write two random 32-bit values to a single 64-bit MMIO register rand32_upper = rand(); if (cxl_mmio_write32(afu_h, 0x17f8, rand32_upper) < 0) { perror("FAILED:cxl_mmio_write32"); goto done; } rand32_lower = rand(); if (cxl_mmio_write32(afu_h, 0x17fc, rand32_lower) < 0) { perror("FAILED:cxl_mmio_write32"); goto done; } // Build 64-bit value from two 32-bit values rand64 = (uint64_t) rand32_upper; rand64 <<= 32; rand64 |= (uint64_t) rand32_lower; // Check 32-bit writes with one 64-bit read if (cxl_mmio_read64(afu_h, 0x17f8, &rand64_check) < 0) { perror("FAILED:cxl_mmio_read64"); goto done; } if (rand64 != rand64_check) { printf("\nFAILED:32-bit writes => 64-bit read mismatch!\n"); printf("\tExpected:0x%016"PRIx64"\n", rand64); printf("\tActual :0x%016"PRIx64"\n", rand64_check); goto done; } printf("32-bit writes => 64-bit read check complete\n"); // Report test as passing printf("PASSED\n"); done: if (afu_h) { // Unmap AFU MMIO registers cxl_mmio_unmap(afu_h); // Free AFU cxl_afu_free(afu_h); } return 0; }
int main(int argc, char *argv[]) { #ifdef PSL9 MachineConfig machine; char *cacheline0, *cacheline1, *name; uint64_t wed; unsigned seed; int i, quadrant, byte, opt, option_index; int response; int context, machine_number; name = strrchr(argv[0], '/'); if (name) name++; else name = argv[0]; static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"seed", required_argument, 0, 's'}, {NULL, 0, 0, 0} }; option_index = 0; seed = time(NULL); while ((opt = getopt_long (argc, argv, "hs:", long_options, &option_index)) >= 0) { switch (opt) { case 0: break; case 's': seed = strtoul(optarg, NULL, 0); break; case 'h': default: usage(name); return 0; } } // Seed random number generator srand(seed); printf("%s: seed=%d\n", name, seed); // find first AFU found struct cxl_afu_h *afu_h, *afu_m, *afu_s; afu_m = afu_s = NULL; afu_h = cxl_afu_next(NULL); if (!afu_h) { fprintf(stderr, "\nNo AFU found!\n\n"); goto done; } // afu master afu_m = cxl_afu_open_h(afu_h, CXL_VIEW_MASTER); if (!afu_m) { perror("cxl_afu_open_h for master"); goto done; } // Set WED to random value wed = rand(); wed <<= 32; wed |= rand(); // Start AFU for master printf("Attach AFU master\n"); if (cxl_afu_attach(afu_m, wed) < 0) { perror("FAILED:cxl_afu_attach for master"); goto done; } printf("wed = 0x%"PRIx64"\n", wed); // Map AFU MMIO registers printf("Mapping AFU master registers...\n"); if ((cxl_mmio_map(afu_m, CXL_MMIO_BIG_ENDIAN)) < 0) { perror("cxl_mmio_map for master"); goto done; } printf("End AFU master mmio map\n"); context = cxl_afu_get_process_element(afu_m); printf("Master context = %d\n", context); // Allocate aligned memory for two cachelines if (posix_memalign((void **)&cacheline0, CACHELINE_BYTES, CACHELINE_BYTES) != 0) { perror("FAILED:posix_memalign"); goto done; } if (posix_memalign((void **)&cacheline1, CACHELINE_BYTES, CACHELINE_BYTES) != 0) { perror("FAILED:posix_memalign"); goto done; } // Pollute first cacheline with random values printf("CACHELINE0 = 0x"); for (i = 0; i < CACHELINE_BYTES; i++) { cacheline0[i] = rand(); printf("%02x", cacheline0[i]); } printf("\n"); // Initialize machine configuration printf("initialize machine\n"); init_machine(&machine); printf("End init machine\n"); // Use AFU Machine 0 to read the first cacheline from memory to AFU printf("Configure, enable and run machine\n"); if ((response = config_enable_and_run_machine(afu_m, &machine, 0, context, PSL_COMMAND_XLAT_RD_P0, CACHELINE_BYTES, 0, 0, (uint64_t)cacheline0, CACHELINE_BYTES, DIRECTED_M)) < 0) { printf("FAILED:config_enable_and_run_machine for master XLAT_RD response = %d\n", response); goto done; } printf("End configure enable and run machine for XLAT_RD\n"); // Check for valid response if (response != PSL_RESPONSE_DONE) { printf("FAILED: Unexpected response code 0x%x\n", response); goto done; } printf("Completed cacheline read\n"); // Use AFU Machine 0 to write the data to the second cacheline if ((response = config_enable_and_run_machine(afu_m, &machine, 0, context, PSL_COMMAND_XLAT_WR_P0, CACHELINE_BYTES, 0, 0, (uint64_t)cacheline1, CACHELINE_BYTES, DIRECTED_M)) < 0) { printf("FAILED:config_enable_and_run_machine for master XLAT_WR response = %d\n", response); goto done; } printf("End configure enable and run machine for XLAT WR\n"); // Check for valid response if (response != PSL_RESPONSE_DONE) { printf("FAILED: Unexpected response code 0x%x\n", response); goto done; } // Test if copy from cacheline0 to cacheline1 was successful if (memcmp(cacheline0,cacheline1, CACHELINE_BYTES) != 0) { printf("FAILED:memcmp\n"); for (quadrant = 0; quadrant < 4; quadrant++) { printf("DEBUG: Expected Q%d 0x", quadrant); for (byte = 0; byte < CACHELINE_BYTES /4; byte++) { printf("%02x", cacheline0[byte+(quadrant*32)]); } printf("\n"); } for (quadrant = 0; quadrant < 4; quadrant++) { printf("DEBUG: Actual Q%d 0x", quadrant); for (byte = 0; byte < CACHELINE_BYTES / 4; byte++) { printf("%02x", cacheline1[byte+(quadrant*32)]); } printf("\n"); } goto done; } printf("Master AFU: PASSED\n"); // afu slave // find next afu afu_h = cxl_afu_next(NULL); if (!afu_h) { fprintf(stderr, "\nNo AFU found!\n\n"); goto done; } afu_s = cxl_afu_open_h(afu_h, CXL_VIEW_SLAVE); if (!afu_s) { perror("cxl_afu_open_h for slave"); goto done; } // Set WED to random value wed = rand(); wed <<= 32; wed |= rand(); // Start AFU for slave if (cxl_afu_attach(afu_s, wed) < 0) { perror("FAILED:cxl_afu_attach for slave"); goto done; } // Map AFU MMIO registers printf("Mapping AFU slave registers...\n"); if ((cxl_mmio_map(afu_s, CXL_MMIO_BIG_ENDIAN)) < 0) { perror("cxl_mmio_map for slave"); goto done; } printf("End AFU slave mmio map\n"); context = cxl_afu_get_process_element(afu_s); printf("Slave context = %d\n", context); machine_number = 20; // Allocate aligned memory for two cachelines if (posix_memalign((void **)&cacheline0, CACHELINE_BYTES, CACHELINE_BYTES) != 0) { perror("FAILED:posix_memalign"); goto done; } if (posix_memalign((void **)&cacheline1, CACHELINE_BYTES, CACHELINE_BYTES) != 0) { perror("FAILED:posix_memalign"); goto done; } // Pollute first cacheline with random values for (i = 0; i < CACHELINE_BYTES; i++) cacheline0[i] = rand(); // Initialize machine configuration //init_machine(&machine); // Use AFU Machine 1 to read the first cacheline from memory to AFU printf("Start config enable and run machine for slave\n"); if ((response = config_enable_and_run_machine(afu_s, &machine, machine_number, context, PSL_COMMAND_XLAT_RD_P0, CACHELINE_BYTES, 0, 0, (uint64_t)cacheline0, CACHELINE_BYTES, DIRECTED)) < 0) { printf("FAILED:config_enable_and_run_machine for slave"); goto done; } printf("End config enable and run machine for slave\n"); // Check for valid response if (response != PSL_RESPONSE_DONE) { printf("FAILED: Unexpected response code 0x%x\n", response); goto done; } printf("Completed cacheline read for slave\n"); // Use AFU Machine 1 to write the data to the second cacheline if ((response = config_enable_and_run_machine(afu_s, &machine, machine_number, context, PSL_COMMAND_XLAT_WR_P0, CACHELINE_BYTES, 0, 0, (uint64_t)cacheline1, CACHELINE_BYTES, DIRECTED)) < 0) { printf("FAILED:config_enable_and_run_machine for slave"); goto done; } // Check for valid response if (response != PSL_RESPONSE_DONE) { printf("FAILED: Unexpected response code 0x%x\n", response); goto done; } // Test if copy from cacheline0 to cacheline1 was successful if (memcmp(cacheline0,cacheline1, CACHELINE_BYTES) != 0) { printf("FAILED:memcmp\n"); for (quadrant = 0; quadrant < 4; quadrant++) { printf("DEBUG: Expected Q%d 0x", quadrant); for (byte = 0; byte < CACHELINE_BYTES /4; byte++) { printf("%02x", cacheline0[byte+(quadrant*32)]); } printf("\n"); } for (quadrant = 0; quadrant < 4; quadrant++) { printf("DEBUG: Actual Q%d 0x", quadrant); for (byte = 0; byte < CACHELINE_BYTES / 4; byte++) { printf("%02x", cacheline1[byte+(quadrant*32)]); } printf("\n"); } goto done; } printf("Slave AFU: PASSED\n"); done: // unmap and free slave afu if (afu_s) { cxl_mmio_unmap(afu_s); cxl_afu_free(afu_s); } // unmap and free master afu if (afu_m) { // Unmap AFU MMIO registers cxl_mmio_unmap(afu_m); // Free AFU cxl_afu_free(afu_m); } #endif return 0; }
int main (int argc, char **argv) { int ret; int i, count, data_size; uint64_t mmio_state = 0; struct wed * capi_wed = NULL; struct wed_tx * p_wed_tx = NULL; char cxl_device [64]; __u8 * source_buf, * result_buf; int16_t * ptr = NULL; struct cxl_afu_h * afu_h = NULL; FILE *fp; int send_num, received_num, loops; struct timeval start; struct timeval end; double interval; if(argc != 2) { printf("uasge: %s imagepath \n", argv[0]); exit(1); } // Malloc buffer for the work element descriptor // posix_memalign is used to keep alignment requirement and make DMA engine simple ret = posix_memalign ((void **) &capi_wed, CACHELINE_BYTES, sizeof(struct wed)); if (ret) { printf ("Error. Can not malloc buffer for wed.\n"); return -1; } ret = posix_memalign ((void **) &p_wed_tx, CACHELINE_BYTES, sizeof(struct wed_tx)); if (ret) { printf ("Error. Can not malloc buffer for wed.\n"); return -1; } // Malloc buffer for the source data // posix_memalign is used to keep alignment requirement and make DMA engine simple ret = posix_memalign ((void **) &source_buf, CACHELINE_BYTES, DATA_SIZE); if (ret) { printf ("Error. Can not malloc buffer for source buffer.\n"); free (capi_wed); return -1; } // Malloc buffer for the result // posix_memalign is used to keep alignment requirement and make DMA engine simple ret = posix_memalign ((void **) &result_buf, CACHELINE_BYTES, DATA_SIZE); if (ret) { printf ("Error. Can not malloc buffer for result buffer.\n"); free (capi_wed); free (source_buf); return -1; } if ((fp = fopen(argv[1], "r")) == NULL) { printf ("Image file can not be opened.\n"); exit(1); } //Read the input data from the file, the data should be 16bits, //You can modify here to for your own data. ptr = (int16_t*) source_buf; count = 0; while(fscanf(fp, "%hd", ptr++) > 0) { count ++; if (count >= DATA_SIZE / sizeof(int16_t)) { printf ("Buff overflow.\n"); exit(1); } } data_size = count * sizeof(int16_t); //The input data buffer should be aligined with cache line (128bytes) if(data_size % CACHELINE_BYTES) { data_size = (data_size / CACHELINE_BYTES + 1) * CACHELINE_BYTES; } capi_wed->data_size = data_size; capi_wed->param_s0 = 0x11223344; capi_wed->param_s1 = 0x55667788; capi_wed->source = source_buf; capi_wed->result = result_buf; capi_wed->p_wed_tx = (__u8 *)p_wed_tx; //Detect which device is avaliable strncpy (cxl_device, DEVICE, 64); for (ret = 0; ret < 8; ret ++) { if (access(cxl_device, W_OK) == 0) break; cxl_device [12] ++; } if (ret == 8) { printf ("Can not find available CAPI device.\n"); return -1; } //Open CAPI device afu_h = cxl_afu_open_dev (cxl_device); if (!afu_h) { printf ("Error. Can not open CAPI device : %s\n", DEVICE); return -1; } printf ("Open CAPI device %s\n", cxl_device); // Create the CAPI hardware thread, AFU is enabled // capi_wed pointer is sent to the AFU // The DMA dose not run at this moment, it will only read the capi_wed struction // to the DAM engine cxl_afu_attach (afu_h, (__u64) capi_wed); printf ("Attach AFU to current application.\n"); // Map some register from AFU to user space to do runtime control // This is an optinal step for CAPI application development dependent on AFU if ((cxl_mmio_map (afu_h, CXL_MMIO_BIG_ENDIAN)) < 0) { printf ("Error. Can not map registers\n"); return -1; } // We clear the status to Zero capi_wed->status = 0; capi_wed->jcounter = 0; capi_wed->ret_size = 0; // We use a register to show whether the AFU is ready // This is an optinal step for CAPI application development dependent on AFU do { cxl_mmio_read64 (afu_h, MMIO_TRACE_ADDR, &mmio_state); } while ((mmio_state & 0xf) != 0x1); gettimeofday(&start, NULL); //Start the data receive DMA cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf0); //Start the data send DMA cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0x0f); loops = 100; send_num = 1; received_num = 0; while(1) { if(capi_wed->status) { received_num ++; if (received_num >= loops)break; //Clear the status bit capi_wed->status = 0; cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf0); } if (p_wed_tx -> status) { p_wed_tx->status = 0; if (send_num < loops) { //clear the status bit p_wed_tx->status = 0; cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0x0f); send_num ++; } } } gettimeofday(&end, NULL); interval = ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0; printf ("The %dst job finish. Return Size = 0x%x\n", capi_wed->jcounter, capi_wed->ret_size); printf ("The total time cost (%d loops) is %.3fs, each loop cost is %.3fms.\n", loops, interval, interval/loops * 1000.0); ptr = (int16_t*) result_buf; printf ("Recognition Results (without softmax):\n"); for (i = 0; i < 10; i++) { printf("%d \t", *ptr ++); } printf ("\n"); free (capi_wed); free (source_buf); free (result_buf); // We use register write to trigger the DAM to finish all jobs // This is an optinal step for CAPI application development dependent on AFU cxl_mmio_write64 (afu_h, MMIO_TRACE_ADDR, 0xf1); // We do register unmap because we map register above cxl_mmio_unmap (afu_h); // Close the CAPI device cxl_afu_free (afu_h); return 0; }