Ejemplo n.º 1
0
 double avgTime() const
 {
     return m_Count == 0 ? 0.0 : accTime() / m_Count;
 }
Ejemplo n.º 2
0
int main(int argc, char** argv)
{
    int err;                            // error code returned from api calls
    int* a = NULL; // input pointer
    int* results = NULL; // output pointer
    unsigned int correct;               // number of correct results returned

    size_t global[2];                   // global domain size for our calculation
    size_t local[2];                    // local domain size for our calculation

    cl_platform_id platform_id;         // platform id
    cl_device_id device_id;             // compute device id
    cl_context context;                 // compute context
    cl_command_queue commands;          // compute command queue
    cl_program program;                 // compute program
    cl_kernel kernel;                   // compute kernel

    char cl_platform_vendor[1001];
    char cl_platform_name[1001];

    cl_mem input_a;                     // device memory used for the input array
    //cl_mem input_b;                     // device memory used for the input array
    cl_mem output;                      // device memory used for the output array
    int inc;
    double t_start, t_end;

    if (argc != 2) {
        printf("%s <inputfile>\n", argv[0]);
        return EXIT_FAILURE;
    }

    // Connect to first platform
    //
    err = clGetPlatformIDs(1,&platform_id,NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to find an OpenCL platform!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    err = clGetPlatformInfo(platform_id,CL_PLATFORM_VENDOR,1000,(void *)cl_platform_vendor,NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    printf("CL_PLATFORM_VENDOR %s\n",cl_platform_vendor);
    err = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME,1000,(void *)cl_platform_name,NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: clGetPlatformInfo(CL_PLATFORM_NAME) failed!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    printf("CL_PLATFORM_NAME %s\n",cl_platform_name);

    // Connect to a compute device
    //
    int fpga = 0;
#if defined (FPGA_DEVICE)
    fpga = 1;
#endif
    err = clGetDeviceIDs(platform_id, fpga ? CL_DEVICE_TYPE_ACCELERATOR : CL_DEVICE_TYPE_CPU,
                         1, &device_id, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to create a device group!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }

    // Create a compute context
    //
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    if (!context)
    {
        printf("Error: Failed to create a compute context!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }

    // Create a command commands
    //
    commands = clCreateCommandQueue(context, device_id, 0, &err);
    if (!commands)
    {
        printf("Error: Failed to create a command commands!\n");
        printf("Error: code %i\n",err);
        printf("Test failed\n");
        return EXIT_FAILURE;
    }

    int status;

    // Create Program Objects
    //

    // Load binary from disk
    unsigned char *kernelbinary;
    char *xclbin=argv[1];
    printf("loading %s\n", xclbin);
    int n_i = load_file_to_memory(xclbin, (char **) &kernelbinary);
    if (n_i < 0) {
        printf("failed to load kernel from xclbin: %s\n", xclbin);
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to load kernel from xclbin: %s\n", xclbin);
    }
    size_t n = n_i;
    // Create the compute program from offline
    program = clCreateProgramWithBinary(context, 1, &device_id, &n,
                                        (const unsigned char **) &kernelbinary, &status, &err);
    if ((!program) || (err!=CL_SUCCESS)) {
        printf("Error: Failed to create compute program from binary %d!\n", err);
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to create compute program from binary %d!\n", err);
    }

    // Build the program executable
    //
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n");
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to build program executable!\n");
    }

    // Create the compute kernel in the program we wish to run
    //
    kernel = clCreateKernel(program, "mmult", &err);
    if (!kernel || err != CL_SUCCESS)
    {
        printf("Error: Failed to create compute kernel!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to create compute kernel!\n");
    }

    // Create the input and output arrays in device memory for our calculation
    //
    input_a = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(int) * DATA_SIZE, NULL, NULL);
    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * RESULT_SIZE, NULL, NULL);
    if (!input_a || !output)
    {
        printf("Error: Failed to allocate device memory!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to allocate device memory!\n");
    }

    // set up socket
    printf("\n************* Welcome to UCLA FPGA agent! **********\n");
    struct sockaddr_in stSockAddr;
    int SocketFD = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);

    if(-1 == SocketFD) {
        perror("can not create socket");
        exit(EXIT_FAILURE);
    }

    memset(&stSockAddr, 0, sizeof(stSockAddr));

    stSockAddr.sin_family = AF_INET;
    stSockAddr.sin_port = htons(7000);
    stSockAddr.sin_addr.s_addr = htonl(INADDR_ANY);

    if(-1 == bind(SocketFD,(struct sockaddr *)&stSockAddr, sizeof(stSockAddr))) {
        perror("error bind failed");
        close(SocketFD);
        exit(EXIT_FAILURE);
    }

    if(-1 == listen(SocketFD, 10)) {
        perror("error listen failed");
        close(SocketFD);
        exit(EXIT_FAILURE);
    }


    int taskNum = -1;

    // polling setting
    timespec deadline;
    deadline.tv_sec = 0;
    deadline.tv_nsec = 100;

    // Get the start time
    timespec timer = tic( );
    timespec socListenTime = diff(timer, timer);
    timespec socSendTime = diff(timer, timer);
    timespec socRecvTime = diff(timer, timer);
    timespec exeTime = diff(timer, timer);

    bool broadcastFlag = false;

    int packet_buf[PACKET_SIZE];
    int time_buf[TIME_BUF_SIZE];

    while (true) {
        //printf("\n************* Got a new task! *************\n");
        timer = tic();

        int ConnectFD = accept(SocketFD, NULL, NULL);
        if (!broadcastFlag) {
            broadcastFlag = true;
            timer = tic();
        }

        // For profiling only
        //struct timeval  tv;
        //gettimeofday(&tv, NULL);
        //double time_in_mill = (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000 ; // convert tv_sec & tv_usec to millisecond
        //printf("Receive time (ms): %lf\n", time_in_mill);

        accTime (&socListenTime, &timer);

        if(0 > ConnectFD) {
            perror("error accept failed");
            close(SocketFD);
            exit(EXIT_FAILURE);
        }

        read(ConnectFD, &packet_buf, PACKET_SIZE * sizeof(int));

        // send FPGA stats back to java application
        if(packet_buf[0] == -1) {
            // for profiling use
            collect_timer_stats(ConnectFD, &socListenTime, &socSendTime, &socRecvTime, &exeTime, &timer);
            broadcastFlag = false;
            continue;
        }

        char* shm_addr;
        int shmid = -1;
        int data_size = -1;  // data sent to FPGA (unit: int)
        shmid = packet_buf[0];
        data_size = packet_buf[1];
        printf("Shmid: %d, Data size (# of int): %d\n", shmid, data_size);

        // shared memory
        if((shm_addr = (char *) shmat(shmid, NULL, 0)) == (char *) -1) {
            perror("Server: shmat failed.");
            exit(1);
        }
        //else
        //printf("Server: attach shared memory: %p\n", shm_addr);

        int done = 0;
        while(done == 0) {
            done = (int) *((int*)shm_addr);
            clock_nanosleep(CLOCK_REALTIME, 0, &deadline, NULL);
        }

        //printf("Copy data to the array in the host\n");
        a = (int *)(shm_addr + FLAG_NUM * sizeof(int));
        results = (int *)(shm_addr + FLAG_NUM * sizeof(int));

        accTime (&socSendTime, &timer);

        taskNum = a[2];
        for (int i=0; i<taskNum; i++) {
            int tmp = *(a+8+i*8+7);
            assert(tmp >=0 && tmp < TOTAL_TASK_NUMS);
        }
        printf("Task Num: %d\n", taskNum);

        //printf("\nparameter recieved --- \n");
        //Write our data set into the input array in device memory

        //printf("Write data from host to FPGA\n");
        err = clEnqueueWriteBuffer(commands, input_a, CL_TRUE, 0, sizeof(int) * data_size, a, 0, NULL, NULL);
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to write to source array a!\n");
            printf("Test failed\n");
            return EXIT_FAILURE;
        }

        // Set the arguments to our compute kernel
        //
        err = 0;
        err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_a);
        err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
        err |= clSetKernelArg(kernel, 2, sizeof(int), &taskNum);
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to set kernel arguments! %d\n", err);
            printf("Test failed\n");
            return EXIT_FAILURE;
        }

        // Execute the kernel over the entire range of our 1d input data set
        // using the maximum number of work group items for this device
        //

        //printf("Enqueue Task\n");
        err = clEnqueueTask(commands, kernel, 0, NULL, NULL);
        if (err)
        {
            printf("Error: Failed to execute kernel! %d\n", err);
            printf("Test failed\n");
            return EXIT_FAILURE;
        }

        // Read back the results from the device to verify the output
        //
        cl_event readevent;
        //printf("Enqueue read buffer\n");
        err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(int) * FPGA_RET_PARAM_NUM * taskNum, results, 0, NULL, &readevent );
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to read output array! %d\n", err);
            printf("Test failed\n");
            return EXIT_FAILURE;
        }

        //printf("Wait for FPGA results\n");
        clWaitForEvents(1, &readevent);
        accTime(&exeTime, &timer);

        // Get the execution time
        //toc(&timer);

        // put data back to shared memory
        //printf("Put data back to the shared memory\n");
        *((int*)(shm_addr + sizeof(int))) = DONE;

        //printf("\n************* Task finished! *************\n");

        if (-1 == shutdown(ConnectFD, SHUT_RDWR)) {
            perror("can not shutdown socket");
            close(ConnectFD);
            close(SocketFD);
            exit(EXIT_FAILURE);
        }
        close(ConnectFD);

        //printf("done\n");

        // free the shared memory
        shmdt(shm_addr);
        //shmctl(shmid, IPC_RMID, 0);

        accTime(&socRecvTime, &timer);

        printf("**********timing begin**********\n");
        printTimeSpec(socListenTime);
        printTimeSpec(socSendTime);
        printTimeSpec(socRecvTime);
        printTimeSpec(exeTime);
        printf("**********timing end**********\n\n");
    }

    close(SocketFD);

    // Shutdown and cleanup
    //
    clReleaseMemObject(input_a);
    clReleaseMemObject(output);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    return EXIT_SUCCESS;

}