Ejemplo n.º 1
0
/* This method gathers all of the traceback data from a single stream.  All of the data is
 * put into an array in the stream info struct.  The argument passed in is expected to be
 * of type args_t.  This function is indended to be the start point of a new thread and
 * not called directly.  All data is returned through the structs contained in the argument.
 */
void * ReceiveTracebackData(void* arg) {

    args_t *a = (args_t *) arg;
    StreamInfo_t *info = a->streaminfo;
    int index = a->thread_index;
    printf("Reading the resulting traceback from the FPGA\n");

    int         err;
    kseq_t*     query           = info->start_info.query_seq;
    kseq_t*     db              = info->start_info.db_seq;
    uint64_t*    rx_buf;
    int         buf_size;
    int         buf_len;
    char            ibuf    [1024];

    // first we create a buffer which we are going to use for receiving our data
    //  buf_len         = a->buffer_length;
    buf_len = (query->seq.l + db->seq.l - 1) *2;
    buf_size        = sizeof(uint64_t) * buf_len;
    if (VERBOSE) printf("Thread %d, Creating uint64_t buffer w/ %i entries, %i B per entry, %i B total\n", index, buf_len, (int) sizeof(uint64_t), buf_len*((int)sizeof(uint64_t)));
    rx_buf          = (uint64_t*) calloc(buf_len, sizeof(uint64_t));
    // receive the contents of buffer from the FPGA
    if (VERBOSE) printf("Thread %d, Reading %i B from stream handle %i\n", index, buf_size, info->traceback_stream[index]);
    if ((err = info->pico->ReadStream(info->traceback_stream[index], rx_buf, buf_size)) < 0) {
        fprintf(stderr, "RunBitFile error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
        exit(EXIT_FAILURE);
    }

    if (VERBOSE)
        for (int i=0; i < buf_len; i++)
            printf("%lx\n", rx_buf[i]);

    info->traceback_buffer[index] = rx_buf;
}
int main(int argc, char* argv[])
{
    int         err, num_engines;
    int*        stream;
    uint32_t    cell_score_threshold;
    uint32_t**  query_ids;
    char *  ref_buf;
    char *** query_buf;
    int **       query_len;
    char        ibuf[1024];
    uint32_t **  results_buf;
    PicoDrv     *pico;
    const char* bitFileName;
    const char* ref_filename;
    int num_queries;
    pthread_t* read_thread;
    pthread_t* write_thread;
    read_thread_args* rta;
    write_thread_args* wta;

    // specify the .bit file name on the command line
    if (argc < 6) {
        fprintf(stderr, "Usage: ./SmithWatermanAccelerator <BIT FILE> <NUM_ENGINES> <CELL SCORE THRESHOLD> <REF SEQ FILE> <QUERY SEQ FILE 1> [<QUERY SEQ FILE 2> ...]");
        exit(1);
    }
    bitFileName = argv[1];
    num_engines = atoi(argv[2]);
    cell_score_threshold = (uint32_t) atoi(argv[3]);
    ref_filename = argv[4];
    num_queries = argc - 5;
    const char* query_filenames[num_queries];
    for (int i = 0; i < num_queries; i++) {
        query_filenames[i] = argv[i+5];
    }
    
    // Read ref seq file into memory
    std::ifstream ref_file;
    std::ifstream::pos_type ref_size;
    ref_file.open(ref_filename, std::ios::in | std::ios::binary | std::ios::ate);
    if (ref_file.is_open()) {
        ref_size = ref_file.tellg();
        ref_buf = new char[ref_size];
        ref_file.seekg(0, std::ios::beg);
        ref_file.read(ref_buf, ref_size);
        ref_file.close();
        printf("Read ref seq file '%s' of length %iB\n", ref_filename, (int) ref_size); 
    } else {
        fprintf(stderr, "Unable to open ref seq file '%s'", ref_filename);
        exit(1);
    }
    
    // Read query seq files into memory
    query_buf = new char** [num_engines];
    query_len = new int* [num_engines];
    query_ids = new uint32_t* [num_engines];
    int num_queries_per_engine[num_engines];
    for (int i = 0; i < num_engines; i++) {
        if (i < num_queries % num_engines) {
            num_queries_per_engine[i] = (num_queries / num_engines) + 1;
        } else {
            num_queries_per_engine[i] = num_queries / num_engines;
        }
    }
    for (int i = 0; i < num_engines; i++) {
        query_buf[i] = new char* [num_queries_per_engine[i]];
        query_len[i] = new int [num_queries_per_engine[i]];
        query_ids[i] = new uint32_t [num_queries_per_engine[i]];
    }
    int cur_engine = 0;
    for (int i = 0; i < num_queries; i++) {
        std::ifstream query_file;
        std::ifstream::pos_type query_size;
        query_file.open(query_filenames[i], std::ios::in | std::ios::binary | std::ios::ate);
        if (query_file.is_open()) {
            query_size = query_file.tellg();
            query_len[cur_engine][i/num_engines] = (int) query_size;
            query_buf[cur_engine][i/num_engines] = new char[query_size];
            query_ids[cur_engine][i/num_engines] = i;
            query_file.seekg(0, std::ios::beg);
            query_file.read(query_buf[cur_engine][i/num_engines], query_size);
            query_file.close();
            printf("Read query seq file '%s' of length %iB for engine %i\n", query_filenames[i], (int) query_size, cur_engine);
            cur_engine++;
            cur_engine %= num_engines;
        } else {
            fprintf(stderr, "Unable to open query seq file '%s'", query_filenames[i]);
            exit(1);
        }
    }

/*
    query_buf = new char* [num_queries];
    query_len = new int [num_queries];
    for (int i = 0; i < num_queries; i++) {
        std::ifstream query_file;
        std::ifstream::pos_type query_size;
        query_file.open(query_filenames[i], std::ios::in | std::ios::binary | std::ios::ate);
        if (query_file.is_open()) {
            query_size = query_file.tellg();
            query_len[i] = (int) query_size;
            query_buf[i] = new char[query_size];
            query_file.seekg(0, std::ios::beg);
            query_file.read(query_buf[i], query_size);
            query_file.close();
            printf("Read query seq file '%s' of length %iB\n", query_filenames[i], (int) query_size);
        } else {
            fprintf(stderr, "Unable to open query seq file '%s'", query_filenames[i]);
            exit(1);
        }
    }*/
    
    // The RunBitFile function will locate a Pico card that can run the given bit file, and is not already
    //   opened in exclusive-access mode by another program. It requests exclusive access to the Pico card
    //   so no other programs will try to reuse the card and interfere with us.
    printf("Loading FPGA with '%s' ...\n", bitFileName);
    err = RunBitFile(bitFileName, &pico);
    if (err < 0) {
        // We use the PicoErrors_FullError function to decipher error codes from RunBitFile.
        // This is more informative than just printing the numeric code, since it can report the name of a
        //   file that wasn't found, for example.
        fprintf(stderr, "RunBitFile error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
        exit(1);
    }
    
    // Open streams to engines
    printf("Opening streams\n");
    stream = new int[num_engines];
    for (int i = 0; i < num_engines; i++) {
        stream[i] = pico->CreateStream(i+1);
        if (stream[i] < 0) {
            fprintf(stderr, "couldn't open stream %i! (return code: %i)\n", i+1, stream[i]);
            exit(1);
        }
    }
    
    // Write reference sequence to the DRAM
    printf("Writing ref seq to DRAM\n");
    err = pico->WriteRam(0, ref_buf, ref_size, PICO_DDR3_0);
    if (err < 0) {
        fprintf(stderr, "WriteRam error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
        exit(1);
    } else if (err != ref_size) {
        fprintf(stderr, "WriteRam wrote %i bytes instead of the desire %i bytes\n", err, (int) ref_size);
        exit(1);
    }
    sleep(1);
  
    // Start read/write threads 
    printf("Starting Smith Waterman tests\n");
    results_buf = new uint32_t* [num_engines];
    for (int i = 0; i < num_engines; i++) {
        results_buf[i] = new uint32_t[RESULT_BUF_SIZE];
        for (int j = 0; j < RESULT_BUF_SIZE; j++) {
            results_buf[i][j] = 0;
        }
    }
    wta = new write_thread_args[num_engines];
    rta = new read_thread_args[num_engines];
    for (int i = 0; i < num_engines; i++) {
        wta[i].pico = pico;
        wta[i].stream = stream[i];
        wta[i].num_queries = num_queries_per_engine[i];
        wta[i].query_len_bytes = query_len[i];
        wta[i].query_buf = query_buf[i];
        wta[i].ref_len_bytes = (int) ref_size;
        wta[i].cell_score_threshold = cell_score_threshold;
        wta[i].engine_id = i;
        wta[i].query_ids = query_ids[i];
        rta[i].pico = pico;
        rta[i].stream = stream[i];
        rta[i].results_buf = results_buf[i];
        rta[i].num_queries = num_queries_per_engine[i];
        rta[i].engine_id = i;
        rta[i].query_ids = query_ids[i];
    }

    read_thread = new pthread_t[num_engines];
    write_thread = new pthread_t[num_engines];
    for (int i = 0; i < num_engines; i++) {
        pthread_create(&(read_thread[i]), NULL, &stream_read_thread, (void*) &(rta[i]));
        pthread_create(&(write_thread[i]), NULL, &stream_write_thread, (void*) &(wta[i]));
    }
    for (int i = 0; i < num_engines; i++) {
        pthread_join(read_thread[i], NULL);
        pthread_join(write_thread[i], NULL);
    }

    for (int i = 0; i < num_engines; i++) {
        for (int j = 0; j < RESULT_BUF_SIZE; j++) {
            printf("%i\t", results_buf[i][j]);
        }
        printf("\n\n");
    }

    // streams are automatically closed when the PicoDrv object is destroyed, or on program termination, but
    //   we can also close a stream manually.
    for (int i = 0; i < num_engines; i++) {
        pico->CloseStream(stream[i]);
    }
    return 0;
}
Ejemplo n.º 3
0
int main(int argc, char* argv[])
{
  int         err, i, j, stream;
  int         room;
  int         *tmp = NULL;
  int         stream11, stream12, stream13;
  int         stream21, stream22, stream23;
  int         stream31, stream32, stream33;
  int         stream41, stream42, stream43;
  int         stream51, stream52, stream53;
  int         stream61, stream62, stream63;
  int         stream71, stream72, stream73;
  int         stream81, stream82, stream83;

  uint32_t    buf[1024], u32, addr;
  char        ibuf[1024];
  PicoDrv     *pico;
  const char* bitFileName;
  printf("Start\r\n");
  // specify the .bit file name on the command line
    switch(argc) {

      case 1: {
        if ((err = FindPico(0x505, &pico)) < 0) {
            printf("FindPico Error\r\n");
            return err;
        }
      }break;

      case 2: {
        bitFileName = argv[1];
        printf("Loading FPGA with '%s' ...\n", bitFileName);
        err = RunBitFile(bitFileName, &pico);
        if (err < 0) {
            fprintf(stderr, "RunBitFile error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
            exit(1);
        }
      }break;
    }

  printf("Opening stream50 (CMD)\r\n");
  stream = pico->CreateStream(50);
  if (stream < 0) {
      fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream, ibuf, sizeof(ibuf)));
      exit(1);
  }
//==============================================================================
  int *A = new int[SIZE];
  int *B = new int[SIZE];
  int *C = new int[SIZE];
  int *D = new int[SIZE];


  for (i = 0; i < SIZE; i++) {
    A[i] = i + 1;
    B[i] = i + 1;
    C[i] = 0;
    D[i] = 0;
  }
  struct timeval start, end;
  gettimeofday(&start, NULL);
  for (i = 0; i < SIZE; i++) {
    C[i] = A[i] + B[i];
  }
  gettimeofday(&end, NULL);

  int timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec;
  printf("CPU %4d threads :\t%9d us\r\n", 1, timeuse);



  // printf("Opening stream21 (CMD)\r\n");
  // stream21 = pico->CreateStream(21);
  // if (stream21 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream21, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream22 (CMD)\r\n");
  // stream22 = pico->CreateStream(22);
  // if (stream22 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream22, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream23 (CMD)\r\n");
  // stream23 = pico->CreateStream(23);
  // if (stream23 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream23, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream31 (CMD)\r\n");
  // stream31 = pico->CreateStream(31);
  // if (stream31 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream31, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream32 (CMD)\r\n");
  // stream32 = pico->CreateStream(32);
  // if (stream32 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream32, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream33 (CMD)\r\n");
  // stream33 = pico->CreateStream(33);
  // if (stream33 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream33, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream41 (CMD)\r\n");
  // stream41 = pico->CreateStream(41);
  // if (stream41 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream41, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream42 (CMD)\r\n");
  // stream42 = pico->CreateStream(42);
  // if (stream42 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream42, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream43 (CMD)\r\n");
  // stream43 = pico->CreateStream(43);
  // if (stream43 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream43, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream51 (CMD)\r\n");
  // stream51 = pico->CreateStream(51);
  // if (stream51 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream51, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream52 (CMD)\r\n");
  // stream52 = pico->CreateStream(52);
  // if (stream52 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream52, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream53 (CMD)\r\n");
  // stream53 = pico->CreateStream(53);
  // if (stream53 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream53, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream61 (CMD)\r\n");
  // stream61 = pico->CreateStream(61);
  // if (stream61 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream61, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream62 (CMD)\r\n");
  // stream62 = pico->CreateStream(62);
  // if (stream62 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream62, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream63 (CMD)\r\n");
  // stream63 = pico->CreateStream(63);
  // if (stream63 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream63, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream71 (CMD)\r\n");
  // stream71 = pico->CreateStream(71);
  // if (stream71 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream71, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream72 (CMD)\r\n");
  // stream72 = pico->CreateStream(72);
  // if (stream72 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream72, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream73 (CMD)\r\n");
  // stream73 = pico->CreateStream(73);
  // if (stream73 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream73, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  int node;
  uint32_t  cmd[4]; // {0xC1100000, 0xC1200000, 0xC1300001, 0xB1000000};
//==================================================================================================
//       _    ____ ____       _
//      / \  / ___/ ___|  _  / |
//     / _ \| |  | |     (_) | |
//    / ___ \ |__| |___   _  | |
//   /_/   \_\____\____| (_) |_|
//==================================================================================================
#ifdef ACC1
  printf("Opening stream11 (CMD)\r\n");
  stream11 = pico->CreateStream(11);
  if (stream11 < 0) {
      fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream11, ibuf, sizeof(ibuf)));
      exit(1);
  }

  printf("Opening stream12 (CMD)\r\n");
  stream12 = pico->CreateStream(12);
  if (stream12 < 0) {
      fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream12, ibuf, sizeof(ibuf)));
      exit(1);
  }

  printf("Opening stream13 (CMD)\r\n");
  stream13 = pico->CreateStream(13);
  if (stream13 < 0) {
      fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream13, ibuf, sizeof(ibuf)));
      exit(1);
  }

  cmd[0] = 0xC1100000;
  cmd[1] = 0xC1200000 | SIZE;
  cmd[2] = 0xC1300001;
  cmd[3] = 0xB1000000;
  pico->WriteStream(stream, cmd, 16);

  // printf("Lanching ACC\r\n");
  // cmd[0] = 0xA1000000;
  // cmd[1] = 0x00000000;
  // cmd[2] = 0x00000000;
  // cmd[3] = 0x00000000;
  // pico->WriteStream(stream, cmd, 16);

  pico->WriteStream(stream11, A, SIZE * 4);
  pico->WriteStream(stream12, B, SIZE * 4);
  // printf("%i B available to read from firmware.\n", i=pico->GetBytesAvailable(stream13, true));
  // if (i < 0){
  //     fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }
  i = SIZE * 4;
  room = i / 4;
  tmp = new int[room];
  pico->ReadStream(stream13, tmp, i);
  printf("%d\r\n", room);
  for (i = 0; i < room; i++) {
    printf("[%d] 0x%08x\r\n", i, tmp[i]);
  }

  // printf("%i B available to read from firmware.\n", i=pico->GetBytesAvailable(stream, true));
  // if (i < 0){
  //     fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }
  // room = i / 4;
  // tmp = new int[room];
  // pico->ReadStream(stream, tmp, i);
  // printf("%d\r\n", room);
  // for (i = 0; i < room; i++) {
  //   printf("[%d] 0x%08x\r\n", i, tmp[i]);
  // }
  delete[] tmp;
#endif
//==================================================================================================
//       _    ____ ____       ____
//      / \  / ___/ ___|  _  |___ \
//     / _ \| |  | |     (_)   __) |
//    / ___ \ |__| |___   _   / __/
//   /_/   \_\____\____| (_) |_____|
//==================================================================================================
//   cmd[0] = 0xC2100000;
//   cmd[1] = 0xC2200000 | SIZE;
//   cmd[2] = 0xC2300001;
//   cmd[3] = 0xB2000000;
//   pico->WriteStream(stream, cmd, 16);

//   printf("Lanching ACC\r\n");
//   cmd[0] = 0xA2000000;
//   cmd[1] = 0x00000000;
//   cmd[2] = 0x00000000;
//   cmd[3] = 0x00000000;
//   pico->WriteStream(stream, cmd, 16);

//   err = pico->WriteStream(stream21, A, SIZE * 4);
//   err = pico->WriteStream(stream22, B, SIZE * 4);
//   printf("%i B available to read from stream23.\n", i=pico->GetBytesAvailable(stream23, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   err = pico->ReadStream(stream23, tmp, i);
//   if (err < 0) {
//       fprintf(stderr, "ReadStream stream23 error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }

//   printf("%i B available to read from stream.\n", i=pico->GetBytesAvailable(stream, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   pico->ReadStream(stream, tmp, i);
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }
//   delete[] tmp;
//==============================================================================
//       _    ____ ____       _____
//      / \  / ___/ ___|  _  |___ /
//     / _ \| |  | |     (_)   |_ \
//    / ___ \ |__| |___   _   ___) |
//   /_/   \_\____\____| (_) |____/
//==============================================================================
//   cmd[0] = 0xC3100000;
//   cmd[1] = 0xC3200000 | SIZE;
//   cmd[2] = 0xC3300001;
//   cmd[3] = 0xB3000000;
//   pico->WriteStream(stream, cmd, 16);

//   printf("Lanching ACC\r\n");
//   cmd[0] = 0xA3000000;
//   cmd[1] = 0x00000000;
//   cmd[2] = 0x00000000;
//   cmd[3] = 0x00000000;
//   pico->WriteStream(stream, cmd, 16);

//   err = pico->WriteStream(stream31, A, SIZE * 4);
//   err = pico->WriteStream(stream32, B, SIZE * 4);
//   printf("%i B available to read from stream33.\n", i=pico->GetBytesAvailable(stream33, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   err = pico->ReadStream(stream33, tmp, i);
//   if (err < 0) {
//       fprintf(stderr, "ReadStream stream33 error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }

//   printf("%i B available to read from stream.\n", i=pico->GetBytesAvailable(stream, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   pico->ReadStream(stream, tmp, i);
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }
//   delete[] tmp;
//==============================================================================
//       _    ____ ____       _  _
//      / \  / ___/ ___|  _  | || |
//     / _ \| |  | |     (_) | || |_
//    / ___ \ |__| |___   _  |__   _|
//   /_/   \_\____\____| (_)    |_|
//==============================================================================
//   cmd[0] = 0xC4100000;
//   cmd[1] = 0xC4200000 | SIZE;
//   cmd[2] = 0xC4300001;
//   cmd[3] = 0xB4000000;
//   pico->WriteStream(stream, cmd, 16);

//   printf("Lanching ACC\r\n");
//   cmd[0] = 0xA4000000;
//   cmd[1] = 0x00000000;
//   cmd[2] = 0x00000000;
//   cmd[3] = 0x00000000;
//   pico->WriteStream(stream, cmd, 16);

//   err = pico->WriteStream(stream41, A, SIZE * 4);
//   err = pico->WriteStream(stream42, B, SIZE * 4);
//   printf("%i B available to read from stream43.\n", i=pico->GetBytesAvailable(stream43, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   err = pico->ReadStream(stream43, tmp, i);
//   if (err < 0) {
//       fprintf(stderr, "ReadStream stream43 error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }

//   printf("%i B available to read from stream.\n", i=pico->GetBytesAvailable(stream, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   pico->ReadStream(stream, tmp, i);
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }
//   delete[] tmp;
// //==============================================================================
// //       _    ____ ____       ____
// //      / \  / ___/ ___|  _  | ___|
// //     / _ \| |  | |     (_) |___ \
// //    / ___ \ |__| |___   _   ___) |
// //   /_/   \_\____\____| (_) |____/
// //==============================================================================
//   cmd[0] = 0xC5100000;
//   cmd[1] = 0xC5200000 | SIZE;
//   cmd[2] = 0xC5300001;
//   cmd[3] = 0xB5000000;
//   pico->WriteStream(stream, cmd, 16);

//   printf("Lanching ACC\r\n");
//   cmd[0] = 0xA5000000;
//   cmd[1] = 0x00000000;
//   cmd[2] = 0x00000000;
//   cmd[3] = 0x00000000;
//   pico->WriteStream(stream, cmd, 16);

//   err = pico->WriteStream(stream51, A, SIZE * 4);
//   err = pico->WriteStream(stream52, B, SIZE * 4);
//   printf("%i B available to read from stream53.\n", i=pico->GetBytesAvailable(stream53, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   err = pico->ReadStream(stream53, tmp, i);
//   if (err < 0) {
//       fprintf(stderr, "ReadStream stream53 error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }

//   printf("%i B available to read from stream.\n", i=pico->GetBytesAvailable(stream, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   pico->ReadStream(stream, tmp, i);
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }
//   delete[] tmp;
// //==============================================================================
// //       _    ____ ____        __
// //      / \  / ___/ ___|  _   / /_
// //     / _ \| |  | |     (_) | '_ \
// //    / ___ \ |__| |___   _  | (_) |
// //   /_/   \_\____\____| (_)  \___/
// //==============================================================================
//   cmd[0] = 0xC6100000;
//   cmd[1] = 0xC6200000 | SIZE;
//   cmd[2] = 0xC6300001;
//   cmd[3] = 0xB6000000;
//   pico->WriteStream(stream, cmd, 16);

//   printf("Lanching ACC\r\n");
//   cmd[0] = 0xA6000000;
//   cmd[1] = 0x00000000;
//   cmd[2] = 0x00000000;
//   cmd[3] = 0x00000000;
//   pico->WriteStream(stream, cmd, 16);

//   err = pico->WriteStream(stream61, A, SIZE * 4);
//   err = pico->WriteStream(stream62, B, SIZE * 4);
//   printf("%i B available to read from stream63.\n", i=pico->GetBytesAvailable(stream63, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   err = pico->ReadStream(stream63, tmp, i);
//   if (err < 0) {
//       fprintf(stderr, "ReadStream stream63 error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }

//   printf("%i B available to read from stream.\n", i=pico->GetBytesAvailable(stream, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   pico->ReadStream(stream, tmp, i);
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }
//   delete[] tmp;
// //==============================================================================
// //       _    ____ ____       _____
// //      / \  / ___/ ___|  _  |___  |
// //     / _ \| |  | |     (_)    / /
// //    / ___ \ |__| |___   _    / /
// //   /_/   \_\____\____| (_)  /_/
// //==============================================================================
//   cmd[0] = 0xC7100000;
//   cmd[1] = 0xC7200000 | SIZE;
//   cmd[2] = 0xC7300001;
//   cmd[3] = 0xB7000000;
//   pico->WriteStream(stream, cmd, 16);

//   printf("Lanching ACC\r\n");
//   cmd[0] = 0xA7000000;
//   cmd[1] = 0x00000000;
//   cmd[2] = 0x00000000;
//   cmd[3] = 0x00000000;
//   pico->WriteStream(stream, cmd, 16);

//   err = pico->WriteStream(stream71, A, SIZE * 4);
//   err = pico->WriteStream(stream72, B, SIZE * 4);
//   printf("%i B available to read from stream73.\n", i=pico->GetBytesAvailable(stream73, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   err = pico->ReadStream(stream73, tmp, i);
//   if (err < 0) {
//       fprintf(stderr, "ReadStream stream73 error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }

//   printf("%i B available to read from stream.\n", i=pico->GetBytesAvailable(stream, true));
//   if (i < 0){
//       fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
//       exit(1);
//   }
//   room = i / 4;
//   tmp = new int[room];
//   pico->ReadStream(stream, tmp, i);
//   printf("%d\r\n", room);
//   for (i = 0; i < room; i++) {
//     printf("[%d] 0x%08x\r\n", i, tmp[i]);
//   }
//   delete[] tmp;
//==============================================================================
//       _    ____ ____        ___
//      / \  / ___/ ___|  _   ( _ )
//     / _ \| |  | |     (_)  / _ \
//    / ___ \ |__| |___   _  | (_) |
//   /_/   \_\____\____| (_)  \___/
//==============================================================================
  // printf("Opening stream81 (CMD)\r\n");
  // stream81 = pico->CreateStream(81);
  // if (stream81 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream81, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream82 (CMD)\r\n");
  // stream82 = pico->CreateStream(82);
  // if (stream82 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream82, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // printf("Opening stream83 (CMD)\r\n");
  // stream83 = pico->CreateStream(83);
  // if (stream83 < 0) {
  //     fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(stream83, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }

  // node   = 8;
  // cmd[3] = 0xBABEFACE;
  // cmd[2] = 0xDEADBEEF;
  // cmd[1] = 0xDEADBEEF;
  // cmd[0] = 0xD000BEEF | (node << 24); // PR Start CMD
  // printf("0x%08x\r\n", cmd[0]);
  // pico->WriteStream(stream, cmd, 16);

  // cmd[0] = 0xD000DEAD | (node << 24); // PR End CMD
  // printf("0x%08x\r\n", cmd[0]);
  // pico->WriteStream(stream, cmd, 16);

  // cmd[0] = 0xC8100000 | (SIZE >> 16) ;
  // cmd[1] = 0xC8200000 | (SIZE & 0x0000FFFF) ;
  // cmd[2] = 0xC8300001;
  // cmd[3] = 0xB8000000;
  // pico->WriteStream(stream, cmd, 16);

  // printf("Lanching ACC\r\n");
  // cmd[0] = 0xA8000000;
  // cmd[1] = 0x00000000;
  // cmd[2] = 0x00000000;
  // cmd[3] = 0x00000000;
  // pico->WriteStream(stream, cmd, 16);

  // err = pico->WriteStream(stream81, A, SIZE * 4);
  // err = pico->WriteStream(stream82, B, SIZE * 4);
  // printf("%i B available to read from stream83.\n", i=pico->GetBytesAvailable(stream83, true));
  // if (i < 0){
  //     fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }
  // room = i / 4;
  // tmp = new int[room];
  // err = pico->ReadStream(stream83, tmp, i);
  // if (err < 0) {
  //     fprintf(stderr, "ReadStream stream83 error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }
  // printf("%d\r\n", room);
  // // for (i = 0; i < room; i++) {
  // //   printf("[%d] 0x%08x\r\n", i, tmp[i]);
  // // }
  // delete[] tmp;

  // printf("%i B available to read from stream.\n", i=pico->GetBytesAvailable(stream, true));
  // if (i < 0){
  //     fprintf(stderr, "GetBytesAvailable error: %s\n", PicoErrors_FullError(i, ibuf, sizeof(ibuf)));
  //     exit(1);
  // }
  // room = i / 4;
  // tmp = new int[room];
  // pico->ReadStream(stream, tmp, i);
  // printf("%d\r\n", room);
  // // for (i = 0; i < room; i++) {
  // //   printf("[%d] 0x%08x\r\n", i, tmp[i]);
  // // }
  // delete[] tmp;
//==============================================================================
  printf("Closing stream50 (CMD)\r\n");
  pico->CloseStream(stream);
  // pico->CloseStream(stream11);
  // pico->CloseStream(stream12);
  // pico->CloseStream(stream13);

  // pico->CloseStream(stream21);
  // pico->CloseStream(stream22);
  // pico->CloseStream(stream23);

  // pico->CloseStream(stream31);
  // pico->CloseStream(stream32);
  // pico->CloseStream(stream33);

  // pico->CloseStream(stream41);
  // pico->CloseStream(stream42);
  // pico->CloseStream(stream43);

  // pico->CloseStream(stream51);
  // pico->CloseStream(stream52);
  // pico->CloseStream(stream53);

  // pico->CloseStream(stream61);
  // pico->CloseStream(stream62);
  // pico->CloseStream(stream63);

  // pico->CloseStream(stream71);
  // pico->CloseStream(stream72);
  // pico->CloseStream(stream73);

  // pico->CloseStream(stream81);
  // pico->CloseStream(stream82);
  // pico->CloseStream(stream83);
  printf("Done\r\n");
  delete[] A;
  delete[] B;
  delete[] C;
  delete[] D;
  return 0;
}
int main(int argc, char* argv[])
{
    int         err, num_engines_per_fpga, num_fpgas;
    int**       stream;
    uint32_t    cell_score_threshold;
    uint32_t***  query_ids;
    char *  ref_buf;
    char * query_buf;
    int        query_len;
    char        ibuf[1024];
    uint32_t ***  results_buf;
    PicoDrv     **pico;
    const char* bitFileName;
    const char* ref_filename;
    int num_queries;
    pthread_t** read_thread;
    pthread_t** write_thread;
    read_thread_args** rta;
    write_thread_args** wta;

    // specify the .bit file name on the command line
    if (argc < 7) {
        fprintf(stderr, "Usage: ./SmithWatermanAcceleratorMultiFPGABenchmark <BIT FILE> <NUM FPGAS> <NUM ENGINES PER FPGA> <CELL SCORE THRESHOLD> <REF SEQ FILE> <QUERY SEQ FILE>\n");
        exit(1);
    }
    bitFileName = argv[1];
    num_fpgas = atoi(argv[2]);
    num_engines_per_fpga = atoi(argv[3]);
    cell_score_threshold = (uint32_t) atoi(argv[4]);
    ref_filename = argv[5];
    num_queries = argc - 6;
    const char* query_filename = argv[6];
    
    printf("Configuration: %i FPGAs, %i Engines per FPGA\n", num_fpgas, num_engines_per_fpga);

    // Read ref seq file into memory
    std::ifstream ref_file;
    std::ifstream::pos_type ref_size;
    ref_file.open(ref_filename, std::ios::in | std::ios::binary | std::ios::ate);
    if (ref_file.is_open()) {
        ref_size = ref_file.tellg();
        ref_buf = new char[ref_size];
        ref_file.seekg(0, std::ios::beg);
        ref_file.read(ref_buf, ref_size);
        ref_file.close();
        printf("Read ref seq file '%s' of length %iB\n", ref_filename, (int) ref_size); 
    } else {
        fprintf(stderr, "Unable to open ref seq file '%s'", ref_filename);
        exit(1);
    }
    
    // Read query seq files into memory
    std::ifstream query_file;
    std::ifstream::pos_type query_size;
    query_file.open(query_filename, std::ios::in | std::ios::binary | std::ios::ate);
    if (query_file.is_open()) {
        query_size = query_file.tellg();
        query_len = (int) query_size;
        query_buf = new char[query_size];
        query_file.seekg(0, std::ios::beg);
        query_file.read(query_buf, query_size);
        query_file.close();
        printf("Read query seq file '%s' of length %iB\n", query_filename, (int) query_size);
    } else {
        fprintf(stderr, "Unable to open query seq file '%s'", query_filename);
        exit(1);
    }

    // Load FPGAs with bitfile
    pico = new PicoDrv*[num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        printf("Loading FPGA %i with '%s' ...\n", i, bitFileName);
        err = RunBitFile(bitFileName, &(pico[i]));
        if (err < 0) {
            fprintf(stderr, "RunBitFile error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
            exit(1);
        }
    }

    // Open streams to engines
    printf("Opening streams\n");
    stream = new int*[num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        stream[i] = new int[num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            stream[i][j] = pico[i]->CreateStream(j+1);
            if (stream[i][j] < 0) {
                fprintf(stderr, "couldn't open stream %i on FPGA %i! (return code: %i)\n", j+1, i, stream[i][j]);
                exit(1);
            }
        }
    }
    
    // Write reference sequence to the DRAMs
    printf("Writing ref seq to DRAMs\n");
    for (int i = 0; i < num_fpgas; i++) {
        err = pico[i]->WriteRam(0, ref_buf, ref_size, PICO_DDR3_0);
        if (err < 0) {
            fprintf(stderr, "WriteRam error on FPGA %i: %s\n", i, PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
            exit(1);
        } else if (err != ref_size) {
            fprintf(stderr, "WriteRam on FPGA %i wrote %i bytes instead of the desire %i bytes\n", i, err, (int) ref_size);
            exit(1);
        }
    }
    sleep(1);
  
    // Start read/write threads 
    printf("Starting Smith Waterman tests\n");
    results_buf = new uint32_t** [num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        results_buf[i] = new uint32_t* [num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            results_buf[i][j] = new uint32_t[RESULT_BUF_SIZE];
            for (int k = 0; k < RESULT_BUF_SIZE; k++) {
                results_buf[i][j][k] = 0;
            }
        }
    }
    uint32_t** num_bytes_returned = new uint32_t* [num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        num_bytes_returned[i] = new uint32_t[num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            num_bytes_returned[j] = new uint32_t;
        }
    }
    wta = new write_thread_args* [num_fpgas];
    rta = new read_thread_args* [num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        wta[i] = new write_thread_args[num_engines_per_fpga];
        rta[i] = new read_thread_args[num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            wta[i][j].pico = pico[i];
            wta[i][j].stream = stream[i][j];
            wta[i][j].num_queries = 1;
            wta[i][j].query_len_bytes = query_len;
            wta[i][j].query_buf = query_buf;
            wta[i][j].ref_len_bytes = (1 << 30) / (num_fpgas * num_engines_per_fpga);
            wta[i][j].cell_score_threshold = cell_score_threshold;
            wta[i][j].engine_id = i*num_engines_per_fpga + j;
            //wta[i][j].ref_addr = (i*num_engines_per_fpga + j) * ((1 << 25) / (num_fpgas * num_engines_per_fpga));
            wta[i][j].ref_addr = 0;
            rta[i][j].pico = pico[i];
            rta[i][j].stream = stream[i][j];
            rta[i][j].results_buf = results_buf[i][j];
            rta[i][j].num_queries = 1;
            rta[i][j].engine_id = i*num_engines_per_fpga + j;
            rta[i][j].num_bytes_returned = &(num_bytes_returned[i][j]);
        }
    }
    read_thread = new pthread_t*[num_fpgas];
    write_thread = new pthread_t*[num_fpgas];
    printf("ref_len_bytes: %i\n", ((1 << 30) / (num_fpgas * num_engines_per_fpga)));


    // TIMED CODE
    struct timespec start, finish;
    clock_gettime(CLOCK_MONOTONIC, &start);
    for (int i = 0; i < num_fpgas; i++) {
        read_thread[i] = new pthread_t[num_engines_per_fpga];
        write_thread[i] = new pthread_t[num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            pthread_create(&(read_thread[i][j]), NULL, &stream_read_thread, (void*) &(rta[i][j]));
            pthread_create(&(write_thread[i][j]), NULL, &stream_write_thread, (void*) &(wta[i][j]));
        }
    }

    // Join read/write threads
    for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            pthread_join(read_thread[i][j], NULL);
            pthread_join(write_thread[i][j], NULL);
        }
    }
    clock_gettime(CLOCK_MONOTONIC, &finish);
    double elapsed = (finish.tv_sec - start.tv_sec);
    elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
    double numCells = query_len * 4 * 4294967296;
    printf("Computation took %f seconds\n", elapsed);
    printf("Total cells computed (billions): %f\n", numCells/1000000000.0);
    printf("Performance: %f GCUPS\n", numCells/elapsed/1000000000.0);
    uint32_t total_num_bytes_returned = 0;
    for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            total_num_bytes_returned += num_bytes_returned[i][j];
        }
    }
    printf("Num bytes returned: %i\n", total_num_bytes_returned);
    printf("Effective PCIe bandwidth: %f MB/sec\n", total_num_bytes_returned/elapsed/1000000.0);
    // Print results
    /*for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            printf("FPGA: %i Engine: %i\n", i, j);
            for (int k = 0; k < RESULT_BUF_SIZE; k++) {
                printf("%i\t", results_buf[i][j][k]);
            }
            printf("\n\n");
        }
    }*/

    // streams are automatically closed when the PicoDrv object is destroyed, or on program termination, but
    //   we can also close a stream manually.
    for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            pico[i]->CloseStream(stream[i][j]);
        }
    }
    return 0;
}
Ejemplo n.º 5
0
//////////
// MAIN //
//////////
int main(int argc, char* argv[]) {

    // command-line arguments
    CParams         args;

    // handles to the Smith-Waterman FPGA systems
    PicoDrv*        aligner;
    fpga_cfg_t      cfg;

    // query and db FASTA file handlers
    gzFile          queryFile;
    gzFile          dbFile;

    // kseq pointers for parsing FASTA files
    kseq_t*         querySeq;
    kseq_t*         dbSeq;

    // alignment score
    int             score;

    // info that we need to create different threads
    // 1 thread used to send the query to the FPGA
    // 1 thread used to send the db to the FPGA
    // the main thread used to read results from the FPGA
    StreamInfo_t*   query_db_info;

    // temporary variables
    int             err;
    char            ibuf    [1024];

    ////////////////////////
    // PARSE COMMAND LINE //
    ////////////////////////

    if (!args.parseParams(argc, argv)) {
        return EXIT_FAILURE;
    }

    // select a bitfile based upon query length
    args.setActiveBitfile(args.getMaxQueryLength(), args.getBitfile().c_str());

    //////////////////
    // PROGRAM FPGA //
    //////////////////

    // program the device
    printf("Programming FPGA with %s\n", args.getActiveBitfile().c_str());
    if ((err = RunBitFile(args.getActiveBitfile().c_str(), &aligner)) < 0) {
        fprintf(stderr, "RunBitFile error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
        return EXIT_FAILURE;
    }

    // read the FPGA configuration info
    // here we read some config info about the programming file that we just used, including the max query length, the number of aligners, etc.
    if (VERBOSE) printf("Reading the configuration info from the FPGA\n");
    if ((err = ReadConfig(aligner, &cfg)) < 0) {
        fprintf(stderr, "ReadConfig error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
        return EXIT_FAILURE;
    }

    // create the right number of StreamInfo structs
    query_db_info = new StreamInfo_t[cfg.info[7]];

    // create the streams to talk to the separate alignment units
    // also, populate the StreamInfo structs w/ the info that we know so far
    for (int i=1; i<=cfg.info[8]; ++i) {
        if (VERBOSE) printf("Creating stream %i\n", i);
        if ((err = aligner->CreateStream(i)) < 0) {
            fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
            return EXIT_FAILURE;
        }
        if (VERBOSE) printf("Stream handle = %i\n", err);
        query_db_info[i-1].stream   = err;
        query_db_info[i-1].cfg      = &cfg;
        query_db_info[i-1].pico     = aligner;
    }

    ////////////////////
    // SCORING MATRIX //
    ////////////////////

    if ((err = WriteScoringMatrix(aligner, args.getScoreMatrix(), &cfg)) < 0) {
        fprintf(stderr, "WriteScoringMatrix error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
        return EXIT_FAILURE;
    }

    ///////////////////////////
    // PARSE QUERY FROM FILE //
    ///////////////////////////

    printf("Reading in the query from %s\n", args.getQueryFile().c_str());

    // open the query file
    queryFile   = gzopen(args.getQueryFile().c_str(), "r");
    if (queryFile == Z_NULL) {
        fprintf(stderr, "Unable to open query file: %s\n", args.getQueryFile().c_str());
        return EXIT_FAILURE;
    }
    querySeq    = kseq_init(queryFile);

    // read the query in from the file
    err                                     = kseq_read(querySeq);
    query_db_info[0].start_info.query_seq   = querySeq;

    ////////////////////////
    // PARSE DB FROM FILE //
    ////////////////////////

    // Open the files for the database sequence
    dbFile      = gzopen(args.getDbFile().c_str(), "r");
    if (dbFile == Z_NULL) {
        fprintf(stderr, "Unable to open db file: %s\n", args.getQueryFile().c_str());
        return EXIT_FAILURE;
    }
    dbSeq       = kseq_init(dbFile);

    // read the db in from the file
    err                                     = kseq_read(dbSeq);
    query_db_info[0].start_info.db_seq      = dbSeq;

    ////////////////////////////////
    // SEND THE QUERY/DB TO INPUT //
    ////////////////////////////////

    // send the query and the DB to the FPGA for alignment
    printf("Sending the query and the db to the FPGA for alignment\n");
    if ((err = AlignQueryToDB(&query_db_info[0])) < 0) {
        fprintf(stderr, "AlignQueryToDB error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
        return EXIT_FAILURE;
    }

    ////////////////////////////////
    // RECEIVE RESULT FROM OUTPUT //
    ////////////////////////////////

    // Create traceback stream(s)
    pthread_t traceback_thread[8];
    int stream_count = get_size(query_db_info[0].start_info.query_seq->seq.l);
    int buffer_length = (query_db_info[0].start_info.query_seq->seq.l + query_db_info[0].start_info.db_seq->seq.l - 1) *2;
    args_t *a = new args_t[stream_count];
    for (int n=0; n<stream_count; n++) {
        if ((err = aligner->CreateStream(12+n)) < 0) {
            fprintf(stderr, "CreateStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
            return EXIT_FAILURE;
        }
        query_db_info[0].traceback_stream[n] = err;

        // Create thread to read traceback stream into buffer
        a[n].streaminfo = &query_db_info[0];
        a[n].thread_index = n;
        if (buffer_length <= (64 + query_db_info[0].start_info.db_seq->seq.l - 1)*2)
            a[n].buffer_length = buffer_length;
        else
            a[n].buffer_length = (64 + query_db_info[0].start_info.db_seq->seq.l - 1)*2;
        buffer_length -= (64 + query_db_info[0].start_info.db_seq->seq.l - 1)*2;
        err = pthread_create(&traceback_thread[n], NULL, ReceiveTracebackData, (void *) &a[n]);
        if (err != 0) {
            fprintf(stderr, "Thread creation error: %d\n", err);
            return EXIT_FAILURE;
        }
    }
    // this just returns the score
    // we can add a LOT more functionality here if we want
    printf("Reading the resulting score from the FPGA\n");
    if ((err = ReceiveScore(&query_db_info[0])) < 0) {
        fprintf(stderr, "ReceiveScore error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
        return EXIT_FAILURE;
    }
    printf("Alignment score = %i at base %i\n",
           query_db_info[0].end_info.globalScore,
           query_db_info[0].end_info.globalTargetBase);

    // Wait for traceback thread to finish reading
    for (int n=0; n<stream_count; n++) {
        if ((err = pthread_join(traceback_thread[n], NULL)) != 0) {
            fprintf(stderr, "Thread join error: %d\n", err);
            return EXIT_FAILURE;
        }
    }

    // Combine data from multiple streams into single traceback data array.
    if (VERBOSE)
        printf("Combining stream data.\n");
    int traceback_size = query_db_info[0].start_info.query_seq->seq.l + query_db_info[0].start_info.db_seq->seq.l - 1;
    uint64_t *buffer = (uint64_t *)calloc(traceback_size*2*stream_count, sizeof(uint64_t));
    combineStreamData(&(query_db_info[0].traceback_buffer[0]), stream_count, traceback_size, buffer);

    // print out the data that we just received
    if (VERBOSE)
        for (int i=0; i < traceback_size*2*stream_count; i++)
            printf("index: %d, %lx\n", i, buffer[i]);

    printf("Starting traceback calculation.\n");
    int * traceback = new int[traceback_size];

    // Perform traceback calculations
    err = trace_matrix_generate(traceback, buffer,
                                query_db_info[0].start_info.query_seq->seq.l,
                                query_db_info[0].start_info.db_seq->seq.l);

    if (VERBOSE) {
        if(err==0) {
            printf("Traceback calculation failed\n");
            printf("The trace matrix data received is invalid\n");
        }
        else {
            printf("Traceback calculation successfull\n");
            printf("Traceback matrix is:\n");
            for(int i=0; i<err; i++)
                printf("%d\t",traceback[i]);
            printf("\n");
        }
    }


    /////////////
    // CLEANUP //
    /////////////

    // close the input and output files for sequences
    kseq_destroy(querySeq);
    gzclose(queryFile);
    kseq_destroy(dbSeq);
    gzclose(dbFile);

    // free up allocated memory
    for (int n=0; n<get_size(query_db_info[0].start_info.query_seq->seq.l); n++) {
        free(query_db_info[0].traceback_buffer[n]);
    }
    delete      aligner;
    delete []   query_db_info;

    return EXIT_SUCCESS;
}
int main(int argc, char* argv[])
{
    int         err, num_engines_per_fpga, num_fpgas;
    int**       stream;
    uint32_t    cell_score_threshold;
    uint32_t***  query_ids;
    char *  ref_buf;
    char **** query_buf;
    int ***       query_len;
    char        ibuf[1024];
    uint32_t ***  results_buf;
    PicoDrv     **pico;
    const char* bitFileName;
    const char* ref_filename;
    int num_queries;
    pthread_t** read_thread;
    pthread_t** write_thread;
    read_thread_args** rta;
    write_thread_args** wta;

    // specify the .bit file name on the command line
    if (argc < 7) {
        fprintf(stderr, "Usage: ./SmithWatermanAccelerator <BIT FILE> <NUM FPGAS> <NUM ENGINES PER FPGA> <CELL SCORE THRESHOLD> <REF SEQ FILE> <QUERY SEQ FILE 1> [<QUERY SEQ FILE 2> ...]");
        exit(1);
    }
    bitFileName = argv[1];
    num_fpgas = atoi(argv[2]);
    num_engines_per_fpga = atoi(argv[3]);
    cell_score_threshold = (uint32_t) atoi(argv[4]);
    ref_filename = argv[5];
    num_queries = argc - 6;
    const char* query_filenames[num_queries];
    for (int i = 0; i < num_queries; i++) {
        query_filenames[i] = argv[i+6];
    }
    
    // Read ref seq file into memory
    std::ifstream ref_file;
    std::ifstream::pos_type ref_size;
    ref_file.open(ref_filename, std::ios::in | std::ios::binary | std::ios::ate);
    if (ref_file.is_open()) {
        ref_size = ref_file.tellg();
        ref_buf = new char[ref_size];
        ref_file.seekg(0, std::ios::beg);
        ref_file.read(ref_buf, ref_size);
        ref_file.close();
        printf("Read ref seq file '%s' of length %iB\n", ref_filename, (int) ref_size); 
    } else {
        fprintf(stderr, "Unable to open ref seq file '%s'", ref_filename);
        exit(1);
    }
    
    // Read query seq files into memory
    query_buf = new char*** [num_fpgas];
    query_len = new int** [num_fpgas];
    query_ids = new uint32_t** [num_fpgas];
    int num_queries_per_engine[num_fpgas][num_engines_per_fpga];
    for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            num_queries_per_engine[i][j] = 0;
        }
    }
    for (int i = 0; i < num_fpgas; i++) {
        query_buf[i] = new char** [num_engines_per_fpga];
        query_len[i] = new int* [num_engines_per_fpga];
        query_ids[i] = new uint32_t* [num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            query_buf[i][j] = new char* [num_queries_per_engine[i][j]];
            query_len[i][j] = new int [num_queries_per_engine[i][j]];
            query_ids[i][j] = new uint32_t [num_queries_per_engine[i][j]];
        }
    }
    int cur_fpga = 0;
    int cur_engine = 0;
    for (int i = 0; i < num_queries; i++) {
        std::ifstream query_file;
        std::ifstream::pos_type query_size;
        query_file.open(query_filenames[i], std::ios::in | std::ios::binary | std::ios::ate);
        if (query_file.is_open()) {
            query_size = query_file.tellg();
            query_len[cur_fpga][cur_engine][i/(num_fpgas * num_engines_per_fpga)] = (int) query_size;
            query_buf[cur_fpga][cur_engine][i/(num_fpgas * num_engines_per_fpga)] = new char[query_size];
            query_ids[cur_fpga][cur_engine][i/(num_fpgas * num_engines_per_fpga)] = i;
            num_queries_per_engine[cur_fpga][cur_engine]++;
            query_file.seekg(0, std::ios::beg);
            query_file.read(query_buf[cur_fpga][cur_engine][i/(num_fpgas * num_engines_per_fpga)], query_size);
            query_file.close();
            printf("Read query seq file '%s' of length %iB for FPGA %i, engine %i\n", query_filenames[i], (int) query_size, cur_fpga, cur_engine);
            cur_fpga++;
            if (cur_fpga == num_fpgas) {
                cur_engine++;
                cur_engine %= num_engines_per_fpga;
                cur_fpga = 0;
            }
        } else {
            fprintf(stderr, "Unable to open query seq file '%s'", query_filenames[i]);
            exit(1);
        }
    }
    for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            printf("FPGA: %i, Engine: %i, Queries: %i\n", i, j, num_queries_per_engine[i][j]);
        }
    }

    // Load FPGAs with bifile
    pico = new PicoDrv*[num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        printf("Loading FPGA %i with '%s' ...\n", i, bitFileName);
        err = RunBitFile(bitFileName, &(pico[i]));
        if (err < 0) {
            fprintf(stderr, "RunBitFile error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
            exit(1);
        }
    }

    // Open streams to engines
    printf("Opening streams\n");
    stream = new int*[num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        stream[i] = new int[num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            stream[i][j] = pico[i]->CreateStream(j+1);
            if (stream[i][j] < 0) {
                fprintf(stderr, "couldn't open stream %i on FPGA %i! (return code: %i)\n", j+1, i, stream[i][j]);
                exit(1);
            }
        }
    }
    
    // Write reference sequence to the DRAMs
    printf("Writing ref seq to DRAMs\n");
    for (int i = 0; i < num_fpgas; i++) {
        err = pico[i]->WriteRam(0, ref_buf, ref_size, PICO_DDR3_0);
        if (err < 0) {
            fprintf(stderr, "WriteRam error on FPGA %i: %s\n", i, PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
            exit(1);
        } else if (err != ref_size) {
            fprintf(stderr, "WriteRam on FPGA %i wrote %i bytes instead of the desire %i bytes\n", i, err, (int) ref_size);
            exit(1);
        }
    }
    sleep(1);
  
    // Start read/write threads 
    printf("Starting Smith Waterman tests\n");
    results_buf = new uint32_t** [num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        results_buf[i] = new uint32_t* [num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            results_buf[i][j] = new uint32_t[RESULT_BUF_SIZE];
            for (int k = 0; k < RESULT_BUF_SIZE; k++) {
                results_buf[i][j][k] = 0;
            }
        }
    }
    wta = new write_thread_args* [num_fpgas];
    rta = new read_thread_args* [num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        wta[i] = new write_thread_args[num_engines_per_fpga];
        rta[i] = new read_thread_args[num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            wta[i][j].pico = pico[i];
            wta[i][j].stream = stream[i][j];
            wta[i][j].num_queries = num_queries_per_engine[i][j];
            wta[i][j].query_len_bytes = query_len[i][j];
            wta[i][j].query_buf = query_buf[i][j];
            wta[i][j].ref_len_bytes = (int) ref_size;
            wta[i][j].cell_score_threshold = cell_score_threshold;
            wta[i][j].engine_id = i*num_engines_per_fpga + j;
            wta[i][j].query_ids = query_ids[i][j];
            rta[i][j].pico = pico[i];
            rta[i][j].stream = stream[i][j];
            rta[i][j].results_buf = results_buf[i][j];
            rta[i][j].num_queries = num_queries_per_engine[i][j];
            rta[i][j].engine_id = i*num_engines_per_fpga + j;
            rta[i][j].query_ids = query_ids[i][j];
        }
    }
    read_thread = new pthread_t*[num_fpgas];
    write_thread = new pthread_t*[num_fpgas];
    for (int i = 0; i < num_fpgas; i++) {
        read_thread[i] = new pthread_t[num_engines_per_fpga];
        write_thread[i] = new pthread_t[num_engines_per_fpga];
        for (int j = 0; j < num_engines_per_fpga; j++) {
            pthread_create(&(read_thread[i][j]), NULL, &stream_read_thread, (void*) &(rta[i][j]));
            pthread_create(&(write_thread[i][j]), NULL, &stream_write_thread, (void*) &(wta[i][j]));
        }
    }

    // Join read/write threads
    for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            pthread_join(read_thread[i][j], NULL);
            pthread_join(write_thread[i][j], NULL);
        }
    }

    // Print results
    for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            printf("FPGA: %i Engine: %i\n", i, j);
            for (int k = 0; k < RESULT_BUF_SIZE; k++) {
                printf("%i\t", results_buf[i][j][k]);
            }
            printf("\n\n");
        }
    }

    // streams are automatically closed when the PicoDrv object is destroyed, or on program termination, but
    //   we can also close a stream manually.
    for (int i = 0; i < num_fpgas; i++) {
        for (int j = 0; j < num_engines_per_fpga; j++) {
            pico[i]->CloseStream(stream[i][j]);
        }
    }
    return 0;
}