void Lexer::consume_token_block(string str, int start_idx, const char *file, int line) { int len = str.length(); for (int i=0 ; i < len ; i++) consume_input(str[i], file, line, start_idx++); consume_input(' ', file, line, start_idx); }
size_t fastq_gzread_se(array_list_t *reads, size_t num_reads, fastq_gzfile_t *fq_gzfile) { size_t count = 0; char header1[MAX_READ_ID_LENGTH]; char sequence[MAX_READ_SEQUENCE_LENGTH]; char header2[MAX_READ_ID_LENGTH]; char qualities[MAX_READ_SEQUENCE_LENGTH]; int header_length, sequence_length, quality_length; fastq_read_t *read; size_t num_lines_to_read = 4 * num_reads; /* Each read consists of 4 lines */ int max_data_len = CHUNK; int max_read_len = MAX_READ_SEQUENCE_LENGTH; /* Each read is supposed to be shorter than MAX_READ_SEQUENCE_LENGTH */ int eof_found = 0; int c = 0; int i = 0; // fq_gzfile->i = 0; size_t lines = 0; char *aux; // fq_gzfile->data = (char*) calloc (CHUNK, sizeof(char)); char *data; // = (char*) calloc (CHUNK, sizeof(char)); char *id = (char*) calloc (max_read_len, sizeof(char)); char *seq = (char*) calloc (max_read_len, sizeof(char)); char *qual = (char*) calloc (max_read_len, sizeof(char)); // ZLIB variables unsigned have; unsigned char in[CHUNK]; unsigned char out[CHUNK]; // If there is some data from before calls if(fq_gzfile->data != NULL) { if(fq_gzfile->data_size > max_data_len) { data = (char*) calloc (fq_gzfile->data_size+max_data_len, sizeof(char)); max_data_len = fq_gzfile->data_size+max_data_len; }else{ data = (char*) calloc (max_data_len, sizeof(char)); } strncpy(data, fq_gzfile->data, fq_gzfile->data_size); i = fq_gzfile->data_size; }else { // first time, no data has been saved before data = (char*) calloc (max_data_len, sizeof(char)); } do { fq_gzfile->strm.avail_in = fread(in, 1, CHUNK, fq_gzfile->fd); // printf("fq_gzfile->strm.avail_in: %i, CHUNK: %i\nnext_in: %s\n\n", fq_gzfile->strm.avail_in, CHUNK, fq_gzfile->strm.next_in); if (ferror(fq_gzfile->fd)) { (void)inflateEnd(&fq_gzfile->strm); return Z_ERRNO; } if (fq_gzfile->strm.avail_in == 0) break; fq_gzfile->strm.next_in = in; /* run inflate() on input until output buffer not full */ do { fq_gzfile->strm.avail_out = CHUNK; fq_gzfile->strm.next_out = out; fq_gzfile->ret = inflate(&fq_gzfile->strm, Z_NO_FLUSH); assert(fq_gzfile->ret != Z_STREAM_ERROR); /* state not clobbered */ switch (fq_gzfile->ret) { case Z_NEED_DICT: fq_gzfile->ret = Z_DATA_ERROR; /* and fall through */ case Z_DATA_ERROR: case Z_MEM_ERROR: (void)inflateEnd(&fq_gzfile->strm); return fq_gzfile->ret; } have = CHUNK - fq_gzfile->strm.avail_out; for (int j = 0; j < have && !eof_found; j++) { c = out[j]; if (c != EOF) { max_data_len = consume_input(c, &data, max_data_len, i); if (c == '\n') { lines++; } i++; } else { eof_found = 1; } } } while (fq_gzfile->strm.avail_out == 0); /* done when inflate() says it's done */ } while (lines < num_lines_to_read && fq_gzfile->ret != Z_STREAM_END); // printf("data: %s\n", data); // LOG_DEBUG_F("lines: %i, num_lines_to_read: %i\n", lines, num_lines_to_read); // check if have read the expected number of lines size_t parsed_chars; size_t parsed_lines = 0; size_t data_size; // if(lines > 0) { //= num_lines_to_read aux = data; for(parsed_chars = 0; parsed_chars < i && parsed_lines < num_lines_to_read; parsed_chars++) { if(data[parsed_chars] == '\n') { // printf(">>i: %i, parsed_chars: %i, %i, aux: %s\n", i, parsed_chars, data[i-1], aux); data[parsed_chars] = '\0'; if(count % 4 == 0) { strcpy(id, aux); //printf("%s\n", id); } if(count % 4 == 1) { strcpy(seq, aux); //printf("%s\n", seq); } if(count % 4 == 2) { } if(count % 4 == 3) { strcpy(qual, aux); //printf("%s\n", qual); read = fastq_read_new(id, seq, qual); array_list_insert(read, reads); } count++; aux = data + parsed_chars + 1; parsed_lines++; } } // LOG_DEBUG_F("i: %lu, parsed_lines: %lu\n", i, parsed_lines); // LOG_DEBUG_F("parsed_chars: %lu, parsed_lines: %lu\n", parsed_chars, parsed_lines); // lines = 0; // LOG_DEBUG_F("BEFORE memcpy: fq_gzfile->data_size: %lu, new size: %lu\n", fq_gzfile->data_size, data_size); data_size = i - parsed_chars; if(fq_gzfile->data == NULL) { fq_gzfile->data = (char*)malloc(data_size*sizeof(char)); } if(fq_gzfile->data_size != 0 && fq_gzfile->data_size < data_size) { fq_gzfile->data = realloc(fq_gzfile->data, data_size); } if(data_size > 0) { memcpy(fq_gzfile->data, data+parsed_chars, data_size); } fq_gzfile->data_size = data_size; // } free(data); free(id); free(seq); free(qual); // if(fq_gzfile->ret == Z_STREAM_END) { // (void)inflateEnd(&fq_gzfile->strm); // } // return fq_gzfile->ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; // printf(">>>>reads->size: %lu, num_reads: %lu\n", reads->size, num_reads); return reads->size; }
size_t fastq_gzread_bytes_se(array_list_t *reads, size_t bytes_to_read, fastq_gzfile_t *fq_gzfile) { size_t count = 0; fastq_read_t *read; // size_t num_lines_to_read = bytes; /* Each read consists of 4 lines */ int max_data_len = CHUNK; int max_read_len = MAX_READ_SEQUENCE_LENGTH_GZ; /* Each read is supposed to be shorter than MAX_READ_SEQUENCE_LENGTH */ int eof_found = 0; int c = 0; int i = 0; size_t bytes_processed = 0; char *aux; char *data; char *id = (char*) calloc (max_read_len, sizeof(char)); char *seq = (char*) calloc (max_read_len, sizeof(char)); char *qual = (char*) calloc (max_read_len, sizeof(char)); // ZLIB variables unsigned have; unsigned char in[CHUNK]; unsigned char out[CHUNK]; // If there is some data from before calls if(fq_gzfile->data != NULL) { if(fq_gzfile->data_size > max_data_len) { data = (char*) calloc (fq_gzfile->data_size+max_data_len, sizeof(char)); max_data_len = fq_gzfile->data_size + max_data_len; }else{ data = (char*) calloc (max_data_len, sizeof(char)); } strncpy(data, fq_gzfile->data, fq_gzfile->data_size); i = fq_gzfile->data_size; }else { // first time, no data has been saved before data = (char*) calloc (max_data_len, sizeof(char)); } do { fq_gzfile->strm.avail_in = fread(in, 1, CHUNK, fq_gzfile->fd); // printf("fq_gzfile->strm.avail_in: %i, CHUNK: %i\nnext_in: %s\n\n", fq_gzfile->strm.avail_in, CHUNK, fq_gzfile->strm.next_in); if (ferror(fq_gzfile->fd)) { (void)inflateEnd(&fq_gzfile->strm); return Z_ERRNO; } if (fq_gzfile->strm.avail_in == 0) break; fq_gzfile->strm.next_in = in; /* run inflate() on input until output buffer not full */ do { fq_gzfile->strm.avail_out = CHUNK; fq_gzfile->strm.next_out = out; fq_gzfile->ret = inflate(&fq_gzfile->strm, Z_NO_FLUSH); assert(fq_gzfile->ret != Z_STREAM_ERROR); /* state not clobbered */ switch (fq_gzfile->ret) { case Z_NEED_DICT: fq_gzfile->ret = Z_DATA_ERROR; /* and fall through */ case Z_DATA_ERROR: case Z_MEM_ERROR: (void)inflateEnd(&fq_gzfile->strm); return fq_gzfile->ret; } have = CHUNK - fq_gzfile->strm.avail_out; for (int j = 0; j < have && !eof_found; j++) { c = out[j]; if (c != EOF) { max_data_len = consume_input(c, &data, max_data_len, i); // if (c == '\n') { // bytes_processed++; // } i++; bytes_processed++; } else { eof_found = 1; } } } while (fq_gzfile->strm.avail_out == 0); /* done when inflate() says it's done */ } while (i < bytes_to_read && fq_gzfile->ret != Z_STREAM_END); // check if have read the expected number of lines size_t parsed_chars; size_t data_size; aux = data; for(parsed_chars = 0; parsed_chars < i; parsed_chars++) { //parsed_chars < bytes_to_read || parsed_lines % 4 == 0 if(data[parsed_chars] == '\n') { data[parsed_chars] = '\0'; if(count % 4 == 0) { strcpy(id, aux); //printf("%s\n", id); } if(count % 4 == 1) { strcpy(seq, aux); //printf("%s\n", seq); } if(count % 4 == 2) { } if(count % 4 == 3) { strcpy(qual, aux); //printf("%s\n", qual); read = fastq_read_new(id, seq, qual); array_list_insert(read, reads); if(parsed_chars+1 > bytes_to_read) { parsed_chars++; break; } } count++; aux = data + parsed_chars + 1; // parsed_lines++; } } data_size = i - parsed_chars; if(fq_gzfile->data == NULL) { fq_gzfile->data = (char*)malloc(data_size*sizeof(char)); } if(fq_gzfile->data_size != 0 && fq_gzfile->data_size < data_size) { fq_gzfile->data = realloc(fq_gzfile->data, data_size); } if(data_size > 0) { memcpy(fq_gzfile->data, data+parsed_chars, data_size); } fq_gzfile->data_size = data_size; free(data); free(id); free(seq); free(qual); return parsed_chars; }
int main(int argc, char **argv) { struct timespec ts_a, ts_b; double elapsed; char *file_path; char *queue_model_str; unsigned num_messages; unsigned num_threads; fstrm_iothr_queue_model queue_model; if (argc != 5) { fprintf(stderr, "Usage: %s <FILE> <QUEUE MODEL> <NUM THREADS> <NUM MESSAGES>\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "FILE is a filesystem path.\n"); fprintf(stderr, "QUEUE MODEL is the string 'SPSC' or 'MPSC'.\n"); fprintf(stderr, "NUM THREADS is an integer.\n"); fprintf(stderr, "NUM MESSAGES is an integer.\n"); return EXIT_FAILURE; } file_path = argv[1]; queue_model_str = argv[2]; num_threads = atoi(argv[3]); num_messages = atoi(argv[4]); if (num_threads < 1) { fprintf(stderr, "%s: Error: invalid number of threads\n", argv[0]); return EXIT_FAILURE; } if (num_messages < 1) { fprintf(stderr, "%s: Error: invalid number of messages\n", argv[0]); return EXIT_FAILURE; } if (strcasecmp(queue_model_str, "SPSC") == 0) { queue_model = FSTRM_IOTHR_QUEUE_MODEL_SPSC; } else if (strcasecmp(queue_model_str, "MPSC") == 0) { queue_model = FSTRM_IOTHR_QUEUE_MODEL_MPSC; } else { fprintf(stderr, "%s: Error: invalid queue model\n", argv[0]); return EXIT_FAILURE; } printf("testing fstrm_iothr with file= %s " "queue_model= %s " "num_threads= %u " "num_messages= %u\n", file_path, queue_model_str, num_threads, num_messages); struct fstrm_file_options *fopt; fopt = fstrm_file_options_init(); fstrm_file_options_set_file_path(fopt, file_path); struct fstrm_writer *w = fstrm_file_writer_init(fopt, NULL); assert(w != NULL); fstrm_file_options_destroy(&fopt); struct fstrm_iothr_options *iothr_opt; iothr_opt = fstrm_iothr_options_init(); if (queue_model == FSTRM_IOTHR_QUEUE_MODEL_SPSC) { fstrm_iothr_options_set_num_input_queues(iothr_opt, num_threads); } else if (queue_model == FSTRM_IOTHR_QUEUE_MODEL_MPSC) { fstrm_iothr_options_set_num_input_queues(iothr_opt, 1); } else { assert(0); /* not reached */ } fstrm_iothr_options_set_queue_model(iothr_opt, queue_model); struct fstrm_iothr *iothr = fstrm_iothr_init(iothr_opt, &w); assert(iothr != NULL); fstrm_iothr_options_destroy(&iothr_opt); struct consumer test_consumer; struct producer test_producers[num_threads]; for (unsigned i = 0; i < num_threads; i++) { test_producers[i].iothr = iothr; test_producers[i].num_messages = num_messages; } if (queue_model == FSTRM_IOTHR_QUEUE_MODEL_SPSC) { for (unsigned i = 0; i < num_threads; i++) { test_producers[i].ioq = fstrm_iothr_get_input_queue(iothr); assert(test_producers[i].ioq != NULL); } } else if (queue_model == FSTRM_IOTHR_QUEUE_MODEL_MPSC) { struct fstrm_iothr_queue *ioq = fstrm_iothr_get_input_queue(iothr); assert(ioq != NULL); for (unsigned i = 0; i < num_threads; i++) test_producers[i].ioq = ioq; } else { assert(0); /* not reached */ } my_gettime(CLOCK_MONOTONIC, &ts_a); printf("creating %u producer threads\n", num_threads); for (unsigned i = 0; i < num_threads; i++) pthread_create(&test_producers[i].thr, NULL, thr_producer, &test_producers[i]); printf("joining %u producer threads\n", num_threads); for (unsigned i = 0; i < num_threads; i++) pthread_join(test_producers[i].thr, (void **) NULL); printf("destroying fstrm_iothr object\n"); fstrm_iothr_destroy(&iothr); my_gettime(CLOCK_MONOTONIC, &ts_b); my_timespec_sub(&ts_a, &ts_b); elapsed = my_timespec_to_double(&ts_b); printf("completed in %.2f seconds\n", elapsed); int res = consume_input(&test_consumer, file_path); if (res != EXIT_SUCCESS) return res; struct producer_stats pstat_sum; memset(&pstat_sum, 0, sizeof(pstat_sum)); for (unsigned i = 0; i < num_threads; i++) { pstat_sum.count_generated += test_producers[i].pstat.count_generated; pstat_sum.count_submitted += test_producers[i].pstat.count_submitted; pstat_sum.bytes_generated += test_producers[i].pstat.bytes_generated; pstat_sum.bytes_submitted += test_producers[i].pstat.bytes_submitted; } printf("count_generated= %" PRIu64 "\n", pstat_sum.count_generated); printf("bytes_generated= %" PRIu64 "\n", pstat_sum.bytes_generated); printf("count_submitted= %" PRIu64 "\n", pstat_sum.count_submitted); printf("bytes_submitted= %" PRIu64 "\n", pstat_sum.bytes_submitted); printf("count_received= %" PRIu64 " (%.3f)\n", test_consumer.cstat.count_received, (test_consumer.cstat.count_received + 0.0) / (pstat_sum.count_generated + 0.0) ); printf("bytes_received= %" PRIu64 " (%.3f)\n", test_consumer.cstat.bytes_received, (test_consumer.cstat.bytes_received + 0.0) / (pstat_sum.bytes_generated + 0.0) ); assert(pstat_sum.count_submitted == test_consumer.cstat.count_received); assert(pstat_sum.bytes_submitted == test_consumer.cstat.bytes_received); putchar('\n'); return EXIT_SUCCESS; }