Ejemplo n.º 1
0
void Lexer::consume_token_block(string str, int start_idx, const char *file, int line)
{
  int len = str.length();

  for (int i=0 ; i < len ; i++)
    consume_input(str[i], file, line, start_idx++);

  consume_input(' ', file, line, start_idx);
}
Ejemplo n.º 2
0
size_t fastq_gzread_se(array_list_t *reads, size_t num_reads, fastq_gzfile_t *fq_gzfile) {
	size_t count = 0;
	char header1[MAX_READ_ID_LENGTH];
	char sequence[MAX_READ_SEQUENCE_LENGTH];
	char header2[MAX_READ_ID_LENGTH];
	char qualities[MAX_READ_SEQUENCE_LENGTH];
	int header_length, sequence_length, quality_length;
	fastq_read_t *read;

	size_t num_lines_to_read = 4 * num_reads;	/* Each read consists of 4 lines */

	int max_data_len = CHUNK;
	int max_read_len = MAX_READ_SEQUENCE_LENGTH;	/* Each read is supposed to be shorter than MAX_READ_SEQUENCE_LENGTH */
	int eof_found = 0;
	int c = 0;
	int i = 0;
	//	fq_gzfile->i = 0;
	size_t lines = 0;
	char *aux;
	//	fq_gzfile->data = (char*) calloc (CHUNK, sizeof(char));
	char *data; // = (char*) calloc (CHUNK, sizeof(char));
	char *id = (char*) calloc (max_read_len, sizeof(char));
	char *seq = (char*) calloc (max_read_len, sizeof(char));
	char *qual = (char*) calloc (max_read_len, sizeof(char));

	// ZLIB variables
	unsigned have;
	unsigned char in[CHUNK];
	unsigned char out[CHUNK];


	// If there is some data from before calls
	if(fq_gzfile->data != NULL) {
		if(fq_gzfile->data_size > max_data_len) {
			data = (char*) calloc (fq_gzfile->data_size+max_data_len, sizeof(char));
			max_data_len = fq_gzfile->data_size+max_data_len;
		}else{
			data = (char*) calloc (max_data_len, sizeof(char));
		}
		strncpy(data, fq_gzfile->data, fq_gzfile->data_size);
		i = fq_gzfile->data_size;
	}else {
		// first time, no data has been saved before
		data = (char*) calloc (max_data_len, sizeof(char));
	}


	do {
		fq_gzfile->strm.avail_in = fread(in, 1, CHUNK, fq_gzfile->fd);
		//		printf("fq_gzfile->strm.avail_in: %i, CHUNK: %i\nnext_in: %s\n\n", fq_gzfile->strm.avail_in, CHUNK, fq_gzfile->strm.next_in);
		if (ferror(fq_gzfile->fd)) {
			(void)inflateEnd(&fq_gzfile->strm);
			return Z_ERRNO;
		}
		if (fq_gzfile->strm.avail_in == 0)
			break;
		fq_gzfile->strm.next_in = in;

		/* run inflate() on input until output buffer not full */
		do {
			fq_gzfile->strm.avail_out = CHUNK;
			fq_gzfile->strm.next_out = out;
			fq_gzfile->ret = inflate(&fq_gzfile->strm, Z_NO_FLUSH);
			assert(fq_gzfile->ret != Z_STREAM_ERROR);  /* state not clobbered */
			switch (fq_gzfile->ret) {
			case Z_NEED_DICT:
				fq_gzfile->ret = Z_DATA_ERROR;     /* and fall through */
			case Z_DATA_ERROR:
			case Z_MEM_ERROR:
				(void)inflateEnd(&fq_gzfile->strm);
				return fq_gzfile->ret;
			}
			have = CHUNK - fq_gzfile->strm.avail_out;
			for (int j = 0; j < have && !eof_found; j++) {
				c = out[j];

				if (c != EOF) {
					max_data_len = consume_input(c, &data, max_data_len, i);
					if (c == '\n') {
						lines++;
					}
					i++;
				} else {
					eof_found = 1;
				}
			}
		} while (fq_gzfile->strm.avail_out == 0);

		/* done when inflate() says it's done */
	} while (lines < num_lines_to_read && fq_gzfile->ret != Z_STREAM_END);

	//	printf("data: %s\n", data);
	//	LOG_DEBUG_F("lines: %i, num_lines_to_read: %i\n", lines, num_lines_to_read);

	// check if have read the expected number of lines
	size_t parsed_chars;
	size_t parsed_lines = 0;
	size_t data_size;
	//	if(lines > 0) { //= num_lines_to_read
	aux = data;
	for(parsed_chars = 0; parsed_chars < i && parsed_lines < num_lines_to_read; parsed_chars++) {
		if(data[parsed_chars] == '\n') {
//		printf(">>i: %i, parsed_chars: %i, %i, aux: %s\n", i, parsed_chars, data[i-1], aux);
			data[parsed_chars] = '\0';
			if(count % 4 == 0) {
				strcpy(id, aux);  //printf("%s\n", id);
			}
			if(count % 4 == 1) {
				strcpy(seq, aux);  //printf("%s\n", seq);
			}
			if(count % 4 == 2) {
			}
			if(count % 4 == 3) {
				strcpy(qual, aux);  //printf("%s\n", qual);
				read = fastq_read_new(id, seq, qual);
				array_list_insert(read, reads);
			}
			count++;
			aux = data + parsed_chars + 1;
			parsed_lines++;
		}
	}
	//		LOG_DEBUG_F("i: %lu, parsed_lines: %lu\n", i, parsed_lines);
	//		LOG_DEBUG_F("parsed_chars: %lu, parsed_lines: %lu\n", parsed_chars, parsed_lines);
	//		lines = 0;
	//		LOG_DEBUG_F("BEFORE memcpy: fq_gzfile->data_size: %lu, new size: %lu\n", fq_gzfile->data_size, data_size);
	data_size = i - parsed_chars;
	if(fq_gzfile->data == NULL) {
		fq_gzfile->data = (char*)malloc(data_size*sizeof(char));
	}
	if(fq_gzfile->data_size != 0 && fq_gzfile->data_size < data_size) {
		fq_gzfile->data = realloc(fq_gzfile->data, data_size);
	}
	if(data_size > 0) {
		memcpy(fq_gzfile->data, data+parsed_chars, data_size);
	}
	fq_gzfile->data_size = data_size;
	//	}

	free(data);
	free(id);
	free(seq);
	free(qual);

	//	if(fq_gzfile->ret == Z_STREAM_END) {
	//		(void)inflateEnd(&fq_gzfile->strm);
	//	}
	//		return fq_gzfile->ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
	//	printf(">>>>reads->size: %lu, num_reads: %lu\n", reads->size, num_reads);
	return reads->size;
}
Ejemplo n.º 3
0
size_t fastq_gzread_bytes_se(array_list_t *reads, size_t bytes_to_read, fastq_gzfile_t *fq_gzfile) {
  size_t count = 0;





  fastq_read_t *read;

  //	size_t num_lines_to_read = bytes;	/* Each read consists of 4 lines */

  int max_data_len = CHUNK;
  int max_read_len = MAX_READ_SEQUENCE_LENGTH_GZ;	/* Each read is supposed to be shorter than MAX_READ_SEQUENCE_LENGTH */
  int eof_found = 0;
  int c = 0;
  int i = 0;
  size_t bytes_processed = 0;
  char *aux;

  char *data;
  char *id = (char*) calloc (max_read_len, sizeof(char));
  char *seq = (char*) calloc (max_read_len, sizeof(char));
  char *qual = (char*) calloc (max_read_len, sizeof(char));

  // ZLIB variables
  unsigned have;
  unsigned char in[CHUNK];
  unsigned char out[CHUNK];


  // If there is some data from before calls
  if(fq_gzfile->data != NULL) {
    if(fq_gzfile->data_size > max_data_len) {
      data = (char*) calloc (fq_gzfile->data_size+max_data_len, sizeof(char));
      max_data_len = fq_gzfile->data_size + max_data_len;
    }else{
      data = (char*) calloc (max_data_len, sizeof(char));
    }
    strncpy(data, fq_gzfile->data, fq_gzfile->data_size);
    i = fq_gzfile->data_size;
  }else {
    // first time, no data has been saved before
    data = (char*) calloc (max_data_len, sizeof(char));
  }


  do {
    fq_gzfile->strm.avail_in = fread(in, 1, CHUNK, fq_gzfile->fd);
    //		printf("fq_gzfile->strm.avail_in: %i, CHUNK: %i\nnext_in: %s\n\n", fq_gzfile->strm.avail_in, CHUNK, fq_gzfile->strm.next_in);
    if (ferror(fq_gzfile->fd)) {
      (void)inflateEnd(&fq_gzfile->strm);
      return Z_ERRNO;
    }
    if (fq_gzfile->strm.avail_in == 0)
      break;
    fq_gzfile->strm.next_in = in;

    /* run inflate() on input until output buffer not full */
    do {
      fq_gzfile->strm.avail_out = CHUNK;
      fq_gzfile->strm.next_out = out;
      fq_gzfile->ret = inflate(&fq_gzfile->strm, Z_NO_FLUSH);
      assert(fq_gzfile->ret != Z_STREAM_ERROR);  /* state not clobbered */
      switch (fq_gzfile->ret) {
      case Z_NEED_DICT:
	fq_gzfile->ret = Z_DATA_ERROR;     /* and fall through */
      case Z_DATA_ERROR:
      case Z_MEM_ERROR:
	(void)inflateEnd(&fq_gzfile->strm);
	return fq_gzfile->ret;
      }
      have = CHUNK - fq_gzfile->strm.avail_out;
      for (int j = 0; j < have && !eof_found; j++) {
	c = out[j];

	if (c != EOF) {
	  max_data_len = consume_input(c, &data, max_data_len, i);
	  //					if (c == '\n') {
	  //						bytes_processed++;
	  //					}
	  i++;
	  bytes_processed++;
	} else {
	  eof_found = 1;
	}
      }
    } while (fq_gzfile->strm.avail_out == 0);

    /* done when inflate() says it's done */
  } while (i < bytes_to_read && fq_gzfile->ret != Z_STREAM_END);

  // check if have read the expected number of lines
  size_t parsed_chars;

  size_t data_size;
  aux = data;
  for(parsed_chars = 0; parsed_chars < i; parsed_chars++) {	//parsed_chars < bytes_to_read || parsed_lines % 4 == 0
    if(data[parsed_chars] == '\n') {
      data[parsed_chars] = '\0';
      if(count % 4 == 0) {
	strcpy(id, aux);  //printf("%s\n", id);
      }
      if(count % 4 == 1) {
	strcpy(seq, aux);  //printf("%s\n", seq);
      }
      if(count % 4 == 2) {
      }
      if(count % 4 == 3) {
	strcpy(qual, aux);  //printf("%s\n", qual);
	read = fastq_read_new(id, seq, qual);
	array_list_insert(read, reads);
	if(parsed_chars+1 > bytes_to_read) {
	  parsed_chars++;
	  break;
	}
      }
      count++;
      aux = data + parsed_chars + 1;
      //			parsed_lines++;
    }
  }
  data_size = i - parsed_chars;
  if(fq_gzfile->data == NULL) {
    fq_gzfile->data = (char*)malloc(data_size*sizeof(char));
  }
  if(fq_gzfile->data_size != 0 && fq_gzfile->data_size < data_size) {
    fq_gzfile->data = realloc(fq_gzfile->data, data_size);
  }
  if(data_size > 0) {
    memcpy(fq_gzfile->data, data+parsed_chars, data_size);
  }
  fq_gzfile->data_size = data_size;

  free(data);
  free(id);
  free(seq);
  free(qual);

  return parsed_chars;
}
Ejemplo n.º 4
0
int
main(int argc, char **argv)
{
	struct timespec ts_a, ts_b;
	double elapsed;
	char *file_path;
	char *queue_model_str;
	unsigned num_messages;
	unsigned num_threads;
	fstrm_iothr_queue_model queue_model;

	if (argc != 5) {
		fprintf(stderr, "Usage: %s <FILE> <QUEUE MODEL> <NUM THREADS> <NUM MESSAGES>\n", argv[0]);
		fprintf(stderr, "\n");
		fprintf(stderr, "FILE is a filesystem path.\n");
		fprintf(stderr, "QUEUE MODEL is the string 'SPSC' or 'MPSC'.\n");
		fprintf(stderr, "NUM THREADS is an integer.\n");
		fprintf(stderr, "NUM MESSAGES is an integer.\n");
		return EXIT_FAILURE;
	}
	file_path = argv[1];
	queue_model_str = argv[2];
	num_threads = atoi(argv[3]);
	num_messages = atoi(argv[4]);
	if (num_threads < 1) {
		fprintf(stderr, "%s: Error: invalid number of threads\n", argv[0]);
		return EXIT_FAILURE;
	}
	if (num_messages < 1) {
		fprintf(stderr, "%s: Error: invalid number of messages\n", argv[0]);
		return EXIT_FAILURE;
	}

	if (strcasecmp(queue_model_str, "SPSC") == 0) {
		queue_model = FSTRM_IOTHR_QUEUE_MODEL_SPSC;
	} else if (strcasecmp(queue_model_str, "MPSC") == 0) {
		queue_model = FSTRM_IOTHR_QUEUE_MODEL_MPSC;
	} else {
		fprintf(stderr, "%s: Error: invalid queue model\n", argv[0]);
		return EXIT_FAILURE;
	}

	printf("testing fstrm_iothr with file= %s "
	       "queue_model= %s "
	       "num_threads= %u "
	       "num_messages= %u\n",
	       file_path, queue_model_str, num_threads, num_messages);

	struct fstrm_file_options *fopt;
	fopt = fstrm_file_options_init();
	fstrm_file_options_set_file_path(fopt, file_path);
	struct fstrm_writer *w = fstrm_file_writer_init(fopt, NULL);
	assert(w != NULL);
	fstrm_file_options_destroy(&fopt);

	struct fstrm_iothr_options *iothr_opt;
	iothr_opt = fstrm_iothr_options_init();

	if (queue_model == FSTRM_IOTHR_QUEUE_MODEL_SPSC) {
		fstrm_iothr_options_set_num_input_queues(iothr_opt, num_threads);
	} else if (queue_model == FSTRM_IOTHR_QUEUE_MODEL_MPSC) {
		fstrm_iothr_options_set_num_input_queues(iothr_opt, 1);
	} else {
		assert(0); /* not reached */
	}
	fstrm_iothr_options_set_queue_model(iothr_opt, queue_model);

	struct fstrm_iothr *iothr = fstrm_iothr_init(iothr_opt, &w);
	assert(iothr != NULL);
	fstrm_iothr_options_destroy(&iothr_opt);

	struct consumer test_consumer;
	struct producer test_producers[num_threads];

	for (unsigned i = 0; i < num_threads; i++) {
		test_producers[i].iothr = iothr;
		test_producers[i].num_messages = num_messages;
	}

	if (queue_model == FSTRM_IOTHR_QUEUE_MODEL_SPSC) {
		for (unsigned i = 0; i < num_threads; i++) {
			test_producers[i].ioq = fstrm_iothr_get_input_queue(iothr);
			assert(test_producers[i].ioq != NULL);
		}
	} else if (queue_model == FSTRM_IOTHR_QUEUE_MODEL_MPSC) {
		struct fstrm_iothr_queue *ioq = fstrm_iothr_get_input_queue(iothr);
		assert(ioq != NULL);
		for (unsigned i = 0; i < num_threads; i++)
			test_producers[i].ioq = ioq;
	} else {
		assert(0); /* not reached */
	}

	my_gettime(CLOCK_MONOTONIC, &ts_a);

	printf("creating %u producer threads\n", num_threads);
	for (unsigned i = 0; i < num_threads; i++)
		pthread_create(&test_producers[i].thr, NULL, thr_producer, &test_producers[i]);

	printf("joining %u producer threads\n", num_threads);
	for (unsigned i = 0; i < num_threads; i++)
		pthread_join(test_producers[i].thr, (void **) NULL);

	printf("destroying fstrm_iothr object\n");
	fstrm_iothr_destroy(&iothr);

	my_gettime(CLOCK_MONOTONIC, &ts_b);
	my_timespec_sub(&ts_a, &ts_b);
	elapsed = my_timespec_to_double(&ts_b);
	printf("completed in %.2f seconds\n", elapsed);

	int res = consume_input(&test_consumer, file_path);
	if (res != EXIT_SUCCESS)
		return res;

	struct producer_stats pstat_sum;
	memset(&pstat_sum, 0, sizeof(pstat_sum));
	for (unsigned i = 0; i < num_threads; i++) {
		pstat_sum.count_generated += test_producers[i].pstat.count_generated;
		pstat_sum.count_submitted += test_producers[i].pstat.count_submitted;
		pstat_sum.bytes_generated += test_producers[i].pstat.bytes_generated;
		pstat_sum.bytes_submitted += test_producers[i].pstat.bytes_submitted;
	}
	printf("count_generated= %" PRIu64 "\n", pstat_sum.count_generated);
	printf("bytes_generated= %" PRIu64 "\n", pstat_sum.bytes_generated);
	printf("count_submitted= %" PRIu64 "\n", pstat_sum.count_submitted);
	printf("bytes_submitted= %" PRIu64 "\n", pstat_sum.bytes_submitted);

	printf("count_received= %" PRIu64 " (%.3f)\n",
	       test_consumer.cstat.count_received,
	       (test_consumer.cstat.count_received + 0.0) / (pstat_sum.count_generated + 0.0)
	);
	printf("bytes_received= %" PRIu64 " (%.3f)\n",
	       test_consumer.cstat.bytes_received,
	       (test_consumer.cstat.bytes_received + 0.0) / (pstat_sum.bytes_generated + 0.0)
	);

	assert(pstat_sum.count_submitted == test_consumer.cstat.count_received);
	assert(pstat_sum.bytes_submitted == test_consumer.cstat.bytes_received);

	putchar('\n');

	return EXIT_SUCCESS;
}