void *fastq_reader(void *input) {
     struct timeval start, end;
     double time;
     extern size_t fd_read_bytes;
     size_t read_bytes;
     //if (time_on) { start_timer(start); }

     wf_input_t *wf_input = (wf_input_t *) input;
     batch_t *new_batch = NULL;
     batch_t *batch = wf_input->batch;
     fastq_batch_reader_input_t *fq_reader_input = wf_input->fq_reader_input;
     array_list_t *reads = array_list_new(10000, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);

     if (fq_reader_input->gzip) {
       //Gzip fastq file
       if (fq_reader_input->flags == SINGLE_END_MODE) {
	 fastq_gzread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1);
       } else {
	 //printf("Gzip Reader for pair-end not implemented\n");;
	 fastq_gzread_bytes_pe(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1, fq_reader_input->fq_gzip_file2);
	 //fastq_fread_bytes_aligner_pe(reads, fq_reader_input->batch_size, 
	 //		      fq_reader_input->fq_gzip_file1, fq_reader_input->fq_gzip_file2);
       }
     } else {
       //Fastq file
       if (fq_reader_input->flags == SINGLE_END_MODE) {
	 read_bytes = fastq_fread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_file1);
       } else {
	 read_bytes = fastq_fread_bytes_aligner_pe(reads, fq_reader_input->batch_size, 
				      fq_reader_input->fq_file1, fq_reader_input->fq_file2);
       }
       fd_read_bytes += read_bytes;
     }

     size_t num_reads = array_list_size(reads);

     if (num_reads == 0) {
	  array_list_free(reads, (void *)fastq_read_free);
     } else {
	  mapping_batch_t *mapping_batch = mapping_batch_new(reads, 
							     batch->pair_input->pair_mng);

	  new_batch = batch_new(batch->bwt_input, batch->region_input, batch->cal_input, 
				batch->pair_input, batch->preprocess_rna, batch->sw_input, batch->writer_input, 
				batch->mapping_mode, mapping_batch);
     }

     //if (time_on) { stop_timer(start, end, time); timing_add(time, FASTQ_READER, timing); }
     //printf("Read batch %i\n", num_reads);
     
     return new_batch;
}
示例#2
0
int
main() {
  void *batch = batch_new(4);
  assert(batch);

  for (int i = 0; i < 20; ++i) {
    void *(*function)(void *) = is_even(i)
      ? function_even
      : function_odd;
    batch_push(batch, function, (void *) i);
  }

  batch_wait(batch);
  batch_end(batch);

  assert(20 == count);

  return 0;
}
示例#3
0
struct ydb_batch *ydb_batch()
{
	return (struct ydb_batch*)batch_new();
}
void *file_reader_2(void *input) {
  wf_input_file_t *wf_input = (wf_input_file_t *) input;
  FILE *fd = wf_input->file;
  batch_t *batch = wf_input->batch;
  int pair_mode = batch->pair_input->pair_mng->pair_mode;
  
  const int MAX_READS = 100;
  int num_reads = 0;
  batch_t *new_batch = NULL;

  size_t sizes_to_read[3], head_len, seq_len, num_items;
  size_t tot_size;
  char *buffer, *id, *sequence, *quality;
  size_t bytes;
  unsigned char type;
  array_list_t *reads = array_list_new(MAX_READS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
  mapping_batch_t *mapping_batch = mapping_batch_new_2(MAX_READS, 
						       reads,
						       batch->pair_input->pair_mng);
  
  while (1) {
    //[size head][size seq][num items]
    bytes = fread(&type, sizeof(unsigned char), 1, fd);
    if (!bytes) { break; }
 
    //fastq_read_t *fq_read = file_fastq_read_new(&num_items, fd);
    fastq_read_t *fq_read = file_read_fastq_reads(&num_items, fd);
    if (fq_read == NULL) { /*printf("fq NULL\n");*/ break; }
    //printf("(num items %i)\nID : %s\nSEQ: %s\nQUA: %s\n", num_items, fq_read->id, fq_read->sequence, fq_read->quality);

    array_list_insert(fq_read, reads);
    
    mapping_batch->mapping_lists[num_reads] = array_list_new(50,
							     1.25f, 
							     COLLECTION_MODE_ASYNCHRONIZED);
    if (type == CAL_TYPE) {
      //exit(-1);
      //printf("\tCal Report\n");
      file_read_cals(num_items, mapping_batch->mapping_lists[num_reads], 
		     fq_read, fd);      
    } else if (type == META_ALIGNMENT_TYPE) {
      //printf("\tMeta Alignments Report\n");
      file_read_meta_alignments(num_items, mapping_batch->mapping_lists[num_reads], 
				fq_read, fd);            
      array_list_set_flag(BITEM_META_ALIGNMENTS,
			  mapping_batch->mapping_lists[num_reads]);    
    } else {
      //exit(-1);
      //printf("\tAlignments Report\n");
      file_read_alignments(num_items, mapping_batch->mapping_lists[num_reads], 
			   fq_read, fd);
    }

    //printf("W3 file read %i\n", array_list_size(mapping_batch->mapping_lists[num_reads]));
    num_reads++;
    if (num_reads >= MAX_READS) { break; }

  }

  tot_reads2 += num_reads;
  //printf("W3 Reads: %i | %i\n", tot_reads2, num_reads);
  //w3_r += num_reads;
  //printf("W3 Reads: %i\n", w3_r);

  if (num_reads) {
    mapping_batch->num_allocated_targets = num_reads;
    new_batch = batch_new(batch->bwt_input, batch->region_input, batch->cal_input, 
			  batch->pair_input, batch->preprocess_rna, batch->sw_input,
			  batch->writer_input, batch->mapping_mode, mapping_batch); 
  } else {
    mapping_batch_free(mapping_batch);
  }

  extern size_t reads_w3;
  reads_w3 += num_reads;

  return new_batch;

}
/*
fastq_read_t *file_fastq_read_new(size_t *num_items, FILE *fd) {

  size_t sizes_to_read[3], head_len, seq_len;

  head_len  = sizes_to_read[0];
  seq_len   = sizes_to_read[1];
  *num_items = sizes_to_read[2];
  
  int bytes = fread(sizes_to_read, sizeof(size_t), 3, fd);
  if (!bytes) { return NULL; }
  
  int tot_size = head_len + 2*seq_len;
  buffer = (char *)calloc(tot_size + 1, sizeof(char));
  bytes = fread(buffer, sizeof(char), tot_size, fd);
  if (!bytes) { 
    free(buffer);    
    return NULL; 
  }

  char *id = (char *)calloc(head_len + 1, sizeof(char));
  memcpy(id, buffer, head_len);
  //printf("ID : %s\n", id);

  char *sequence = (char *)calloc(seq_len + 1, sizeof(char));  
  memcpy(sequence, &buffer[head_len], seq_len);
  //printf("SEQ: %s\n", sequence);

  char *quality = (char *)calloc(seq_len + 1, sizeof(char));  
  memcpy(quality, &buffer[head_len + seq_len], seq_len);
  //printf("QUA: %s\n", quality);
  
  fastq_read_t *fq_read = fastq_read_new(id, sequence, quality);

  free(buffer);
  free(id);
  free(sequence);
  free(quality);


  return fq_read;

}

int file_cal_fill(size_t num_items, array_list_t *list, FILE *fd) {

  if (!num_items) { return 0; }
  
  bwt_anchor_t bwt_anchors[num_items];
  bytes = fread(bwt_anchors, sizeof(bwt_anchor_t), num_items, fd);
  if (!bytes) { LOG_FATAL("Corrupt file\n"); }
  
  for (int i = 0; i < num_items; i++) {
    //printf("[%i:%lu-%lu]\n", bwt_anchors[i].chromosome, bwt_anchors[i].start, bwt_anchors[i].end);
    size_t seed_size = bwt_anchors[i].end - bwt_anchors[i].start;
    cal_t *cal;
    if (bwt_anchors[i].type == FORWARD_ANCHOR) {
      cal = convert_bwt_anchor_to_CAL(&bwt_anchors[i], 0, seed_size);
    } else {
      cal = convert_bwt_anchor_to_CAL(&bwt_anchors[i], fq_read->length - seed_size - 1, fq_read->length - 1);
    }
    array_list_insert(cal, list); 
  }  
  
  return 0;

}

int file_meta_alignment_fill(size_t num_items, array_list_t *list, FILE *fd) {

  if (!num_items) { return 0; }

  simple_alignment_t simple_alignment[num_items];
  simple_alignment_t *simple_a;
  
  bytes = fread(simple_alignment, sizeof(simple_alignment_t), num_items, fd);
  if (!bytes) { LOG_FATAL("Corrupt file\n"); }
  
  size_t cigar_tot_len = 0;
  for (int i = 0; i < num_items; i++) {
    simple_a = &simple_alignment[i];
    //printf("ITEM %i: (%i)[%i:%lu] [%i-%i]\n", i, simple_a->map_strand, simple_a->map_chromosome,
    //     simple_a->map_start, simple_a->gap_start, simple_a->gap_end);
    cigar_tot_len += simple_a->cigar_len;
  }
    
  char cigar_buffer[cigar_tot_len];
  bytes = fread(cigar_buffer, sizeof(char), cigar_tot_len, fd);
  if (!bytes) { LOG_FATAL("Corrupt file\n"); }

  char cigars_test[num_items][1024];
  size_t actual_read = 0;
  for (int i = 0; i < num_items; i++) {
    simple_a = &simple_alignment[i];
    memcpy(&cigars_test[i], &cigar_buffer[actual_read], simple_a->cigar_len);
    cigars_test[i][simple_a->cigar_len] = '\0';
    actual_read += simple_a->cigar_len;
    //printf("CIGAR %i: %s\n", i, cigars_test[i]);
    size_t map_len = fq_read->length - simple_a->gap_start - simple_a->gap_end;
    //printf("SEED := len_read:%i - gap_read:%i - gap_end:%i = %i, SEED-END = %i\n", fq_read->length, 
    //     simple_a->gap_start, 
    //     simple_a->gap_end, 
    //     map_len, simple_a->gap_start + map_len);
    seed_region_t *s_region = seed_region_new(simple_a->gap_start, 
					      simple_a->gap_start + map_len - 1,
					      simple_a->map_start, 
					      simple_a->map_start + map_len,
					      0);
    
    //printf("Exit with seed [%i:%i]\n", s_region->read_start, s_region->read_end);
    
    linked_list_t *sr_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED);
    //s_region->info = cigar_code_new_by_string(cigars_test[i]);
    linked_list_insert(s_region, sr_list);
    
    cal_t *cal = cal_new(simple_a->map_chromosome, 
			 simple_a->map_strand,
			 simple_a->map_start,
			 simple_a->map_start + map_len,
			 1,
			 sr_list,
			 linked_list_new(COLLECTION_MODE_ASYNCHRONIZED));
    cal->info = cigar_code_new_by_string(cigars_test[i]);
    
    meta_alignment_t *meta_alignment = meta_alignment_new();
    array_list_insert(cal, meta_alignment->cals_list);
    array_list_insert(meta_alignment, list);
  }

  return 0;
}

int file_alignment_fill(size_t num_items, array_list_t *list, 
			fastq_read_t *fq_read, FILE *fd) {
  if (!num_items) { return 0; }
  
  alignment_aux_t alignments_aux[num_items];
  alignment_aux_t *alignment_a;
  
  bytes = fread(alignments_aux, sizeof(alignment_aux_t), num_items, fd);
  if (!bytes) { LOG_FATAL("Corrupt file\n"); }

  size_t cigar_tot_len = 0;
  for (int i = 0; i < num_items; i++) {
    alignment_a = &simple_alignment[i];
    //printf("ITEM %i: (%i)[%i:%lu] [%i-%i]\n", i, simple_a->map_strand, simple_a->map_chromosome,
    //     simple_a->map_start, simple_a->gap_start, simple_a->gap_end);
    cigar_tot_len += alignmment_a->cigar_len + alignment_a->optional_field_length;
  }

  char cigars_test[num_items][1024];
  char optional_fields[num_items][1024];
  size_t actual_read = 0;
  for (int i = 0; i < num_items; i++) {
    alignment_a = &alignments_aux[i];
    memcpy(&cigars_test[i], &cigar_buffer[actual_read], alignment_a->cigar_len);
    cigars_test[i][alignment_a->cigar_len] = '\0';
    actual_read += simple_a->cigar_len;
    
    char op;
    char op_value[1024];
    int c = 0;
    int hc_start = 0, hc_end;
    for (int j = 0; j < alignment_a->cigar_len; j++) {
      op = cigars_test[j];
      if (op < 58) {
	op_value[c++] = op;
      } else {
	op_value[c] = '\0';
	if (op == 'H') {
	  hc_start = atoi(op_value);
	}
	break;
      }
    }

    if (cigars_test[alignment_a->cigar_len - 1] == 'H') {
      for (int j = alignment_a->cigar_len - 2; j >= 0; j--) {
	op = cigars_test[j];
	if (op < 58) {
	  op_value[c++] = op;
	} else {
	  op_value[c] = '\0';
	  int len = strlen(op_value);
	  char op_val_aux[len];
	  int pos = len - 1;
	  for (int j = 0; j < len; j++) {	    
	    op_val_aux[j] = op_value[pos - j];
	  } 
	  hc_end = atoi(op_val_aux);
	  break;
	}
      }
    }

    memcpy(&optional_fields[i], &cigar_buffer[actual_read], alignment_a->optional_fields_length);
    optional_fields[i][alignment_a->optional_fields_length] = '0';
    actual_read += alignment_a->optional_fields_length;

    int header_len = strlen(fq_read->id);
    char header_id[header_len + 1];
    get_to_first_blank(fq_read->id, header_len, header_id);
    //char *header_match = (char *)malloc(sizeof(char)*header_len);
    //memcpy(header_match, header_id, header_len);

    int len_read = fq_read->length - (hc_start + hc_end);
    char *quality = (char *) calloc (len_read + 1, sizeof(char));
    strncpy(quality, fq_read->quality + hc_start, len_read);
    char *query = (char *) calloc (len_read + 1, sizeof(char));
    strncpy(query, fq_read->query + hc_start, len_read);

    //Revisar rna_Server get_to_first_blank header copy
    alignment_t *alignment = alignment_new();
    alignment_init_single_end(strdup(header_id),
			      query,
			      quality,
			      alignment_a->seq_strand, 
			      alignment_a->chromosome, 
			      alignment_a->position,
			      strdup(cigars_test[i]),
			      alignment_a->num_cigar_operations,
			      alignment_a->map_quality, 
			      1, 
			      num_items < 1,
			      alignment_a->optional_fields_length,
			      strdup(optional_fields[i]), 
			      alignment);
    
    array_list_insert(alignment, list);
  }  

  return 0;

}
*/
void *file_reader(void *input) {
  wf_input_file_t *wf_input = (wf_input_file_t *) input;
  FILE *fd = wf_input->file;
  batch_t *batch = wf_input->batch;
  int pair_mode = batch->pair_input->pair_mng->pair_mode;

  const int MAX_READS = 100;
  int num_reads = 0;
  batch_t *new_batch = NULL;

  size_t tot_size;
  size_t num_items;
  char *buffer, *id, *sequence, *quality;
  size_t bytes;
  unsigned char type;
  array_list_t *reads = array_list_new(MAX_READS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
  mapping_batch_t *mapping_batch = mapping_batch_new_2(MAX_READS, 
						       reads,
						       batch->pair_input->pair_mng);  
  while (1) {
    //[type][size head][size seq][num items]
    bytes = fread(&type, sizeof(unsigned char), 1, fd);
    if (!bytes) { break; }
 
    fastq_read_t *fq_read = file_read_fastq_reads(&num_items, fd);
    if (fq_read == NULL) { break; }
    
    mapping_batch->mapping_lists[num_reads] = array_list_new(50,
							     1.25f, 
							     COLLECTION_MODE_ASYNCHRONIZED);
    //printf("(num items %i)\nID : %s\nSEQ: %s\nQUA: %s\n", num_items, fq_read->id, fq_read->sequence, fq_read->quality);

    array_list_insert(fq_read, reads);
    
    if (type == CAL_TYPE) {
      //printf("\tCal Report\n");
      file_read_cals(num_items, mapping_batch->mapping_lists[num_reads], 
		     fq_read, fd);      
      array_list_set_flag(BITEM_SINGLE_ANCHORS, 
			  mapping_batch->mapping_lists[num_reads]);
    } else if (type == META_ALIGNMENT_TYPE) {
      //printf("\tMeta Alignments Report\n");
      array_list_set_flag(BITEM_META_ALIGNMENTS, 
			  mapping_batch->mapping_lists[num_reads]);
      file_read_meta_alignments(num_items, mapping_batch->mapping_lists[num_reads], 
				fq_read, fd);            
    } else {
      //printf("\tAlignments Report\n");
      file_read_alignments(num_items, mapping_batch->mapping_lists[num_reads], 
			   fq_read, fd);
    }

    /*if (strcmp("@ENST00000496771@ENSG00000000003@processed_transcript@X@99887538@99891686@-1@KNOWN_518_447_1_0_0_0_4:0:0_3:0:0_3/1",
	       fq_read->id) == 0) {
      exit(-1);
      }*/
            
    num_reads++;
    if (num_reads >= MAX_READS) { break; }

  }

  //w2_r += num_reads;
  //printf("W2 Reads: %i\n", w2_r);

  if (num_reads) {
    mapping_batch->num_allocated_targets = num_reads;
    new_batch = batch_new(batch->bwt_input, batch->region_input, batch->cal_input, 
			  batch->pair_input, batch->preprocess_rna, batch->sw_input,
			  batch->writer_input, batch->mapping_mode, mapping_batch); 
  } else {
    //array_list_free(reads, NULL);
    mapping_batch_free(mapping_batch);
  }

  extern size_t reads_w2;
  reads_w2 += num_reads;


  return new_batch;

}