Exemple #1
0
// Returns number of types.
int load_hla_csv(const char *path, char ***bools_ptr, int num_rows)
{
  assert(num_rows > 0);

  StrBuf line;
  strbuf_alloc(&line, 1024);

  FILE *fh = fopen(path, "r");
  if(fh == NULL) die("Cannot open file: %s.", path);

  if(strbuf_readline(&line, fh) == 0) die("Empty CSV file: %s.", path);
  int num_types = count_char(line.b, ',');

  char **bools = my_malloc(sizeof(char*) * num_rows, __FILE__, __LINE__);
  char *data = my_malloc(sizeof(char) * num_rows * (num_types+1), __FILE__, __LINE__);
  printf("Number of rows: %i.\n",num_rows);
  int i;
  for(i = 0; i < num_rows && strbuf_reset_readline(&line, fh); i++)
  {
    strbuf_chomp(&line);
    bools[i] = data + i * (num_types+1);
    load_comma_bool_line(line.b, bools[i], num_types);
    bools[i][num_types] = '\0';
  }

  if(i < num_rows) die("Not enough rows in CSV file: %s.", path);

  fclose(fh);
  strbuf_dealloc(&line);

  *bools_ptr = bools;
  return num_types;
}
Exemple #2
0
void error_correct_list_of_files(StrBuf* list_fastq,char quality_cutoff, char ascii_qual_offset,
                                 dBGraphEc *db_graph, HandleLowQualUncorrectable policy,
                                 int max_read_len, int min_read_len, StrBuf* suffix, char* outdir,
                                 boolean add_greedy_bases_for_better_bwt_compression,
                                 int num_greedy_bases, boolean rev_comp_read_if_on_reverse_strand)

{
    printf("error correct list of files\n");
    fflush(stdout);
    int len = max_read_len+2;
    uint64_t* distrib_num_bases_corrected     =(uint64_t*) malloc(sizeof(uint64_t)*len);
    uint64_t* distrib_position_bases_corrected=(uint64_t*) malloc(sizeof(uint64_t)*len);
    if ( (distrib_num_bases_corrected==NULL)|| (distrib_position_bases_corrected==NULL))
    {
        die("Unable to alloc arrays for keeping stats. Your machine must have hardly any spare memory\n");
    }
    set_uint64_t_array(distrib_num_bases_corrected,      len, (uint64_t) 0);
    set_uint64_t_array(distrib_position_bases_corrected, len, (uint64_t) 0);

    FILE* list_fastq_fp = fopen(list_fastq->buff, "r");
    if (list_fastq_fp==NULL)
    {
        printf("Cannot open file %s\n", list_fastq->buff);
    }
    StrBuf *next_fastq     = strbuf_new();
    StrBuf* corrected_file = strbuf_new();
    StrBuf* corrected_file_newpath = strbuf_new();

    while(strbuf_reset_readline(next_fastq, list_fastq_fp))
    {
        strbuf_chomp(next_fastq);
        if(strbuf_len(next_fastq) > 0)
        {
            strbuf_reset(corrected_file);
            strbuf_reset(corrected_file_newpath);
            strbuf_copy(corrected_file, 0,//dest
                        next_fastq,0,strbuf_len(next_fastq));
            strbuf_append_str(corrected_file, suffix->buff);
            char* corrected_file_basename = basename(corrected_file->buff);
            strbuf_append_str(corrected_file_newpath, outdir);
            strbuf_append_str(corrected_file_newpath, corrected_file_basename);

            error_correct_file_against_graph(next_fastq->buff, quality_cutoff, ascii_qual_offset,
                                             db_graph, corrected_file_newpath->buff,
                                             distrib_num_bases_corrected,
                                             distrib_position_bases_corrected,
                                             len,
                                             min_read_len,
                                             policy,
                                             add_greedy_bases_for_better_bwt_compression,
                                             num_greedy_bases, rev_comp_read_if_on_reverse_strand);


        }
    }
    fclose(list_fastq_fp);
    strbuf_free(next_fastq);
    strbuf_free(corrected_file);
    strbuf_free(corrected_file_newpath);
    free(distrib_num_bases_corrected);
    free(distrib_position_bases_corrected);
}