Ejemplo n.º 1
0
//virtual
std::istream &csv_selector::input(std::istream &iss) {
    // reuse the input from csv_tool
    if (!iss) {
        return iss;
    }
    csv_data->buffer.clear();
    while (iss) {
        std::string tmp = read_block(iss);
        // std::cerr << "tmp:" << std::endl << tmp << std::endl;
        if (tmp.size() < 1) {
            break;
        }
        
        std::string block_type = get_block_type(tmp);
        if (block_type == "csv_analyzer_header") {
            csv_data->mcolumns = parse_csv_header(tmp);
            this->update_selector();
        } else if (block_type == "line_t" ) {
            line_t line = parse_csv_line(tmp);
            csv_data->buffer.push_back(line);
        } else {
            throw std::runtime_error("unknown block type in csv_selector::input: \"" + block_type + "\". block = \"" + tmp + "\".");
        }
    }
    return iss;
}
Ejemplo n.º 2
0
int main(int argc, char** argv) {
  int ifd;
  int ofd;
  size_t size;
  struct info info_ins;
  char* line = NULL;
  if (argc != 3) {
    fprintf(stderr, "Need 2 argument: csv file path and binary file path.\n");
    exit(1);
  }

  if ((ifd = open(argv[1], O_RDONLY)) == -1) {
    perror("Open csv file fail");
    exit(1);
  }

  if ((ofd = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC, 0640)) == -1) {
    perror("Open binary file fail");
    exit(1);
  }

  FILE* stream = fdopen(ifd, "r");
  while (getline(&line, &size, stream) != -1) {
    info_ins = parse_csv_line(line);
    if (write(ofd, &info_ins, sizeof(struct info)) != sizeof(struct info)) {
      fprintf(stderr, "Warning: write to binary fail. The binary may corrupt!");
    }
    free(line);
    line = NULL;
  }

  close(ifd);
  close(ofd);
  return 0;
}
Ejemplo n.º 3
0
/* parse_csv_file()
 *
 * inputs	- FILE pointer
 * 		- type of conf to parse
 * output	- none
 * side effects	-
 */
void
parse_csv_file(FBFILE *file, ConfType conf_type)
{
  struct ConfItem *conf;
  struct AccessItem *aconf;
  struct MatchItem *match_item;
  struct MatchItem *nresv;
  struct ResvChannel *cresv;
  char  *name_field=NULL;
  char  *user_field=NULL;
  char  *reason_field=NULL;
  char  *oper_reason=NULL;
  char  *host_field=NULL;
  char  *duration_field=NULL;
  char  *temp=NULL;
  char  line[IRCD_BUFSIZE];
  char  *p;

  while (fbgets(line, sizeof(line), file) != NULL)
  {
    duration_field = NULL;

    if ((p = strchr(line, '\n')) != NULL)
      *p = '\0';

    if ((line[0] == '\0') || (line[0] == '#'))
      continue;

    switch(conf_type)
    {
      case KLINE_TYPE:
        parse_csv_line(line, &user_field, &host_field, &reason_field,
            &oper_reason, &temp, &temp, &temp, &duration_field, NULL);
        conf = make_conf_item(KLINE_TYPE);
        aconf = map_to_conf(conf);

        if (host_field != NULL)
          DupString(aconf->host, host_field);
        if (reason_field != NULL)
          DupString(aconf->reason, reason_field);
        if (oper_reason != NULL)
          DupString(aconf->oper_reason, oper_reason);
        if (user_field != NULL)
          DupString(aconf->user, user_field);
        if (duration_field != NULL)
          aconf->hold = atoi(duration_field);
        if (aconf->host != NULL)
        {
          if(duration_field == NULL)
            add_conf_by_address(CONF_KILL, aconf);
          else
            add_temp_line(conf);
        }
        break;

      case RKLINE_TYPE:
        {
          const char *errptr = NULL;
          pcre *exp_user = NULL, *exp_host = NULL;

          parse_csv_line(line, &user_field, &host_field, &reason_field,
              &oper_reason, &temp, &temp, &temp, &duration_field, NULL);

          if (host_field == NULL || user_field == NULL)
            break;

          if (!(exp_user = ircd_pcre_compile(user_field, &errptr)) ||
              !(exp_host = ircd_pcre_compile(host_field, &errptr)))
          {
            sendto_realops_flags(UMODE_ALL, L_ALL,
                "Failed to add regular expression based K-Line: %s", errptr);
            break;
          }

          conf = make_conf_item(RKLINE_TYPE);
          aconf = map_to_conf(conf);

          aconf->regexuser = exp_user;
          aconf->regexhost = exp_host;

          DupString(aconf->user, user_field);
          DupString(aconf->host, host_field);

          if (reason_field != NULL)
            DupString(aconf->reason, reason_field);
          else
            DupString(aconf->reason, "No reason");

          if (oper_reason != NULL)
            DupString(aconf->oper_reason, oper_reason);

          if(duration_field != NULL)
          {
            aconf->hold = atoi(duration_field);
            add_temp_line(conf);
          }
        }
        break;

      case DLINE_TYPE:
        parse_csv_line(line, &host_field, &reason_field, &temp, &temp, &temp, 
            &temp, &duration_field, NULL);
        conf = make_conf_item(DLINE_TYPE);
        aconf = (struct AccessItem *)map_to_conf(conf);
        if (host_field != NULL)
          DupString(aconf->host, host_field);
        if (reason_field != NULL)
          DupString(aconf->reason, reason_field);
        if(duration_field != NULL)
        {
          aconf->hold = atoi(duration_field);
          add_temp_line(conf);
        }
        else
          conf_add_d_conf(aconf);
        break;

      case XLINE_TYPE:
        parse_csv_line(line, &name_field, &reason_field, &oper_reason, &temp,
            &temp, &temp, &temp, &duration_field, NULL);
        conf = make_conf_item(XLINE_TYPE);
        match_item = (struct MatchItem *)map_to_conf(conf);
        if (name_field != NULL)
          DupString(conf->name, name_field);
        if (reason_field != NULL)
          DupString(match_item->reason, reason_field);

        if(duration_field != NULL)
        {
          match_item->hold = atoi(duration_field);
          add_temp_line(conf);
        }
        break;

      case RXLINE_TYPE:
        {
          const char *errptr = NULL;
          pcre *exp_p = NULL;

          parse_csv_line(line, &name_field, &reason_field, &oper_reason, &temp,
              &temp, &temp, &temp, &duration_field, NULL);

          if (name_field == NULL)
            break;

          if (!(exp_p = ircd_pcre_compile(name_field, &errptr)))
          {
            sendto_realops_flags(UMODE_ALL, L_ALL,
                "Failed to add regular expression based X-Line: %s", errptr);
            break;
          }

          conf = make_conf_item(RXLINE_TYPE);
          conf->regexpname = exp_p;
          match_item = map_to_conf(conf);
          DupString(conf->name, name_field);

          if (reason_field != NULL)
            DupString(match_item->reason, reason_field);
          else
            DupString(match_item->reason, "No reason");

          if(duration_field != NULL)
          {
            match_item->hold = atoi(duration_field);
            add_temp_line(conf);
          }
        }
        break;

      case CRESV_TYPE:
        parse_csv_line(line, &name_field, &reason_field, &duration_field, NULL);
        conf = create_channel_resv(name_field, reason_field, 0);
        if(duration_field != NULL)
        {
          cresv = map_to_conf(conf);
          cresv->hold = atoi(duration_field);
          add_temp_line(conf);
        }
        break;

      case NRESV_TYPE:
        parse_csv_line(line, &name_field, &reason_field, &duration_field, NULL);
        conf = create_nick_resv(name_field, reason_field, 0);
        if(duration_field != NULL)
        {
          nresv = map_to_conf(conf);
          nresv->hold = atoi(duration_field);
          add_temp_line(conf);
        }
        break;

      case GLINE_TYPE:
      case GDENY_TYPE:
      case CONF_TYPE:
      case OPER_TYPE:
      case CLIENT_TYPE:
      case SERVER_TYPE:
      case CLUSTER_TYPE:
      case HUB_TYPE:
      case LEAF_TYPE:
      case ULINE_TYPE:
      case EXEMPTDLINE_TYPE:
      case CLASS_TYPE:
        break;
    }
  }
}
Ejemplo n.º 4
0
int
main_dist(int argc, char **argv)
{
    timer_init();
    
    int n_max_reading_set = 0;

    const char *quantiles_string = "0.005,0.05,0.5,0.95,0.995";

    const char *dist_file = NULL;
    const char *comp_file = NULL;
    const char *indel_dist_file = NULL;
    const char *summary_stats_file = NULL;
    const char *query_range_file = NULL;
    const char *readgroup_file = NULL;

#define SQRT2 1.41421356237309504880

    char c;
    while ((c = getopt(argc, argv, "d:c:i:l:x:y:X:Z:P:S:p:C:Q:q:f:F:R:t:r:m:g")) >= 0) {
        switch(c) {
            /* files */
        case 'd': dist_file = optarg; break;
        case 'c': comp_file = optarg; break;
        case 'i': indel_dist_file = optarg; break;
        case 'l': query_range_file = optarg; break;
        case 'x': summary_stats_file = optarg; break;

            /* statistical parameters */
        case 'y': opts.be_par.min_dirichlet_dist = 
                SQRT2 * strtod_errmsg(optarg, "-y (min_dirichlet_dist)"); break;
        case 'X': opts.be_par.post_confidence = strtod_errmsg(optarg, "-X (post_confidence)"); break;
        case 'Z': opts.be_par.beta_confidence = strtod_errmsg(optarg, "-Z (beta_confidence)"); break;
        case 'P': opts.be_par.max_sample_points = 
                (unsigned)strtod_errmsg(optarg, "-f (max_sample_points)"); 
            break;
        case 'p': opts.dc_par.prior_alpha = strtod_errmsg(optarg, "-p (prior_alpha)"); break;
        case 'C': quantiles_string = optarg; break;
        case 'S': opts.dc_par.n_max_survey_loci = strtod_errmsg(optarg, "-S (n_max_survey_loci)"); break;

            /* read-level filtering */
        case 'Q': opts.bf_par.min_base_quality = strtol_errmsg(optarg, "-Q (min_base_quality)"); break;
        case 'q': opts.bf_par.min_map_quality = strtol_errmsg(optarg, "-q (min_map_quality)"); break;
        case 'f': opts.bf_par.rflag_require = strtol_errmsg(optarg, "-f (rflag_require)"); break;
        case 'F': opts.bf_par.rflag_filter = strtol_errmsg(optarg, "-F (rflag_filter)"); break;
        case 'R': readgroup_file = optarg; break;

            /* general */
        case 't': opts.n_threads = strtol_errmsg(optarg, "-t (n_threads)"); break;
        case 'r': 
            opts.n_max_reading = strtol_errmsg(optarg, "-r (n_max_reading)"); 
            n_max_reading_set = 1;
            break;
        case 'm': opts.max_mem = (size_t)strtod_errmsg(optarg, "-m (max_mem)"); break;
        case 'g': opts.ld_par.do_print_pileup = 1; break;

        default: return dist_usage(); break;
        }
    }
    if (argc - optind != 3) return dist_usage();

    if (! n_max_reading_set)
        opts.n_max_reading = opts.n_threads;

    /* This adjustment makes max_sample_points a multiple of GEN_POINTS_BATCH */
    opts.be_par.max_sample_points += 
        GEN_POINTS_BATCH - (opts.be_par.max_sample_points % GEN_POINTS_BATCH);

    const char *samples_file = argv[optind];
    const char *sample_pairs_file = argv[optind + 1];
    const char *fasta_file = argv[optind + 2];

    setvbuf(stdout, NULL, _IONBF, 0);
    printf("\n"); /* So progress messages don't interfere with shell prompt. */

    if (! dist_file && ! comp_file && ! indel_dist_file) {
        fprintf(stderr, "Error: You must provide at least one of -d or -c or -i.  "
                "Otherwise, there is nothing to calculate\n");
        exit(5);
    }

    /* parse readgroups file */
    FILE *readgroup_fh = open_if_present(readgroup_file, "r");
    if (readgroup_fh) {
        opts.bf_par.readgroup_include_hash = 
            init_readgroup_file(readgroup_fh);
        fclose(readgroup_fh);
    }

    gsl_set_error_handler_off();

    FILE *dist_fh = open_if_present(dist_file, "w");
    FILE *comp_fh = open_if_present(comp_file, "w");
    FILE *indel_fh = open_if_present(indel_dist_file, "w");

    opts.ld_par.do_dist = (dist_fh != NULL);
    opts.ld_par.do_comp = (comp_fh != NULL);
    opts.ld_par.do_indel = (indel_fh != NULL);

    /* resolve overlap between parameter sets */
    opts.ld_par.max_sample_points = opts.be_par.max_sample_points;
    opts.ld_par.post_confidence = opts.be_par.post_confidence;
    opts.ld_par.min_dirichlet_dist = opts.be_par.min_dirichlet_dist;
    opts.dc_par.fasta_file = fasta_file;
    opts.dc_par.max_sample_points = opts.be_par.max_sample_points;

    /* allot fractions of main memory to points and input buffers.
       bounds and output buffers will be negligible */
#define FRAC_MEM_POINTSETS 0.9
    size_t max_point_sets = opts.max_mem 
        / (sizeof(POINT) * opts.be_par.max_sample_points);
    opts.dc_par.n_point_sets = max_point_sets * FRAC_MEM_POINTSETS;

#define INPUT_MEM_FRACTION 0.05

    /* allot a fraction of total memory to input buffers. */
    unsigned long max_input_mem = opts.max_mem * INPUT_MEM_FRACTION;

    parse_csv_line(quantiles_string, 
                   opts.ld_par.quantiles, 
                   &opts.ld_par.n_quantiles, 
                   MAX_NUM_QUANTILES);

    struct thread_queue *tqueue =
        locus_diff_init(samples_file, sample_pairs_file, 
                        query_range_file, fasta_file,
                        opts.n_threads, opts.n_max_reading, max_input_mem,
                        opts.ld_par, opts.be_par, opts.dc_par, opts.bf_par,
                        opts.bp_par, dist_fh, comp_fh, indel_fh);

    printf("Starting input processing.\n");
    thread_queue_run(tqueue);

    if (summary_stats_file)
        print_pair_stats(summary_stats_file);

    if (dist_fh) fclose(dist_fh);
    if (comp_fh) fclose(comp_fh);
    if (indel_fh) fclose(indel_fh);

    locus_diff_free(tqueue);

    if (opts.bf_par.readgroup_include_hash)
        free_readgroup_hash(opts.bf_par.readgroup_include_hash);
    
    printf("Finished.\n");

    return 0;
}