Exemple #1
0
static int ingestion_init_s5p_l1b_ra(const harp_ingestion_module *module, coda_product *product,
                                     const harp_ingestion_options *options, harp_product_definition **definition,
                                     void **user_data)
{
    ingest_info *info;

    (void)options;

    info = malloc(sizeof(ingest_info));
    if (info == NULL)
    {
        harp_set_error(HARP_ERROR_OUT_OF_MEMORY, "out of memory (could not allocate %lu bytes) (%s:%u)",
                       sizeof(ingest_info), __FILE__, __LINE__);
        return -1;
    }
    info->product = product;
    info->band = -1;

    if (init_cursors(info, NULL) != 0)
    {
        ingestion_done(info);
        return -1;
    }
    if (init_dimensions(info, info->observation_cursor, "radiance") != 0)
    {
        ingestion_done(info);
        return -1;
    }

    /* Initialize cursors and fill values for datasets which will be read using partial reads. */
    if (init_dataset
        (info->instrument_cursor, "nominal_wavelength", info->num_pixels * info->num_channels, &info->wavelength_cursor,
         &info->wavelength_fill_value) != 0)
    {
        ingestion_done(info);
        return -1;
    }

    if (init_dataset
        (info->observation_cursor, "radiance", info->num_scanlines * info->num_pixels * info->num_channels,
         &info->observable_cursor, &info->observable_fill_value) != 0)
    {
        ingestion_done(info);
        return -1;
    }

    *definition = *module->product_definition;
    *user_data = info;

    return 0;
}
Exemple #2
0
void load_mnist_dataset(dataset *train_set, dataset *validate_set){
    uint32_t N, nrow, ncol, magic_n;
    rio_t rio_train_x, rio_train_y;
    int train_x_fd, train_y_fd;
    int train_set_size = 50000, validate_set_size = 10000;

    train_x_fd = open("../data/train-images-idx3-ubyte", O_RDONLY);
    train_y_fd = open("../data/train-labels-idx1-ubyte", O_RDONLY);

    if(train_x_fd == -1){
        fprintf(stderr, "cannot open train-images-idx3-ubyte\n");
        exit(1);
    }
    if(train_y_fd == -1){
        fprintf(stderr, "cannot open train-labels-idx1-ubyte\n");
        exit(1);
    }

    rio_readinitb(&rio_train_x, train_x_fd, 0);
    rio_readinitb(&rio_train_y, train_y_fd, 0);

    read_uint32(&rio_train_x, &magic_n);
    read_uint32(&rio_train_x, &N);
    read_uint32(&rio_train_x, &nrow);
    read_uint32(&rio_train_x, &ncol);
    
    read_uint32(&rio_train_y, &magic_n);
    read_uint32(&rio_train_y, &N);
#ifdef DEBUG
    printf("magic number: %u\nN: %u\nnrow: %u\nncol: %u\n", magic_n, N, nrow, ncol);
    fflush(stdout);
#endif

    init_dataset(train_set, train_set_size, nrow, ncol);
    init_dataset(validate_set, validate_set_size, nrow, ncol);

    load_dataset_input(&rio_train_x, train_set);
    load_dataset_output(&rio_train_y, train_set);

    load_dataset_input(&rio_train_x, validate_set);
    load_dataset_output(&rio_train_y, validate_set);

    //print_dataset(&validate_set);

    close(train_x_fd);
    close(train_y_fd);
}
int main() {
	/* read config file information */
    config_t *config = read_config();
    /* get cluster number k */
    int k = config->k;
    /* initialize dataset into an array */
    point_t *dataset = init_dataset(config);
    /* get number of points */
    int num_of_points = config->count;
    /* compute initial centroids from dataset */
    point_t *centroids = init_centroid(dataset, k, num_of_points);
    /* start the kmeans process */
    kmeans(centroids, dataset, k, num_of_points);
    /* print the results */
    print_result(centroids, k);
    write_result(centroids, k, config);
    return 0;
}
Exemple #4
0
void
Options::set(const options_data& opts)
{
    simulations = opts.simulations;
    places = opts.places;
    departments = opts.departments;
    years = opts.years;
    observed = opts.observed;

    const auto rows = opts.options.rows();
    const auto columns = opts.options.columns();
    options.init(rows, columns);

    for (size_t r = 0; r != rows; ++r)
        for (size_t c = 0; c != columns; ++c)
            options(r, c) = opts.options(c, r);

    init_dataset();
    check();
}
Exemple #5
0
eastl::optional<csv_parser_status>
Options::read(eastl::shared_ptr<context> context, FILE* is, const Model& model)
{
    clear();

    eastl::vector<const attribute*> atts = get_basic_attribute(model);
    eastl::vector<int> convertheader(atts.size(), 0);
    eastl::vector<eastl::string> columns;
    eastl::string line;
    int id = -1;

    line_reader ls(is);

    {
        auto opt_line = ls.getline();
        if (!opt_line) {
            info(context, "Fail to read header\n");
            return eastl::make_optional<csv_parser_status>(
              csv_parser_status::tag::file_error, size_t(0), columns.size());
        }

        line = *opt_line;

        tokenize(line, columns, ";", false);

        if (columns.size() == atts.size() + 4)
            id = 3;
        else if (columns.size() == atts.size() + 5)
            id = 4;
        else
            return eastl::make_optional<csv_parser_status>(
              csv_parser_status::tag::column_number_incorrect,
              size_t(0),
              columns.size());
    }

    for (size_t i = 0, e = atts.size(); i != e; ++i)
        info(context, "column {} {}\n", i, columns[i].c_str());

    for (size_t i = id, e = id + atts.size(); i != e; ++i) {
        info(context,
             "try to get_basic_atribute_id {} : {}\n",
             i,
             columns[i].c_str());

        auto opt_att_id = get_basic_attribute_id(atts, columns[i]);
        if (!opt_att_id) {
            return eastl::make_optional<csv_parser_status>(
              csv_parser_status::tag::basic_attribute_unknown,
              size_t(0),
              columns.size());
        }

        convertheader[i - id] = *opt_att_id;
    }

    info(context, "Starts to read data (atts.size() = {}\n", atts.size());

    options.init(atts.size());
    options.push_line();
    int line_number = -1;

    while (true) {
        auto opt_line = ls.getline();
        if (!opt_line)
            break;

        line = *opt_line;
        line_number++;

        tokenize(line, columns, ";", false);
        if (columns.size() != atts.size() + id + 1) {
            error(context,
                  "Options: error in csv file line {}:"
                  " not correct number of column {}"
                  " (expected: {})\n",
                  line_number,
                  columns.size(),
                  atts.size() + id + 1);
            continue;
        }

        auto opt_obs =
          model.attributes[0].scale.find_scale_value(columns.back());

        if (not opt_obs) {
            return eastl::make_optional<csv_parser_status>(
              csv_parser_status::tag::scale_value_unknown,
              static_cast<size_t>(line_number),
              static_cast<size_t>(columns.size()));
        }

        int obs = *opt_obs;
        int department, year;

        {
            auto len1 = sscanf(columns[id - 1].c_str(), "%d", &year);
            auto len2 = sscanf(columns[id - 1].c_str(), "%d", &department);

            if (len1 != 1 or len2 != 1) {
                error(context,
                      "Options: error in csv file line {}."
                      " Malformed year or department\n",
                      line_number);
                continue;
            }
        }

        simulations.push_back(columns[0]);
        if (id == 4)
            places.push_back(columns[1]);

        departments.push_back(department);
        years.push_back(year);
        observed.push_back(obs);

        for (size_t i = id, e = id + atts.size(); i != e; ++i) {
            size_t attid = convertheader[i - id];

            auto opt_option = atts[attid]->scale.find_scale_value(columns[i]);
            if (!opt_option) {
                error(context,
                      "Options: error in csv file line {}: "
                      "unknown scale value `{}' for attribute `{}'",
                      line_number,
                      columns[i].c_str(),
                      atts[attid]->name.c_str());
                simulations.pop_back();
                if (id == 4)
                    places.pop_back();
                departments.pop_back();
                years.pop_back();
                observed.pop_back();

                options.pop_line();
            } else {
                options(options.rows() - 1, attid) = *opt_option;
            }
        }

        options.push_line();
    }

    options.pop_line();

    init_dataset();
    check();

    return {};
}
Exemple #6
0
static int ingestion_init_s5p_l1b_ir(const harp_ingestion_module *module, coda_product *product,
                                     const harp_ingestion_options *options, harp_product_definition **definition,
                                     void **user_data)
{
    ingest_info *info;
    char product_group_name[17];
    int band_available;

    info = malloc(sizeof(ingest_info));
    if (info == NULL)
    {
        harp_set_error(HARP_ERROR_OUT_OF_MEMORY, "out of memory (could not allocate %lu bytes) (%s:%u)",
                       sizeof(ingest_info), __FILE__, __LINE__);
        return -1;
    }
    info->product = product;
    info->band = -1;

    if (parse_option_band(info, options) != 0)
    {
        ingestion_done(info);
        return -1;
    }

    snprintf(product_group_name, ARRAY_SIZE(product_group_name), "BAND%d_IRRADIANCE", info->band);
    if (get_product_group_availability(info->product, product_group_name, &band_available) != 0)
    {
        ingestion_done(info);
        return -1;
    }
    if (!band_available)
    {
        harp_set_error(HARP_ERROR_INGESTION, "no data for band '%d'", info->band);
        ingestion_done(info);
        return -1;
    }

    if (init_cursors(info, product_group_name) != 0)
    {
        ingestion_done(info);
        return -1;
    }
    if (init_dimensions(info, info->observation_cursor, "irradiance") != 0)
    {
        ingestion_done(info);
        return -1;
    }

    /* Initialize cursors and fill values for datasets which will be read using partial reads. */
    if (init_dataset
        (info->instrument_cursor, "calibrated_wavelength", info->num_pixels * info->num_channels,
         &info->wavelength_cursor, &info->wavelength_fill_value) != 0)
    {
        ingestion_done(info);
        return -1;
    }

    if (init_dataset
        (info->observation_cursor, "irradiance", info->num_scanlines * info->num_pixels * info->num_channels,
         &info->observable_cursor, &info->observable_fill_value) != 0)
    {
        ingestion_done(info);
        return -1;
    }

    assert(info->band >= 1 && info->band <= 8);
    if (info->band < 7)
    {
        *definition = module->product_definition[info->band - 1];
    }
    else
    {
        *definition = module->product_definition[info->band - 7];
    }
    *user_data = info;

    return 0;
}