Exemple #1
0
/**
 * Builds a report from database.
 */
struct lmgr_report_t *ListMgr_Report(lmgr_t *p_mgr,
                                     const report_field_descr_t *
                                     report_desc_array,
                                     unsigned int report_descr_count,
                                     const profile_field_descr_t *
                                     profile_descr,
                                     const lmgr_filter_t *p_filter,
                                     const lmgr_iter_opt_t *p_opt)
{
    unsigned int i;
    char attrname[128];
    lmgr_report_t *p_report;
    int rc;
    table_enum query_tab;
    /* supported report fields: ENTRIES, ANNEX_INFO or ACCT */
    bool use_acct_table = false;
    lmgr_iter_opt_t opt = { 0 };
    unsigned int profile_len = 0;
    unsigned int ratio = 0;
    struct field_count fcnt = { 0 };
    GString *req = NULL;
    GString *fields = NULL;
    GString *where = NULL;
    GString *having = NULL;
    GString *group_by = NULL;
    GString *order_by = NULL;
    GString *filter_name = NULL;

    /* check profile argument and increase output array if needed */
    if (profile_descr != NULL) {
        if (profile_descr->attr_index != ATTR_INDEX_size) {
            DisplayLog(LVL_CRIT, LISTMGR_TAG,
                       "Profile on attribute '%s' (index=%u) is not supported",
                       field_name(profile_descr->attr_index),
                       profile_descr->attr_index);
            return NULL;
        }
        profile_len = SZ_PROFIL_COUNT;
        if (profile_descr->range_ratio_len > 0)
            ratio = 1;
    }

    /* allocate a new report structure */
    p_report = (lmgr_report_t *) MemAlloc(sizeof(lmgr_report_t));
    if (!p_report)
        return NULL;

    p_report->p_mgr = p_mgr;

    p_report->result = (struct result *)MemCalloc(report_descr_count
                                                  + profile_len + ratio,
                                                  sizeof(struct result));
    if (!p_report->result)
        goto free_report;

    p_report->result_count = report_descr_count + profile_len + ratio;
    p_report->profile_count = profile_len;
    p_report->ratio_count = ratio;
    if (profile_descr != NULL)
        p_report->profile_attr = ATTR_INDEX_size;

    /* initially, no char * tab allocated */
    p_report->str_tab = NULL;

    if (p_opt)
        opt = *p_opt;

    fields = g_string_new(NULL);
    group_by = g_string_new(NULL);
    order_by = g_string_new(NULL);
    having = g_string_new(NULL);
    where = g_string_new(NULL);

    if (full_acct(report_desc_array, report_descr_count, p_filter)
        && !opt.force_no_acct) {
        listmgr_optimizedstat(p_report, p_mgr, report_descr_count,
                              report_desc_array, profile_descr, fields,
                              group_by, order_by, having, where);
        use_acct_table = true;
    } else {    /* not only ACCT table */

        /* sorting by ratio first */
        if (profile_descr && profile_descr->range_ratio_len > 0) {
            if (profile_descr->attr_index == ATTR_INDEX_size) {
                coma_if_needed(order_by);
                if (profile_descr->range_ratio_sort == SORT_ASC)
                    g_string_append(order_by, "sizeratio ASC");
                else
                    g_string_append(order_by, "sizeratio DESC");
            }
        }

        for (i = 0; i < report_descr_count; i++) {
            /* no field for count or distinct count */
            if (report_desc_array[i].report_type != REPORT_COUNT &&
                report_desc_array[i].report_type != REPORT_COUNT_DISTINCT) {
                /* in what table is this field ? */
                if (is_main_field(report_desc_array[i].attr_index))
                    fcnt.nb_main++;
                else if (is_annex_field(report_desc_array[i].attr_index))
                    fcnt.nb_annex++;
                else {
                    /* Not supported yet */
                    DisplayLog(LVL_CRIT, LISTMGR_TAG,
                               "Error: report on attribute '%s' (index=%u) is not supported (report item #%u).",
                               field_name(report_desc_array[i].attr_index),
                               report_desc_array[i].attr_index, i);
                    rc = DB_NOT_SUPPORTED;
                    goto free_str;
                }
            }

            sprintf(attrname, "attr%u", i);

            /* what kind of stat on this field ? */
            switch (report_desc_array[i].report_type) {
            case REPORT_MIN:
                coma_if_needed(fields);
                g_string_append_printf(fields, "MIN(%s) as %s",
                                       field_str(report_desc_array[i].
                                                 attr_index), attrname);
                p_report->result[i].type =
                    field_type(report_desc_array[i].attr_index);
                break;

            case REPORT_MAX:
                coma_if_needed(fields);
                g_string_append_printf(fields, "MAX(%s) as %s",
                                       field_str(report_desc_array[i].
                                                 attr_index), attrname);
                p_report->result[i].type =
                    field_type(report_desc_array[i].attr_index);
                break;

            case REPORT_AVG:
                coma_if_needed(fields);
                g_string_append_printf(fields, "ROUND(AVG(%s)) as %s",
                                       field_str(report_desc_array[i].
                                                 attr_index), attrname);
                p_report->result[i].type =
                    field_type(report_desc_array[i].attr_index);
                break;

            case REPORT_SUM:
                coma_if_needed(fields);
                g_string_append_printf(fields, "SUM(%s) as %s",
                                       field_str(report_desc_array[i].
                                                 attr_index), attrname);
                p_report->result[i].type =
                    field_type(report_desc_array[i].attr_index);
                break;

            case REPORT_COUNT:
                coma_if_needed(fields);
                g_string_append_printf(fields, "COUNT(*) as %s", attrname);
                p_report->result[i].type = DB_BIGUINT;
                break;

            case REPORT_COUNT_DISTINCT:
                coma_if_needed(fields);
                g_string_append_printf(fields, "COUNT(DISTINCT(%s)) as %s",
                                       field_str(report_desc_array[i].
                                                 attr_index), attrname);
                p_report->result[i].type = DB_BIGUINT;
                break;

            case REPORT_GROUP_BY:
                coma_if_needed(fields);
                g_string_append_printf(fields, "%s as %s",
                                       field_str(report_desc_array[i].
                                                 attr_index), attrname);
                coma_if_needed(group_by);
                g_string_append(group_by, attrname);
                p_report->result[i].type =
                    field_type(report_desc_array[i].attr_index);
                break;
            }

            /* is this field sorted ? */
            append_sort_order(order_by, attrname,
                              report_desc_array[i].sort_flag);

            /* is this field filtered ? */
            listmgr_fieldfilter(p_report, p_mgr, report_desc_array, attrname,
                                having, where, i);

            p_report->result[i].flags =
                field_flag(report_desc_array[i].attr_index);
        }

        /* generate size profile */
        if (profile_descr != NULL) {
            if (profile_descr->attr_index == ATTR_INDEX_size) {
                coma_if_needed(fields);
                g_string_append(fields, "SUM(size=0)");

                for (i = 1; i < SZ_PROFIL_COUNT - 1; i++)
                    g_string_append_printf(fields,
                                           ",SUM(" SZRANGE_FUNC "(size)=%u)",
                                           i - 1);

                g_string_append_printf(fields,
                                       ",SUM(" SZRANGE_FUNC "(size)>=%u)",
                                       SZ_PROFIL_COUNT - 1);

                for (i = 0; i < SZ_PROFIL_COUNT; i++)
                    p_report->result[i + report_descr_count].type = DB_BIGUINT;

                if (profile_descr->range_ratio_len > 0) {
                    /* add ratio field and sort it */
                    coma_if_needed(fields);
                    g_string_append_printf(fields, "SUM(size>=%llu",
                                           SZ_MIN_BY_INDEX(profile_descr->
                                                           range_ratio_start));

                    /* is the last range = 1T->inf ? */
                    if (profile_descr->range_ratio_start +
                        profile_descr->range_ratio_len >= SZ_PROFIL_COUNT)
                        g_string_append(fields, ")");
                    else
                        g_string_append_printf(fields, " and size<%llu)",
                            SZ_MIN_BY_INDEX(profile_descr->range_ratio_start
                                            + profile_descr->range_ratio_len));

                    g_string_append(fields, "/COUNT(*) as sizeratio");
                }
            }
        }
    }

    /* process filter */
    if (!(no_filter(p_filter))) {
        if (full_acct(report_desc_array, report_descr_count, p_filter)
            && !opt.force_no_acct) {
            int filter_acct;

            /* filter on acct fields only */
            filter_acct = filter2str(p_mgr, where, p_filter, T_ACCT,
                               (!GSTRING_EMPTY(where) ? AOF_LEADING_SEP : 0)
                               | AOF_PREFIX);
            if (filter_acct > 0)
                use_acct_table = true;
        } else {
            /* process NAMES filters apart, as with must then join with
             * DISTINCT(id) */
            filter_where(p_mgr, p_filter, &fcnt, where,
                         (!GSTRING_EMPTY(where) ? AOF_LEADING_SEP : 0)
                         | AOF_SKIP_NAME);

            filter_name = g_string_new(NULL);
            fcnt.nb_names =
                filter2str(p_mgr, filter_name, p_filter, T_DNAMES, 0);
        }
    }

    /* start building the whole request */
    req = g_string_new("SELECT ");
    g_string_append_printf(req, "%s FROM ", fields->str);

    /* FROM clause */
    if (use_acct_table) {
        g_string_append(req, ACCT_TABLE);
        query_tab = T_ACCT;
    } else {
        bool distinct;

        filter_from(p_mgr, &fcnt, req, &query_tab, &distinct, AOF_SKIP_NAME);

        if (filter_name != NULL && !GSTRING_EMPTY(filter_name)) {
            g_string_append_printf(req, " INNER JOIN (SELECT DISTINCT(id)"
                                   " FROM " DNAMES_TABLE " WHERE %s) N"
                                   " ON %s.id=N.id", filter_name->str,
                                   table2name(query_tab));
            /* FIXME: what if NAMES is the query tab? */
        }
        /* FIXME: do the same for stripe items */
    }

    /* Build the request */
    if (!GSTRING_EMPTY(where))
        g_string_append_printf(req, " WHERE %s", where->str);

    if (!GSTRING_EMPTY(group_by))
        g_string_append_printf(req, " GROUP BY %s", group_by->str);

    if (!GSTRING_EMPTY(having))
        g_string_append_printf(req, " HAVING %s", having->str);

    if (!GSTRING_EMPTY(order_by))
        g_string_append_printf(req, " ORDER BY %s", order_by->str);

    /* iterator opt */
    if (opt.list_count_max > 0)
        g_string_append_printf(req, " LIMIT %u", opt.list_count_max);

 retry:
    /* execute request (expect that ACCT table does not exists) */
    if (use_acct_table)
        rc = db_exec_sql_quiet(&p_mgr->conn, req->str,
                               &p_report->select_result);
    else
        rc = db_exec_sql(&p_mgr->conn, req->str, &p_report->select_result);

    if (lmgr_delayed_retry(p_mgr, rc))
        goto retry;

    /* if the ACCT table does exist, switch to standard mode */
    if (use_acct_table && (rc == DB_NOT_EXISTS)) {
        lmgr_iter_opt_t new_opt;

        if (p_opt != NULL)
            new_opt = *p_opt;
        else
            new_opt.list_count_max = 0;

        new_opt.force_no_acct = true;

        DisplayLog(LVL_EVENT, LISTMGR_TAG,
                   "No accounting info: switching to standard query mode");

        g_string_free(req, TRUE);
        g_string_free(fields, TRUE);
        g_string_free(group_by, TRUE);
        g_string_free(order_by, TRUE);
        g_string_free(having, TRUE);
        g_string_free(where, TRUE);
        if (filter_name != NULL)
            g_string_free(filter_name, TRUE);

        return ListMgr_Report(p_mgr, report_desc_array, report_descr_count,
                              profile_descr, p_filter, &new_opt);
    }

 free_str:
    /* these are always allocated */
    g_string_free(fields, TRUE);
    g_string_free(group_by, TRUE);
    g_string_free(order_by, TRUE);
    g_string_free(having, TRUE);
    g_string_free(where, TRUE);
    /* these may not be allocated */
    if (req != NULL)
        g_string_free(req, TRUE);
    if (filter_name != NULL)
        g_string_free(filter_name, TRUE);

    if (rc == DB_SUCCESS)
        return p_report;

/* error */
    MemFree(p_report->result);

 free_report:
    MemFree(p_report);
    return NULL;
}   /* ListMgr_Report */
Exemple #2
0
static struct decision*
dt_parse_samples(const struct sample *samples, int max, struct where *where)
{
    bool ambiguous = is_set_ambiguous(samples, max);
    int best_field = best_field_where(samples, max, where);

    if (best_field < 0 || !ambiguous)  {
        if (!ambiguous)
            printf("Non-ambiguous set:\n");
        else
            printf("No best field:\n");
        print_set_info(samples, max, where);

        struct decision *d = majority_result_node(samples, max);
        printf("\tLeaf with majority value %i -> %i\n", d->field, d->value);
        return d;
    }


    // The first call has no defined where, and it must be explicitly
    // deleted. Other calls only need append a new where-clause and
    // give it proper filters.
    struct where *w = where_alloc();
    if (where)	 where_append(where, w);
    else		 where = w;
    w->field = best_field;

    // Get all the unique values from the set
    int unique = 0;
    int *vals = unique_values(samples, max, &unique, best_field);

    // The decision tree we are returning
    struct decision *dec = NULL;

    for (int i=0; i<unique; i++) {
        // Create a subset filtered for s->{best_field} = V[i]
        w->value = vals[i];
        int wmax = 0;
        struct sample *wsamples = filter_where(samples, max, where, &wmax);

        // If the filtered subset is equal to the superset, the training
        // data is ambiguous. Return a leaf node with the majority result
        if (wmax == max) {
            printf("Ambiguity in training set:\n\t");
            print_set_info(samples, max, where);
            dec = majority_result_node(samples, max);

            printf("\tassigning majority value %i=%i\n\n",
                   dec->field, dec->value);
            goto dt_parse_samples_cleanup;
        }

        // Create a branch-node
        struct decision *d = dt_alloc();
        d->field = best_field;
        d->value = vals[i];

        // Append the branch to the tree
        if (!dec) 	dec = d;
        else 		dt_append_next(dec, d);

        // Create a subtree
        struct decision *sub = dt_parse_samples(wsamples, wmax, where);
        d->dest = sub;

        // Reference "dec" from all sibling nodes of sub
        while (sub) {
            sub->parent = dec;
            sub = sub->next;
        }

        free(wsamples);
    }

dt_parse_samples_cleanup:
    if (where != w)
        where_destroy(where_pop(where));
    else
        where_destroy(w);
    free(vals);
    return dec;
}
Exemple #3
0
/**
 * Get the list of children of a given parent (or list of parents).
 * \param parent_list       [in]  list of parents to get the child of
 * \param parent_count      [in]  number of ids in parent list
 * \param attr_mask         [in]  required attributes for children
 * \param child_id_list     [out] ptr to array of child ids
 * \param child_attr_list   [out] ptr to array of child attrs
 * \param child_count       [out] number of returned children
 */
int ListMgr_GetChild(lmgr_t *p_mgr, const lmgr_filter_t *p_filter,
                     const wagon_t *parent_list, unsigned int parent_count,
                     attr_mask_t attr_mask,
                     wagon_t **child_id_list, attr_set_t **child_attr_list,
                     unsigned int *child_count)
{
    result_handle_t result;
    char *path = NULL;
    int path_len;
    int                rc, i;
    GString           *req = NULL;
    GString           *fields = NULL;
    GString           *from = NULL;
    GString           *where = NULL;
    struct field_count field_cnt = {0};
    struct field_count filter_cnt = {0};
    table_enum         query_tab = T_DNAMES;
    bool               distinct = false;
    int                retry_status;

    /* XXX: querying children from several parent cannot work, since
     * we need to get the paths of the children. Or we could do a
     * lookup into parent_list to find the right one. In the meantime,
     * try not to mess up the code. */
    if (unlikely(parent_count != 1))
        RBH_BUG("cannot get children for several parent simultaneously");

    /* always request for name to build fullpath in wagon */
    attr_mask_set_index(&attr_mask, ATTR_INDEX_name);

    fields = g_string_new(NULL);

    /* append fields for all tables */
    if (!attr_mask_is_null(attr_mask))
    {
        /* retrieve source info for generated fields */
        add_source_fields_for_gen(&attr_mask.std);

        field_cnt.nb_names = attrmask2fieldlist(fields, attr_mask, T_DNAMES,
                                                DNAMES_TABLE".", "",
                                                AOF_LEADING_SEP);

        field_cnt.nb_main = attrmask2fieldlist(fields, attr_mask, T_MAIN,
                                               MAIN_TABLE".", "",
                                               AOF_LEADING_SEP);

        field_cnt.nb_annex = attrmask2fieldlist(fields, attr_mask, T_ANNEX,
                                                ANNEX_TABLE".", "",
                                                AOF_LEADING_SEP);
    }
    else
    {
        /* no returned attrs */
        if (child_attr_list != NULL)
            *child_attr_list = NULL;
    }

    where = g_string_new(NULL);

    /* starts with condition on parent */
    rc = append_parent_cond(p_mgr, where, parent_list, parent_count, DNAMES_TABLE".");
    if (rc != DB_SUCCESS)
        goto free_str;

    /* check filters on other tables */
    if (!no_filter(p_filter))
    {
        if (unlikely(dir_filter(p_mgr, NULL, p_filter, NULL, NULL) != FILTERDIR_NONE))
        {
            DisplayLog(LVL_MAJOR, LISTMGR_TAG, "Directory filter not supported in %s()", __func__);
            rc = DB_NOT_SUPPORTED;
            goto free_str;
        }
        else if (unlikely(func_filter(p_mgr, NULL, p_filter, T_MAIN, 0)))
        {
            DisplayLog(LVL_MAJOR, LISTMGR_TAG, "Function filter not supported in %s()", __func__);
            rc = DB_NOT_SUPPORTED;
            goto free_str;
        }

        /* There is always a filter on T_DNAMES, which is the parent condition.
         * Look for optional filters.
         */
        filter_where(p_mgr, p_filter, &filter_cnt, where,
                     AOF_LEADING_SEP | AOF_SKIP_NAME);
        /** @FIXME process other filters on NAMES */
    }

    from = g_string_new(DNAMES_TABLE);

    /* add filter_count + field_count to build the FROM clause.
     * Preserve field count which is needed to interpret the result.
     */
    filter_cnt.nb_main += field_cnt.nb_main;
    filter_cnt.nb_annex += field_cnt.nb_annex;
    filter_cnt.nb_names += field_cnt.nb_names;
    /* query tab is DNAMES, skip_name=true, is_first_tab=T_DNAMES */
    filter_from(p_mgr, &filter_cnt, from, &query_tab, &distinct,
                AOF_LEADING_SEP | AOF_SKIP_NAME);

    /* request is always on the DNAMES table (which contains [parent_id, id] relationship */
    if (distinct)
        req = g_string_new("SELECT DISTINCT("DNAMES_TABLE".id) as id");
    else
        req = g_string_new("SELECT "DNAMES_TABLE".id as id");

    /* build the whole request */
    g_string_append_printf(req, "%s FROM %s WHERE %s", fields->str, from->str, where->str);

retry:
    rc = db_exec_sql(&p_mgr->conn, req->str, &result);
    retry_status = lmgr_delayed_retry(p_mgr, rc);
    if (retry_status == 1)
        goto retry;
    else if (retry_status == 2) {
        rc = DB_RBH_SIG_SHUTDOWN;
        goto free_str;
    } else if (rc)
        goto free_str;

    /* copy result to output structures */
    *child_count = db_result_nb_records(&p_mgr->conn, &result);

    /* allocate entry_id array */
    *child_id_list = MemCalloc(*child_count, sizeof(wagon_t));
    if (*child_id_list == NULL)
    {
        rc = DB_NO_MEMORY;
        goto free_str;
    }

    if (child_attr_list)
    {
        *child_attr_list = MemCalloc(*child_count, sizeof(attr_set_t));
        if (*child_attr_list == NULL)
        {
            rc = DB_NO_MEMORY;
            goto array_free;
        }
    }

    /* Allocate a string long enough to contain the parent path and a
     * child name. */
    path_len = strlen(parent_list[0].fullname) + RBH_NAME_MAX + 2;
    path = malloc(path_len);
    if (!path) {
        DisplayLog(LVL_MAJOR, LISTMGR_TAG, "Can't alloc enough memory (%d bytes)",
                    path_len);
        rc = DB_NO_MEMORY;
        goto array_free;
    }

    for (i = 0; i < *child_count; i++)
    {
        char *res[128]; /* 128 fields per record is large enough */

        rc = db_next_record(&p_mgr->conn, &result, res, sizeof(res)/sizeof(*res));
        if (rc)
            goto array_free;

        /* copy id to array */
        pk2entry_id(p_mgr, res[0], &((*child_id_list)[i].id));

        /* copy attributes to array */
        if (child_attr_list)
        {
            unsigned int shift = 1; /* first was NAMES.id */

            (*child_attr_list)[i].attr_mask = attr_mask;

            /* first id, then dnames attrs, then main attrs, then annex attrs */
            if (field_cnt.nb_names > 0)
            {
                /* shift of 1 for id */
                rc = result2attrset(T_DNAMES, res + shift, field_cnt.nb_names, &((*child_attr_list)[i]));
                if (rc)
                    goto array_free;
                shift += field_cnt.nb_names;
            }

            if (field_cnt.nb_main > 0)
            {
                /* first id, then main attrs, then annex attrs */
                /* shift of 1 for id */
                rc = result2attrset(T_MAIN, res + shift, field_cnt.nb_main, &((*child_attr_list)[i]));
                if (rc)
                    goto array_free;
                shift += field_cnt.nb_main;
            }

            if (field_cnt.nb_annex > 0)
            {
                /* shift of main_attrs count */
                rc = result2attrset(T_ANNEX, res + shift, field_cnt.nb_annex,
                                     &((*child_attr_list)[i]));
                if (rc)
                    goto array_free;
                shift += field_cnt.nb_annex;
            }

#ifdef _LUSTRE
            if (stripe_fields(attr_mask))
            {
                if (get_stripe_info(p_mgr, res[0], &ATTR(&(*child_attr_list)[i], stripe_info),
                                     &ATTR(&(*child_attr_list)[i], stripe_items)))
                {
                    ATTR_MASK_UNSET(&(*child_attr_list)[i], stripe_info);
                    ATTR_MASK_UNSET(&(*child_attr_list)[i], stripe_items);
                }
            }
#endif

            generate_fields(&((*child_attr_list)[i]));

            /* Note: path is properly sized already to not overflow. */
            snprintf(path, path_len, "%s/%s", parent_list[0].fullname,
                     (*child_attr_list)[i].attr_values.name);
            (*child_id_list)[i].fullname = strdup(path);
        }
    }

    if (path)
        free(path);

    db_result_free(&p_mgr->conn, &result);
    g_string_free(req, TRUE);
    g_string_free(fields, TRUE);
    g_string_free(from, TRUE);
    g_string_free(where, TRUE);
    return 0;

array_free:
    if (path)
        free(path);
    if (child_attr_list && *child_attr_list)
    {
        MemFree(*child_attr_list);
        *child_attr_list = NULL;
    }
    MemFree(*child_id_list);
    *child_id_list = NULL;
free_str:
    g_string_free(req, TRUE);
    g_string_free(fields, TRUE);
    g_string_free(from, TRUE);
    g_string_free(where, TRUE);
    return rc;
}