/** * Builds a report from database. */ struct lmgr_report_t *ListMgr_Report(lmgr_t *p_mgr, const report_field_descr_t * report_desc_array, unsigned int report_descr_count, const profile_field_descr_t * profile_descr, const lmgr_filter_t *p_filter, const lmgr_iter_opt_t *p_opt) { unsigned int i; char attrname[128]; lmgr_report_t *p_report; int rc; table_enum query_tab; /* supported report fields: ENTRIES, ANNEX_INFO or ACCT */ bool use_acct_table = false; lmgr_iter_opt_t opt = { 0 }; unsigned int profile_len = 0; unsigned int ratio = 0; struct field_count fcnt = { 0 }; GString *req = NULL; GString *fields = NULL; GString *where = NULL; GString *having = NULL; GString *group_by = NULL; GString *order_by = NULL; GString *filter_name = NULL; /* check profile argument and increase output array if needed */ if (profile_descr != NULL) { if (profile_descr->attr_index != ATTR_INDEX_size) { DisplayLog(LVL_CRIT, LISTMGR_TAG, "Profile on attribute '%s' (index=%u) is not supported", field_name(profile_descr->attr_index), profile_descr->attr_index); return NULL; } profile_len = SZ_PROFIL_COUNT; if (profile_descr->range_ratio_len > 0) ratio = 1; } /* allocate a new report structure */ p_report = (lmgr_report_t *) MemAlloc(sizeof(lmgr_report_t)); if (!p_report) return NULL; p_report->p_mgr = p_mgr; p_report->result = (struct result *)MemCalloc(report_descr_count + profile_len + ratio, sizeof(struct result)); if (!p_report->result) goto free_report; p_report->result_count = report_descr_count + profile_len + ratio; p_report->profile_count = profile_len; p_report->ratio_count = ratio; if (profile_descr != NULL) p_report->profile_attr = ATTR_INDEX_size; /* initially, no char * tab allocated */ p_report->str_tab = NULL; if (p_opt) opt = *p_opt; fields = g_string_new(NULL); group_by = g_string_new(NULL); order_by = g_string_new(NULL); having = g_string_new(NULL); where = g_string_new(NULL); if (full_acct(report_desc_array, report_descr_count, p_filter) && !opt.force_no_acct) { listmgr_optimizedstat(p_report, p_mgr, report_descr_count, report_desc_array, profile_descr, fields, group_by, order_by, having, where); use_acct_table = true; } else { /* not only ACCT table */ /* sorting by ratio first */ if (profile_descr && profile_descr->range_ratio_len > 0) { if (profile_descr->attr_index == ATTR_INDEX_size) { coma_if_needed(order_by); if (profile_descr->range_ratio_sort == SORT_ASC) g_string_append(order_by, "sizeratio ASC"); else g_string_append(order_by, "sizeratio DESC"); } } for (i = 0; i < report_descr_count; i++) { /* no field for count or distinct count */ if (report_desc_array[i].report_type != REPORT_COUNT && report_desc_array[i].report_type != REPORT_COUNT_DISTINCT) { /* in what table is this field ? */ if (is_main_field(report_desc_array[i].attr_index)) fcnt.nb_main++; else if (is_annex_field(report_desc_array[i].attr_index)) fcnt.nb_annex++; else { /* Not supported yet */ DisplayLog(LVL_CRIT, LISTMGR_TAG, "Error: report on attribute '%s' (index=%u) is not supported (report item #%u).", field_name(report_desc_array[i].attr_index), report_desc_array[i].attr_index, i); rc = DB_NOT_SUPPORTED; goto free_str; } } sprintf(attrname, "attr%u", i); /* what kind of stat on this field ? */ switch (report_desc_array[i].report_type) { case REPORT_MIN: coma_if_needed(fields); g_string_append_printf(fields, "MIN(%s) as %s", field_str(report_desc_array[i]. attr_index), attrname); p_report->result[i].type = field_type(report_desc_array[i].attr_index); break; case REPORT_MAX: coma_if_needed(fields); g_string_append_printf(fields, "MAX(%s) as %s", field_str(report_desc_array[i]. attr_index), attrname); p_report->result[i].type = field_type(report_desc_array[i].attr_index); break; case REPORT_AVG: coma_if_needed(fields); g_string_append_printf(fields, "ROUND(AVG(%s)) as %s", field_str(report_desc_array[i]. attr_index), attrname); p_report->result[i].type = field_type(report_desc_array[i].attr_index); break; case REPORT_SUM: coma_if_needed(fields); g_string_append_printf(fields, "SUM(%s) as %s", field_str(report_desc_array[i]. attr_index), attrname); p_report->result[i].type = field_type(report_desc_array[i].attr_index); break; case REPORT_COUNT: coma_if_needed(fields); g_string_append_printf(fields, "COUNT(*) as %s", attrname); p_report->result[i].type = DB_BIGUINT; break; case REPORT_COUNT_DISTINCT: coma_if_needed(fields); g_string_append_printf(fields, "COUNT(DISTINCT(%s)) as %s", field_str(report_desc_array[i]. attr_index), attrname); p_report->result[i].type = DB_BIGUINT; break; case REPORT_GROUP_BY: coma_if_needed(fields); g_string_append_printf(fields, "%s as %s", field_str(report_desc_array[i]. attr_index), attrname); coma_if_needed(group_by); g_string_append(group_by, attrname); p_report->result[i].type = field_type(report_desc_array[i].attr_index); break; } /* is this field sorted ? */ append_sort_order(order_by, attrname, report_desc_array[i].sort_flag); /* is this field filtered ? */ listmgr_fieldfilter(p_report, p_mgr, report_desc_array, attrname, having, where, i); p_report->result[i].flags = field_flag(report_desc_array[i].attr_index); } /* generate size profile */ if (profile_descr != NULL) { if (profile_descr->attr_index == ATTR_INDEX_size) { coma_if_needed(fields); g_string_append(fields, "SUM(size=0)"); for (i = 1; i < SZ_PROFIL_COUNT - 1; i++) g_string_append_printf(fields, ",SUM(" SZRANGE_FUNC "(size)=%u)", i - 1); g_string_append_printf(fields, ",SUM(" SZRANGE_FUNC "(size)>=%u)", SZ_PROFIL_COUNT - 1); for (i = 0; i < SZ_PROFIL_COUNT; i++) p_report->result[i + report_descr_count].type = DB_BIGUINT; if (profile_descr->range_ratio_len > 0) { /* add ratio field and sort it */ coma_if_needed(fields); g_string_append_printf(fields, "SUM(size>=%llu", SZ_MIN_BY_INDEX(profile_descr-> range_ratio_start)); /* is the last range = 1T->inf ? */ if (profile_descr->range_ratio_start + profile_descr->range_ratio_len >= SZ_PROFIL_COUNT) g_string_append(fields, ")"); else g_string_append_printf(fields, " and size<%llu)", SZ_MIN_BY_INDEX(profile_descr->range_ratio_start + profile_descr->range_ratio_len)); g_string_append(fields, "/COUNT(*) as sizeratio"); } } } } /* process filter */ if (!(no_filter(p_filter))) { if (full_acct(report_desc_array, report_descr_count, p_filter) && !opt.force_no_acct) { int filter_acct; /* filter on acct fields only */ filter_acct = filter2str(p_mgr, where, p_filter, T_ACCT, (!GSTRING_EMPTY(where) ? AOF_LEADING_SEP : 0) | AOF_PREFIX); if (filter_acct > 0) use_acct_table = true; } else { /* process NAMES filters apart, as with must then join with * DISTINCT(id) */ filter_where(p_mgr, p_filter, &fcnt, where, (!GSTRING_EMPTY(where) ? AOF_LEADING_SEP : 0) | AOF_SKIP_NAME); filter_name = g_string_new(NULL); fcnt.nb_names = filter2str(p_mgr, filter_name, p_filter, T_DNAMES, 0); } } /* start building the whole request */ req = g_string_new("SELECT "); g_string_append_printf(req, "%s FROM ", fields->str); /* FROM clause */ if (use_acct_table) { g_string_append(req, ACCT_TABLE); query_tab = T_ACCT; } else { bool distinct; filter_from(p_mgr, &fcnt, req, &query_tab, &distinct, AOF_SKIP_NAME); if (filter_name != NULL && !GSTRING_EMPTY(filter_name)) { g_string_append_printf(req, " INNER JOIN (SELECT DISTINCT(id)" " FROM " DNAMES_TABLE " WHERE %s) N" " ON %s.id=N.id", filter_name->str, table2name(query_tab)); /* FIXME: what if NAMES is the query tab? */ } /* FIXME: do the same for stripe items */ } /* Build the request */ if (!GSTRING_EMPTY(where)) g_string_append_printf(req, " WHERE %s", where->str); if (!GSTRING_EMPTY(group_by)) g_string_append_printf(req, " GROUP BY %s", group_by->str); if (!GSTRING_EMPTY(having)) g_string_append_printf(req, " HAVING %s", having->str); if (!GSTRING_EMPTY(order_by)) g_string_append_printf(req, " ORDER BY %s", order_by->str); /* iterator opt */ if (opt.list_count_max > 0) g_string_append_printf(req, " LIMIT %u", opt.list_count_max); retry: /* execute request (expect that ACCT table does not exists) */ if (use_acct_table) rc = db_exec_sql_quiet(&p_mgr->conn, req->str, &p_report->select_result); else rc = db_exec_sql(&p_mgr->conn, req->str, &p_report->select_result); if (lmgr_delayed_retry(p_mgr, rc)) goto retry; /* if the ACCT table does exist, switch to standard mode */ if (use_acct_table && (rc == DB_NOT_EXISTS)) { lmgr_iter_opt_t new_opt; if (p_opt != NULL) new_opt = *p_opt; else new_opt.list_count_max = 0; new_opt.force_no_acct = true; DisplayLog(LVL_EVENT, LISTMGR_TAG, "No accounting info: switching to standard query mode"); g_string_free(req, TRUE); g_string_free(fields, TRUE); g_string_free(group_by, TRUE); g_string_free(order_by, TRUE); g_string_free(having, TRUE); g_string_free(where, TRUE); if (filter_name != NULL) g_string_free(filter_name, TRUE); return ListMgr_Report(p_mgr, report_desc_array, report_descr_count, profile_descr, p_filter, &new_opt); } free_str: /* these are always allocated */ g_string_free(fields, TRUE); g_string_free(group_by, TRUE); g_string_free(order_by, TRUE); g_string_free(having, TRUE); g_string_free(where, TRUE); /* these may not be allocated */ if (req != NULL) g_string_free(req, TRUE); if (filter_name != NULL) g_string_free(filter_name, TRUE); if (rc == DB_SUCCESS) return p_report; /* error */ MemFree(p_report->result); free_report: MemFree(p_report); return NULL; } /* ListMgr_Report */
static struct decision* dt_parse_samples(const struct sample *samples, int max, struct where *where) { bool ambiguous = is_set_ambiguous(samples, max); int best_field = best_field_where(samples, max, where); if (best_field < 0 || !ambiguous) { if (!ambiguous) printf("Non-ambiguous set:\n"); else printf("No best field:\n"); print_set_info(samples, max, where); struct decision *d = majority_result_node(samples, max); printf("\tLeaf with majority value %i -> %i\n", d->field, d->value); return d; } // The first call has no defined where, and it must be explicitly // deleted. Other calls only need append a new where-clause and // give it proper filters. struct where *w = where_alloc(); if (where) where_append(where, w); else where = w; w->field = best_field; // Get all the unique values from the set int unique = 0; int *vals = unique_values(samples, max, &unique, best_field); // The decision tree we are returning struct decision *dec = NULL; for (int i=0; i<unique; i++) { // Create a subset filtered for s->{best_field} = V[i] w->value = vals[i]; int wmax = 0; struct sample *wsamples = filter_where(samples, max, where, &wmax); // If the filtered subset is equal to the superset, the training // data is ambiguous. Return a leaf node with the majority result if (wmax == max) { printf("Ambiguity in training set:\n\t"); print_set_info(samples, max, where); dec = majority_result_node(samples, max); printf("\tassigning majority value %i=%i\n\n", dec->field, dec->value); goto dt_parse_samples_cleanup; } // Create a branch-node struct decision *d = dt_alloc(); d->field = best_field; d->value = vals[i]; // Append the branch to the tree if (!dec) dec = d; else dt_append_next(dec, d); // Create a subtree struct decision *sub = dt_parse_samples(wsamples, wmax, where); d->dest = sub; // Reference "dec" from all sibling nodes of sub while (sub) { sub->parent = dec; sub = sub->next; } free(wsamples); } dt_parse_samples_cleanup: if (where != w) where_destroy(where_pop(where)); else where_destroy(w); free(vals); return dec; }
/** * Get the list of children of a given parent (or list of parents). * \param parent_list [in] list of parents to get the child of * \param parent_count [in] number of ids in parent list * \param attr_mask [in] required attributes for children * \param child_id_list [out] ptr to array of child ids * \param child_attr_list [out] ptr to array of child attrs * \param child_count [out] number of returned children */ int ListMgr_GetChild(lmgr_t *p_mgr, const lmgr_filter_t *p_filter, const wagon_t *parent_list, unsigned int parent_count, attr_mask_t attr_mask, wagon_t **child_id_list, attr_set_t **child_attr_list, unsigned int *child_count) { result_handle_t result; char *path = NULL; int path_len; int rc, i; GString *req = NULL; GString *fields = NULL; GString *from = NULL; GString *where = NULL; struct field_count field_cnt = {0}; struct field_count filter_cnt = {0}; table_enum query_tab = T_DNAMES; bool distinct = false; int retry_status; /* XXX: querying children from several parent cannot work, since * we need to get the paths of the children. Or we could do a * lookup into parent_list to find the right one. In the meantime, * try not to mess up the code. */ if (unlikely(parent_count != 1)) RBH_BUG("cannot get children for several parent simultaneously"); /* always request for name to build fullpath in wagon */ attr_mask_set_index(&attr_mask, ATTR_INDEX_name); fields = g_string_new(NULL); /* append fields for all tables */ if (!attr_mask_is_null(attr_mask)) { /* retrieve source info for generated fields */ add_source_fields_for_gen(&attr_mask.std); field_cnt.nb_names = attrmask2fieldlist(fields, attr_mask, T_DNAMES, DNAMES_TABLE".", "", AOF_LEADING_SEP); field_cnt.nb_main = attrmask2fieldlist(fields, attr_mask, T_MAIN, MAIN_TABLE".", "", AOF_LEADING_SEP); field_cnt.nb_annex = attrmask2fieldlist(fields, attr_mask, T_ANNEX, ANNEX_TABLE".", "", AOF_LEADING_SEP); } else { /* no returned attrs */ if (child_attr_list != NULL) *child_attr_list = NULL; } where = g_string_new(NULL); /* starts with condition on parent */ rc = append_parent_cond(p_mgr, where, parent_list, parent_count, DNAMES_TABLE"."); if (rc != DB_SUCCESS) goto free_str; /* check filters on other tables */ if (!no_filter(p_filter)) { if (unlikely(dir_filter(p_mgr, NULL, p_filter, NULL, NULL) != FILTERDIR_NONE)) { DisplayLog(LVL_MAJOR, LISTMGR_TAG, "Directory filter not supported in %s()", __func__); rc = DB_NOT_SUPPORTED; goto free_str; } else if (unlikely(func_filter(p_mgr, NULL, p_filter, T_MAIN, 0))) { DisplayLog(LVL_MAJOR, LISTMGR_TAG, "Function filter not supported in %s()", __func__); rc = DB_NOT_SUPPORTED; goto free_str; } /* There is always a filter on T_DNAMES, which is the parent condition. * Look for optional filters. */ filter_where(p_mgr, p_filter, &filter_cnt, where, AOF_LEADING_SEP | AOF_SKIP_NAME); /** @FIXME process other filters on NAMES */ } from = g_string_new(DNAMES_TABLE); /* add filter_count + field_count to build the FROM clause. * Preserve field count which is needed to interpret the result. */ filter_cnt.nb_main += field_cnt.nb_main; filter_cnt.nb_annex += field_cnt.nb_annex; filter_cnt.nb_names += field_cnt.nb_names; /* query tab is DNAMES, skip_name=true, is_first_tab=T_DNAMES */ filter_from(p_mgr, &filter_cnt, from, &query_tab, &distinct, AOF_LEADING_SEP | AOF_SKIP_NAME); /* request is always on the DNAMES table (which contains [parent_id, id] relationship */ if (distinct) req = g_string_new("SELECT DISTINCT("DNAMES_TABLE".id) as id"); else req = g_string_new("SELECT "DNAMES_TABLE".id as id"); /* build the whole request */ g_string_append_printf(req, "%s FROM %s WHERE %s", fields->str, from->str, where->str); retry: rc = db_exec_sql(&p_mgr->conn, req->str, &result); retry_status = lmgr_delayed_retry(p_mgr, rc); if (retry_status == 1) goto retry; else if (retry_status == 2) { rc = DB_RBH_SIG_SHUTDOWN; goto free_str; } else if (rc) goto free_str; /* copy result to output structures */ *child_count = db_result_nb_records(&p_mgr->conn, &result); /* allocate entry_id array */ *child_id_list = MemCalloc(*child_count, sizeof(wagon_t)); if (*child_id_list == NULL) { rc = DB_NO_MEMORY; goto free_str; } if (child_attr_list) { *child_attr_list = MemCalloc(*child_count, sizeof(attr_set_t)); if (*child_attr_list == NULL) { rc = DB_NO_MEMORY; goto array_free; } } /* Allocate a string long enough to contain the parent path and a * child name. */ path_len = strlen(parent_list[0].fullname) + RBH_NAME_MAX + 2; path = malloc(path_len); if (!path) { DisplayLog(LVL_MAJOR, LISTMGR_TAG, "Can't alloc enough memory (%d bytes)", path_len); rc = DB_NO_MEMORY; goto array_free; } for (i = 0; i < *child_count; i++) { char *res[128]; /* 128 fields per record is large enough */ rc = db_next_record(&p_mgr->conn, &result, res, sizeof(res)/sizeof(*res)); if (rc) goto array_free; /* copy id to array */ pk2entry_id(p_mgr, res[0], &((*child_id_list)[i].id)); /* copy attributes to array */ if (child_attr_list) { unsigned int shift = 1; /* first was NAMES.id */ (*child_attr_list)[i].attr_mask = attr_mask; /* first id, then dnames attrs, then main attrs, then annex attrs */ if (field_cnt.nb_names > 0) { /* shift of 1 for id */ rc = result2attrset(T_DNAMES, res + shift, field_cnt.nb_names, &((*child_attr_list)[i])); if (rc) goto array_free; shift += field_cnt.nb_names; } if (field_cnt.nb_main > 0) { /* first id, then main attrs, then annex attrs */ /* shift of 1 for id */ rc = result2attrset(T_MAIN, res + shift, field_cnt.nb_main, &((*child_attr_list)[i])); if (rc) goto array_free; shift += field_cnt.nb_main; } if (field_cnt.nb_annex > 0) { /* shift of main_attrs count */ rc = result2attrset(T_ANNEX, res + shift, field_cnt.nb_annex, &((*child_attr_list)[i])); if (rc) goto array_free; shift += field_cnt.nb_annex; } #ifdef _LUSTRE if (stripe_fields(attr_mask)) { if (get_stripe_info(p_mgr, res[0], &ATTR(&(*child_attr_list)[i], stripe_info), &ATTR(&(*child_attr_list)[i], stripe_items))) { ATTR_MASK_UNSET(&(*child_attr_list)[i], stripe_info); ATTR_MASK_UNSET(&(*child_attr_list)[i], stripe_items); } } #endif generate_fields(&((*child_attr_list)[i])); /* Note: path is properly sized already to not overflow. */ snprintf(path, path_len, "%s/%s", parent_list[0].fullname, (*child_attr_list)[i].attr_values.name); (*child_id_list)[i].fullname = strdup(path); } } if (path) free(path); db_result_free(&p_mgr->conn, &result); g_string_free(req, TRUE); g_string_free(fields, TRUE); g_string_free(from, TRUE); g_string_free(where, TRUE); return 0; array_free: if (path) free(path); if (child_attr_list && *child_attr_list) { MemFree(*child_attr_list); *child_attr_list = NULL; } MemFree(*child_id_list); *child_id_list = NULL; free_str: g_string_free(req, TRUE); g_string_free(fields, TRUE); g_string_free(from, TRUE); g_string_free(where, TRUE); return rc; }