//Assign true labels to all clusters, and find members of each cluster. void Clustering::finalize_labeling() { assert(!done_labeling); std::vector<int> valid_labels; std::vector<bool> flag(N_,false); for (int i=0; i<N_; ++i) { cluster_alias_[i] = get_true_label(i); if (!flag[cluster_alias_[i]]) { flag[cluster_alias_[i]] = true; valid_labels.push_back(cluster_alias_[i]); } } const int num_valid_labels = valid_labels.size(); //std::cout << "# of valid labels " << num_valid_labels << std::endl; cluster_members.resize(num_valid_labels); cluster_labels_.resize(N_); std::vector<int> label_map(N_); for (int label=0; label<num_valid_labels; ++label) { label_map[valid_labels[label]] = label; } for (int i=0; i<N_; ++i) { const int new_label = cluster_labels_[i] = label_map[cluster_alias_[i]]; assert(new_label>=0); assert(new_label<cluster_members.size()); cluster_members[new_label].push_back(i); } //std::cout << "len of cluster members " << cluster_members.size() << std::endl; done_labeling = true; }
idn_result_t idn_res_decodename(idn_resconf_t ctx, idn_action_t actions, const char *from, char *to, size_t tolen) { idn_converter_t local_converter = NULL; idn_converter_t idn_converter = NULL; idn_delimitermap_t delimiter_mapper; idn_result_t r; labellist_t labels = NULL, l; unsigned long *buffer = NULL; unsigned long *saved_name = NULL; size_t buffer_length; int idn_is_ace; assert(ctx != NULL && from != NULL && to != NULL); TRACE(("idn_res_decodename(actions=%s, from=\"%s\", tolen=%d)\n", idn__res_actionstostring(actions), idn__debug_xstring(from, 50), (int)tolen)); if (actions & ~DECODE_MASK) { WARNING(("idn_res_decodename: invalid actions 0x%x\n", actions)); r = idn_invalid_action; goto ret; } if (!initialized) idn_res_initialize(); if (!enabled || actions == 0) { r = copy_verbatim(from, to, tolen); goto ret; } else if (tolen <= 0) { r = idn_buffer_overflow; goto ret; } if (actions & IDN_DECODE_QUERY) { #ifndef WITHOUT_ICONV actions |= (IDN_DELIMMAP | IDN_MAP | IDN_NORMALIZE | \ IDN_PROHCHECK | IDN_BIDICHECK | IDN_IDNCONV | \ IDN_RTCHECK | IDN_LOCALCONV); #else actions |= (IDN_DELIMMAP | IDN_MAP | IDN_NORMALIZE | \ IDN_PROHCHECK | IDN_BIDICHECK | IDN_IDNCONV | \ IDN_RTCHECK); #endif } /* * Convert `from' to UCS4. */ local_converter = idn_resconf_getlocalconverter(ctx); #ifndef WITHOUT_ICONV if (local_converter == NULL) { r = idn_invalid_name; goto ret; } #endif idn_converter = idn_resconf_getidnconverter(ctx); if (idn_converter != NULL && idn_converter_isasciicompatible(idn_converter)) idn_is_ace = 1; else idn_is_ace = 0; buffer_length = tolen * 2; TRACE(("res idndecode(name=\"%s\")\n", idn__debug_xstring(from, 50))); for (;;) { void *new_buffer; new_buffer = realloc(buffer, sizeof(*buffer) * buffer_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } buffer = (unsigned long *)new_buffer; if ((actions & IDN_IDNCONV) && idn_converter != NULL && !idn_is_ace) { r = idn_converter_convtoucs4(idn_converter, from, buffer, buffer_length); } else { r = idn_ucs4_utf8toucs4(from, buffer, buffer_length); } if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; buffer_length *= 2; } if (*buffer == '\0') { if (tolen <= 0) { r = idn_buffer_overflow; goto ret; } *to = '\0'; r = idn_success; goto ret; } /* * Delimiter map. */ if (actions & IDN_DELIMMAP) { TRACE(("res delimitermap(name=\"%s\")\n", idn__debug_ucs4xstring(buffer, 50))); delimiter_mapper = idn_resconf_getdelimitermap(ctx); if (delimiter_mapper != NULL) { r = idn_delimitermap_map(delimiter_mapper, buffer, buffer, buffer_length); idn_delimitermap_destroy(delimiter_mapper); if (r != idn_success) goto ret; } TRACE(("res delimitermap(): success (name=\"%s\")\n", idn__debug_ucs4xstring(buffer, 50))); } /* * Split the name into a list of labels. */ r = labellist_create(buffer, &labels); if (r != idn_success) goto ret; /* * Perform conversions and tests. */ for (l = labellist_tail(labels); l != NULL; l = labellist_previous(l)) { free(saved_name); saved_name = NULL; if (!idn__util_ucs4isasciirange(labellist_getname(l))) { if (actions & IDN_MAP) { r = label_map(ctx, l); if (r != idn_success) goto ret; } if (actions & IDN_NORMALIZE) { r = label_normalize(ctx, l); if (r != idn_success) goto ret; } if (actions & IDN_PROHCHECK) { r = label_prohcheck(ctx, l); if (r == idn_prohibited) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } if (actions & IDN_UNASCHECK) { r = label_unascheck(ctx, l); if (r == idn_prohibited) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } if (actions & IDN_BIDICHECK) { r = label_bidicheck(ctx, l); if (r == idn_prohibited) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } } if ((actions & IDN_IDNCONV) && idn_is_ace) { saved_name = idn_ucs4_strdup(labellist_getname(l)); if (saved_name == NULL) { r = idn_nomemory; goto ret; } r = label_idndecode(ctx, l); if (r == idn_invalid_encoding) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } if ((actions & IDN_RTCHECK) && saved_name != NULL) { r = label_rtcheck(ctx, actions, l, saved_name); if (r == idn_invalid_encoding) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } #ifndef WITHOUT_ICONV if (actions & IDN_LOCALCONV) { r = label_localdecodecheck(ctx, l); if (r != idn_success) goto ret; } #endif } /* * Concat a list of labels to a name. */ for (;;) { void *new_buffer; new_buffer = realloc(buffer, sizeof(*buffer) * buffer_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } buffer = (unsigned long *)new_buffer; r = labellist_getnamelist(labels, buffer, buffer_length); if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; buffer_length *= 2; } if (actions & IDN_LOCALCONV) { r = idn_converter_convfromucs4(local_converter, buffer, to, tolen); } else { r = idn_ucs4_ucs4toutf8(buffer, to, tolen); } ret: if (r == idn_success) { TRACE(("idn_res_decodename(): success (to=\"%s\")\n", idn__debug_xstring(to, 50))); } else { TRACE(("idn_res_decodename(): %s\n", idn_result_tostring(r))); } free(saved_name); free(buffer); if (local_converter != NULL) idn_converter_destroy(local_converter); if (idn_converter != NULL) idn_converter_destroy(idn_converter); if (labels != NULL) labellist_destroy(labels); return (r); }
static idn_result_t label_rtcheck(idn_resconf_t ctx, idn_action_t actions, labellist_t label, const unsigned long *original_name) { labellist_t rt_label = NULL; const unsigned long *rt_name; const unsigned long *cur_name; idn_result_t r; cur_name = labellist_getname(label); TRACE(("res rtcheck(label=\"%s\", org_label=\"%s\")\n", idn__debug_ucs4xstring(cur_name, 50), idn__debug_ucs4xstring(original_name, 50))); r = labellist_create(cur_name, &rt_label); if (r != idn_success) goto ret; if (rt_label == NULL) { if (*original_name == '\0') r = idn_success; else r = idn_invalid_encoding; goto ret; } if (!idn__util_ucs4isasciirange(labellist_getname(rt_label))) { r = label_map(ctx, rt_label); if (r != idn_success) goto ret; r = label_normalize(ctx, rt_label); if (r != idn_success) goto ret; r = label_prohcheck(ctx, rt_label); if (r != idn_success) goto ret; if (actions & IDN_UNASCHECK) { r = label_unascheck(ctx, rt_label); if (r != idn_success) goto ret; } r = label_bidicheck(ctx, rt_label); if (r != idn_success) goto ret; } if (actions & IDN_ASCCHECK) { r = label_asccheck(ctx, rt_label); if (r != idn_success) goto ret; } if (!idn__util_ucs4isasciirange(labellist_getname(rt_label))) { r = label_idnencode_ace(ctx, rt_label); if (r != idn_success) goto ret; } r = label_lencheck_ace(ctx, rt_label); if (r != idn_success) goto ret; rt_name = labellist_getname(rt_label); if (idn_ucs4_strcasecmp(rt_name, original_name) != 0) { TRACE(("res rtcheck(): round trip failed, org =\"%s\", rt=\"%s\"\n", idn__debug_ucs4xstring(original_name, 50), idn__debug_ucs4xstring(rt_name, 50))); r = idn_invalid_encoding; goto ret; } r = idn_success; ret: if (r != idn_nomemory && r != idn_success) r = idn_invalid_encoding; TRACE(("res rtcheck(): %s\n", idn_result_tostring(r))); if (rt_label != NULL) labellist_destroy(rt_label); return (r); }