Ejemplo n.º 1
0
//Assign true labels to all clusters, and find members of each cluster.
void Clustering::finalize_labeling() {
  assert(!done_labeling);
  std::vector<int> valid_labels;
  std::vector<bool> flag(N_,false);
  for (int i=0; i<N_; ++i) {
    cluster_alias_[i] = get_true_label(i);
    if (!flag[cluster_alias_[i]]) {
      flag[cluster_alias_[i]] = true;
      valid_labels.push_back(cluster_alias_[i]);
    }
  }
  const int num_valid_labels = valid_labels.size();
  //std::cout << "# of valid labels " << num_valid_labels << std::endl;

  cluster_members.resize(num_valid_labels);
  cluster_labels_.resize(N_);
  std::vector<int> label_map(N_);
  for (int label=0; label<num_valid_labels; ++label) {
    label_map[valid_labels[label]] = label;
  }
  for (int i=0; i<N_; ++i) {
    const int new_label = cluster_labels_[i] = label_map[cluster_alias_[i]];
    assert(new_label>=0);
    assert(new_label<cluster_members.size());
    cluster_members[new_label].push_back(i);
  }

  //std::cout << "len of cluster members " << cluster_members.size() << std::endl;
  done_labeling = true;
}
Ejemplo n.º 2
0
idn_result_t
idn_res_decodename(idn_resconf_t ctx, idn_action_t actions, const char *from,
		    char *to, size_t tolen) {
	idn_converter_t local_converter = NULL;
	idn_converter_t idn_converter = NULL;
	idn_delimitermap_t delimiter_mapper;
	idn_result_t r;
	labellist_t labels = NULL, l;
	unsigned long *buffer = NULL;
	unsigned long *saved_name = NULL;
	size_t buffer_length;
	int idn_is_ace;

	assert(ctx != NULL && from != NULL && to != NULL);

	TRACE(("idn_res_decodename(actions=%s, from=\"%s\", tolen=%d)\n",
		idn__res_actionstostring(actions),
		idn__debug_xstring(from, 50), (int)tolen));

	if (actions & ~DECODE_MASK) {
		WARNING(("idn_res_decodename: invalid actions 0x%x\n",
			 actions));
		r = idn_invalid_action;
		goto ret;
	}

	if (!initialized)
		idn_res_initialize();
	if (!enabled || actions == 0) {
		r = copy_verbatim(from, to, tolen);
		goto ret;
	} else if (tolen <= 0) {
		r = idn_buffer_overflow;
		goto ret;
	}

	if (actions & IDN_DECODE_QUERY) {
#ifndef WITHOUT_ICONV
		actions |= (IDN_DELIMMAP | IDN_MAP | IDN_NORMALIZE | \
			    IDN_PROHCHECK | IDN_BIDICHECK | IDN_IDNCONV | \
			    IDN_RTCHECK | IDN_LOCALCONV);
#else
		actions |= (IDN_DELIMMAP | IDN_MAP | IDN_NORMALIZE | \
			    IDN_PROHCHECK | IDN_BIDICHECK | IDN_IDNCONV | \
			    IDN_RTCHECK);
#endif
	}

	/*
	 * Convert `from' to UCS4.
	 */
	local_converter = idn_resconf_getlocalconverter(ctx);
#ifndef WITHOUT_ICONV
	if (local_converter == NULL) {
		r = idn_invalid_name;
		goto ret;
	}
#endif

	idn_converter = idn_resconf_getidnconverter(ctx);
	if (idn_converter != NULL &&
	    idn_converter_isasciicompatible(idn_converter))
		idn_is_ace = 1;
	else
		idn_is_ace = 0;

	buffer_length = tolen * 2;

	TRACE(("res idndecode(name=\"%s\")\n", idn__debug_xstring(from, 50)));

	for (;;) {
		void *new_buffer;

		new_buffer = realloc(buffer, sizeof(*buffer) * buffer_length);
		if (new_buffer == NULL) {
			r = idn_nomemory;
			goto ret;
		}
		buffer = (unsigned long *)new_buffer;

		if ((actions & IDN_IDNCONV) &&
		     idn_converter != NULL && !idn_is_ace) {
			r = idn_converter_convtoucs4(idn_converter, from,
						     buffer, buffer_length);
		} else {
			r = idn_ucs4_utf8toucs4(from, buffer, buffer_length);
		}
		if (r == idn_success)
			break;
		else if (r != idn_buffer_overflow)
			goto ret;

		buffer_length *= 2;
	}

	if (*buffer == '\0') {
		if (tolen <= 0) {
			r = idn_buffer_overflow;
			goto ret;
		}
		*to = '\0';
		r = idn_success;
		goto ret;
	}

	/*
	 * Delimiter map.
	 */
	if (actions & IDN_DELIMMAP) {
		TRACE(("res delimitermap(name=\"%s\")\n",
		       idn__debug_ucs4xstring(buffer, 50)));

		delimiter_mapper = idn_resconf_getdelimitermap(ctx);
		if (delimiter_mapper != NULL) {
			r = idn_delimitermap_map(delimiter_mapper, buffer,
						 buffer, buffer_length);
			idn_delimitermap_destroy(delimiter_mapper);
			if (r != idn_success)
				goto ret;
		}
		TRACE(("res delimitermap(): success (name=\"%s\")\n",
		       idn__debug_ucs4xstring(buffer, 50)));
	}

	/*
	 * Split the name into a list of labels.
	 */
	r = labellist_create(buffer, &labels);
	if (r != idn_success)
		goto ret;

	/*
	 * Perform conversions and tests.
	 */
	for (l = labellist_tail(labels); l != NULL;
	     l = labellist_previous(l)) {

		free(saved_name);
		saved_name = NULL;

		if (!idn__util_ucs4isasciirange(labellist_getname(l))) {
			if (actions & IDN_MAP) {
				r = label_map(ctx, l);
				if (r != idn_success)
					goto ret;
			}
			if (actions & IDN_NORMALIZE) {
				r = label_normalize(ctx, l);
				if (r != idn_success)
					goto ret;
			}
			if (actions & IDN_PROHCHECK) {
				r = label_prohcheck(ctx, l);
				if (r == idn_prohibited) {
					labellist_undo(l);
					continue;
				} else if (r != idn_success) {
					goto ret;
				}
			}
			if (actions & IDN_UNASCHECK) {
				r = label_unascheck(ctx, l);
				if (r == idn_prohibited) {
					labellist_undo(l);
					continue;
				} else if (r != idn_success) {
					goto ret;
				}
			}
			if (actions & IDN_BIDICHECK) {
				r = label_bidicheck(ctx, l);
				if (r == idn_prohibited) {
					labellist_undo(l);
					continue;
				} else if (r != idn_success) {
					goto ret;
				}
			}
		}

		if ((actions & IDN_IDNCONV) && idn_is_ace) {
			saved_name = idn_ucs4_strdup(labellist_getname(l));
			if (saved_name == NULL) {
				r = idn_nomemory;
				goto ret;
			}
			r = label_idndecode(ctx, l);
			if (r == idn_invalid_encoding) {
				labellist_undo(l);
				continue;
			} else if (r != idn_success) {
				goto ret;
			}
		}
		if ((actions & IDN_RTCHECK) && saved_name != NULL) {
			r = label_rtcheck(ctx, actions, l, saved_name);
			if (r == idn_invalid_encoding) {
				labellist_undo(l);
				continue;
			} else if (r != idn_success) {
				goto ret;
			}
		}

#ifndef WITHOUT_ICONV
		if (actions & IDN_LOCALCONV) {
			r = label_localdecodecheck(ctx, l);
			if (r != idn_success)
				goto ret;
		}
#endif
	}

	/*
	 * Concat a list of labels to a name.
	 */
	for (;;) {
		void *new_buffer;

		new_buffer = realloc(buffer, sizeof(*buffer) * buffer_length);
		if (new_buffer == NULL) {
			r = idn_nomemory;
			goto ret;
		}
		buffer = (unsigned long *)new_buffer;

		r = labellist_getnamelist(labels, buffer, buffer_length);
		if (r == idn_success)
			break;
		else if (r != idn_buffer_overflow)
			goto ret;

		buffer_length *= 2;
	}

	if (actions & IDN_LOCALCONV) {
		r = idn_converter_convfromucs4(local_converter, buffer, to,
					       tolen);
	} else {
		r = idn_ucs4_ucs4toutf8(buffer, to, tolen);
	}

ret:
	if (r == idn_success) {
		TRACE(("idn_res_decodename(): success (to=\"%s\")\n",
		       idn__debug_xstring(to, 50)));
	} else {
		TRACE(("idn_res_decodename(): %s\n", idn_result_tostring(r)));
	}
	free(saved_name);
	free(buffer);
	if (local_converter != NULL)
		idn_converter_destroy(local_converter);
	if (idn_converter != NULL)
		idn_converter_destroy(idn_converter);
	if (labels != NULL)
		labellist_destroy(labels);
	return (r);
}
Ejemplo n.º 3
0
static idn_result_t
label_rtcheck(idn_resconf_t ctx, idn_action_t actions, labellist_t label,
	    const unsigned long *original_name) {
	labellist_t rt_label = NULL;
	const unsigned long *rt_name;
	const unsigned long *cur_name;
	idn_result_t r;

	cur_name = labellist_getname(label);
	TRACE(("res rtcheck(label=\"%s\", org_label=\"%s\")\n",
		idn__debug_ucs4xstring(cur_name, 50),
		idn__debug_ucs4xstring(original_name, 50)));

	r = labellist_create(cur_name, &rt_label);
	if (r != idn_success)
		goto ret;
	if (rt_label == NULL) {
		if (*original_name == '\0')
			r = idn_success;
		else
			r = idn_invalid_encoding;
		goto ret;
	}

	if (!idn__util_ucs4isasciirange(labellist_getname(rt_label))) {
		r = label_map(ctx, rt_label);
		if (r != idn_success)
			goto ret;
		r = label_normalize(ctx, rt_label);
		if (r != idn_success)
			goto ret;
		r = label_prohcheck(ctx, rt_label);
		if (r != idn_success)
			goto ret;
		if (actions & IDN_UNASCHECK) {
			r = label_unascheck(ctx, rt_label);
			if (r != idn_success)
				goto ret;
		}
		r = label_bidicheck(ctx, rt_label);
		if (r != idn_success)
			goto ret;
	}

	if (actions & IDN_ASCCHECK) {
		r = label_asccheck(ctx, rt_label);
		if (r != idn_success)
			goto ret;
	}
	if (!idn__util_ucs4isasciirange(labellist_getname(rt_label))) {
		r = label_idnencode_ace(ctx, rt_label);
		if (r != idn_success)
			goto ret;
	}
	r = label_lencheck_ace(ctx, rt_label);
	if (r != idn_success)
		goto ret;
	rt_name = labellist_getname(rt_label);

	if (idn_ucs4_strcasecmp(rt_name, original_name) != 0) {
		TRACE(("res rtcheck(): round trip failed, org =\"%s\", rt=\"%s\"\n",
		       idn__debug_ucs4xstring(original_name, 50),
		       idn__debug_ucs4xstring(rt_name, 50)));
		r = idn_invalid_encoding;
		goto ret;
	}

	r  = idn_success;
ret:
	if (r != idn_nomemory && r != idn_success)
		r = idn_invalid_encoding;
	TRACE(("res rtcheck(): %s\n", idn_result_tostring(r)));
	if (rt_label != NULL)
		labellist_destroy(rt_label);
	return (r);
}