C++ (Cpp) is_encoding_utf8の例

コード例 #1

0

ファイルを表示

ファイル: utf8.c プロジェクト: CoerWatt/git

char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding)
{
	iconv_t conv;
	char *out;

	if (!in_encoding)
		return NULL;

	conv = iconv_open(out_encoding, in_encoding);
	if (conv == (iconv_t) -1) {
		/*
		 * Some platforms do not have the variously spelled variants of
		 * UTF-8, so let's fall back to trying the most official
		 * spelling. We do so only as a fallback in case the platform
		 * does understand the user's spelling, but not our official
		 * one.
		 */
		if (is_encoding_utf8(in_encoding))
			in_encoding = "UTF-8";
		if (is_encoding_utf8(out_encoding))
			out_encoding = "UTF-8";
		conv = iconv_open(out_encoding, in_encoding);
		if (conv == (iconv_t) -1)
			return NULL;
	}

	out = reencode_string_iconv(in, strlen(in), conv);
	iconv_close(conv);
	return out;
}

コード例 #2

0

ファイルを表示

ファイル: pretty.c プロジェクト: CookieChen/git

static char *replace_encoding_header(char *buf, const char *encoding)
{
	struct strbuf tmp = STRBUF_INIT;
	size_t start, len;
	char *cp = buf;

	/* guess if there is an encoding header before a \n\n */
	while (strncmp(cp, "encoding ", strlen("encoding "))) {
		cp = strchr(cp, '\n');
		if (!cp || *++cp == '\n')
			return buf;
	}
	start = cp - buf;
	cp = strchr(cp, '\n');
	if (!cp)
		return buf; /* should not happen but be defensive */
	len = cp + 1 - (buf + start);

	strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
	if (is_encoding_utf8(encoding)) {
		/* we have re-coded to UTF-8; drop the header */
		strbuf_remove(&tmp, start, len);
	} else {
		/* just replaces XXXX in 'encoding XXXX\n' */
		strbuf_splice(&tmp, start + strlen("encoding "),
					  len - strlen("encoding \n"),
					  encoding, strlen(encoding));
	}
	return strbuf_detach(&tmp, NULL);
}

コード例 #3

0

ファイルを表示

ファイル: utf8.c プロジェクト: CoerWatt/git

/*
 * Returns first character length in bytes for multi-byte `text` according to
 * `encoding`.
 *
 * - The `text` pointer is updated to point at the next character.
 * - When `remainder_p` is not NULL, on entry `*remainder_p` is how much bytes
 *   we can consume from text, and on exit `*remainder_p` is reduced by returned
 *   character length. Otherwise `text` is treated as limited by NUL.
 */
int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding)
{
	int chrlen;
	const char *p = *text;
	size_t r = (remainder_p ? *remainder_p : SIZE_MAX);

	if (r < 1)
		return 0;

	if (is_encoding_utf8(encoding)) {
		pick_one_utf8_char(&p, &r);

		chrlen = p ? (p - *text)
			   : 1 /* not valid UTF-8 -> raw byte sequence */;
	}
	else {
		/*
		 * TODO use iconv to decode one char and obtain its chrlen
		 * for now, let's treat encodings != UTF-8 as one-byte
		 */
		chrlen = 1;
	}

	*text += chrlen;
	if (remainder_p)
		*remainder_p -= chrlen;

	return chrlen;
}

コード例 #4

0

ファイルを表示

ファイル: builtin-mailinfo.c プロジェクト: asoltys/git

/*
 * When there is no known charset, guess.
 *
 * Right now we assume that if the target is UTF-8 (the default),
 * and it already looks like UTF-8 (which includes US-ASCII as its
 * subset, of course) then that is what it is and there is nothing
 * to do.
 *
 * Otherwise, we default to assuming it is Latin1 for historical
 * reasons.
 */
static const char *guess_charset(const struct strbuf *line, const char *target_charset)
{
	if (is_encoding_utf8(target_charset)) {
		if (is_utf8(line->buf))
			return NULL;
	}
	return "ISO8859-1";
}

コード例 #5

0

ファイルを表示

ファイル: builtin-mailinfo.c プロジェクト: Jatinpurohit/git

/*
 * When there is no known charset, guess.
 *
 * Right now we assume that if the target is UTF-8 (the default),
 * and it already looks like UTF-8 (which includes US-ASCII as its
 * subset, of course) then that is what it is and there is nothing
 * to do.
 *
 * Otherwise, we default to assuming it is Latin1 for historical
 * reasons.
 */
static const char *guess_charset(const char *line, const char *target_charset)
{
	if (is_encoding_utf8(target_charset)) {
		if (is_utf8(line))
			return NULL;
	}
	return "latin1";
}

コード例 #6

0

ファイルを表示

ファイル: encoding-guesser.c プロジェクト: student-t/PSPP

/* If ENCODING is the name of an encoding that could begin with a byte-order
   mark, and in fact the N bytes in DATA do begin with a byte-order mark,
   returns the number of bytes in the byte-order mark.  Otherwise, returns 0.

   N must be at least ENCODING_GUESS_MIN, unless the file is shorter than
   that. */
size_t
encoding_guess_bom_length (const char *encoding,
                           const void *data_, size_t n)
{
  const uint8_t *data = data_;

  return (is_utf8_bom (data, n) && is_encoding_utf8 (encoding) ? 3
          : is_utf16le_bom (data, n) && is_encoding_utf16 (encoding) ? 2
          : is_utf16be_bom (data, n) && is_encoding_utf16 (encoding) ? 2
          : is_utf32le_bom (data, n) && is_encoding_utf32 (encoding) ? 4
          : is_utf32be_bom (data, n) && is_encoding_utf32 (encoding) ? 4
          : 0);
}

コード例 #7

0

ファイルを表示

ファイル: git.c プロジェクト: eatnumber1/parsecvs

/*
 * Create a commit object in the repository using the current
 * index and the information from the provided rev_commit
 */
static int
git_commit(rev_commit *commit)
{
	cvs_author *author;
	char *full;
	char *email;
	char *log;
	unsigned char commit_sha1[20];
	size_t size = 0;
	int encoding_is_utf8;

	if (!commit->sha1)
		return 0;

	log = git_log(commit);
	if (!log)
		return 0;

	author = git_fullname(commit->author);
	if (!author) {
//		fprintf (stderr, "%s: not in author map\n", commit->author);
		full = commit->author;
		email = commit->author;
	} else {
		full = author->full;
		email = author->email;
	}

	/* Not having i18n.commitencoding is the same as having utf-8 */
	encoding_is_utf8 = is_encoding_utf8(git_commit_encoding);

	add_buffer(&size, "tree %s\n", commit->sha1);
	if (commit->parent)
		add_buffer(&size, "parent %s\n", commit->parent->sha1);
	add_buffer(&size, "author %s <%s> %lu +0000\n",
		   full, email, commit->date);
	add_buffer(&size, "committer %s <%s> %lu +0000\n",
		   full, email, commit->date);
	if (!encoding_is_utf8)
		add_buffer(&size, "encoding %s\n", git_commit_encoding);
	add_buffer(&size, "\n%s", log);

	if (write_sha1_file(commit_text, size, commit_type, commit_sha1))
		return 0;

	commit->sha1 = atom(sha1_to_hex(commit_sha1));
	if (!commit->sha1)
		return 0;
	return 1;
}

コード例 #8

0

ファイルを表示

ファイル: encoding-guesser.c プロジェクト: student-t/PSPP

/* Returns an encoding guess based on ENCODING and the N bytes of text starting
   at DATA.  DATA should start with the first non-ASCII text character (as
   determined by encoding_guess_is_ascii_text()) found in the input.

   The return value will either be "UTF-8" or the fallback encoding for
   ENCODING.

   See encoding-guesser.h for intended use of this function.

   N must be at least ENCODING_GUESS_MIN, unless the file has fewer bytes than
   that starting with the first non-ASCII text character. */
const char *
encoding_guess_tail_encoding (const char *encoding,
                              const void *data, size_t n)
{

  if (encoding_guess_tail_is_utf8 (data, n) != 0)
    return "UTF-8";
  else
    {
      /* The data is not UTF-8. */
      const char *fallback_encoding = encoding_guess_parse_encoding (encoding);

      /* If the fallback encoding is UTF-8, fall back on something else.*/
      if (is_encoding_utf8 (fallback_encoding))
        return "windows-1252";

      return fallback_encoding;
    }

}

コード例 #9

0

ファイルを表示

ファイル: notes.c プロジェクト: Noffica/git

/*
 * Fill the given strbuf with the notes associated with the given object.
 *
 * If the given notes_tree structure is not initialized, it will be auto-
 * initialized to the default value (see documentation for init_notes() above).
 * If the given notes_tree is NULL, the internal/default notes_tree will be
 * used instead.
 *
 * (raw != 0) gives the %N userformat; otherwise, the note message is given
 * for human consumption.
 */
static void format_note(struct notes_tree *t, const struct object_id *object_oid,
			struct strbuf *sb, const char *output_encoding, int raw)
{
	static const char utf8[] = "utf-8";
	const struct object_id *oid;
	char *msg, *msg_p;
	unsigned long linelen, msglen;
	enum object_type type;

	if (!t)
		t = &default_notes_tree;
	if (!t->initialized)
		init_notes(t, NULL, NULL, 0);

	oid = get_note(t, object_oid);
	if (!oid)
		return;

	if (!(msg = read_object_file(oid, &type, &msglen)) || type != OBJ_BLOB) {
		free(msg);
		return;
	}

	if (output_encoding && *output_encoding &&
	    !is_encoding_utf8(output_encoding)) {
		char *reencoded = reencode_string(msg, output_encoding, utf8);
		if (reencoded) {
			free(msg);
			msg = reencoded;
			msglen = strlen(msg);
		}
	}

	/* we will end the annotation by a newline anyway */
	if (msglen && msg[msglen - 1] == '\n')
		msglen--;

	if (!raw) {
		const char *ref = t->ref;
		if (!ref || !strcmp(ref, GIT_NOTES_DEFAULT_REF)) {
			strbuf_addstr(sb, "\nNotes:\n");
		} else {
			if (starts_with(ref, "refs/"))
				ref += 5;
			if (starts_with(ref, "notes/"))
				ref += 6;
			strbuf_addf(sb, "\nNotes (%s):\n", ref);
		}
	}

	for (msg_p = msg; msg_p < msg + msglen; msg_p += linelen + 1) {
		linelen = strchrnul(msg_p, '\n') - msg_p;

		if (!raw)
			strbuf_addstr(sb, "    ");
		strbuf_add(sb, msg_p, linelen);
		strbuf_addch(sb, '\n');
	}

	free(msg);
}

コード例 #10

0

ファイルを表示

ファイル: fast-export.c プロジェクト: CSRedRat/git

static void handle_commit(struct commit *commit, struct rev_info *rev)
{
	int saved_output_format = rev->diffopt.output_format;
	const char *commit_buffer;
	const char *author, *author_end, *committer, *committer_end;
	const char *encoding, *message;
	char *reencoded = NULL;
	struct commit_list *p;
	const char *refname;
	int i;

	rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;

	parse_commit_or_die(commit);
	commit_buffer = get_commit_buffer(commit, NULL);
	author = strstr(commit_buffer, "\nauthor ");
	if (!author)
		die ("Could not find author in commit %s",
		     sha1_to_hex(commit->object.sha1));
	author++;
	author_end = strchrnul(author, '\n');
	committer = strstr(author_end, "\ncommitter ");
	if (!committer)
		die ("Could not find committer in commit %s",
		     sha1_to_hex(commit->object.sha1));
	committer++;
	committer_end = strchrnul(committer, '\n');
	message = strstr(committer_end, "\n\n");
	encoding = find_encoding(committer_end, message);
	if (message)
		message += 2;

	if (commit->parents &&
	    get_object_mark(&commit->parents->item->object) != 0 &&
	    !full_tree) {
		parse_commit_or_die(commit->parents->item);
		diff_tree_sha1(commit->parents->item->tree->object.sha1,
			       commit->tree->object.sha1, "", &rev->diffopt);
	}
	else
		diff_root_tree_sha1(commit->tree->object.sha1,
				    "", &rev->diffopt);

	/* Export the referenced blobs, and remember the marks. */
	for (i = 0; i < diff_queued_diff.nr; i++)
		if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
			export_blob(diff_queued_diff.queue[i]->two->sha1);

	refname = commit->util;
	if (anonymize) {
		refname = anonymize_refname(refname);
		anonymize_ident_line(&committer, &committer_end);
		anonymize_ident_line(&author, &author_end);
	}

	mark_next_object(&commit->object);
	if (anonymize)
		reencoded = anonymize_commit_message(message);
	else if (!is_encoding_utf8(encoding))
		reencoded = reencode_string(message, "UTF-8", encoding);
	if (!commit->parents)
		printf("reset %s\n", refname);
	printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
	       refname, last_idnum,
	       (int)(author_end - author), author,
	       (int)(committer_end - committer), committer,
	       (unsigned)(reencoded
			  ? strlen(reencoded) : message
			  ? strlen(message) : 0),
	       reencoded ? reencoded : message ? message : "");
	free(reencoded);
	unuse_commit_buffer(commit, commit_buffer);

	for (i = 0, p = commit->parents; p; p = p->next) {
		int mark = get_object_mark(&p->item->object);
		if (!mark)
			continue;
		if (i == 0)
			printf("from :%d\n", mark);
		else
			printf("merge :%d\n", mark);
		i++;
	}

	if (full_tree)
		printf("deleteall\n");
	log_tree_diff_flush(rev);
	rev->diffopt.output_format = saved_output_format;

	printf("\n");

	show_progress();
}

コード例 #11

0

ファイルを表示

ファイル: utf8.c プロジェクト: CoerWatt/git

int same_encoding(const char *src, const char *dst)
{
	if (is_encoding_utf8(src) && is_encoding_utf8(dst))
		return 1;
	return !strcasecmp(src, dst);
}

コード例 #12

0

ファイルを表示

ファイル: encoding-guesser.c プロジェクト: student-t/PSPP

/* Attempts to guess the encoding of a text file based on ENCODING, an encoding
   name in one of the forms described at the top of encoding-guesser.h, and
   DATA, which contains the first N bytes of the file.  Returns the guessed
   encoding, which might be ENCODING itself or a suffix of it or a statically
   allocated string.

   Encoding autodetection only takes place if ENCODING actually specifies
   autodetection.  See encoding-guesser.h for details.

   UTF-8 cannot be distinguished from other ASCII-based encodings until a
   non-ASCII text character is encountered.  If ENCODING specifies
   autodetection and this function returns "ASCII", then the client should
   process the input until it encounters an non-ASCII character (as returned by
   encoding_guess_is_ascii_text()) and then use encoding_guess_tail_encoding()
   to make a final encoding guess.  See encoding-guesser.h for details.

   N must be at least ENCODING_GUESS_MIN, unless the file is shorter than
   that. */
const char *
encoding_guess_head_encoding (const char *encoding,
                              const void *data_, size_t n)
{
  const uint8_t *data = data_;
  const char *fallback_encoding;
  const char *guess;

  fallback_encoding = encoding_guess_parse_encoding (encoding);
  if (!encoding_guess_encoding_is_auto (encoding))
    return fallback_encoding;

  if (n == 0)
    return fallback_encoding;

  if (is_utf32be_bom (data, n) || is_utf32le_bom (data, n))
    return "UTF-32";

  if (n >= 4)
    {
      uint32_t x = get_be32 (data);
      if (x == 0x84319533)
        return "GB-18030";
      else if (x == 0xdd736673)
        return "UTF-EBCDIC";
    }

  if (is_utf16be_bom (data, n) || is_utf16le_bom (data, n))
    return "UTF-16";

  if (is_utf8_bom (data, n))
    return "UTF-8";

  guess = guess_utf16 (data, n);
  if (guess != NULL)
    return guess;

  if (is_utf32 (data, n, get_be32))
    return "UTF-32BE";
  if (is_utf32 (data, n, get_le32))
    return "UTF-32LE";

  /* We've tried all the "giveaways" that make the encoding obvious.  That
     rules out, incidentally, all the encodings with multibyte units
     (e.g. UTF-16, UTF-32).  Our remaining goal is to try to distinguish UTF-8
     from some ASCII-based fallback encoding. */

  /* If the fallback encoding isn't ASCII compatible, give up. */
  if (!is_encoding_ascii_compatible (fallback_encoding))
    return fallback_encoding;

  /* If the data we have clearly is not UTF-8, give up. */
  if (!encoding_guess_tail_is_utf8 (data, n))
    {
      /* If the fallback encoding is UTF-8, fall back on something else.*/
      if (is_encoding_utf8 (fallback_encoding))
        return "windows-1252";

      return fallback_encoding;
    }

  return "ASCII";
}

コード例 #13

0

ファイルを表示

ファイル: builtin-commit.c プロジェクト: Jatinpurohit/git

int cmd_commit(int argc, const char **argv, const char *prefix)
{
	int header_len;
	struct strbuf sb;
	const char *index_file, *reflog_msg;
	char *nl, *p;
	unsigned char commit_sha1[20];
	struct ref_lock *ref_lock;

	git_config(git_commit_config);

	argc = parse_and_validate_options(argc, argv, builtin_commit_usage);

	index_file = prepare_index(argc, argv, prefix);

	/* Set up everything for writing the commit object.  This includes
	   running hooks, writing the trees, and interacting with the user.  */
	if (!prepare_to_commit(index_file, prefix)) {
		rollback_index_files();
		return 1;
	}

	/*
	 * The commit object
	 */
	strbuf_init(&sb, 0);
	strbuf_addf(&sb, "tree %s\n",
		    sha1_to_hex(active_cache_tree->sha1));

	/* Determine parents */
	if (initial_commit) {
		reflog_msg = "commit (initial)";
	} else if (amend) {
		struct commit_list *c;
		struct commit *commit;

		reflog_msg = "commit (amend)";
		commit = lookup_commit(head_sha1);
		if (!commit || parse_commit(commit))
			die("could not parse HEAD commit");

		for (c = commit->parents; c; c = c->next)
			add_parent(&sb, c->item->object.sha1);
	} else if (in_merge) {
		struct strbuf m;
		FILE *fp;

		reflog_msg = "commit (merge)";
		add_parent(&sb, head_sha1);
		strbuf_init(&m, 0);
		fp = fopen(git_path("MERGE_HEAD"), "r");
		if (fp == NULL)
			die("could not open %s for reading: %s",
			    git_path("MERGE_HEAD"), strerror(errno));
		while (strbuf_getline(&m, fp, '\n') != EOF) {
			unsigned char sha1[20];
			if (get_sha1_hex(m.buf, sha1) < 0)
				die("Corrupt MERGE_HEAD file (%s)", m.buf);
			add_parent(&sb, sha1);
		}
		fclose(fp);
		strbuf_release(&m);
	} else {
		reflog_msg = "commit";
		strbuf_addf(&sb, "parent %s\n", sha1_to_hex(head_sha1));
	}

	strbuf_addf(&sb, "author %s\n",
		    fmt_ident(author_name, author_email, author_date, IDENT_ERROR_ON_NO_NAME));
	strbuf_addf(&sb, "committer %s\n", git_committer_info(IDENT_ERROR_ON_NO_NAME));
	if (!is_encoding_utf8(git_commit_encoding))
		strbuf_addf(&sb, "encoding %s\n", git_commit_encoding);
	strbuf_addch(&sb, '\n');

	/* Finally, get the commit message */
	header_len = sb.len;
	if (strbuf_read_file(&sb, git_path(commit_editmsg), 0) < 0) {
		rollback_index_files();
		die("could not read commit message");
	}

	/* Truncate the message just before the diff, if any. */
	p = strstr(sb.buf, "\ndiff --git a/");
	if (p != NULL)
		strbuf_setlen(&sb, p - sb.buf + 1);

	if (cleanup_mode != CLEANUP_NONE)
		stripspace(&sb, cleanup_mode == CLEANUP_ALL);
	if (sb.len < header_len || message_is_empty(&sb, header_len)) {
		rollback_index_files();
		die("no commit message?  aborting commit.");
	}
	strbuf_addch(&sb, '\0');
	if (is_encoding_utf8(git_commit_encoding) && !is_utf8(sb.buf))
		fprintf(stderr, commit_utf8_warn);

	if (write_sha1_file(sb.buf, sb.len - 1, commit_type, commit_sha1)) {
		rollback_index_files();
		die("failed to write commit object");
	}

	ref_lock = lock_any_ref_for_update("HEAD",
					   initial_commit ? NULL : head_sha1,
					   0);

	nl = strchr(sb.buf + header_len, '\n');
	if (nl)
		strbuf_setlen(&sb, nl + 1 - sb.buf);
	else
		strbuf_addch(&sb, '\n');
	strbuf_remove(&sb, 0, header_len);
	strbuf_insert(&sb, 0, reflog_msg, strlen(reflog_msg));
	strbuf_insert(&sb, strlen(reflog_msg), ": ", 2);

	if (!ref_lock) {
		rollback_index_files();
		die("cannot lock HEAD ref");
	}
	if (write_ref_sha1(ref_lock, commit_sha1, sb.buf) < 0) {
		rollback_index_files();
		die("cannot update HEAD ref");
	}

	unlink(git_path("MERGE_HEAD"));
	unlink(git_path("MERGE_MSG"));
	unlink(git_path("SQUASH_MSG"));

	if (commit_index_files())
		die ("Repository has been updated, but unable to write\n"
		     "new_index file. Check that disk is not full or quota is\n"
		     "not exceeded, and then \"git reset HEAD\" to recover.");

	rerere();
	run_hook(get_index_file(), "post-commit", NULL);
	if (!quiet)
		print_summary(prefix, commit_sha1);

	return 0;
}