Beispiel #1
0
/**
 * maps a file to our address space 
 * and returns it the calling function.
 */
void 
load_file(URL *U, char *file)
{
  FILE     *fp;
  size_t   len = 0;
  struct   stat st; 
  char     *filename;
  char     postdata[POST_BUF]; 
  unichar  ubuf[POST_BUF+1];   
  size_t   ulen;

  filename = trim(file);
  memset(postdata, 0, POST_BUF);

  if((lstat(filename, &st) == 0) || (errno != ENOENT)){ 
    len = st.st_size;  
    if((fp = fopen(filename, "r")) == NULL){
      joe_error("could not open file: %s", filename);
      return;
    }
    if((fread(postdata, 1, len, fp )) == len){
      if(looks_ascii((unsigned char*)postdata,len,ubuf,&ulen))
        trim(postdata);
      else if(looks_utf8((unsigned char*)postdata,len,ubuf,&ulen))
        trim(postdata);
    } else {
      joe_error( "unable to read file: %s", filename );
    }
    fclose(fp);
  }

  if(strlen(postdata) > 0){
    U->conttype = xstrdup(get_content_type(filename));
    U->postlen  = strlen(postdata);
    U->postdata = malloc(U->postlen);
    memcpy(U->postdata, postdata, U->postlen);
    U->postdata[U->postlen] = 0;
  } 
  return;
}
Beispiel #2
0
int file_ascmagic(RMagic *ms, const ut8 *buf, size_t nbytes) {
return 0;
	size_t i;
	ut8 *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
	unichar *ubuf = NULL;	
	size_t ulen, mlen;
	const struct names *p;
	int rv = -1;
	int mime = ms->flags & R_MAGIC_MIME;

	const char *code = NULL;
	const char *code_mime = NULL;
	const char *type = NULL;
	const char *subtype = NULL;
	const char *subtype_mime = NULL;

	int has_escapes = 0;
	int has_backspace = 0;
	int seen_cr = 0;

	int n_crlf = 0;
	int n_lf = 0;
	int n_cr = 0;
	int n_nel = 0;

	size_t last_line_end = (size_t)-1;
	int has_long_lines = 0;

	/*
	 * Undo the NUL-termination kindly provided by process()
	 * but leave at least one byte to look at
	 */
	while (nbytes > 1 && buf[nbytes - 1] == '\0')
		nbytes--;

	if (!(nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0]))))
		goto done;
	if (!(ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0]))))
		goto done;

	/*
	 * Then try to determine whether it's any character code we can
	 * identify.  Each of these tests, if it succeeds, will leave
	 * the text converted into one-unichar-per-character Unicode in
	 * ubuf, and the number of characters converted in ulen.
	 */
	if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
		code = "ASCII";
		code_mime = "us-ascii";
		type = "text";
	} else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
		code = "UTF-8 Unicode (with BOM)";
		code_mime = "utf-8";
		type = "text";
	} else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
		code = "UTF-8 Unicode";
		code_mime = "utf-8";
		type = "text";
	} else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
		if (i == 1)
			code = "Little-endian UTF-16 Unicode";
		else
			code = "Big-endian UTF-16 Unicode";

		type = "character data";
		code_mime = "utf-16";    /* is this defined? */
	} else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
		if (!memcmp (buf, "\xff\xff\xff\xff", 4)) {
			// uninitialized memory is not iso-8859!!
			goto done;
		}
		code = "ISO-8859";
		type = "text";
		code_mime = "iso-8859-1"; 
	} else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
		code = "Non-ISO extended-ASCII";
		type = "text";
		code_mime = "unknown";
	} else {
		from_ebcdic(buf, nbytes, nbuf);

		if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
			code = "EBCDIC";
			type = "character data";
			code_mime = "ebcdic";
		} else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
			code = "International EBCDIC";
			type = "character data";
			code_mime = "ebcdic";
		} else {
			rv = 0;
			goto done;  /* doesn't look like text at all */
		}
	}

	if (nbytes <= 1) {
		rv = 0;
		goto done;
	}

	/* Convert ubuf to UTF-8 and try text soft magic */
	/* If original was ASCII or UTF-8, could use nbuf instead of
	   re-converting. */
	/* malloc size is a conservative overestimate; could be
	   re-converting improved, or at least realloced after
	   re-converting conversion. */
	mlen = ulen * 6;
	if (!(utf8_buf = malloc(mlen))) {
		file_oomem(ms, mlen);
		goto done;
	}
	if (!(utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)))
		goto done;
	if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
		rv = 1;
		goto done;
	}

	/* look for tokens from names.h - this is expensive! */
	if ((ms->flags & R_MAGIC_NO_CHECK_TOKENS) != 0)
		goto subtype_identified;

	i = 0;
	while (i < ulen) {
		size_t end;

		/* skip past any leading space */
		while (i < ulen && ISSPC(ubuf[i]))
			i++;
		if (i >= ulen)
			break;

		/* find the next whitespace */
		for (end = i + 1; end < nbytes; end++)
			if (ISSPC(ubuf[end]))
				break;

		/* compare the word thus isolated against the token list */
		for (p = names; p < names + NNAMES; p++) {
			if (ascmatch((const ut8 *)p->name, ubuf + i,
			    end - i)) {
				subtype = types[p->type].human;
				subtype_mime = types[p->type].mime;
				goto subtype_identified;
			}
		}

		i = end;
	}

subtype_identified:

	/* Now try to discover other details about the file. */
	for (i = 0; i < ulen; i++) {
		if (ubuf[i] == '\n') {
			if (seen_cr)
				n_crlf++;
			else
				n_lf++;
			last_line_end = i;
		} else if (seen_cr)
			n_cr++;

		seen_cr = (ubuf[i] == '\r');
		if (seen_cr)
			last_line_end = i;

		if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
			n_nel++;
			last_line_end = i;
		}
		/* If this line is _longer_ than MAXLINELEN, remember it. */
		if (i > last_line_end + MAXLINELEN)
			has_long_lines = 1;

		if (ubuf[i] == '\033')
			has_escapes = 1;
		if (ubuf[i] == '\b')
			has_backspace = 1;
	}

	/* Beware, if the data has been truncated, the final CR could have
	   been followed by a LF.  If we have HOWMANY bytes, it indicates
	   that the data might have been truncated, probably even before
	   this function was called. */
	if (seen_cr && nbytes < HOWMANY)
		n_cr++;

	if (mime) {
		if (mime & R_MAGIC_MIME_TYPE) {
			if (subtype_mime) {
				if (file_printf(ms, subtype_mime) == -1)
					goto done;
			} else {
				if (file_printf(ms, "text/plain") == -1)
					goto done;
			}
		}

		if ((mime == 0 || mime == R_MAGIC_MIME) && code_mime) {
			if ((mime & R_MAGIC_MIME_TYPE) &&
			    file_printf(ms, " charset=") == -1)
				goto done;
			if (file_printf(ms, code_mime) == -1)
				goto done;
		}

		if (mime == R_MAGIC_MIME_ENCODING)
		    if (file_printf(ms, "binary") == -1){
                rv = 1;
                goto done;
            }
	} else {
		if (file_printf(ms, code) == -1)
			goto done;

		if (subtype) {
			if (file_printf(ms, " ") == -1)
				goto done;
			if (file_printf(ms, subtype) == -1)
				goto done;
		}

		if (file_printf(ms, " ") == -1)
			goto done;
		if (file_printf(ms, type) == -1)
			goto done;

		if (has_long_lines)
			if (file_printf(ms, ", with very long lines") == -1)
				goto done;

		/*
		 * Only report line terminators if we find one other than LF,
		 * or if we find none at all.
		 */
		if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
		    (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
			if (file_printf(ms, ", with") == -1)
				goto done;

			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0)			{
				if (file_printf(ms, " no") == -1)
					goto done;
			} else {
				if (n_crlf) {
					if (file_printf(ms, " CRLF") == -1)
						goto done;
					if (n_cr || n_lf || n_nel)
						if (file_printf(ms, ",") == -1)
							goto done;
				}
				if (n_cr) {
					if (file_printf(ms, " CR") == -1)
						goto done;
					if (n_lf || n_nel)
						if (file_printf(ms, ",") == -1)
							goto done;
				}
				if (n_lf) {
					if (file_printf(ms, " LF") == -1)
						goto done;
					if (n_nel)
						if (file_printf(ms, ",") == -1)
							goto done;
				}
				if (n_nel)
					if (file_printf(ms, " NEL") == -1)
						goto done;
			}

			if (file_printf(ms, " line terminators") == -1)
				goto done;
		}

		if (has_escapes)
			if (file_printf(ms, ", with escape sequences") == -1)
				goto done;
		if (has_backspace)
			if (file_printf(ms, ", with overstriking") == -1)
				goto done;
	}
	rv = 1;
done:
	free (nbuf);
	free (ubuf);
	free (utf8_buf);
	return rv;
}