示例#1
0
/*
 * Like printf, only we print to a buffer and advance it.
 */
protected int
file_printf(struct magic_set *ms, const char *fmt, ...)
{
	va_list ap;
	size_t len;
	char *buf;

	va_start(ap, fmt);

	if ((len = _vsnprintf(ms->o.ptr, ms->o.len, fmt, ap)) >= ms->o.len) {
		va_end(ap);
		if ((buf = realloc(ms->o.buf, len + 1024)) == NULL) {
			file_oomem(ms);
			return -1;
		}
		ms->o.ptr = buf + (ms->o.ptr - ms->o.buf);
		ms->o.buf = buf;
		ms->o.len = ms->o.size - (ms->o.ptr - ms->o.buf);
		ms->o.size = len + 1024;

		va_start(ap, fmt);
		len = _vsnprintf(ms->o.ptr, ms->o.len, fmt, ap);
	}
	ms->o.ptr += len;
	ms->o.len -= len;
	va_end(ap);
	return 0;
}
示例#2
0
/*
 * Handle one file or directory.
 */
static int apprentice_1(RMagic *ms, const char *fn, int action, struct mlist *mlist) {
	struct r_magic *magic = NULL;
	ut32 nmagic = 0;
	struct mlist *ml;
	int rv = -1;
	int mapped;

	if (!ms) return -1;
	ms->haderr = 0;
	if (magicsize != FILE_MAGICSIZE) {
		file_error(ms, 0, "magic element size %lu != %lu",
		    (unsigned long)(size_t)sizeof (*magic),
		    (unsigned long)FILE_MAGICSIZE);
		return -1;
	}

	ms->file = fn; // fix use of ms->file before being initialized
	if (action == FILE_COMPILE) {
		rv = apprentice_load (ms, &magic, &nmagic, fn, action);
		if (rv != 0)
			return -1;
		rv = apprentice_compile (ms, &magic, &nmagic, fn);
		free (magic);
		return rv;
	}

	if ((rv = apprentice_map (ms, &magic, &nmagic, fn)) == -1) {
		//if (ms->flags & R_MAGIC_CHECK)
		//	file_magwarn(ms, "using regular magic file `%s'", fn);
		rv = apprentice_load (ms, &magic, &nmagic, fn, action);
		if (rv != 0)
			return -1;
	}

	mapped = rv;

	if (magic == NULL) {
		file_delmagic (magic, mapped, nmagic);
		return -1;
	}

	if ((ml = malloc (sizeof (*ml))) == NULL) {
		file_delmagic (magic, mapped, nmagic);
		file_oomem (ms, sizeof(*ml));
		free (magic);
		return -1;
	}

	ml->magic = magic;
	ml->nmagic = nmagic;
	ml->mapped = mapped;

	mlist->prev->next = ml;
	ml->prev = mlist->prev;
	ml->next = mlist;
	mlist->prev = ml;
	return 0;
}
示例#3
0
/* const char *fn: list of magic files and directories */
struct mlist * file_apprentice(RMagic *ms, const char *fn, int action) {
	char *p, *mfn;
	int file_err, errs = -1;
	struct mlist *mlist;

	init_file_tables ();

	if (!fn) fn = getenv ("MAGIC");
	if (!fn) fn = MAGICFILE;

	if (!(mfn = strdup (fn))) {
		file_oomem (ms, strlen (fn));
		return NULL;
	}
	fn = mfn;

	if (!(mlist = malloc (sizeof (*mlist)))) {
		free (mfn);
		file_oomem (ms, sizeof (*mlist));
		return NULL;
	}
	mlist->next = mlist->prev = mlist;

	while (fn) {
		p = strchr (fn, PATHSEP);
		if (p) *p++ = '\0';
		if (*fn == '\0') break;
		file_err = apprentice_1 (ms, fn, action, mlist);
		errs = R_MAX (errs, file_err);
		fn = p;
	}
	if (errs == -1) {
		free (mfn);
		free (mlist);
		mlist = NULL;
		file_error (ms, 0, "could not find any magic files!");
		return NULL;
	}
	free (mfn);
	return mlist;
}
示例#4
0
文件: funcs.c 项目: KarjamP/radare2
int file_check_mem(RMagic *ms, unsigned int level) {
    if (level >= ms->c.len) {
        size_t len = (ms->c.len += 20) * sizeof (*ms->c.li);
        ms->c.li = (ms->c.li == NULL) ? malloc (len) :
                   realloc (ms->c.li, len);
        if (ms->c.li == NULL) {
            file_oomem (ms, len);
            return -1;
        }
    }
    ms->c.li[level].got_match = 0;
    ms->c.li[level].last_match = 0;
    ms->c.li[level].last_cond = COND_NONE;
    return 0;
}
示例#5
0
文件: funcs.c 项目: KarjamP/radare2
const char *file_getbuffer(RMagic *ms) {
    char *pbuf, *op, *np;
    size_t psize, len;

    if (ms->haderr)
        return NULL;

    if (ms->flags & R_MAGIC_RAW)
        return ms->o.buf;

    if (ms->o.buf == NULL) {
        eprintf ("ms->o.buf = NULL\n");
        return NULL;
    }

    /* * 4 is for octal representation, + 1 is for NUL */
    len = strlen (ms->o.buf);
    if (len > (SIZE_MAX - 1) / 4) {
        file_oomem (ms, len);
        return NULL;
    }
    psize = len * 4 + 1;
    if ((pbuf = realloc (ms->o.pbuf, psize)) == NULL) {
        file_oomem (ms, psize);
        return NULL;
    }
    ms->o.pbuf = pbuf;

#if 1
//defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
    {
        mbstate_t state;
        wchar_t nextchar;
        int mb_conv = 1;
        size_t bytesconsumed;
        char *eop;
        (void)memset(&state, 0, sizeof(mbstate_t));

        np = ms->o.pbuf;
        op = ms->o.buf;
        eop = op + len;

        while (op < eop) {
            bytesconsumed = mbrtowc(&nextchar, op,
                                    (size_t)(eop - op), &state);
            if (bytesconsumed == (size_t)(-1) ||
                    bytesconsumed == (size_t)(-2)) {
                mb_conv = 0;
                break;
            }

            if (iswprint(nextchar)) {
                (void)memcpy(np, op, bytesconsumed);
                op += bytesconsumed;
                np += bytesconsumed;
            } else {
                while (bytesconsumed-- > 0)
                    OCTALIFY(np, op);
            }
        }
        *np = '\0';

        /* Parsing succeeded as a multi-byte sequence */
        if (mb_conv != 0)
            return ms->o.pbuf;
    }
#endif
    for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
        if (isprint ((ut8)*op)) {
            *np++ = *op;
        } else {
            OCTALIFY (np, op);
        }
    }
    *np = '\0';
    return ms->o.pbuf;
}
示例#6
0
/*
 * parse one line from magic file, put into magic[index++] if valid
 */
static int parse(RMagic *ms, struct r_magic_entry **mentryp, ut32 *nmentryp, const char *line, size_t lineno, int action) {
	static ut32 last_cont_level = 0;
	size_t i;
	struct r_magic_entry *me;
	struct r_magic *m;
	const char *l = line;
	char *t;
	int op;
	ut32 cont_level = 0;

	for (; *l == '>'; l++, cont_level++);
	if (cont_level == 0 || cont_level > last_cont_level)
		if (file_check_mem (ms, cont_level) == -1)
			return -1;
	last_cont_level = cont_level;
#define ALLOC_CHUNK	(size_t)10
#define ALLOC_INCR	(size_t)200
	if (cont_level != 0) {
		if (*nmentryp == 0) {
			file_error(ms, 0, "No current entry for continuation");
			return -1;
		}
		me = &(*mentryp)[*nmentryp - 1];
		if (me->cont_count == me->max_count) {
			struct r_magic *nm;
			size_t cnt = me->max_count + ALLOC_CHUNK;
			if (!(nm = realloc(me->mp, sizeof (*nm) * cnt))) {
				file_oomem(ms, sizeof (*nm) * cnt);
				return -1;
			}
			me->mp = nm;
			me->max_count = cnt;
		}
		m = &me->mp[me->cont_count++];
		(void)memset(m, 0, sizeof (*m));
		m->cont_level = cont_level;
	} else {
		if (*nmentryp == maxmagic) {
			struct r_magic_entry *mp;

			maxmagic += ALLOC_INCR;
			if (!(mp = realloc (*mentryp, sizeof (*mp) * maxmagic))) {
				file_oomem (ms, sizeof (*mp) * maxmagic);
				return -1;
			}
			(void)memset(&mp[*nmentryp], 0, sizeof (*mp) *
			    ALLOC_INCR);
			*mentryp = mp;
		}
		me = &(*mentryp)[*nmentryp];
		if (!me->mp) {
			if (!(m = malloc (sizeof (*m) * ALLOC_CHUNK))) {
				file_oomem (ms, sizeof (*m) * ALLOC_CHUNK);
				return -1;
			}
			me->mp = m;
			me->max_count = ALLOC_CHUNK;
		} else
			m = me->mp;
		(void)memset(m, 0, sizeof (*m));
		m->cont_level = 0;
		me->cont_count = 1;
	}
	m->lineno = lineno;

	if (*l == '&') {  /* m->cont_level == 0 checked below. */
                ++l;            /* step over */
                m->flag |= OFFADD;
        }
	if (*l == '(') {
		++l;		/* step over */
		m->flag |= INDIR;
		if (m->flag & OFFADD)
			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;

		if (*l == '&') {  /* m->cont_level == 0 checked below */
			++l;            /* step over */
			m->flag |= OFFADD;
		}
	}
	/* Indirect offsets are not valid at level 0. */
	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
		if (ms->flags & R_MAGIC_CHECK)
			file_magwarn(ms, "relative offset at level 0");

	/* get offset, then skip over it */
	m->offset = (ut32)strtoul(l, &t, 0);
        if ((l == t) && (ms->flags & R_MAGIC_CHECK))
		file_magwarn(ms, "offset `%s' invalid", l);
        l = t;

	if (m->flag & INDIR) {
		m->in_type = FILE_LONG;
		m->in_offset = 0;
		/*
		 * read [.lbs][+-]nnnnn)
		 */
		if (*l == '.') {
			l++;
			switch (*l) {
			case 'l':
				m->in_type = FILE_LELONG;
				break;
			case 'L':
				m->in_type = FILE_BELONG;
				break;
			case 'm':
				m->in_type = FILE_MELONG;
				break;
			case 'h':
			case 's':
				m->in_type = FILE_LESHORT;
				break;
			case 'H':
			case 'S':
				m->in_type = FILE_BESHORT;
				break;
			case 'c':
			case 'b':
			case 'C':
			case 'B':
				m->in_type = FILE_BYTE;
				break;
			case 'e':
			case 'f':
			case 'g':
				m->in_type = FILE_LEDOUBLE;
				break;
			case 'E':
			case 'F':
			case 'G':
				m->in_type = FILE_BEDOUBLE;
				break;
			default:
				if (ms->flags & R_MAGIC_CHECK)
					file_magwarn(ms,
					    "indirect offset type `%c' invalid",
					    *l);
				break;
			}
			l++;
		}

		m->in_op = 0;
		if (*l == '~') {
			m->in_op |= FILE_OPINVERSE;
			l++;
		}
		if ((op = get_op(*l)) != -1) {
			m->in_op |= op;
			l++;
		}
		if (*l == '(') {
			m->in_op |= FILE_OPINDIRECT;
			l++;
		}
		if (isdigit((ut8)*l) || *l == '-') {
			m->in_offset = (int32_t)strtol(l, &t, 0);
			if (l == t)
				if (ms->flags & R_MAGIC_CHECK)
					file_magwarn(ms,
					    "in_offset `%s' invalid", l);
			l = t;
		}
		if (*l++ != ')' ||
		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
			if (ms->flags & R_MAGIC_CHECK)
				file_magwarn(ms,
				    "missing ')' in indirect offset");
	}
	EATAB;

	m->cond = get_cond(l, &l);
	if (check_cond(ms, m->cond, cont_level) == -1)
		return -1;
	EATAB;

	if (*l == 'u') {
		++l;
		m->flag |= UNSIGNED;
	}

	m->type = get_type(l, &l);
	if (m->type == FILE_INVALID) {
		if (ms->flags & R_MAGIC_CHECK)
			file_magwarn(ms, "type `%s' invalid", l);
		return -1;
	}

	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */

	m->mask_op = 0;
	if (*l == '~') {
		if (!MAGIC_IS_STRING (m->type))
			m->mask_op |= FILE_OPINVERSE;
		else if (ms->flags & R_MAGIC_CHECK)
			file_magwarn (ms, "'~' invalid for string types");
		++l;
	}
	m->str_range = 0;
	m->str_flags = 0;
	m->num_mask = 0;
	if ((op = get_op (*l)) != -1) {
		if (!MAGIC_IS_STRING (m->type)) {
			ut64 val;
			++l;
			m->mask_op |= op;
			val = (ut64)strtoull (l, &t, 0);
			l = t;
			m->num_mask = file_signextend (ms, m, val);
			eatsize (&l);
		}
		else if (op == FILE_OPDIVIDE) {
			int have_range = 0;
			while (!isspace ((ut8)*++l)) {
				switch (*l) {
				case '0':  case '1':  case '2':
				case '3':  case '4':  case '5':
				case '6':  case '7':  case '8':
				case '9':
					if (have_range &&
					    (ms->flags & R_MAGIC_CHECK))
						file_magwarn(ms,
						    "multiple ranges");
					have_range = 1;
					m->str_range = strtoul(l, &t, 0);
					if (m->str_range == 0)
						file_magwarn(ms,
						    "zero range");
					l = t - 1;
					break;
				case CHAR_COMPACT_BLANK:
					m->str_flags |= STRING_COMPACT_BLANK;
					break;
				case CHAR_COMPACT_OPTIONAL_BLANK:
					m->str_flags |=
					    STRING_COMPACT_OPTIONAL_BLANK;
					break;
				case CHAR_IGNORE_LOWERCASE:
					m->str_flags |= STRING_IGNORE_LOWERCASE;
					break;
				case CHAR_IGNORE_UPPERCASE:
					m->str_flags |= STRING_IGNORE_UPPERCASE;
					break;
				case CHAR_REGEX_OFFSET_START:
					m->str_flags |= REGEX_OFFSET_START;
					break;
				default:
					if (ms->flags & R_MAGIC_CHECK)
						file_magwarn(ms,
						"string extension `%c' invalid",
						*l);
					return -1;
				}
				/* allow multiple '/' for readability */
				if (l[1] == '/' && !isspace ((ut8)l[2]))
					l++;
			}
			if (string_modifier_check(ms, m) == -1)
				return -1;
		} else {
			if (ms->flags & R_MAGIC_CHECK)
				file_magwarn(ms, "invalid string op: %c", *t);
			return -1;
		}
	}
	/*
	 * We used to set mask to all 1's here, instead let's just not do
	 * anything if mask = 0 (unless you have a better idea)
	 */
	EATAB;

	switch (*l) {
	case '>':
	case '<':
	/* Old-style anding: "0 byte &0x80 dynamically linked" */
	case '&':
	case '^':
	case '=':
  		m->reln = *l;
  		++l;
		if (*l == '=') {
		   /* HP compat: ignore &= etc. */
		   ++l;
		}
		break;
	case '!':
		m->reln = *l;
		++l;
		break;
	default:
  		m->reln = '=';	/* the default relation */
		if (*l == 'x' && ((isascii((ut8)l[1]) &&
				isspace ((ut8)l[1])) || !l[1])) {
			m->reln = *l;
			++l;
		}
		break;
	}
	/*
	 * Grab the value part, except for an 'x' reln.
	 */
	if (m->reln != 'x' && getvalue (ms, m, &l, action))
		return -1;

	/*
	 * TODO finish this macro and start using it!
	 * #define offsetcheck {if (offset > HOWMANY-1)
	 *	magwarn("offset too big"); }
	 */

	/*
	 * Now get last part - the description
	 */
	EATAB;
	if (l[0] == '\b') {
		++l;
		m->flag |= NOSPACE;
	} else if ((l[0] == '\\') && (l[1] == 'b')) {
		++l;
		++l;
		m->flag |= NOSPACE;
	}
	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof (m->desc); )
		continue;
	if (i == sizeof (m->desc)) {
		m->desc[sizeof (m->desc) - 1] = '\0';
		if (ms->flags & R_MAGIC_CHECK)
			file_magwarn(ms, "description `%s' truncated", m->desc);
	}

        /*
	 * We only do this check while compiling, or if any of the magic
	 * files were not compiled.
         */
        if (ms->flags & R_MAGIC_CHECK)
		if (check_format (ms, m) == -1)
			return -1;
	if (action == FILE_CHECK)
		file_mdump (m);
	m->mimetype[0] = '\0';		/* initialise MIME type to none */
	if (m->cont_level == 0)
		++(*nmentryp);		/* make room for next */
	return 0;
}
示例#7
0
/*
 * parse a file or directory of files
 * const char *fn: name of magic file or directory
 */
static int apprentice_load(RMagic *ms, struct r_magic **magicp, ut32 *nmagicp, const char *fn, int action) {
	ut32 marraycount, i, mentrycount = 0, starttest;
	struct r_magic_entry *marray;
	char subfn[MAXPATHLEN];
	struct dirent *d;
	struct stat st;
	int errs = 0;
	DIR *dir;

	ms->flags |= R_MAGIC_CHECK;	/* Enable checks for parsed files */

        maxmagic = MAXMAGIS;
	if (!(marray = calloc (maxmagic, sizeof (*marray)))) {
		file_oomem (ms, maxmagic * sizeof (*marray));
		return -1;
	}
	marraycount = 0;

	/* print silly verbose header for USG compat. */
	if (action == FILE_CHECK)
		eprintf ("%s\n", usg_hdr);

	/* load directory or file */
	if (stat (fn, &st) == 0 && S_ISDIR (st.st_mode)) {
		if (r_sandbox_enable (0) && !r_sandbox_check_path (fn)) {
			free (marray);
			return  -1;
		}
		dir = opendir (fn);
		if (dir) {
			while ((d = readdir (dir))) {
				if (*d->d_name=='.') continue;
				snprintf (subfn, sizeof (subfn), "%s/%s", fn, d->d_name);
				if (stat (subfn, &st) == 0 && S_ISREG (st.st_mode))
					load_1 (ms, action, subfn, &errs, &marray, &marraycount);
				//else perror (subfn);
			}
			closedir (dir);
		} else errs++;
	} else load_1 (ms, action, fn, &errs, &marray, &marraycount);
	if (errs)
		goto out;

	/* Set types of tests */
	for (i = 0; i < marraycount; ) {
		if (marray[i].mp->cont_level != 0) {
			i++;
			continue;
		}

		starttest = i;
		do {
			set_test_type(marray[starttest].mp, marray[i].mp);
			if (ms->flags & R_MAGIC_DEBUG) {
				(void)fprintf(stderr, "%s%s%s: %s\n",
					marray[i].mp->mimetype,
					marray[i].mp->mimetype[0] == '\0' ? "" : "; ",
					marray[i].mp->desc[0] ? marray[i].mp->desc : "(no description)",
					marray[i].mp->flag & BINTEST ? "binary" : "text");
				if (marray[i].mp->flag & BINTEST) {
#define SYMBOL "text"
#define SYMLEN sizeof (SYMBOL)
					char *p = strstr(marray[i].mp->desc, "text");
					if (p && (p == marray[i].mp->desc || isspace((unsigned char)p[-1])) &&
					    (p + SYMLEN - marray[i].mp->desc == MAXstring ||
					     (p[SYMLEN] == '\0' || isspace((unsigned char)p[SYMLEN])))) {
						(void)fprintf(stderr,
							      "*** Possible binary test for text type\n");
					}
#undef SYMBOL
#undef SYMLEN
				}
			}
		} while (++i < marraycount && marray[i].mp->cont_level != 0);
	}

	qsort (marray, marraycount, sizeof (*marray), apprentice_sort);

	/*
	 * Make sure that any level 0 "default" line is last (if one exists).
	 */
	for (i = 0; i < marraycount; i++) {
		if (marray[i].mp->cont_level == 0 &&
		    marray[i].mp->type == FILE_DEFAULT) {
			while (++i < marraycount)
				if (marray[i].mp->cont_level == 0)
					break;
			if (i != marraycount) {
				ms->line = marray[i].mp->lineno; /* XXX - Ugh! */
				file_magwarn (ms, "level 0 \"default\" did not sort last");
			}
			break;
		}
	}

	for (i = 0; i < marraycount; i++)
		mentrycount += marray[i].cont_count;

	if (!(*magicp = malloc (1 + (sizeof (**magicp) * mentrycount)))) {
		file_oomem (ms, sizeof (**magicp) * mentrycount);
		errs++;
		goto out;
	}

	mentrycount = 0;
	for (i = 0; i < marraycount; i++) {
		(void)memcpy (*magicp + mentrycount, marray[i].mp,
		    marray[i].cont_count * sizeof (**magicp));
		mentrycount += marray[i].cont_count;
	}
out:
	for (i = 0; i < marraycount; i++)
		free(marray[i].mp);
	free (marray);
	if (errs) {
		*magicp = NULL;
		*nmagicp = 0;
		return errs;
	}
	*nmagicp = mentrycount;
	return 0;
}
示例#8
0
/*
 * handle a compiled file.
 */
static int apprentice_map(RMagic *ms, struct r_magic **magicp, ut32 *nmagicp, const char *fn) {
	int fd;
	struct stat st;
	ut32 *ptr;
	ut32 version;
	int needsbyteswap;
	char *dbname = NULL;
	void *mm = NULL;

	dbname = mkdbname (fn, 0);
	if (!dbname)
		goto error2;

	if ((fd = r_sandbox_open (dbname, O_RDONLY|O_BINARY, 0)) == -1)
		goto error2;

	if (fstat(fd, &st) == -1) {
		file_error (ms, errno, "cannot stat `%s'", dbname);
		goto error1;
	}
	if (st.st_size < 8) {
		file_error (ms, 0, "file `%s' is too small", dbname);
		goto error1;
	}

#ifdef QUICK
	if ((mm = mmap (0, (size_t)st.st_size, PROT_READ, //OPENBSDBUG  |PROT_WRITE,
	    MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
		file_error (ms, errno, "cannot map `%s'"); //, dbname);
		goto error1;
	}
#define RET	2
#else
	if (!(mm = malloc ((size_t)st.st_size))) {
		file_oomem(ms, (size_t)st.st_size);
		goto error1;
	}
	if (read (fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
		file_badread(ms);
		goto error1;
	}
#define RET	1
#endif
	*magicp = mm;
	(void)close (fd);
	fd = -1;
	ptr = (ut32 *)(void *)*magicp;

	if (*ptr != MAGICNO) {
		if (swap4(*ptr) != MAGICNO) {
		//OPENBSDBUG file_error(ms, 0, "bad magic in `%s'");
			file_error(ms, 0, "bad magic in `%s'", dbname);
			goto error1;
		}
		needsbyteswap = 1;
	} else needsbyteswap = 0;

	version = needsbyteswap? swap4(ptr[1]): ptr[1];
	if (version != VERSIONNO) {
		file_error(ms, 0, "File %d.%d supports only %d version magic "
		    "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
		    VERSIONNO, dbname, version);
		goto error1;
	}
	*nmagicp = (ut32)(st.st_size / sizeof (struct r_magic));
	if (*nmagicp > 0)
		(*nmagicp)--;
	(*magicp)++;
	if (needsbyteswap)
		byteswap (*magicp, *nmagicp);
	free (dbname);
	return RET;

error1:
	if (fd != -1)
		(void)close (fd);
	if (mm) {
#ifdef QUICK
		(void)munmap((void *)mm, (size_t)st.st_size);
#else
		free(mm);
#endif
	} else {
		*magicp = NULL;
		*nmagicp = 0;
	}
error2:
	free (dbname);
	return -1;
}
示例#9
0
int file_ascmagic(RMagic *ms, const ut8 *buf, size_t nbytes) {
return 0;
	size_t i;
	ut8 *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
	unichar *ubuf = NULL;	
	size_t ulen, mlen;
	const struct names *p;
	int rv = -1;
	int mime = ms->flags & R_MAGIC_MIME;

	const char *code = NULL;
	const char *code_mime = NULL;
	const char *type = NULL;
	const char *subtype = NULL;
	const char *subtype_mime = NULL;

	int has_escapes = 0;
	int has_backspace = 0;
	int seen_cr = 0;

	int n_crlf = 0;
	int n_lf = 0;
	int n_cr = 0;
	int n_nel = 0;

	size_t last_line_end = (size_t)-1;
	int has_long_lines = 0;

	/*
	 * Undo the NUL-termination kindly provided by process()
	 * but leave at least one byte to look at
	 */
	while (nbytes > 1 && buf[nbytes - 1] == '\0')
		nbytes--;

	if (!(nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0]))))
		goto done;
	if (!(ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0]))))
		goto done;

	/*
	 * Then try to determine whether it's any character code we can
	 * identify.  Each of these tests, if it succeeds, will leave
	 * the text converted into one-unichar-per-character Unicode in
	 * ubuf, and the number of characters converted in ulen.
	 */
	if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
		code = "ASCII";
		code_mime = "us-ascii";
		type = "text";
	} else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
		code = "UTF-8 Unicode (with BOM)";
		code_mime = "utf-8";
		type = "text";
	} else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
		code = "UTF-8 Unicode";
		code_mime = "utf-8";
		type = "text";
	} else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
		if (i == 1)
			code = "Little-endian UTF-16 Unicode";
		else
			code = "Big-endian UTF-16 Unicode";

		type = "character data";
		code_mime = "utf-16";    /* is this defined? */
	} else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
		if (!memcmp (buf, "\xff\xff\xff\xff", 4)) {
			// uninitialized memory is not iso-8859!!
			goto done;
		}
		code = "ISO-8859";
		type = "text";
		code_mime = "iso-8859-1"; 
	} else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
		code = "Non-ISO extended-ASCII";
		type = "text";
		code_mime = "unknown";
	} else {
		from_ebcdic(buf, nbytes, nbuf);

		if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
			code = "EBCDIC";
			type = "character data";
			code_mime = "ebcdic";
		} else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
			code = "International EBCDIC";
			type = "character data";
			code_mime = "ebcdic";
		} else {
			rv = 0;
			goto done;  /* doesn't look like text at all */
		}
	}

	if (nbytes <= 1) {
		rv = 0;
		goto done;
	}

	/* Convert ubuf to UTF-8 and try text soft magic */
	/* If original was ASCII or UTF-8, could use nbuf instead of
	   re-converting. */
	/* malloc size is a conservative overestimate; could be
	   re-converting improved, or at least realloced after
	   re-converting conversion. */
	mlen = ulen * 6;
	if (!(utf8_buf = malloc(mlen))) {
		file_oomem(ms, mlen);
		goto done;
	}
	if (!(utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)))
		goto done;
	if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
		rv = 1;
		goto done;
	}

	/* look for tokens from names.h - this is expensive! */
	if ((ms->flags & R_MAGIC_NO_CHECK_TOKENS) != 0)
		goto subtype_identified;

	i = 0;
	while (i < ulen) {
		size_t end;

		/* skip past any leading space */
		while (i < ulen && ISSPC(ubuf[i]))
			i++;
		if (i >= ulen)
			break;

		/* find the next whitespace */
		for (end = i + 1; end < nbytes; end++)
			if (ISSPC(ubuf[end]))
				break;

		/* compare the word thus isolated against the token list */
		for (p = names; p < names + NNAMES; p++) {
			if (ascmatch((const ut8 *)p->name, ubuf + i,
			    end - i)) {
				subtype = types[p->type].human;
				subtype_mime = types[p->type].mime;
				goto subtype_identified;
			}
		}

		i = end;
	}

subtype_identified:

	/* Now try to discover other details about the file. */
	for (i = 0; i < ulen; i++) {
		if (ubuf[i] == '\n') {
			if (seen_cr)
				n_crlf++;
			else
				n_lf++;
			last_line_end = i;
		} else if (seen_cr)
			n_cr++;

		seen_cr = (ubuf[i] == '\r');
		if (seen_cr)
			last_line_end = i;

		if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
			n_nel++;
			last_line_end = i;
		}
		/* If this line is _longer_ than MAXLINELEN, remember it. */
		if (i > last_line_end + MAXLINELEN)
			has_long_lines = 1;

		if (ubuf[i] == '\033')
			has_escapes = 1;
		if (ubuf[i] == '\b')
			has_backspace = 1;
	}

	/* Beware, if the data has been truncated, the final CR could have
	   been followed by a LF.  If we have HOWMANY bytes, it indicates
	   that the data might have been truncated, probably even before
	   this function was called. */
	if (seen_cr && nbytes < HOWMANY)
		n_cr++;

	if (mime) {
		if (mime & R_MAGIC_MIME_TYPE) {
			if (subtype_mime) {
				if (file_printf(ms, subtype_mime) == -1)
					goto done;
			} else {
				if (file_printf(ms, "text/plain") == -1)
					goto done;
			}
		}

		if ((mime == 0 || mime == R_MAGIC_MIME) && code_mime) {
			if ((mime & R_MAGIC_MIME_TYPE) &&
			    file_printf(ms, " charset=") == -1)
				goto done;
			if (file_printf(ms, code_mime) == -1)
				goto done;
		}

		if (mime == R_MAGIC_MIME_ENCODING)
		    if (file_printf(ms, "binary") == -1){
                rv = 1;
                goto done;
            }
	} else {
		if (file_printf(ms, code) == -1)
			goto done;

		if (subtype) {
			if (file_printf(ms, " ") == -1)
				goto done;
			if (file_printf(ms, subtype) == -1)
				goto done;
		}

		if (file_printf(ms, " ") == -1)
			goto done;
		if (file_printf(ms, type) == -1)
			goto done;

		if (has_long_lines)
			if (file_printf(ms, ", with very long lines") == -1)
				goto done;

		/*
		 * Only report line terminators if we find one other than LF,
		 * or if we find none at all.
		 */
		if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
		    (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
			if (file_printf(ms, ", with") == -1)
				goto done;

			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0)			{
				if (file_printf(ms, " no") == -1)
					goto done;
			} else {
				if (n_crlf) {
					if (file_printf(ms, " CRLF") == -1)
						goto done;
					if (n_cr || n_lf || n_nel)
						if (file_printf(ms, ",") == -1)
							goto done;
				}
				if (n_cr) {
					if (file_printf(ms, " CR") == -1)
						goto done;
					if (n_lf || n_nel)
						if (file_printf(ms, ",") == -1)
							goto done;
				}
				if (n_lf) {
					if (file_printf(ms, " LF") == -1)
						goto done;
					if (n_nel)
						if (file_printf(ms, ",") == -1)
							goto done;
				}
				if (n_nel)
					if (file_printf(ms, " NEL") == -1)
						goto done;
			}

			if (file_printf(ms, " line terminators") == -1)
				goto done;
		}

		if (has_escapes)
			if (file_printf(ms, ", with escape sequences") == -1)
				goto done;
		if (has_backspace)
			if (file_printf(ms, ", with overstriking") == -1)
				goto done;
	}
	rv = 1;
done:
	free (nbuf);
	free (ubuf);
	free (utf8_buf);
	return rv;
}
示例#10
0
static st32 mprint(RMagic *ms, struct r_magic *m) {
    ut64 v;
    float vf;
    double vd;
    ut64 t = 0;
    char *buf = NULL;
    union VALUETYPE *p = &ms->ms_value;

    switch (m->type) {
    case FILE_BYTE:
        v = file_signextend(ms, m, (ut64)p->b);
        switch (check_fmt(ms, m)) {
        case -1:
            return -1;
        case 1:
            buf = malloc (2);
            if (snprintf (buf, 2, "%c", (ut8)v)<0) {
                free (buf);
                return -1;
            }
            if (file_printf (ms, R_MAGIC_DESC, buf) == -1) {
                free (buf);
                return -1;
            }
            break;
        default:
            if (file_printf(ms, R_MAGIC_DESC, (ut8) v) == -1)
                return -1;
            break;
        }
        t = ms->offset + sizeof(char);
        break;
    case FILE_SHORT:
    case FILE_BESHORT:
    case FILE_LESHORT:
        v = file_signextend (ms, m, (ut64)p->h);
        switch (check_fmt (ms, m)) {
        case -1:
            return -1;
        case 1:
            buf = malloc (32);
            if (snprintf (buf, 32, "%hu", (unsigned short)v) < 0) {
                free (buf);
                return -1;
            }
            if (file_printf(ms, R_MAGIC_DESC, buf) == -1) {
                free (buf);
                return -1;
            }
            break;
        default:
            if (file_printf(ms, R_MAGIC_DESC, (unsigned short) v) == -1)
                return -1;
            break;
        }
        t = ms->offset + sizeof(short);
        break;
    case FILE_LONG:
    case FILE_BELONG:
    case FILE_LELONG:
    case FILE_MELONG:
        v = file_signextend(ms, m, (ut64)p->l);
        switch (check_fmt(ms, m)) {
        case -1:
            return -1;
        case 1:
            buf = malloc (32);
            if (snprintf (buf, 32, "%u", (ut32)v) < 0) {
                free (buf);
                return -1;
            }
            if (file_printf(ms, R_MAGIC_DESC, buf) == -1) {
                free (buf);
                return -1;
            }
            break;
        default:
            if (file_printf(ms, R_MAGIC_DESC, (ut32) v) == -1)
                return -1;
            break;
        }
        t = ms->offset + sizeof(st32);
        break;
    case FILE_QUAD:
    case FILE_BEQUAD:
    case FILE_LEQUAD:
        v = file_signextend(ms, m, p->q);
        if (file_printf(ms, R_MAGIC_DESC, (ut64) v) == -1)
            return -1;
        t = ms->offset + sizeof(ut64);
        break;

    case FILE_STRING:
    case FILE_PSTRING:
    case FILE_BESTRING16:
    case FILE_LESTRING16:
        if (m->reln == '=' || m->reln == '!') {
            if (file_printf (ms, R_MAGIC_DESC, m->value.s) == -1)
                return -1;
            t = ms->offset + m->vallen;
        }
        else {
            if (*m->value.s == '\0')
                p->s[strcspn (p->s, "\n")] = '\0';
            if (file_printf (ms, R_MAGIC_DESC, p->s) == -1)
                return -1;
            t = ms->offset + strlen (p->s);
            if (m->type == FILE_PSTRING)
                t++;
        }
        break;
    case FILE_DATE:
    case FILE_BEDATE:
    case FILE_LEDATE:
    case FILE_MEDATE:
        if (file_printf(ms, R_MAGIC_DESC, file_fmttime(p->l, 1)) == -1)
            return -1;
        t = ms->offset + sizeof(time_t);
        break;
    case FILE_LDATE:
    case FILE_BELDATE:
    case FILE_LELDATE:
    case FILE_MELDATE:
        if (file_printf(ms, R_MAGIC_DESC, file_fmttime(p->l, 0)) == -1)
            return -1;
        t = ms->offset + sizeof(time_t);
        break;
    case FILE_QDATE:
    case FILE_BEQDATE:
    case FILE_LEQDATE:
        if (file_printf(ms, R_MAGIC_DESC, file_fmttime((ut32)p->q, 1))
                == -1)
            return -1;
        t = ms->offset + sizeof(ut64);
        break;
    case FILE_QLDATE:
    case FILE_BEQLDATE:
    case FILE_LEQLDATE:
        if (file_printf(ms, R_MAGIC_DESC, file_fmttime((ut32)p->q, 0))
                == -1)
            return -1;
        t = ms->offset + sizeof(ut64);
        break;
    case FILE_FLOAT:
    case FILE_BEFLOAT:
    case FILE_LEFLOAT:
        vf = p->f;
        switch (check_fmt(ms, m)) {
        case -1:
            return -1;
        case 1:
            buf = malloc (32);
            if (snprintf (buf, 32, "%g", vf) < 0) {
                free (buf);
                return -1;
            }
            if (file_printf (ms, R_MAGIC_DESC, buf) == -1) {
                free (buf);
                return -1;
            }
            break;
        default:
            if (file_printf(ms, R_MAGIC_DESC, vf) == -1)
                return -1;
            break;
        }
        t = ms->offset + sizeof(float);
        break;
    case FILE_DOUBLE:
    case FILE_BEDOUBLE:
    case FILE_LEDOUBLE:
        vd = p->d;
        switch (check_fmt(ms, m)) {
        case -1:
            return -1;
        case 1:
            buf = malloc (32);
            if (snprintf (buf, 32, "%g", vd) < 0) {
                free (buf);
                return -1;
            }
            if (file_printf (ms, R_MAGIC_DESC, buf) == -1) {
                free (buf);
                return -1;
            }
            break;
        default:
            if (file_printf(ms, R_MAGIC_DESC, vd) == -1)
                return -1;
            break;
        }
        t = ms->offset + sizeof(double);
        break;
    case FILE_REGEX: {
        char *cp;
        int rval;

        cp = strdupn((const char *)ms->search.s, ms->search.rm_len);
        if (cp == NULL) {
            file_oomem(ms, ms->search.rm_len);
            return -1;
        }
        rval = file_printf(ms, R_MAGIC_DESC, cp);
        free(cp);

        if (rval == -1)
            return -1;

        if ((m->str_flags & REGEX_OFFSET_START))
            t = ms->search.offset;
        else
            t = ms->search.offset + ms->search.rm_len;
        break;
    }

    case FILE_SEARCH:
        if (file_printf(ms, R_MAGIC_DESC, m->value.s) == -1)
            return -1;
        if ((m->str_flags & REGEX_OFFSET_START))
            t = ms->search.offset;
        else t = ms->search.offset + m->vallen;
        break;
    case FILE_DEFAULT:
        if (file_printf(ms, R_MAGIC_DESC, m->value.s) == -1)
            return -1;
        t = ms->offset;
        break;
    default:
        file_magerror(ms, "invalid m->type (%d) in mprint()", m->type);
        return -1;
    }
    free (buf);
    return(t);
}