/* * Check that the optional printf format in description matches * the type of the magic. */ static int check_format(RMagic *ms, struct r_magic *m) { char *ptr; for (ptr = m->desc; *ptr; ptr++) if (*ptr == '%') break; if (*ptr == '\0') { /* No format string; ok */ return 1; } if (file_nformats != file_nnames) { return -1; } if (m->type >= file_nformats) { file_magwarn(ms, "Internal error inconsistency between " "m->type and format strings"); return -1; } if (magic_file_formats[m->type] == FILE_FMT_NONE) { file_magwarn(ms, "No format string for `%s' with description " "`%s'", m->desc, magic_file_names[m->type]); return -1; } ptr++; if (ptr && check_format_type(ptr, magic_file_formats[m->type]) == -1) { /* * TODO: this error message is unhelpful if the format * string is not one character long */ file_magwarn(ms, "Printf format `%c' is not valid for type " "`%s' in description `%s'", ptr && *ptr ? *ptr : '?', magic_file_names[m->type], m->desc); return -1; } for (; *ptr; ptr++) { if (*ptr == '%') { file_magwarn (ms, "Too many format strings (should have at most one) " "for `%s' with description `%s'", magic_file_names[m->type], m->desc); return -1; } } return 0; }
/* * parse a MIME annotation line from magic file, put into magic[index - 1] * if valid */ static int parse_mime(RMagic *ms, struct r_magic_entry **mentryp, ut32 *nmentryp, const char *line) { size_t i; const char *l = line; struct r_magic *m; struct r_magic_entry *me; if (*nmentryp == 0) { file_error(ms, 0, "No current entry for MIME type"); return -1; } me = &(*mentryp)[*nmentryp - 1]; m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; if (m->mimetype[0] != '\0') { file_error(ms, 0, "Current entry already has a MIME type: %s\n" "Description: %s\nNew type: %s", m->mimetype, m->desc, l); return -1; } EATAB; for (i = 0; *l && ((isascii((ut8)*l) && isalnum((ut8)*l)) || strchr("-+/.", *l)) && i < sizeof (m->mimetype); m->mimetype[i++] = *l++) continue; if (i == sizeof (m->mimetype)) { m->desc[sizeof (m->mimetype) - 1] = '\0'; if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "MIME type `%s' truncated %zu", m->mimetype, i); } else m->mimetype[i] = '\0'; return (i>0)? 0: -1; }
/* * Read a numeric value from a pointer, into the value union of a magic * pointer, according to the magic type. Update the string pointer to point * just after the number read. Return 0 for success, non-zero for failure. */ static int getvalue(RMagic *ms, struct r_magic *m, const char **p, int action) { int slen; switch (m->type) { case FILE_BESTRING16: case FILE_LESTRING16: case FILE_STRING: case FILE_PSTRING: case FILE_REGEX: case FILE_SEARCH: *p = getstr(ms, *p, m->value.s, sizeof (m->value.s), &slen, action); if (!*p) { if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "cannot get string from `%s'", m->value.s); return -1; } m->vallen = slen; if (m->type == FILE_PSTRING) m->vallen++; return 0; case FILE_FLOAT: case FILE_BEFLOAT: case FILE_LEFLOAT: if (m->reln != 'x') { char *ep; #ifdef HAVE_STRTOF m->value.f = strtof(*p, &ep); #else m->value.f = (float)strtod(*p, &ep); #endif *p = ep; } return 0; case FILE_DOUBLE: case FILE_BEDOUBLE: case FILE_LEDOUBLE: if (m->reln != 'x') { char *ep; m->value.d = strtod(*p, &ep); *p = ep; } return 0; default: if (m->reln != 'x') { char *ep; m->value.q = file_signextend(ms, m, (ut64)strtoull(*p, &ep, 0)); *p = ep; eatsize(p); } return 0; } }
static int check_cond(RMagic *ms, int cond, ut32 cont_level) { int last_cond; last_cond = ms->c.li[cont_level].last_cond; switch (cond) { case COND_IF: if (last_cond != COND_NONE && last_cond != COND_ELIF) { if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "syntax error: `if'"); return -1; } last_cond = COND_IF; break; case COND_ELIF: if (last_cond != COND_IF && last_cond != COND_ELIF) { if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "syntax error: `elif'"); return -1; } last_cond = COND_ELIF; break; case COND_ELSE: if (last_cond != COND_IF && last_cond != COND_ELIF) { if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "syntax error: `else'"); return -1; } last_cond = COND_NONE; break; case COND_NONE: last_cond = COND_NONE; break; } ms->c.li[cont_level].last_cond = last_cond; return 0; }
static int string_modifier_check(RMagic *ms, struct r_magic *m) { if ((ms->flags & R_MAGIC_CHECK) == 0) return 0; switch (m->type) { case FILE_BESTRING16: case FILE_LESTRING16: if (m->str_flags != 0) { file_magwarn(ms, "no modifiers allowed for 16-bit strings\n"); return -1; } break; case FILE_STRING: case FILE_PSTRING: if ((m->str_flags & REGEX_OFFSET_START) != 0) { file_magwarn(ms, "'/%c' only allowed on regex and search\n", CHAR_REGEX_OFFSET_START); return -1; } break; case FILE_SEARCH: if (m->str_range == 0) { file_magwarn(ms, "missing range; defaulting to %d\n", STRING_DEFAULT_RANGE); m->str_range = STRING_DEFAULT_RANGE; return -1; } break; case FILE_REGEX: if ((m->str_flags & STRING_COMPACT_BLANK) != 0) { file_magwarn(ms, "'/%c' not allowed on regex\n", CHAR_COMPACT_BLANK); return -1; } if ((m->str_flags & STRING_COMPACT_OPTIONAL_BLANK) != 0) { file_magwarn(ms, "'/%c' not allowed on regex\n", CHAR_COMPACT_OPTIONAL_BLANK); return -1; } break; default: file_magwarn (ms, "coding error: m->type=%d\n", m->type); return -1; } return 0; }
/* * parse one line from magic file, put into magic[index++] if valid */ static int parse(RMagic *ms, struct r_magic_entry **mentryp, ut32 *nmentryp, const char *line, size_t lineno, int action) { static ut32 last_cont_level = 0; size_t i; struct r_magic_entry *me; struct r_magic *m; const char *l = line; char *t; int op; ut32 cont_level = 0; for (; *l == '>'; l++, cont_level++); if (cont_level == 0 || cont_level > last_cont_level) if (file_check_mem (ms, cont_level) == -1) return -1; last_cont_level = cont_level; #define ALLOC_CHUNK (size_t)10 #define ALLOC_INCR (size_t)200 if (cont_level != 0) { if (*nmentryp == 0) { file_error(ms, 0, "No current entry for continuation"); return -1; } me = &(*mentryp)[*nmentryp - 1]; if (me->cont_count == me->max_count) { struct r_magic *nm; size_t cnt = me->max_count + ALLOC_CHUNK; if (!(nm = realloc(me->mp, sizeof (*nm) * cnt))) { file_oomem(ms, sizeof (*nm) * cnt); return -1; } me->mp = nm; me->max_count = cnt; } m = &me->mp[me->cont_count++]; (void)memset(m, 0, sizeof (*m)); m->cont_level = cont_level; } else { if (*nmentryp == maxmagic) { struct r_magic_entry *mp; maxmagic += ALLOC_INCR; if (!(mp = realloc (*mentryp, sizeof (*mp) * maxmagic))) { file_oomem (ms, sizeof (*mp) * maxmagic); return -1; } (void)memset(&mp[*nmentryp], 0, sizeof (*mp) * ALLOC_INCR); *mentryp = mp; } me = &(*mentryp)[*nmentryp]; if (!me->mp) { if (!(m = malloc (sizeof (*m) * ALLOC_CHUNK))) { file_oomem (ms, sizeof (*m) * ALLOC_CHUNK); return -1; } me->mp = m; me->max_count = ALLOC_CHUNK; } else m = me->mp; (void)memset(m, 0, sizeof (*m)); m->cont_level = 0; me->cont_count = 1; } m->lineno = lineno; if (*l == '&') { /* m->cont_level == 0 checked below. */ ++l; /* step over */ m->flag |= OFFADD; } if (*l == '(') { ++l; /* step over */ m->flag |= INDIR; if (m->flag & OFFADD) m->flag = (m->flag & ~OFFADD) | INDIROFFADD; if (*l == '&') { /* m->cont_level == 0 checked below */ ++l; /* step over */ m->flag |= OFFADD; } } /* Indirect offsets are not valid at level 0. */ if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "relative offset at level 0"); /* get offset, then skip over it */ m->offset = (ut32)strtoul(l, &t, 0); if ((l == t) && (ms->flags & R_MAGIC_CHECK)) file_magwarn(ms, "offset `%s' invalid", l); l = t; if (m->flag & INDIR) { m->in_type = FILE_LONG; m->in_offset = 0; /* * read [.lbs][+-]nnnnn) */ if (*l == '.') { l++; switch (*l) { case 'l': m->in_type = FILE_LELONG; break; case 'L': m->in_type = FILE_BELONG; break; case 'm': m->in_type = FILE_MELONG; break; case 'h': case 's': m->in_type = FILE_LESHORT; break; case 'H': case 'S': m->in_type = FILE_BESHORT; break; case 'c': case 'b': case 'C': case 'B': m->in_type = FILE_BYTE; break; case 'e': case 'f': case 'g': m->in_type = FILE_LEDOUBLE; break; case 'E': case 'F': case 'G': m->in_type = FILE_BEDOUBLE; break; default: if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "indirect offset type `%c' invalid", *l); break; } l++; } m->in_op = 0; if (*l == '~') { m->in_op |= FILE_OPINVERSE; l++; } if ((op = get_op(*l)) != -1) { m->in_op |= op; l++; } if (*l == '(') { m->in_op |= FILE_OPINDIRECT; l++; } if (isdigit((ut8)*l) || *l == '-') { m->in_offset = (int32_t)strtol(l, &t, 0); if (l == t) if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "in_offset `%s' invalid", l); l = t; } if (*l++ != ')' || ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "missing ')' in indirect offset"); } EATAB; m->cond = get_cond(l, &l); if (check_cond(ms, m->cond, cont_level) == -1) return -1; EATAB; if (*l == 'u') { ++l; m->flag |= UNSIGNED; } m->type = get_type(l, &l); if (m->type == FILE_INVALID) { if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "type `%s' invalid", l); return -1; } /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ m->mask_op = 0; if (*l == '~') { if (!MAGIC_IS_STRING (m->type)) m->mask_op |= FILE_OPINVERSE; else if (ms->flags & R_MAGIC_CHECK) file_magwarn (ms, "'~' invalid for string types"); ++l; } m->str_range = 0; m->str_flags = 0; m->num_mask = 0; if ((op = get_op (*l)) != -1) { if (!MAGIC_IS_STRING (m->type)) { ut64 val; ++l; m->mask_op |= op; val = (ut64)strtoull (l, &t, 0); l = t; m->num_mask = file_signextend (ms, m, val); eatsize (&l); } else if (op == FILE_OPDIVIDE) { int have_range = 0; while (!isspace ((ut8)*++l)) { switch (*l) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (have_range && (ms->flags & R_MAGIC_CHECK)) file_magwarn(ms, "multiple ranges"); have_range = 1; m->str_range = strtoul(l, &t, 0); if (m->str_range == 0) file_magwarn(ms, "zero range"); l = t - 1; break; case CHAR_COMPACT_BLANK: m->str_flags |= STRING_COMPACT_BLANK; break; case CHAR_COMPACT_OPTIONAL_BLANK: m->str_flags |= STRING_COMPACT_OPTIONAL_BLANK; break; case CHAR_IGNORE_LOWERCASE: m->str_flags |= STRING_IGNORE_LOWERCASE; break; case CHAR_IGNORE_UPPERCASE: m->str_flags |= STRING_IGNORE_UPPERCASE; break; case CHAR_REGEX_OFFSET_START: m->str_flags |= REGEX_OFFSET_START; break; default: if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "string extension `%c' invalid", *l); return -1; } /* allow multiple '/' for readability */ if (l[1] == '/' && !isspace ((ut8)l[2])) l++; } if (string_modifier_check(ms, m) == -1) return -1; } else { if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "invalid string op: %c", *t); return -1; } } /* * We used to set mask to all 1's here, instead let's just not do * anything if mask = 0 (unless you have a better idea) */ EATAB; switch (*l) { case '>': case '<': /* Old-style anding: "0 byte &0x80 dynamically linked" */ case '&': case '^': case '=': m->reln = *l; ++l; if (*l == '=') { /* HP compat: ignore &= etc. */ ++l; } break; case '!': m->reln = *l; ++l; break; default: m->reln = '='; /* the default relation */ if (*l == 'x' && ((isascii((ut8)l[1]) && isspace ((ut8)l[1])) || !l[1])) { m->reln = *l; ++l; } break; } /* * Grab the value part, except for an 'x' reln. */ if (m->reln != 'x' && getvalue (ms, m, &l, action)) return -1; /* * TODO finish this macro and start using it! * #define offsetcheck {if (offset > HOWMANY-1) * magwarn("offset too big"); } */ /* * Now get last part - the description */ EATAB; if (l[0] == '\b') { ++l; m->flag |= NOSPACE; } else if ((l[0] == '\\') && (l[1] == 'b')) { ++l; ++l; m->flag |= NOSPACE; } for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof (m->desc); ) continue; if (i == sizeof (m->desc)) { m->desc[sizeof (m->desc) - 1] = '\0'; if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "description `%s' truncated", m->desc); } /* * We only do this check while compiling, or if any of the magic * files were not compiled. */ if (ms->flags & R_MAGIC_CHECK) if (check_format (ms, m) == -1) return -1; if (action == FILE_CHECK) file_mdump (m); m->mimetype[0] = '\0'; /* initialise MIME type to none */ if (m->cont_level == 0) ++(*nmentryp); /* make room for next */ return 0; }
/* * extend the sign bit if the comparison is to be signed */ ut64 file_signextend(RMagic *ms, struct r_magic *m, ut64 v) { if (!(m->flag & UNSIGNED)) { switch (m->type) { /* * Do not remove the casts below. They are * vital. When later compared with the data, * the sign extension must have happened. */ case FILE_BYTE: v = (char) v; break; case FILE_SHORT: case FILE_BESHORT: case FILE_LESHORT: v = (short) v; break; case FILE_DATE: case FILE_BEDATE: case FILE_LEDATE: case FILE_MEDATE: case FILE_LDATE: case FILE_BELDATE: case FILE_LELDATE: case FILE_MELDATE: case FILE_LONG: case FILE_BELONG: case FILE_LELONG: case FILE_MELONG: case FILE_FLOAT: case FILE_BEFLOAT: case FILE_LEFLOAT: v = (int32_t) v; break; case FILE_QUAD: case FILE_BEQUAD: case FILE_LEQUAD: case FILE_QDATE: case FILE_QLDATE: case FILE_BEQDATE: case FILE_BEQLDATE: case FILE_LEQDATE: case FILE_LEQLDATE: case FILE_DOUBLE: case FILE_BEDOUBLE: case FILE_LEDOUBLE: v = (int64_t) v; break; case FILE_STRING: case FILE_PSTRING: case FILE_BESTRING16: case FILE_LESTRING16: case FILE_REGEX: case FILE_SEARCH: case FILE_DEFAULT: break; default: if (ms->flags & R_MAGIC_CHECK) file_magwarn(ms, "cannot happen: m->type=%d\n", m->type); return ~0U; } } return v; }
/* * parse a file or directory of files * const char *fn: name of magic file or directory */ static int apprentice_load(RMagic *ms, struct r_magic **magicp, ut32 *nmagicp, const char *fn, int action) { ut32 marraycount, i, mentrycount = 0, starttest; struct r_magic_entry *marray; char subfn[MAXPATHLEN]; struct dirent *d; struct stat st; int errs = 0; DIR *dir; ms->flags |= R_MAGIC_CHECK; /* Enable checks for parsed files */ maxmagic = MAXMAGIS; if (!(marray = calloc (maxmagic, sizeof (*marray)))) { file_oomem (ms, maxmagic * sizeof (*marray)); return -1; } marraycount = 0; /* print silly verbose header for USG compat. */ if (action == FILE_CHECK) eprintf ("%s\n", usg_hdr); /* load directory or file */ if (stat (fn, &st) == 0 && S_ISDIR (st.st_mode)) { if (r_sandbox_enable (0) && !r_sandbox_check_path (fn)) { free (marray); return -1; } dir = opendir (fn); if (dir) { while ((d = readdir (dir))) { if (*d->d_name=='.') continue; snprintf (subfn, sizeof (subfn), "%s/%s", fn, d->d_name); if (stat (subfn, &st) == 0 && S_ISREG (st.st_mode)) load_1 (ms, action, subfn, &errs, &marray, &marraycount); //else perror (subfn); } closedir (dir); } else errs++; } else load_1 (ms, action, fn, &errs, &marray, &marraycount); if (errs) goto out; /* Set types of tests */ for (i = 0; i < marraycount; ) { if (marray[i].mp->cont_level != 0) { i++; continue; } starttest = i; do { set_test_type(marray[starttest].mp, marray[i].mp); if (ms->flags & R_MAGIC_DEBUG) { (void)fprintf(stderr, "%s%s%s: %s\n", marray[i].mp->mimetype, marray[i].mp->mimetype[0] == '\0' ? "" : "; ", marray[i].mp->desc[0] ? marray[i].mp->desc : "(no description)", marray[i].mp->flag & BINTEST ? "binary" : "text"); if (marray[i].mp->flag & BINTEST) { #define SYMBOL "text" #define SYMLEN sizeof (SYMBOL) char *p = strstr(marray[i].mp->desc, "text"); if (p && (p == marray[i].mp->desc || isspace((unsigned char)p[-1])) && (p + SYMLEN - marray[i].mp->desc == MAXstring || (p[SYMLEN] == '\0' || isspace((unsigned char)p[SYMLEN])))) { (void)fprintf(stderr, "*** Possible binary test for text type\n"); } #undef SYMBOL #undef SYMLEN } } } while (++i < marraycount && marray[i].mp->cont_level != 0); } qsort (marray, marraycount, sizeof (*marray), apprentice_sort); /* * Make sure that any level 0 "default" line is last (if one exists). */ for (i = 0; i < marraycount; i++) { if (marray[i].mp->cont_level == 0 && marray[i].mp->type == FILE_DEFAULT) { while (++i < marraycount) if (marray[i].mp->cont_level == 0) break; if (i != marraycount) { ms->line = marray[i].mp->lineno; /* XXX - Ugh! */ file_magwarn (ms, "level 0 \"default\" did not sort last"); } break; } } for (i = 0; i < marraycount; i++) mentrycount += marray[i].cont_count; if (!(*magicp = malloc (1 + (sizeof (**magicp) * mentrycount)))) { file_oomem (ms, sizeof (**magicp) * mentrycount); errs++; goto out; } mentrycount = 0; for (i = 0; i < marraycount; i++) { (void)memcpy (*magicp + mentrycount, marray[i].mp, marray[i].cont_count * sizeof (**magicp)); mentrycount += marray[i].cont_count; } out: for (i = 0; i < marraycount; i++) free(marray[i].mp); free (marray); if (errs) { *magicp = NULL; *nmagicp = 0; return errs; } *nmagicp = mentrycount; return 0; }
/* * Convert a string containing C character escapes. Stop at an unescaped * space or tab. * Copy the converted version to "p", returning its length in *slen. * Return updated scan pointer as function result. */ static const char * getstr(RMagic *ms, const char *s, char *p, int plen, int *slen, int action) { const char *origs = s; char *origp = p; char *pmax = p + plen - 1; int c, val; while ((c = *s++) != '\0') { if (isspace ((ut8) c)) break; if (p >= pmax) { file_error(ms, 0, "string too long: `%s'", origs); return NULL; } if (c == '\\') { switch ((c = *s++)) { case '\0': if (action == FILE_COMPILE) file_magwarn(ms, "incomplete escape"); goto out; case '\t': if (action == FILE_COMPILE) { file_magwarn(ms, "escaped tab found, use \\t instead"); action++; } /*FALLTHROUGH*/ default: if (action == FILE_COMPILE) { if (isprint((ut8)c)) file_magwarn(ms, "no need to escape `%c'", c); else file_magwarn(ms, "unknown escape sequence: \\%03o", c); } /*FALLTHROUGH*/ /* space, perhaps force people to use \040? */ case ' ': #if 0 /* * Other things people escape, but shouldn't need to, * so we disallow them */ case '\'': case '"': case '?': #endif /* Relations */ case '>': case '<': case '&': case '^': case '=': case '!': /* and baskslash itself */ case '\\': *p++ = (char) c; break; case 'a': *p++ = '\a'; break; case 'b': *p++ = '\b'; break; case 'f': *p++ = '\f'; break; case 'n': *p++ = '\n'; break; case 'r': *p++ = '\r'; break; case 't': *p++ = '\t'; break; case 'v': *p++ = '\v'; break; /* \ and up to 3 octal digits */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': val = c - '0'; c = *s++; /* try for 2 */ if (c >= '0' && c <= '7') { val = (val << 3) | (c - '0'); c = *s++; /* try for 3 */ if (c >= '0' && c <= '7') val = (val << 3) | (c-'0'); else --s; } else --s; *p++ = (char)val; break; /* \x and up to 2 hex digits */ case 'x': val = 'x'; /* Default if no digits */ c = hextoint(*s++); /* Get next char */ if (c >= 0) { val = c; c = hextoint(*s++); if (c >= 0) val = (val << 4) + c; else --s; } else --s; *p++ = (char)val; break; } } else *p++ = (char)c; } out: *p = '\0'; *slen = p - origp; return s; }