Beispiel #1
0
// ------------------------------------------------------------------------------------------------
unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input,
        const char*& cursor, const char* end, bool long_length = false, bool allow_null = false) {
    const uint32_t len_len = long_length ? 4 : 1;
    if(Offset(cursor, end) < len_len) {
        TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
    }

    const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);

    if (Offset(cursor, end) < length) {
        TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
    }

    sbegin_out = cursor;
    cursor += length;

    send_out = cursor;

    if(!allow_null) {
        for (unsigned int i = 0; i < length; ++i) {
            if(sbegin_out[i] == '\0') {
                TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
            }
        }
    }

    return length;
}
Beispiel #2
0
// ------------------------------------------------------------------------------------------------
uint8_t ReadByte(const char* input, const char*& cursor, const char* end) {
    if(Offset(cursor, end) < sizeof( uint8_t ) ) {
        TokenizeError("cannot ReadByte, out of bounds",input, cursor);
    }

    uint8_t word;/* = *reinterpret_cast< const uint8_t* >( cursor )*/
    ::memcpy( &word, cursor, sizeof( uint8_t ) );
    ++cursor;

    return word;
}
// ------------------------------------------------------------------------------------------------
uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
{
    if(Offset(cursor, end) < 1) {
        TokenizeError("cannot ReadByte, out of bounds",input, cursor);
    }

    uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
    ++cursor;

    return word;
}
// ------------------------------------------------------------------------------------------------
uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
{
    if(Offset(cursor, end) < 4) {
        TokenizeError("cannot ReadWord, out of bounds",input, cursor);
    }

    uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
    AI_SWAP4(word);

    cursor += 4;

    return word;
}
Beispiel #5
0
// ------------------------------------------------------------------------------------------------
uint64_t ReadDoubleWord(const char* input, const char*& cursor, const char* end) {
    const size_t k_to_read = sizeof(uint64_t);
    if(Offset(cursor, end) < k_to_read) {
        TokenizeError("cannot ReadDoubleWord, out of bounds",input, cursor);
    }

    uint64_t dword = *reinterpret_cast<const uint64_t*>(cursor);
    AI_SWAP8(dword);

    cursor += k_to_read;

    return dword;
}
Beispiel #6
0
// ------------------------------------------------------------------------------------------------
uint32_t ReadWord(const char* input, const char*& cursor, const char* end) {
    const size_t k_to_read = sizeof( uint32_t );
    if(Offset(cursor, end) < k_to_read ) {
        TokenizeError("cannot ReadWord, out of bounds",input, cursor);
    }

    uint32_t word;
    ::memcpy(&word, cursor, 4);
    AI_SWAP4(word);

    cursor += k_to_read;

    return word;
}
Beispiel #7
0
// ------------------------------------------------------------------------------------------------
void Tokenize(TokenList& output_tokens, const char* input)
{
    ai_assert(input);

    // line and column numbers numbers are one-based
    unsigned int line = 1;
    unsigned int column = 1;

    bool comment = false;
    bool in_double_quotes = false;
    bool pending_data_token = false;

    const char* token_begin = NULL, *token_end = NULL;
    for (const char* cur = input;*cur;column += (*cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) {
        const char c = *cur;

        if (IsLineEnd(c)) {
            comment = false;

            column = 0;
            ++line;
        }

        if(comment) {
            continue;
        }

        if(in_double_quotes) {
            if (c == '\"') {
                in_double_quotes = false;
                token_end = cur;

                ProcessDataToken(output_tokens,token_begin,token_end,line,column);
                pending_data_token = false;
            }
            continue;
        }

        switch(c)
        {
        case '\"':
            if (token_begin) {
                TokenizeError("unexpected double-quote", line, column);
            }
            token_begin = cur;
            in_double_quotes = true;
            continue;

        case ';':
            ProcessDataToken(output_tokens,token_begin,token_end,line,column);
            comment = true;
            continue;

        case '{':
            ProcessDataToken(output_tokens,token_begin,token_end, line, column);
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
            continue;

        case '}':
            ProcessDataToken(output_tokens,token_begin,token_end,line,column);
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
            continue;

        case ',':
            if (pending_data_token) {
                ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true);
            }
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
            continue;

        case ':':
            if (pending_data_token) {
                ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true);
            }
            else {
                TokenizeError("unexpected colon", line, column);
            }
            continue;
        }

        if (IsSpaceOrNewLine(c)) {

            if (token_begin) {
                // peek ahead and check if the next token is a colon in which
                // case this counts as KEY token.
                TokenType type = TokenType_DATA;
                for (const char* peek = cur;  *peek && IsSpaceOrNewLine(*peek); ++peek) {
                    if (*peek == ':') {
                        type = TokenType_KEY;
                        cur = peek;
                        break;
                    }
                }

                ProcessDataToken(output_tokens,token_begin,token_end,line,column,type);
            }

            pending_data_token = false;
        }
        else {
            token_end = cur;
            if (!token_begin) {
                token_begin = cur;
            }

            pending_data_token = true;
        }
    }
}
Beispiel #8
0
// ------------------------------------------------------------------------------------------------
bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end, bool const is64bits)
{
    // the first word contains the offset at which this block ends
	const uint64_t end_offset = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);

    // we may get 0 if reading reached the end of the file -
    // fbx files have a mysterious extra footer which I don't know
    // how to extract any information from, but at least it always
    // starts with a 0.
    if(!end_offset) {
        return false;
    }

    if(end_offset > Offset(input, end)) {
        TokenizeError("block offset is out of range",input, cursor);
    }
    else if(end_offset < Offset(input, cursor)) {
        TokenizeError("block offset is negative out of range",input, cursor);
    }

    // the second data word contains the number of properties in the scope
	const uint64_t prop_count = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);

    // the third data word contains the length of the property list
	const uint64_t prop_length = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);

    // now comes the name of the scope/key
    const char* sbeg, *send;
    ReadString(sbeg, send, input, cursor, end);

    output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));

    // now come the individual properties
    const char* begin_cursor = cursor;
    for (unsigned int i = 0; i < prop_count; ++i) {
        ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);

        output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));

        if(i != prop_count-1) {
            output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
        }
    }

    if (Offset(begin_cursor, cursor) != prop_length) {
        TokenizeError("property length not reached, something is wrong",input, cursor);
    }

    // at the end of each nested block, there is a NUL record to indicate
    // that the sub-scope exists (i.e. to distinguish between P: and P : {})
    // this NUL record is 13 bytes long on 32 bit version and 25 bytes long on 64 bit.
	const size_t sentinel_block_length = is64bits ? (sizeof(uint64_t)* 3 + 1) : (sizeof(uint32_t)* 3 + 1);

    if (Offset(input, cursor) < end_offset) {
        if (end_offset - Offset(input, cursor) < sentinel_block_length) {
            TokenizeError("insufficient padding bytes at block end",input, cursor);
        }

        output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));

        // XXX this is vulnerable to stack overflowing ..
        while(Offset(input, cursor) < end_offset - sentinel_block_length) {
			ReadScope(output_tokens, input, cursor, input + end_offset - sentinel_block_length, is64bits);
        }
        output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));

        for (unsigned int i = 0; i < sentinel_block_length; ++i) {
            if(cursor[i] != '\0') {
                TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
            }
        }
        cursor += sentinel_block_length;
    }

    if (Offset(input, cursor) != end_offset) {
        TokenizeError("scope length not reached, something is wrong",input, cursor);
    }

    return true;
}
Beispiel #9
0
// ------------------------------------------------------------------------------------------------
void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end) {
    if(Offset(cursor, end) < 1) {
        TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
    }

    const char type = *cursor;
    sbegin_out = cursor++;

    switch(type)
    {
        // 16 bit int
    case 'Y':
        cursor += 2;
        break;

        // 1 bit bool flag (yes/no)
    case 'C':
        cursor += 1;
        break;

        // 32 bit int
    case 'I':
        // <- fall through

        // float
    case 'F':
        cursor += 4;
        break;

        // double
    case 'D':
        cursor += 8;
        break;

        // 64 bit int
    case 'L':
        cursor += 8;
        break;

        // note: do not write cursor += ReadWord(...cursor) as this would be UB

        // raw binary data
    case 'R':
    {
        const uint32_t length = ReadWord(input, cursor, end);
        cursor += length;
        break;
    }

    case 'b':
        // TODO: what is the 'b' type code? Right now we just skip over it /
        // take the full range we could get
        cursor = end;
        break;

        // array of *
    case 'f':
    case 'd':
    case 'l':
    case 'i':
    case 'c':   {
        const uint32_t length = ReadWord(input, cursor, end);
        const uint32_t encoding = ReadWord(input, cursor, end);

        const uint32_t comp_len = ReadWord(input, cursor, end);

        // compute length based on type and check against the stored value
        if(encoding == 0) {
            uint32_t stride = 0;
            switch(type)
            {
            case 'f':
            case 'i':
                stride = 4;
                break;

            case 'd':
            case 'l':
                stride = 8;
                break;

            case 'c':
                stride = 1;
                break;

            default:
                ai_assert(false);
            };
            ai_assert(stride > 0);
            if(length * stride != comp_len) {
                TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
            }
        }
        // zip/deflate algorithm (encoding==1)? take given length. anything else? die
        else if (encoding != 1) {
            TokenizeError("cannot ReadData, unknown encoding",input, cursor);
        }
        cursor += comp_len;
        break;
    }

        // string
    case 'S': {
        const char* sb, *se;
        // 0 characters can legally happen in such strings
        ReadString(sb, se, input, cursor, end, true, true);
        break;
    }
    default:
        TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
    }

    if(cursor > end) {
        TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
    }

    // the type code is contained in the returned range
    send_out = cursor;
}
Beispiel #10
0
// ------------------------------------------------------------------------------------------------
void TokenizeError(const std::string& message, const char* begin, const char* cursor) {
    TokenizeError(message, Offset(begin, cursor));
}