Exemple #1
0
/*
    Returns zero if matching rule not found or exception occured.
*/
static const UCell* _parseBlock( UThread* ut, BlockParser* pe,
                                 const UCell* rit, const UCell* rend,
                                 UIndex* spos )
{
    const UCell* tval;
    int32_t repMin;
    int32_t repMax;
    UAtom atom;
    const UBuffer* iblk = pe->blk;
    UIndex pos = *spos;


match:

    while( rit != rend )
    {
        switch( ur_type(rit) )
        {
            case UT_WORD:
                atom = ur_atom(rit);

                if( atom < UT_BI_COUNT )
                {
                    // Datatype
                    if( pos >= pe->inputEnd )
                        goto failed;
                    tval = iblk->ptr.cell + pos;
                    if( ur_type(tval) != atom )
                    {
                        /*
                        if( atom == UT_NUMBER )
                        {
                            if( ur_is(tval,UT_INT) || ur_is(tval,UT_DECIMAL) )
                                goto type_matched;
                        }
                        */
                        goto failed;
                    }
//type_matched:
                    ++rit;
                    ++pos;
                }
                else switch( atom )
                {
                    case UR_ATOM_OPT:
                        ++rit;
                        repMin = 0;
                        repMax = 1;
                        goto repeat;

                    case UR_ATOM_ANY:
                        ++rit;
                        repMin = 0;
                        repMax = 0x7fffffff;
                        goto repeat;

                    case UR_ATOM_SOME:
                        ++rit;
                        repMin = 1;
                        repMax = 0x7fffffff;
                        goto repeat;

                    case UR_ATOM_BREAK:
                        pe->exception = PARSE_EX_BREAK;
                        *spos = pos;
                        return 0;

                    case UR_ATOM_BAR:
                        goto complete;

                    case UR_ATOM_TO:
                    case UR_ATOM_THRU:
                    {
                        const UCell* ci;
                        const UCell* ce;
                        UAtom ratom = ur_atom(rit);

                        ++rit;
                        if( rit == rend )
                            return 0;

                        ci = iblk->ptr.cell + pos;
                        ce = iblk->ptr.cell + pe->inputEnd;

                        if( ur_is(rit, UT_WORD) )
                        {
                            if( ur_atom(rit) < UT_BI_COUNT )
                            {
                                atom = ur_atom(rit);
                                while( ci != ce )
                                {
                                    if( ur_type(ci) == atom )
                                        break;
                                    ++ci;
                                }
                                if( ci == ce )
                                    goto failed;
                                pos = ci - iblk->ptr.cell;
                                if( ratom == UR_ATOM_THRU )
                                    ++pos;
                                ++rit;
                                break;
                            }
                            else
                            {
                                tval = ur_wordCell( ut, rit );
                                CHECK_WORD( tval )
                            }
                        }
                        else
                        {
                            tval = rit;
                        }


                        if( ur_is(tval, UT_BLOCK) )
                        {
                            // TODO: If block then all values must match.
                            BLK_RULE_ERROR( "to/thru block! not implemented" );
                        }
                        else
                        {
                            while( ci != ce )
                            {
                                if( ur_equal(ut, ci, tval) )
                                    break;
                                ++ci;
                            }
                            if( ci == ce )
                                goto failed;
                            pos = ci - iblk->ptr.cell;
                            if( ratom == UR_ATOM_THRU )
                                ++pos;
                        }
                        ++rit;
                    }
                        break;

                    case UR_ATOM_INTO:
                        ++rit;
                        if( rit == rend || ! ur_is(rit, UT_BLOCK) )
                        {
                            BLK_RULE_ERROR( "parse into expected block" );
                        }
                        tval = iblk->ptr.cell + pos;
                        if( ! ur_is(tval, UT_BLOCK) )
                            goto failed;
                        if( ur_isShared( tval->series.buf ) )
                            goto failed;
                    {
                        BlockParser ip;
                        UBlockIter bi;
                        UIndex parsePos = 0;

                        ip.eval = pe->eval;
                        ip.blk  = ur_bufferSer( tval );
                        ip.inputBuf  = tval->series.buf;
                        ip.inputEnd  = ip.blk->used;
                        ip.sliced    = 0;
                        ip.exception = PARSE_EX_NONE;

                        ur_blkSlice( ut, &bi, rit );

                        tval = _parseBlock( ut, &ip, bi.it, bi.end, &parsePos );
                        iblk = _acquireInput( ut, pe );
                        if( ! tval )
                        {
                            if( ip.exception == PARSE_EX_ERROR )
                            {
                                pe->exception = PARSE_EX_ERROR;
                                ur_appendTrace( ut, rit->series.buf, 0 );
                                return 0;
                            }
                            if( ip.exception != PARSE_EX_BREAK )
                                goto failed;
                        }
                    }
                        ++rit;
                        ++pos;
                        break;

                    case UR_ATOM_SKIP:
                        repMin = 1;
skip:
                        if( (pos + repMin) > pe->inputEnd )
                            goto failed;
                        pos += repMin;
                        ++rit;
                        break;

                    case UR_ATOM_SET:
                        ++rit;
                        if( rit == rend )
                            goto unexpected_end;
                        if( ! ur_is(rit, UT_WORD) )
                        {
                            BLK_RULE_ERROR( "parse set expected word" );
                        }
                        {
                        UCell* cell = ur_wordCellM( ut, rit );
                        CHECK_WORD( cell )
                        *cell = iblk->ptr.cell[ pos ];
                        }
                        ++rit;
                        break;

                    case UR_ATOM_PLACE:
                        ++rit;
                        if( (rit != rend) && ur_is(rit, UT_WORD) )
                        {
                            tval = ur_wordCell( ut, rit++ );
                            CHECK_WORD( tval )
                            if( ur_is(tval, UT_BLOCK) )
                            {
                                pos = tval->series.it;
                                break;
                            }
                        }
                        BLK_RULE_ERROR( "place expected series word" );

                    //case UR_ATOM_COPY:

                    default:
                    {
                        tval = ur_wordCell( ut, rit );
                        CHECK_WORD( tval )

                        if( ur_is(tval, UT_BLOCK) )
                        {
                            goto match_block;
                        }
                        else
                        {
                            BLK_RULE_ERROR( "parse expected block" );
                        }
                    }
                        break;
                }
                break;

            case UT_SETWORD:
            {
                UCell* cell = ur_wordCellM( ut, rit );
                CHECK_WORD( cell )
                ++rit;

                ur_setId( cell, UT_BLOCK );
                ur_setSlice( cell, pe->inputBuf, pos, pe->inputEnd );
            }
                break;

            case UT_GETWORD:
            {
                UCell* cell = ur_wordCellM( ut, rit );
                CHECK_WORD( cell )
                ++rit;

                if( ur_is(cell, UT_BLOCK) &&
                    (cell->series.buf == pe->inputBuf) )
                    cell->series.end = pos;
            }
                break;

            case UT_LITWORD:
                if( pos >= pe->inputEnd )
                    goto failed;
                tval = iblk->ptr.cell + pos;
                if( (ur_is(tval, UT_WORD) || ur_is(tval, UT_LITWORD))
                    && (ur_atom(tval) == ur_atom(rit)) )
                {
                    ++rit;
                    ++pos;
                }
                else
                    goto failed;
                break;

            case UT_INT:
                repMin = ur_int(rit);

                ++rit;
                if( rit == rend )
                    return 0;

                if( ur_is(rit, UT_INT) )
                {
                    repMax = ur_int(rit);
                    ++rit;
                }
                else if( ur_is(rit, UT_WORD) && ur_atom(rit) == UR_ATOM_SKIP )
                {
                    goto skip;
                }
                else
                {
                    repMax = repMin;
                }
                goto repeat;

            case UT_DATATYPE:
                if( pos >= pe->inputEnd )
                    goto failed;
                if( ! ur_isDatatype( iblk->ptr.cell + pos, rit ) )
                    goto failed;
                ++rit;
                ++pos;
                break;

            case UT_CHAR:
            case UT_BINARY:
            case UT_STRING:
            case UT_FILE:
                if( pos >= pe->inputEnd )
                    goto failed;
                if( ! ur_equal( ut, iblk->ptr.cell + pos, rit ) )
                    goto failed;
                ++rit;
                ++pos;
                break;

            case UT_BLOCK:
                tval = rit;
match_block:
                {
                UBlockIter bi;
                UIndex rblkN = tval->series.buf;
                ur_blkSlice( ut, &bi, tval );
                tval = _parseBlock( ut, pe, bi.it, bi.end, &pos );
                iblk = pe->blk;
                if( ! tval )
                {
                    if( pe->exception == PARSE_EX_ERROR )
                    {
                        ur_appendTrace( ut, rblkN, 0 );
                        return 0;
                    }
                    if( pe->exception == PARSE_EX_BREAK )
                        pe->exception = PARSE_EX_NONE;
                    else
                        goto failed;
                }
                }
                ++rit;
                break;

            case UT_PAREN:
                if( UR_OK != pe->eval( ut, rit ) )
                    goto parse_err;
                iblk = _acquireInput( ut, pe );
                ++rit;
                break;

            default:
                BLK_RULE_ERROR( "invalid parse value" );
        }
Exemple #2
0
/*
  Returns zero if matching rule not found or exception occured.
*/
static const UCell* _parseBin( UThread* ut, BinaryParser* pe,
                               const UCell* rit, const UCell* rend,
                               UIndex* spos )
{
    const UCell* set = 0;
    const UCell* tval;
    uint32_t bitCount;
    uint32_t field;
    UBuffer* ibin  = ur_buffer( pe->inputBufN );
    uint8_t* in    = ibin->ptr.b + *spos;
    uint8_t* inEnd = ibin->ptr.b + pe->inputEnd;


match:

    while( rit != rend )
    {
        switch( ur_type(rit) )
        {
            case UT_INT:
                bitCount = ur_int(rit);
                if( bitCount < 1 || bitCount > 32 )
                {
                    ur_error( PARSE_ERR, "bit-field size must be 1 to 32" );
                    goto parse_err;
                }
                if( bitCount > 24 )
                {
                    uint32_t high;
                    in = pullBits( pe, bitCount - 16, in, inEnd, &high );
                    if( ! in )
                        goto failed;
                    in = pullBits( pe, 16, in, inEnd, &field );
                    if( ! in )
                        goto failed;
                    field |= high << 16;
                }
                else
                {
                    in = pullBits( pe, bitCount, in, inEnd, &field );
                    if( ! in )
                        goto failed;
                }
                goto set_field;

            case UT_WORD:
                switch( ur_atom(rit) )
                {
                case UR_ATOM_U8:
                    if( in == inEnd )
                        goto failed;
                    field = *in++;
                    goto set_field;

                case UR_ATOM_U16:
                    if( (inEnd - in) < 2 )
                        goto failed;
                    if( pe->bigEndian )
                        field = (in[0] << 8) | in[1];
                    else
                        field = (in[1] << 8) | in[0];
                    in += 2;
                    goto set_field;

                case UR_ATOM_U32:
                    if( (inEnd - in) < 4 )
                        goto failed;
                    if( pe->bigEndian )
                        field = (in[0] << 24) | (in[1] << 16) |
                                (in[2] <<  8) |  in[3];
                    else
                        field = (in[3] << 24) | (in[2] << 16) |
                                (in[1] <<  8) |  in[0];
                    in += 4;
                    goto set_field;

                case UR_ATOM_SKIP:
                    ++rit;
                    ++in;
                    break;
#if 0
                case UR_ATOM_MARK:
                    break;

                case UR_ATOM_PLACE:
                    ++rit;
                    if( (rit != rend) && ur_is(rit, UT_WORD) )
                    {
                        tval = ur_wordCell( ut, rit++ );
                        CHECK_WORD(tval);
                        if( ur_is(tval, UT_BINARY) )
                        {
                            pos = tval->series.it;
                            break;
                        }
                    }
                    ur_error( PARSE_ERR, "place expected series word" );
                    goto parse_err;
#endif
                case UR_ATOM_COPY:      // copy  dest   size
                                        //       word!  int!/word!
                    ++rit;
                    if( (rit != rend) && ur_is(rit, UT_WORD) )
                    {
                        UCell* res = ur_wordCellM( ut, rit );
                        CHECK_WORD(res);
                        if( ++rit != rend )
                        {
                            tval = rit++;
                            if( ur_is(tval, UT_WORD) )
                            {
                                tval = ur_wordCell( ut, tval );
                                CHECK_WORD(tval);
                            }
                            if( ur_is(tval, UT_INT) )
                            {
                                UBuffer* cb;
                                int size = ur_int(tval);
                                cb = ur_makeBinaryCell( ut, size, res );
                                cb->used = size;
                                memCpy( cb->ptr.b, in, size );
                                in += size;
                                break;
                            }
                        }
                        ur_error( PARSE_ERR, "copy expected int! count" );
                        goto parse_err;
                    }
                    ur_error( PARSE_ERR, "copy expected word! destination" );
                    goto parse_err;

                case UR_ATOM_BIG_ENDIAN:
                    ++rit;
                    pe->bigEndian = 1;
                    break;

                case UR_ATOM_LITTLE_ENDIAN:
                    ++rit;
                    pe->bigEndian = 0;
                    break;

                default:
                    tval = ur_wordCell( ut, rit );
                    CHECK_WORD(tval);

                    if( ur_is(tval, UT_CHAR) )
                        goto match_char;
                    else if( ur_is(tval, UT_STRING) )
                        goto match_string;
                    else if( ur_is(tval, UT_BLOCK) )
                        goto match_block;
                    /*
                    else if( ur_is(tval, UT_BITSET) )
                        goto match_bitset;
                    */
                    else
                    {
                        ur_error( PARSE_ERR,
                                "parse expected char!/string!/block!" );
                        goto parse_err;
                    }
                    break;
                }
                break;

            case UT_SETWORD:
                set = rit++;
                while( (rit != rend) && ur_is(rit, UT_SETWORD) )
                    ++rit;
                break;
#if 0
            case UT_GETWORD:
                break;

            case UT_INT:
                repMin = ur_int(rit);

                ++rit;
                if( rit == rend )
                    return 0;

                if( ur_is(rit, UT_INT) )
                {
                    repMax = ur_int(rit);
                    ++rit;
                }
                else
                {
                    repMax = repMin;
                }
                goto repeat;
#endif
            case UT_CHAR:
match_char:
                if( *in != ur_int(rit) )
                    goto failed;
                ++in;
                ++rit;
                break;

            case UT_BLOCK:
                tval = rit;
match_block:
            {
                UBlockIter bi;
                UIndex pos = in - ibin->ptr.b;
                UIndex rblkN = tval->series.buf;
                ur_blkSlice( ut, &bi, tval );
                tval = _parseBin( ut, pe, bi.it, bi.end, &pos );
                ibin = ur_buffer( pe->inputBufN );
                if( ! tval )
                {
                    if( pe->exception == PARSE_EX_ERROR )
                    {
                        ur_appendTrace( ut, rblkN, 0 );
                        return 0;
                    }
                    if( pe->exception == PARSE_EX_BREAK )
                        pe->exception = PARSE_EX_NONE;
                    else
                        goto failed;
                }
                in    = ibin->ptr.b + pos;
                inEnd = ibin->ptr.b + pe->inputEnd;
                ++rit;
            }
                break;

            case UT_PAREN:
            {
                UIndex pos = in - ibin->ptr.b;

                if( UR_OK != pe->eval( ut, rit ) )
                    goto parse_err;

                /* Re-acquire pointer & check if input modified. */
                ibin = ur_buffer( pe->inputBufN );
                if( pe->sliced )
                {
                    // We have no way to track changes to the end of a slice,
                    // so just make sure we remain in valid memery.
                    if( ibin->used < pe->inputEnd )
                        pe->inputEnd = ibin->used;
                }
                else
                {
                    // Not sliced, track input end.
                    if( ibin->used != pe->inputEnd )
                        pe->inputEnd = ibin->used;
                }
                in    = ibin->ptr.b + pos;
                inEnd = ibin->ptr.b + pe->inputEnd;
                ++rit;
            }
                break;

            case UT_STRING:
                tval = rit;
match_string:
            {
                UBinaryIter bi;
                int size;

                ur_binSlice( ut, &bi, tval );
                if( ur_strIsUcs2(bi.buf) )
                    goto bad_enc;
                size = bi.end - bi.it;
                if( size > (inEnd - in) )
                    goto failed;
                if( match_pattern_8(in, inEnd, bi.it, bi.end) == bi.end )
                {
                    in += size;
                    ++rit;
                }
                else
                    goto failed;
            }
                break;
#if 0
            case UT_BITSET:
                tval = rit;
match_bitset:
            if( pos >= pe->inputEnd )
                goto failed;
            {
                const UBuffer* bin = ur_bufferSer( tval );
                int c = istr->ptr.c[ pos ];
                if( bitIsSet( bin->ptr.b, c ) )
                {
                    ++rit;
                    ++pos;
                }
                else
                    goto failed;
            }
                break;
#endif
            default:
                ur_error( PARSE_ERR, "invalid parse value" );
                             //orDatatypeName( ur_type(rit) ) );
                goto parse_err;
        }
    }

//complete:

    *spos = in - ibin->ptr.b;
    return rit;

set_field:

    if( set )
    {
        UCell* val;
        while( set != rit )
        {
            val = ur_wordCellM( ut, set++ );
            CHECK_WORD(val);
            ur_setId(val, UT_INT);
            ur_int(val) = field;
        }
        set = 0;
    }
    ++rit;
    goto match;

failed:

    *spos = in - ibin->ptr.b;
    return 0;

bad_enc:

    ur_error( ut, UR_ERR_INTERNAL,
              "parse binary does not handle UCS2 strings" );
    //goto parse_err;

parse_err:

    pe->exception = PARSE_EX_ERROR;
    return 0;
}