void DecimalDatatypeValidator::checkAdditionalFacetConstraintsBase(MemoryManager* const manager) const
{

    DecimalDatatypeValidator *numBase = (DecimalDatatypeValidator*) getBaseValidator();

    if (!numBase)
        return;

    int thisFacetsDefined = getFacetsDefined();
    int baseFacetsDefined = numBase->getFacetsDefined();

    // check 4.3.11.c1 error: totalDigits > base.totalDigits
    // totalDigits != base.totalDigits if (base.fixed)
    if (( thisFacetsDefined & DatatypeValidator::FACET_TOTALDIGITS) != 0)
    {
        if ( (( baseFacetsDefined & DatatypeValidator::FACET_TOTALDIGITS) != 0) &&
            ( fTotalDigits > numBase->fTotalDigits ))
        {
            XMLCh value1[BUF_LEN+1];
            XMLCh value2[BUF_LEN+1];
            XMLString::binToText(fTotalDigits, value1, BUF_LEN, 10, manager);
            XMLString::binToText(numBase->fTotalDigits, value2, BUF_LEN, 10, manager);
            ThrowXMLwithMemMgr2(InvalidDatatypeFacetException
                                 , XMLExcepts::FACET_totalDigit_base_totalDigit
                                 , value1
                                 , value2
                                 , manager);
        }

        if ( (( baseFacetsDefined & DatatypeValidator::FACET_TOTALDIGITS) != 0) &&
            (( numBase->getFixed() & DatatypeValidator::FACET_TOTALDIGITS) != 0) &&
            ( fTotalDigits != numBase->fTotalDigits ))
        {
            XMLCh value1[BUF_LEN+1];
            XMLCh value2[BUF_LEN+1];
            XMLString::binToText(fTotalDigits, value1, BUF_LEN, 10, manager);
            XMLString::binToText(numBase->fTotalDigits, value2, BUF_LEN, 10, manager);
            ThrowXMLwithMemMgr2(InvalidDatatypeFacetException
                                 , XMLExcepts::FACET_totalDigit_base_fixed
                                 , value1
                                 , value2
                                 , manager);
        }
    }

    if (( thisFacetsDefined & DatatypeValidator::FACET_FRACTIONDIGITS) != 0)
    {
        // check question error: fractionDigits > base.fractionDigits ???
        if ( (( baseFacetsDefined & DatatypeValidator::FACET_FRACTIONDIGITS) != 0) &&
            ( fFractionDigits > numBase->fFractionDigits ))
        {
            XMLCh value1[BUF_LEN+1];
            XMLCh value2[BUF_LEN+1];
            XMLString::binToText(fFractionDigits, value1, BUF_LEN, 10, manager);
            XMLString::binToText(numBase->fFractionDigits, value2, BUF_LEN, 10, manager);
            ThrowXMLwithMemMgr2(InvalidDatatypeFacetException
                                 , XMLExcepts::FACET_fractDigit_base_fractDigit
                                 , value1
                                 , value2
                                 , manager);
                        }

        // check question error: fractionDigits > base.totalDigits ???
        if ( (( baseFacetsDefined & DatatypeValidator::FACET_TOTALDIGITS) != 0) &&
            ( fFractionDigits > numBase->fTotalDigits ))
        {
            XMLCh value1[BUF_LEN+1];
            XMLCh value2[BUF_LEN+1];
            XMLString::binToText(fFractionDigits, value1, BUF_LEN, 10, manager);
            XMLString::binToText(numBase->fTotalDigits, value2, BUF_LEN, 10, manager);
            ThrowXMLwithMemMgr2(InvalidDatatypeFacetException
                                 , XMLExcepts::FACET_fractDigit_base_totalDigit
                                 , value1
                                 , value2
                                 , manager);
        }

        // fractionDigits != base.fractionDigits if (base.fixed)
        if ( (( baseFacetsDefined & DatatypeValidator::FACET_FRACTIONDIGITS) != 0) &&
            (( numBase->getFixed() & DatatypeValidator::FACET_FRACTIONDIGITS) != 0) &&
            ( fFractionDigits != numBase->fFractionDigits ))
        {
            XMLCh value1[BUF_LEN+1];
            XMLCh value2[BUF_LEN+1];
            XMLString::binToText(fFractionDigits, value1, BUF_LEN, 10, manager);
            XMLString::binToText(numBase->fFractionDigits, value2, BUF_LEN, 10, manager);
            ThrowXMLwithMemMgr2(InvalidDatatypeFacetException
                                 , XMLExcepts::FACET_fractDigit_base_fixed
                                 , value1
                                 , value2
                                 , manager);
        }
    }

}
// -----------------------------------------------------------------------
// Abstract interface from AbstractNumericValidator
// -----------------------------------------------------------------------
void DecimalDatatypeValidator::checkContent(const XMLCh*             const content
                                           ,      ValidationContext* const context
                                           ,      bool                     asBase
                                           ,      MemoryManager*     const manager)
{
    //validate against base validator if any
    DecimalDatatypeValidator *pBase = (DecimalDatatypeValidator*) this->getBaseValidator();
    if (pBase)
        pBase->checkContent(content, context, true, manager);

    int thisFacetsDefined = getFacetsDefined();

    // we check pattern first
    if ( (thisFacetsDefined & DatatypeValidator::FACET_PATTERN ) != 0 )
    {
        if (getRegex()->matches(content, manager) ==false)
        {
            ThrowXMLwithMemMgr2(InvalidDatatypeValueException
                    , XMLExcepts::VALUE_NotMatch_Pattern
                    , content
                    , getPattern()
                    , manager);
        }
    }

    // if this is a base validator, we only need to check pattern facet
    // all other facet were inherited by the derived type
    if (asBase)
        return;

    XMLBigDecimal  compareDataValue(content, manager);
    XMLBigDecimal* compareData = &compareDataValue;

    if (getEnumeration())
    {
        XMLSize_t i=0;
        XMLSize_t enumLength = getEnumeration()->size();
        for ( ; i < enumLength; i++)
        {
            if (compareValues(compareData, (XMLBigDecimal*) getEnumeration()->elementAt(i)) ==0 )
                break;
        }

        if (i == enumLength)
            ThrowXMLwithMemMgr1(InvalidDatatypeValueException, XMLExcepts::VALUE_NotIn_Enumeration, content, manager);
    }

    boundsCheck(compareData, manager);

    if ( (thisFacetsDefined & DatatypeValidator::FACET_FRACTIONDIGITS) != 0 )
    {
        if ( compareData->getScale() > fFractionDigits )
        {
            XMLCh value1[BUF_LEN+1];
            XMLCh value2[BUF_LEN+1];
            XMLString::binToText(compareData->getScale(), value1, BUF_LEN, 10, manager);
            XMLString::binToText(fFractionDigits, value2, BUF_LEN, 10, manager);
            ThrowXMLwithMemMgr3(InvalidDatatypeFacetException
                              , XMLExcepts::VALUE_exceed_fractDigit
                              , compareData->getRawData()
                              , value1
                              , value2
                              , manager);
        }
    }

    if ( (thisFacetsDefined & DatatypeValidator::FACET_TOTALDIGITS) != 0 )
    {
        if ( compareData->getTotalDigit() > fTotalDigits )
        {
            XMLCh value1[BUF_LEN+1];
            XMLCh value2[BUF_LEN+1];
            XMLString::binToText(compareData->getTotalDigit(), value1, BUF_LEN, 10, manager);
            XMLString::binToText(fTotalDigits, value2, BUF_LEN, 10, manager);
            ThrowXMLwithMemMgr3(InvalidDatatypeFacetException
                              , XMLExcepts::VALUE_exceed_totalDigit
                              , compareData->getRawData()
                              , value1
                              , value2
                              , manager);
        }

        /***
         E2-44 totalDigits
         ... by restricting it to numbers that are expressible as i x 10^-n
         where i and n are integers such that |i| < 10^totalDigits and 0 <= n <= totalDigits.
         ***/

        if ( compareData->getScale() > fTotalDigits )
        {
            XMLCh value1[BUF_LEN+1];
            XMLCh value2[BUF_LEN+1];
            XMLString::binToText(compareData->getScale(), value1, BUF_LEN, 10, manager);
            XMLString::binToText(fTotalDigits, value2, BUF_LEN, 10, manager);
            ThrowXMLwithMemMgr3(InvalidDatatypeFacetException
                              , XMLExcepts::VALUE_exceed_totalDigit
                              , compareData->getRawData()
                              , value1
                              , value2
                              , manager);
        }
    }
}
// ---------------------------------------------------------------------------
//  XMLUTF8Transcoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
XMLSize_t
XMLUTF8Transcoder::transcodeFrom(const  XMLByte* const          srcData
                                , const XMLSize_t               srcCount
                                ,       XMLCh* const            toFill
                                , const XMLSize_t               maxChars
                                ,       XMLSize_t&              bytesEaten
                                ,       unsigned char* const    charSizes)
{
    // Watch for pathological scenario. Shouldn't happen, but...
    if (!srcCount || !maxChars)
        return 0;

    //
    //  Get pointers to our start and end points of the input and output
    //  buffers.
    //
    const XMLByte*  srcPtr = srcData;
    const XMLByte*  srcEnd = srcPtr + srcCount;
    XMLCh*          outPtr = toFill;
    XMLCh*          outEnd = outPtr + maxChars;
    unsigned char*  sizePtr = charSizes;



    //
    //  We now loop until we either run out of input data, or room to store
    //  output chars.
    //
    while ((srcPtr < srcEnd) && (outPtr < outEnd))
    {
        // Special-case ASCII, which is a leading byte value of <= 127
        if (*srcPtr <= 127)
        {
            // Handle ASCII in groups instead of single character at a time.
            const XMLByte* srcPtr_save = srcPtr;
            const XMLSize_t chunkSize = (srcEnd-srcPtr)<(outEnd-outPtr)?(srcEnd-srcPtr):(outEnd-outPtr);
            for(XMLSize_t i=0;i<chunkSize && *srcPtr <= 127;++i)
                *outPtr++ = XMLCh(*srcPtr++);
            memset(sizePtr,1,srcPtr - srcPtr_save);
            sizePtr += srcPtr - srcPtr_save;
            if (srcPtr == srcEnd || outPtr == outEnd)
                break;
        }

        // See how many trailing src bytes this sequence is going to require
        const unsigned int trailingBytes = gUTFBytes[*srcPtr];

        //
        //  If there are not enough source bytes to do this one, then we
        //  are done. Note that we done >= here because we are implicitly
        //  counting the 1 byte we get no matter what.
        //
        //  If we break out here, then there is nothing to undo since we
        //  haven't updated any pointers yet.
        //
        if (srcPtr + trailingBytes >= srcEnd)
            break;

        // Looks ok, so lets build up the value
        // or at least let's try to do so--remembering that
        // we cannot assume the encoding to be valid:

        // first, test first byte
        if((gUTFByteIndicatorTest[trailingBytes] & *srcPtr) != gUTFByteIndicator[trailingBytes]) {
            char pos[2] = {(char)0x31, 0}; 
            char len[2] = {(char)(trailingBytes+0x31), 0};
            char byte[2] = {(char)*srcPtr,0};
            ThrowXMLwithMemMgr3(UTFDataFormatException, XMLExcepts::UTF8_FormatError, pos, byte, len, getMemoryManager());
        }

        /***
         * http://www.unicode.org/reports/tr27/
         *
         * Table 3.1B. lists all of the byte sequences that are legal in UTF-8. 
         * A range of byte values such as A0..BF indicates that any byte from A0 to BF (inclusive) 
         * is legal in that position. 
         * Any byte value outside of the ranges listed is illegal. 
         * For example, 
         * the byte sequence <C0 AF> is illegal  since C0 is not legal in the 1st Byte column. 
         * The byte sequence <E0 9F 80> is illegal since in the row 
         *    where E0 is legal as a first byte, 
         *    9F is not legal as a second byte. 
         * The byte sequence <F4 80 83 92> is legal, since every byte in that sequence matches 
         * a byte range in a row of the table (the last row). 
         *
         *
         * Table 3.1B. Legal UTF-8 Byte Sequences  
         * Code Points              1st Byte    2nd Byte    3rd Byte    4th Byte 
         * =========================================================================
         * U+0000..U+007F            00..7F       
         * -------------------------------------------------------------------------
         * U+0080..U+07FF            C2..DF      80..BF      
         *
         * -------------------------------------------------------------------------
         * U+0800..U+0FFF            E0          A0..BF     80..BF   
         *                                       -- 
         *                          
         * U+1000..U+FFFF            E1..EF      80..BF     80..BF    
         *
         * --------------------------------------------------------------------------
         * U+10000..U+3FFFF          F0          90..BF     80..BF       80..BF 
         *                                       --
         * U+40000..U+FFFFF          F1..F3      80..BF     80..BF       80..BF 
         * U+100000..U+10FFFF        F4          80..8F     80..BF       80..BF 
         *                                           --
         * ==========================================================================
         *
         *  Cases where a trailing byte range is not 80..BF are underlined in the table to 
         *  draw attention to them. These occur only in the second byte of a sequence.
         *
         ***/

        XMLUInt32 tmpVal = 0;

        switch(trailingBytes)
        {
            case 1 :
                // UTF-8:   [110y yyyy] [10xx xxxx]
                // Unicode: [0000 0yyy] [yyxx xxxx]
                //
                // 0xC0, 0xC1 has been filtered out             
                checkTrailingBytes(*(srcPtr+1), 1, 1);

                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            case 2 :
                // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
                // Unicode: [zzzz yyyy] [yyxx xxxx]
                //
                if (( *srcPtr == 0xE0) && ( *(srcPtr+1) < 0xA0)) 
                {
                    char byte0[2] = {(char)*srcPtr    ,0};
                    char byte1[2] = {(char)*(srcPtr+1),0};

                    ThrowXMLwithMemMgr2(UTFDataFormatException
                                      , XMLExcepts::UTF8_Invalid_3BytesSeq
                                      , byte0
                                      , byte1
                                      , getMemoryManager());
                }

                checkTrailingBytes(*(srcPtr+1), 2, 1);
                checkTrailingBytes(*(srcPtr+2), 2, 2);

                //
                // D36 (a) UTF-8 is the Unicode Transformation Format that serializes 
                //         a Unicode code point as a sequence of one to four bytes, 
                //         as specified in Table 3.1, UTF-8 Bit Distribution.
                //     (b) An illegal UTF-8 code unit sequence is any byte sequence that 
                //         does not match the patterns listed in Table 3.1B, Legal UTF-8 
                //         Byte Sequences.
                //     (c) An irregular UTF-8 code unit sequence is a six-byte sequence 
                //         where the first three bytes correspond to a high surrogate, 
                //         and the next three bytes correspond to a low surrogate. 
                //         As a consequence of C12, these irregular UTF-8 sequences shall 
                //         not be generated by a conformant process. 
                //
                //irregular three bytes sequence
                // that is zzzzyy matches leading surrogate tag 110110 or 
                //                       trailing surrogate tag 110111
                // *srcPtr=1110 1101 
                // *(srcPtr+1)=1010 yyyy or 
                // *(srcPtr+1)=1011 yyyy
                //
                // 0xED 1110 1101
                // 0xA0 1010 0000

                if ((*srcPtr == 0xED) && (*(srcPtr+1) >= 0xA0))
                {
                    char byte0[2] = {(char)*srcPtr,    0};
                    char byte1[2] = {(char)*(srcPtr+1),0};

                     ThrowXMLwithMemMgr2(UTFDataFormatException
                              , XMLExcepts::UTF8_Irregular_3BytesSeq
                              , byte0
                              , byte1
                              , getMemoryManager());
                }

                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            case 3 : 
                // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
                // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
                //          [1101 11yy] [yyxx xxxx] (low surrogate)
                //          * uuuuu = wwww + 1
                //
                if (((*srcPtr == 0xF0) && (*(srcPtr+1) < 0x90)) ||
                    ((*srcPtr == 0xF4) && (*(srcPtr+1) > 0x8F))  )
                {
                    char byte0[2] = {(char)*srcPtr    ,0};
                    char byte1[2] = {(char)*(srcPtr+1),0};

                    ThrowXMLwithMemMgr2(UTFDataFormatException
                                      , XMLExcepts::UTF8_Invalid_4BytesSeq
                                      , byte0
                                      , byte1
                                      , getMemoryManager());
                }

                checkTrailingBytes(*(srcPtr+1), 3, 1);
                checkTrailingBytes(*(srcPtr+2), 3, 2);
                checkTrailingBytes(*(srcPtr+3), 3, 3);
                
                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            default: // trailingBytes > 3

                /***
                 * The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also allows 
                 * for the use of five- and six-byte sequences to encode characters that 
                 * are outside the range of the Unicode character set; those five- and 
                 * six-byte sequences are illegal for the use of UTF-8 as a transformation 
                 * of Unicode characters. ISO/IEC 10646 does not allow mapping of unpaired 
                 * surrogates, nor U+FFFE and U+FFFF (but it does allow other noncharacters).
                 ***/
                char len[2]  = {(char)(trailingBytes+0x31), 0};
                char byte[2] = {(char)*srcPtr,0};

                ThrowXMLwithMemMgr2(UTFDataFormatException
                                  , XMLExcepts::UTF8_Exceeds_BytesLimit
                                  , byte
                                  , len
                                  , getMemoryManager());

                break;
        }


        // since trailingBytes comes from an array, this logic is redundant
        //  default :
        //      ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq);
        //}
        tmpVal -= gUTFOffsets[trailingBytes];

        //
        //  If it will fit into a single char, then put it in. Otherwise
        //  encode it as a surrogate pair. If its not valid, use the
        //  replacement char.
        //
        if (!(tmpVal & 0xFFFF0000))
        {
            *sizePtr++ = trailingBytes + 1;
            *outPtr++ = XMLCh(tmpVal);
        }
         else if (tmpVal > 0x10FFFF)
        {
            //
            //  If we've gotten more than 32 chars so far, then just break
            //  out for now and lets process those. When we come back in
            //  here again, we'll get no chars and throw an exception. This
            //  way, the error will have a line and col number closer to
            //  the real problem area.
            //
            if ((outPtr - toFill) > 32)
                break;

            ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
        }
         else
        {
            //
            //  If we have enough room to store the leading and trailing
            //  chars, then lets do it. Else, pretend this one never
            //  happened, and leave it for the next time. Since we don't
            //  update the bytes read until the bottom of the loop, by
            //  breaking out here its like it never happened.
            //
            if (outPtr + 1 >= outEnd)
                break;

            // Store the leading surrogate char
            tmpVal -= 0x10000;
            *sizePtr++ = trailingBytes + 1;
            *outPtr++ = XMLCh((tmpVal >> 10) + 0xD800);

            //
            //  And then the trailing char. This one accounts for no
            //  bytes eaten from the source, so set the char size for this
            //  one to be zero.
            //
            *sizePtr++ = 0;
            *outPtr++ = XMLCh((tmpVal & 0x3FF) + 0xDC00);
        }
    }

    // Update the bytes eaten
    bytesEaten = srcPtr - srcData;

    // Return the characters read
    return outPtr - toFill;
}
XMLSize_t
XMLUTF8Transcoder::transcodeTo( const   XMLCh* const    srcData
                                , const XMLSize_t       srcCount
                                ,       XMLByte* const  toFill
                                , const XMLSize_t       maxBytes
                                ,       XMLSize_t&      charsEaten
                                , const UnRepOpts       options)
{
    // Watch for pathological scenario. Shouldn't happen, but...
    if (!srcCount || !maxBytes)
        return 0;

    //
    //  Get pointers to our start and end points of the input and output
    //  buffers.
    //
    const XMLCh*    srcPtr = srcData;
    const XMLCh*    srcEnd = srcPtr + srcCount;
    XMLByte*        outPtr = toFill;
    XMLByte*        outEnd = toFill + maxBytes;

    while (srcPtr < srcEnd)
    {
        //
        //  Tentatively get the next char out. We have to get it into a
        //  32 bit value, because it could be a surrogate pair.
        //
        XMLUInt32 curVal = *srcPtr;

        //
        //  If its a leading surrogate, then lets see if we have the trailing
        //  available. If not, then give up now and leave it for next time.
        //
        unsigned int srcUsed = 1;
        if ((curVal >= 0xD800) && (curVal <= 0xDBFF))
        {
            if (srcPtr + 1 >= srcEnd)
                break;

            // Create the composite surrogate pair
            curVal = ((curVal - 0xD800) << 10)
                    + ((*(srcPtr + 1) - 0xDC00) + 0x10000);

            // And indicate that we ate another one
            srcUsed++;
        }

        // Figure out how many bytes we need
        unsigned int encodedBytes;
        if (curVal < 0x80)
            encodedBytes = 1;
        else if (curVal < 0x800)
            encodedBytes = 2;
        else if (curVal < 0x10000)
            encodedBytes = 3;
        else if (curVal < 0x110000)
            encodedBytes = 4;
        else
        {
            // If the options say to throw, then throw
            if (options == UnRep_Throw)
            {
                XMLCh tmpBuf[17];
                XMLString::binToText(curVal, tmpBuf, 16, 16, getMemoryManager());
                ThrowXMLwithMemMgr2
                (
                    TranscodingException
                    , XMLExcepts::Trans_Unrepresentable
                    , tmpBuf
                    , getEncodingName()
                    , getMemoryManager()
                );
            }

            // Else, use the replacement character
            *outPtr++ = chSpace;
            srcPtr += srcUsed;
            continue;
        }

        //
        //  If we cannot fully get this char into the output buffer,
        //  then leave it for the next time.
        //
        if (outPtr + encodedBytes > outEnd)
            break;

        // We can do it, so update the source index
        srcPtr += srcUsed;

        //
        //  And spit out the bytes. We spit them out in reverse order
        //  here, so bump up the output pointer and work down as we go.
        //
        outPtr += encodedBytes;
        switch(encodedBytes)
        {
            case 6 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL);
                     curVal >>= 6;
            case 5 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL);
                     curVal >>= 6;
            case 4 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL);
                     curVal >>= 6;
            case 3 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL);
                     curVal >>= 6;
            case 2 : *--outPtr = XMLByte((curVal | 0x80UL) & 0xBFUL);
                     curVal >>= 6;
            case 1 : *--outPtr = XMLByte
                     (
                        curVal | gFirstByteMark[encodedBytes]
                     );
        }

        // Add the encoded bytes back in again to indicate we've eaten them
        outPtr += encodedBytes;
    }

    // Fill in the chars we ate
    charsEaten = (srcPtr - srcData);

    // And return the bytes we filled in
    return (outPtr - toFill);
}
Exemplo n.º 5
0
// ---------------------------------------------------------------------------
//  XMLASCIITranscoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLASCIITranscoder::transcodeFrom(  const   XMLByte* const       srcData
                                    , const unsigned int         srcCount
                                    ,       XMLCh* const         toFill
                                    , const unsigned int         maxChars
                                    ,       unsigned int&        bytesEaten
                                    ,       unsigned char* const charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the source byte count.
    //
    const unsigned int countToDo = srcCount < maxChars ? srcCount : maxChars;

    //
    //  Now loop through that many source chars and just cast each one
    //  over to the XMLCh format. Check each source that its really a
    //  valid ASCI char.
    //
    const XMLByte*  srcPtr = srcData;
    XMLCh*          outPtr = toFill;
    unsigned int    countDone = 0;
    for (; countDone < countToDo; countDone++)
    {
        // Do the optimistic work up front
        if (*srcPtr < 0x80)
        {
            *outPtr++ = XMLCh(*srcPtr++);
            continue;
        }

        //
        //  We got non source encoding char. If we got more than 32 chars,
        //  the just break out. We'll come back here later to hit this again
        //  and give an error much closer to the real source position.
        //
        if (countDone > 32)
            break;

        XMLCh tmpBuf[17];
        XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
        ThrowXMLwithMemMgr2
        (
            TranscodingException
            , XMLExcepts::Trans_Unrepresentable
            , tmpBuf
            , getEncodingName()
            , getMemoryManager()
        );
    }

    // Set the bytes we ate
    bytesEaten = countDone;

    // Set the char sizes to the fixed size
    memset(charSizes, 1, countDone);

    // Return the chars we transcoded
    return countDone;
}
Exemplo n.º 6
0
/***
 *   .empty stringPool
 *   .empty gramamrRegistry
 ***/
void XMLGrammarPoolImpl::deserializeGrammars(BinInputStream* const binIn)
{
    MemoryManager *memMgr = getMemoryManager();
    unsigned int stringCount = fStringPool->getStringCount();
    if (stringCount)
    {
        /***
         * it contains only the four predefined one, that is ok
         * but we need to reset the string before deserialize it
         *
         ***/
        if ( stringCount <= 4 )
        {
            fStringPool->flushAll();
        }
        else
        {
            ThrowXMLwithMemMgr(XSerializationException, XMLExcepts::XSer_StringPool_NotEmpty, memMgr);
        }
    }

    RefHashTableOfEnumerator<Grammar> grammarEnum(fGrammarRegistry, false, memMgr);
    if (grammarEnum.hasMoreElements())
    {
        ThrowXMLwithMemMgr(XSerializationException, XMLExcepts::XSer_GrammarPool_NotEmpty, memMgr);
    }

    // This object will take care of cleaning up if an exception is
    // thrown during deserialization.
    JanitorMemFunCall<XMLGrammarPoolImpl>   cleanup(this, &XMLGrammarPoolImpl::cleanUp);

    try
    {
        XSerializeEngine  serEng(binIn, this);

        //version information
        unsigned int  StorerLevel;
        serEng>>StorerLevel;
        serEng.fStorerLevel = StorerLevel;

        // The storer level must match the loader level.
        //
        if (StorerLevel != (unsigned int)MATCH_GRAMMAR_SERIALIZATION_LEVEL)
        {
            XMLCh     StorerLevelChar[5];
            XMLCh     LoaderLevelChar[5];
            XMLString::binToText(StorerLevel,                          StorerLevelChar,   4, 10, memMgr);
            XMLString::binToText(MATCH_GRAMMAR_SERIALIZATION_LEVEL,   LoaderLevelChar,   4, 10, memMgr);

            ThrowXMLwithMemMgr2(XSerializationException
                    , XMLExcepts::XSer_Storer_Loader_Mismatch
                    , StorerLevelChar
                    , LoaderLevelChar
                    , memMgr);
        }

        //lock status
        serEng>>fLocked;

        //StringPool, don't use >>
        fStringPool->serialize(serEng);

        /***
         * Deserialize RefHashTableOf<Grammar>*    fGrammarRegistry;
         ***/
        XTemplateSerializer::loadObject(&fGrammarRegistry, 29, true, serEng);

    }
    catch(const OutOfMemoryException&)
    {
        // This is a special case, because we don't want
        // to execute cleanup code on out-of-memory
        // conditions.
        cleanup.release();

        throw;
    }

    // Everything is OK, so we can release the cleanup object.
    cleanup.release();

    if (fLocked)
    {
        createXSModel();
    }
}
Exemplo n.º 7
0
// ---------------------------------------------------------------------------
//  Win32Transcoder: The virtual transcoder API
// ---------------------------------------------------------------------------
unsigned int
Win32Transcoder::transcodeFrom( const   XMLByte* const      srcData
                                , const unsigned int        srcCount
                                ,       XMLCh* const        toFill
                                , const unsigned int        maxChars
                                ,       unsigned int&       bytesEaten
                                ,       unsigned char* const charSizes)
{
    // Get temp pointers to the in and out buffers, and the chars sizes one
    XMLCh*          outPtr = toFill;
    const XMLByte*  inPtr  = srcData;
    unsigned char*  sizesPtr = charSizes;

    // Calc end pointers for each of them
    XMLCh*          outEnd = toFill + maxChars;
    const XMLByte*  inEnd  = srcData + srcCount;

    //
    //  Now loop until we either get our max chars, or cannot get a whole
    //  character from the input buffer.
    //
    bytesEaten = 0;
    while ((outPtr < outEnd) && (inPtr < inEnd))
    {
        //
        //  If we are looking at a leading byte of a multibyte sequence,
        //  then we are going to eat 2 bytes, else 1.
        //
        const unsigned int toEat = ::IsDBCSLeadByteEx(fWinCP, *inPtr) ?
                                    2 : 1;

        // Make sure a whol char is in the source
        if (inPtr + toEat > inEnd)
            break;

        // Try to translate this next char and check for an error
        const unsigned int converted = ::MultiByteToWideChar
        (
            fWinCP
            , MB_PRECOMPOSED | MB_ERR_INVALID_CHARS
            , (const char*)inPtr
            , toEat
            , outPtr
            , 1
        );

        if (converted != 1)
        {
            if (toEat == 1)
            {
                XMLCh tmpBuf[17];
                XMLString::binToText((unsigned int)(*inPtr), tmpBuf, 16, 16, getMemoryManager());
                ThrowXMLwithMemMgr2
                (
                    TranscodingException
                    , XMLExcepts::Trans_BadSrcCP
                    , tmpBuf
                    , getEncodingName()
                    , getMemoryManager()
                );
            }
             else
            {
                ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
            }
        }

        // Update the char sizes array for this round
        *sizesPtr++ = toEat;

        // And update the bytes eaten count
        bytesEaten += toEat;

        // And update our in/out ptrs
        inPtr += toEat;
        outPtr++;
    }

    // Return the chars we output
    return (outPtr - toFill);
}
Exemplo n.º 8
0
unsigned int
Win32Transcoder::transcodeTo(const  XMLCh* const    srcData
                            , const unsigned int    srcCount
                            ,       XMLByte* const  toFill
                            , const unsigned int    maxBytes
                            ,       unsigned int&   charsEaten
                            , const UnRepOpts       options)
{
    // Get pointers to the start and end of each buffer
    const XMLCh*    srcPtr = srcData;
    const XMLCh*    srcEnd = srcData + srcCount;
    XMLByte*        outPtr = toFill;
    XMLByte*        outEnd = toFill + maxBytes;

    //
    //  Now loop until we either get our max chars, or cannot get a whole
    //  character from the input buffer.
    //
    //  NOTE: We have to use a loop for this unfortunately because the
    //  conversion API is too dumb to tell us how many chars it converted if
    //  it couldn't do the whole source.
    //
    BOOL usedDef;
    while ((outPtr < outEnd) && (srcPtr < srcEnd))
    {
        //
        //  Do one char and see if it made it.
        const unsigned int bytesStored = ::WideCharToMultiByte
        (
            fWinCP
            , WC_COMPOSITECHECK | WC_SEPCHARS
            , srcPtr
            , 1
            , (char*)outPtr
            , outEnd - outPtr
            , 0
            , &usedDef
        );

        // If we didn't transcode anything, then we are done
        if (!bytesStored)
            break;

        //
        //  If the defaault char was used and the options indicate that
        //  this isn't allowed, then throw.
        //
        if (usedDef && (options == UnRep_Throw))
        {
            XMLCh tmpBuf[17];
            XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
            ThrowXMLwithMemMgr2
            (
                TranscodingException
                , XMLExcepts::Trans_Unrepresentable
                , tmpBuf
                , getEncodingName()
                , getMemoryManager()
            );
        }

        // Update our pointers
        outPtr += bytesStored;
        srcPtr++;
    }

    // Update the chars eaten
    charsEaten = srcPtr - srcData;

    // And return the bytes we stored
    return outPtr - toFill;
}
Exemplo n.º 9
0
unsigned int
XML256TableTranscoder390::transcodeTo( const   XMLCh* const    srcData
                                    , const unsigned int    srcCount
                                    ,       XMLByte* const  toFill
                                    , const unsigned int    maxBytes
                                    ,       unsigned int&   charsEaten
                                    , const UnRepOpts       options)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxBytes);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the number of chars in the source.
    //
    const unsigned int countToDo = srcCount < maxBytes ? srcCount : maxBytes;

    //
    //  Loop through the count we have to do and map each char via the
    //  lookup table.
    //
    const XMLCh*    srcPtr = srcData;
    const XMLCh*    endPtr = (srcPtr + countToDo);
    XMLByte*        outPtr = toFill;
    XMLByte         nextOut;
    while (srcPtr < endPtr)
    {
        //
        //  Get the next src char out to a temp, then do a binary search
        //  of the 'to' table for this entry.
        //
        if ((nextOut = xlatOneTo(*srcPtr)))
        {
            *outPtr++ = nextOut;
            srcPtr++;
            continue;
        }

        //
        //  Its not representable so, according to the options, either
        //  throw or use the replacement.
        //
        if (options == UnRep_Throw)
        {
            XMLCh tmpBuf[17];
            XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
            ThrowXMLwithMemMgr2
            (
                TranscodingException
                , XMLExcepts::Trans_Unrepresentable
                , tmpBuf
                , getEncodingName()
                , getMemoryManager()
            );
        }

        // Eat the source char and use the replacement char
        srcPtr++;
        *outPtr++ = 0x3F;
    }

    // Set the chars eaten
    charsEaten = countToDo;

    // Return the bytes we transcoded
    return countToDo;
}
Exemplo n.º 10
0
BinInputStream* XMLURL::makeNewStream() const
{
    //
    //  If its a local host, then we short circuit it and use our own file
    //  stream support. Otherwise, we just let it fall through and let the
    //  installed network access object provide a stream.
    //
    if (fProtocol == XMLURL::File)
    {
        if (!fHost || !XMLString::compareIStringASCII(fHost, XMLUni::fgLocalHostString))
        {

            XMLCh* realPath = XMLString::replicate(fPath, fMemoryManager);
            ArrayJanitor<XMLCh> basePathName(realPath, fMemoryManager);

            //
            // Need to manually replace any character reference %xx first
            // HTTP protocol will be done automatically by the netaccessor
            //
            int end = XMLString::stringLen(realPath);
            int percentIndex = XMLString::indexOf(realPath, chPercent, 0, fMemoryManager);

            while (percentIndex != -1) {

                if (percentIndex+2 >= end ||
                    !isHexDigit(realPath[percentIndex+1]) ||
                    !isHexDigit(realPath[percentIndex+2]))
                {
                    XMLCh value1[4];
                    XMLString::moveChars(value1, &(realPath[percentIndex]), 3);
                    value1[3] = chNull;
                    ThrowXMLwithMemMgr2(MalformedURLException
                            , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
                            , realPath
                            , value1
                            , fMemoryManager);
                }

                unsigned int value = (xlatHexDigit(realPath[percentIndex+1]) * 16) + xlatHexDigit(realPath[percentIndex+2]);

                realPath[percentIndex] = XMLCh(value);

                int i =0;
                for (i = percentIndex + 1; i < end - 2 ; i++)
                    realPath[i] = realPath[i+2];
                realPath[i] = chNull;
                end = i;

                percentIndex = XMLString::indexOf(realPath, chPercent, percentIndex, fMemoryManager);
            }


            BinFileInputStream* retStrm = new (fMemoryManager) BinFileInputStream(realPath, fMemoryManager);
            if (!retStrm->getIsOpen())
            {
                delete retStrm;
                return 0;
            }
            return retStrm;
        }
    }

    //
    //  If we don't have have an installed net accessor object, then we
    //  have to just throw here.
    //
    if (!XMLPlatformUtils::fgNetAccessor)
        ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);

    // Else ask the net accessor to create the stream
    return XMLPlatformUtils::fgNetAccessor->makeNew(*this);
}
Exemplo n.º 11
0
unsigned int
XML88591Transcoder390::transcodeTo(const   XMLCh* const    srcData
                                , const unsigned int    srcCount
                                ,       XMLByte* const  toFill
                                , const unsigned int    maxBytes
                                ,       unsigned int&   charsEaten
                                , const UnRepOpts       options)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxBytes);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output bytes and the number of chars in the source.
    //
    const unsigned int countToDo = srcCount < maxBytes ? srcCount : maxBytes;

    //
    //  Loop through the bytes to do and convert over each byte. Its just
    //  a downcast of the wide char, checking for unrepresentable chars.
    //
    const XMLCh*    srcPtr  = srcData;
    const XMLCh*    srcEnd  = srcPtr + countToDo;
    XMLByte*        destPtr = toFill;
    while (srcPtr < srcEnd)
    {
        // If its legal, take it and jump back to top
        if (*srcPtr < 256)
        {
            *destPtr++ = XMLByte(*srcPtr++);
            continue;
        }

        //
        //  Its not representable so use a replacement char. According to
        //  the options, either throw or use the replacement.
        //
        if (options == UnRep_Throw)
        {
            XMLCh tmpBuf[17];
            XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
            ThrowXMLwithMemMgr2
            (
                TranscodingException
                , XMLExcepts::Trans_Unrepresentable
                , tmpBuf
                , getEncodingName()
                , getMemoryManager()
            );
        }
        *destPtr++ = 0x1A;
        srcPtr++;
    }

    // Set the chars eaten
    charsEaten = countToDo;

    // Return the bytes we transcoded
    return countToDo;
}
Exemplo n.º 12
0
RangeToken* RegxParser::parseCharacterClass(const bool useNRange) {

    setParseContext(regexParserStateInBrackets);
    processNext();

    RangeToken* tok = 0;
    bool isNRange = false;

    if (getState() == REGX_T_CHAR && getCharData() == chCaret) {
        isNRange = true;
        processNext();
    }
    tok = fTokenFactory->createRange();

    parserState type;
    bool firstLoop = true;
    bool wasDecoded;

    while ( (type = getState()) != REGX_T_EOF) {

        wasDecoded = false;

        // single range | from-to-range | subtraction
        if (type == REGX_T_CHAR && getCharData() == chCloseSquare && !firstLoop)
            break;

        XMLInt32 ch = getCharData();
        bool     end = false;

        if (type == REGX_T_BACKSOLIDUS) {

            switch(ch) {
            case chLatin_d:
            case chLatin_D:
            case chLatin_w:
            case chLatin_W:
            case chLatin_s:
            case chLatin_S:
            case chLatin_i:
            case chLatin_I:
            case chLatin_c:
            case chLatin_C:
                {
                    tok->mergeRanges(getTokenForShorthand(ch));
                    end = true;
                }
                break;
            case chLatin_p:
            case chLatin_P:
                {                    
                    RangeToken* tok2 = processBacksolidus_pP(ch);

                    if (tok2 == 0) {
                        ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom5, getMemoryManager());
                    }

                    tok->mergeRanges(tok2);
                    end = true;
                }
                break;
            case chDash:
                wasDecoded = true;
                // fall thru to default.
            default:
                ch = decodeEscaped();
            }
        } // end if REGX_T_BACKSOLIDUS
        else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION && !firstLoop) {

            if (isNRange)
            {
                tok = RangeToken::complementRanges(tok, fTokenFactory, fMemoryManager);
                isNRange=false;
            }
            RangeToken* rangeTok = parseCharacterClass(false);
            tok->subtractRanges(rangeTok);

            if (getState() != REGX_T_CHAR || getCharData() != chCloseSquare) {
                ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC5, getMemoryManager());
            }
            break;
        } // end if REGX_T_XMLSCHEMA...

        processNext();

        if (!end) {

            if (type == REGX_T_CHAR
                && (ch == chOpenSquare
                    || ch == chCloseSquare
                    || (ch == chDash && getCharData() == chCloseSquare && firstLoop))) {
                // if regex = [-] then invalid...
                // '[', ']', '-' not allowed and should be escaped
                XMLCh chStr[] = { ch, chNull };
                ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager());
            }
            if (ch == chDash && getCharData() == chDash && getState() != REGX_T_BACKSOLIDUS && !wasDecoded) {
                XMLCh chStr[] = { ch, chNull };
                ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager());
            }

            if (getState() != REGX_T_CHAR || getCharData() != chDash) {
                tok->addRange(ch, ch);
            }
            else {

                processNext();
                if ((type = getState()) == REGX_T_EOF)
                    ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager());

                if (type == REGX_T_CHAR && getCharData() == chCloseSquare) {
                    tok->addRange(ch, ch);
                    tok->addRange(chDash, chDash);
                }
                else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) {

                    static const XMLCh dashStr[] = { chDash, chNull};
                    ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, dashStr, dashStr, getMemoryManager());
                }
                else {

                    XMLInt32 rangeEnd = getCharData();
                    XMLCh rangeEndStr[] = { rangeEnd, chNull };

                    if (type == REGX_T_CHAR) {

                        if (rangeEnd == chOpenSquare
                            || rangeEnd == chCloseSquare
                            || rangeEnd == chDash)
                            // '[', ']', '-' not allowed and should be escaped
                            ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr, getMemoryManager());
                    }
                    else if (type == REGX_T_BACKSOLIDUS) {
                        rangeEnd = decodeEscaped();
                    }

                    processNext();

                    if (ch > rangeEnd) {
                        XMLCh chStr[] = { ch, chNull };
                        ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_Ope3, rangeEndStr, chStr, getMemoryManager());
                    }

                    tok->addRange(ch, rangeEnd);
                }
            }
        }
        firstLoop = false;
    }

    if (getState() == REGX_T_EOF)
        ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager());

    if (isNRange)
    {
        if(useNRange)
            tok->setTokenType(Token::T_NRANGE);
        else
            tok = RangeToken::complementRanges(tok, fTokenFactory, fMemoryManager);
    }

    tok->sortRanges();
    tok->compactRanges();

    // If the case-insensitive option is enabled, we need to
    // have the new RangeToken instance build its internal
    // case-insensitive RangeToken.
    if (RegularExpression::isSet(fOptions, RegularExpression::IGNORE_CASE))
    {
        tok->getCaseInsensitiveToken(fTokenFactory);
    }

    setParseContext(regexParserStateNormal);
    processNext();

    return tok;
}
void QNameDatatypeValidator::checkContent( const XMLCh*             const content
                                          ,       ValidationContext* const context
                                          ,       bool                     asBase
                                          ,       MemoryManager*     const manager
                                          )
{

    //validate against base validator if any
    QNameDatatypeValidator *pBaseValidator = (QNameDatatypeValidator*) this->getBaseValidator();
    if (pBaseValidator)
        pBaseValidator->checkContent(content, context, true, manager);

    int thisFacetsDefined = getFacetsDefined();

    // we check pattern first
    if ( (thisFacetsDefined & DatatypeValidator::FACET_PATTERN ) != 0 )
    {
        if (getRegex()->matches(content, manager) ==false)
        {
            ThrowXMLwithMemMgr2(InvalidDatatypeValueException
                    , XMLExcepts::VALUE_NotMatch_Pattern
                    , content
                    , getPattern()
                    , manager);
        }
    }

    // if this is a base validator, we only need to check pattern facet
    // all other facet were inherited by the derived type
    if (asBase)
        return;

    checkValueSpace(content, manager);

    if (context) {
        int colonPos = XMLString::indexOf(content, chColon);
        if (colonPos > 0) {
            XMLCh* prefix = XMLString::replicate(content, manager);
            ArrayJanitor<XMLCh>  jan(prefix, manager);
            normalizeContent(prefix, manager);
            prefix[colonPos] = chNull;
                     
            if (context->isPrefixUnknown(prefix)) {
                ThrowXMLwithMemMgr1(InvalidDatatypeValueException
                    , XMLExcepts::VALUE_QName_Invalid2
                    , content
                    , manager);             
            }                                  
        }
    }

    if ((thisFacetsDefined & DatatypeValidator::FACET_ENUMERATION) != 0 &&
        (getEnumeration() != 0))
    {
        XMLCh* normContent = XMLString::replicate(content, manager);
        ArrayJanitor<XMLCh>  jan(normContent, manager);
        normalizeContent(normContent, manager);

        int i=0;
        int enumLength = getEnumeration()->size();
        for ( ; i < enumLength; i++)
        {
            if (XMLString::equals(normContent, getEnumeration()->elementAt(i)))
                break;
        }

        if (i == enumLength)
            ThrowXMLwithMemMgr1(InvalidDatatypeValueException, XMLExcepts::VALUE_NotIn_Enumeration, content, manager);
    }

    checkAdditionalFacet(content, manager);
}