Beispiel #1
0
namespace qencodingprober {
static unsigned int HZ_cls[ 256 / 8 ] = {
    PCK4BITS(1,0,0,0,0,0,0,0),  // 00 - 07
    PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17
    PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 20 - 27
    PCK4BITS(0,0,0,0,0,0,0,0),  // 28 - 2f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37
    PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 40 - 47
    PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57
    PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67
    PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77
    PCK4BITS(0,0,0,4,0,5,2,0),  // 78 - 7f
    PCK4BITS(1,1,1,1,1,1,1,1),  // 80 - 87
    PCK4BITS(1,1,1,1,1,1,1,1),  // 88 - 8f
    PCK4BITS(1,1,1,1,1,1,1,1),  // 90 - 97
    PCK4BITS(1,1,1,1,1,1,1,1),  // 98 - 9f
    PCK4BITS(1,1,1,1,1,1,1,1),  // a0 - a7
    PCK4BITS(1,1,1,1,1,1,1,1),  // a8 - af
    PCK4BITS(1,1,1,1,1,1,1,1),  // b0 - b7
    PCK4BITS(1,1,1,1,1,1,1,1),  // b8 - bf
    PCK4BITS(1,1,1,1,1,1,1,1),  // c0 - c7
    PCK4BITS(1,1,1,1,1,1,1,1),  // c8 - cf
    PCK4BITS(1,1,1,1,1,1,1,1),  // d0 - d7
    PCK4BITS(1,1,1,1,1,1,1,1),  // d8 - df
    PCK4BITS(1,1,1,1,1,1,1,1),  // e0 - e7
    PCK4BITS(1,1,1,1,1,1,1,1),  // e8 - ef
    PCK4BITS(1,1,1,1,1,1,1,1),  // f0 - f7
    PCK4BITS(1,1,1,1,1,1,1,1)   // f8 - ff
};


static unsigned int HZ_st [ 6] = {
    PCK4BITS(eStart,eError,     3,eStart,eStart,eStart,eError,eError),//00-07
    PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
    PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError),//10-17
    PCK4BITS(     5,eError,     6,eError,     5,     5,     4,eError),//18-1f
    PCK4BITS(     4,eError,     4,     4,     4,eError,     4,eError),//20-27
    PCK4BITS(     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
};

static const unsigned int HZCharLenTable[] = {0, 0, 0, 0, 0, 0};

SMModel HZSMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls },
    6,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st },
    HZCharLenTable,
    "HZ-GB-2312",
};


static unsigned int ISO2022CN_cls [ 256 / 8 ] = {
    PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07
    PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17
    PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 20 - 27
    PCK4BITS(0,3,0,0,0,0,0,0),  // 28 - 2f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37
    PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f
    PCK4BITS(0,0,0,4,0,0,0,0),  // 40 - 47
    PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57
    PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67
    PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77
    PCK4BITS(0,0,0,0,0,0,0,0),  // 78 - 7f
    PCK4BITS(2,2,2,2,2,2,2,2),  // 80 - 87
    PCK4BITS(2,2,2,2,2,2,2,2),  // 88 - 8f
    PCK4BITS(2,2,2,2,2,2,2,2),  // 90 - 97
    PCK4BITS(2,2,2,2,2,2,2,2),  // 98 - 9f
    PCK4BITS(2,2,2,2,2,2,2,2),  // a0 - a7
    PCK4BITS(2,2,2,2,2,2,2,2),  // a8 - af
    PCK4BITS(2,2,2,2,2,2,2,2),  // b0 - b7
    PCK4BITS(2,2,2,2,2,2,2,2),  // b8 - bf
    PCK4BITS(2,2,2,2,2,2,2,2),  // c0 - c7
    PCK4BITS(2,2,2,2,2,2,2,2),  // c8 - cf
    PCK4BITS(2,2,2,2,2,2,2,2),  // d0 - d7
    PCK4BITS(2,2,2,2,2,2,2,2),  // d8 - df
    PCK4BITS(2,2,2,2,2,2,2,2),  // e0 - e7
    PCK4BITS(2,2,2,2,2,2,2,2),  // e8 - ef
    PCK4BITS(2,2,2,2,2,2,2,2),  // f0 - f7
    PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff
};


static unsigned int ISO2022CN_st [ 8] = {
    PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eStart,eStart),//00-07
    PCK4BITS(eStart,eError,eError,eError,eError,eError,eError,eError),//08-0f
    PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//10-17
    PCK4BITS(eItsMe,eItsMe,eItsMe,eError,eError,eError,     4,eError),//18-1f
    PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//20-27
    PCK4BITS(     5,     6,eError,eError,eError,eError,eError,eError),//28-2f
    PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//30-37
    PCK4BITS(eError,eError,eError,eError,eError,eItsMe,eError,eStart) //38-3f
};

static const unsigned int ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};

SMModel ISO2022CNSMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls },
    9,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st },
    ISO2022CNCharLenTable,
    "ISO-2022-CN",
};

static unsigned int ISO2022JP_cls [ 256 / 8 ] = {
    PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07
    PCK4BITS(0,0,0,0,0,0,2,2),  // 08 - 0f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17
    PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f
    PCK4BITS(0,0,0,0,7,0,0,0),  // 20 - 27
    PCK4BITS(3,0,0,0,0,0,0,0),  // 28 - 2f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37
    PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f
    PCK4BITS(6,0,4,0,8,0,0,0),  // 40 - 47
    PCK4BITS(0,9,5,0,0,0,0,0),  // 48 - 4f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57
    PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67
    PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77
    PCK4BITS(0,0,0,0,0,0,0,0),  // 78 - 7f
    PCK4BITS(2,2,2,2,2,2,2,2),  // 80 - 87
    PCK4BITS(2,2,2,2,2,2,2,2),  // 88 - 8f
    PCK4BITS(2,2,2,2,2,2,2,2),  // 90 - 97
    PCK4BITS(2,2,2,2,2,2,2,2),  // 98 - 9f
    PCK4BITS(2,2,2,2,2,2,2,2),  // a0 - a7
    PCK4BITS(2,2,2,2,2,2,2,2),  // a8 - af
    PCK4BITS(2,2,2,2,2,2,2,2),  // b0 - b7
    PCK4BITS(2,2,2,2,2,2,2,2),  // b8 - bf
    PCK4BITS(2,2,2,2,2,2,2,2),  // c0 - c7
    PCK4BITS(2,2,2,2,2,2,2,2),  // c8 - cf
    PCK4BITS(2,2,2,2,2,2,2,2),  // d0 - d7
    PCK4BITS(2,2,2,2,2,2,2,2),  // d8 - df
    PCK4BITS(2,2,2,2,2,2,2,2),  // e0 - e7
    PCK4BITS(2,2,2,2,2,2,2,2),  // e8 - ef
    PCK4BITS(2,2,2,2,2,2,2,2),  // f0 - f7
    PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff
};


static unsigned int ISO2022JP_st [ 9] = {
    PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eStart,eStart),//00-07
    PCK4BITS(eStart,eStart,eError,eError,eError,eError,eError,eError),//08-0f
    PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//10-17
    PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError),//18-1f
    PCK4BITS(eError,     5,eError,eError,eError,     4,eError,eError),//20-27
    PCK4BITS(eError,eError,eError,     6,eItsMe,eError,eItsMe,eError),//28-2f
    PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//30-37
    PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//38-3f
    PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eStart,eStart) //40-47
};

static const unsigned int ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};

SMModel ISO2022JPSMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
    10,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
    ISO2022JPCharLenTable,
    "ISO-2022-JP",
};

static unsigned int ISO2022KR_cls [ 256 / 8 ] = {
    PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07
    PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17
    PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f
    PCK4BITS(0,0,0,0,3,0,0,0),  // 20 - 27
    PCK4BITS(0,4,0,0,0,0,0,0),  // 28 - 2f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37
    PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f
    PCK4BITS(0,0,0,5,0,0,0,0),  // 40 - 47
    PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57
    PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67
    PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f
    PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77
    PCK4BITS(0,0,0,0,0,0,0,0),  // 78 - 7f
    PCK4BITS(2,2,2,2,2,2,2,2),  // 80 - 87
    PCK4BITS(2,2,2,2,2,2,2,2),  // 88 - 8f
    PCK4BITS(2,2,2,2,2,2,2,2),  // 90 - 97
    PCK4BITS(2,2,2,2,2,2,2,2),  // 98 - 9f
    PCK4BITS(2,2,2,2,2,2,2,2),  // a0 - a7
    PCK4BITS(2,2,2,2,2,2,2,2),  // a8 - af
    PCK4BITS(2,2,2,2,2,2,2,2),  // b0 - b7
    PCK4BITS(2,2,2,2,2,2,2,2),  // b8 - bf
    PCK4BITS(2,2,2,2,2,2,2,2),  // c0 - c7
    PCK4BITS(2,2,2,2,2,2,2,2),  // c8 - cf
    PCK4BITS(2,2,2,2,2,2,2,2),  // d0 - d7
    PCK4BITS(2,2,2,2,2,2,2,2),  // d8 - df
    PCK4BITS(2,2,2,2,2,2,2,2),  // e0 - e7
    PCK4BITS(2,2,2,2,2,2,2,2),  // e8 - ef
    PCK4BITS(2,2,2,2,2,2,2,2),  // f0 - f7
    PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff
};


static unsigned int ISO2022KR_st [ 5] = {
    PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eError,eError),//00-07
    PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
    PCK4BITS(eItsMe,eItsMe,eError,eError,eError,     4,eError,eError),//10-17
    PCK4BITS(eError,eError,eError,eError,     5,eError,eError,eError),//18-1f
    PCK4BITS(eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart) //20-27
};

static const unsigned int ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};

SMModel ISO2022KRSMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls },
    6,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st },
    ISO2022KRCharLenTable,
    "ISO-2022-KR",
};
}
Beispiel #2
0
Modification from frank tang's original work:
. 0x00 is allowed as a legal character. Since some web pages contains this char in 
  text stream.
*/

/*
Modification from frank tang's original work: (Modified by Cheolgi Kim
. Korean CharSet of EUC-KR is extended to CP949, which allows more characters.
  Thus, Error condition of EUC-KR is relaxed.
*/

// BIG5 

static PRUint32 BIG5_cls [ 256 / 8 ] = {
//PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07    //allow 0x00 as legal value
PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
PCK4BITS(1,1,1,1,1,1,1,1),  // 10 - 17 
PCK4BITS(1,1,1,0,1,1,1,1),  // 18 - 1f 
PCK4BITS(1,1,1,1,1,1,1,1),  // 20 - 27 
PCK4BITS(1,1,1,1,1,1,1,1),  // 28 - 2f 
PCK4BITS(1,1,1,1,1,1,1,1),  // 30 - 37 
PCK4BITS(1,1,1,1,1,1,1,1),  // 38 - 3f 
PCK4BITS(2,2,2,2,2,2,2,2),  // 40 - 47 
PCK4BITS(2,2,2,2,2,2,2,2),  // 48 - 4f 
PCK4BITS(2,2,2,2,2,2,2,2),  // 50 - 57 
PCK4BITS(2,2,2,2,2,2,2,2),  // 58 - 5f 
PCK4BITS(2,2,2,2,2,2,2,2),  // 60 - 67 
PCK4BITS(2,2,2,2,2,2,2,2),  // 68 - 6f 
PCK4BITS(2,2,2,2,2,2,2,2),  // 70 - 77 
PCK4BITS(2,2,2,2,2,2,2,1),  // 78 - 7f 
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */
#include "universalchardet.h"
#include "nsCodingStateMachine.h"

static PRUint32 HZ_cls[ 256 / 8 ] = {
PCK4BITS(1,0,0,0,0,0,0,0),  // 00 - 07 
PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f 
PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f 
PCK4BITS(0,0,0,0,0,0,0,0),  // 20 - 27 
PCK4BITS(0,0,0,0,0,0,0,0),  // 28 - 2f 
PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37 
PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f 
PCK4BITS(0,0,0,0,0,0,0,0),  // 40 - 47 
PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f 
PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57 
PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f 
PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67 
PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f 
PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77 
PCK4BITS(0,0,0,4,0,5,2,0),  // 78 - 7f 
Beispiel #4
0
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "statemachine.h"

static PRUint32 HZ_cls[ 256 / 8 ] = {
PCK4BITS(1,0,0,0,0,0,0,0),  /*  00 - 07  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  08 - 0f  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  10 - 17  */
PCK4BITS(0,0,0,1,0,0,0,0),  /*  18 - 1f  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  20 - 27  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  28 - 2f  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  30 - 37  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  38 - 3f  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  40 - 47  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  48 - 4f  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  50 - 57  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  58 - 5f  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  60 - 67  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  68 - 6f  */
PCK4BITS(0,0,0,0,0,0,0,0),  /*  70 - 77  */
PCK4BITS(0,0,0,4,0,5,2,0),  /*  78 - 7f  */
Beispiel #5
0
*/

#include "types.h"
#include "statemachine.h"

/*
Modification from frank tang's original work:
. 0x00 is allowed as a legal character. Since some web pages contains this char in 
  text stream.
*/

/* BIG5 */

static PRUint32 BIG5_cls [ 256 / 8 ] = {
/*PCK4BITS(0,1,1,1,1,1,1,1),    00 - 07  */
PCK4BITS(1,1,1,1,1,1,1,1),  /*  00 - 07     allow 0x00 as legal value */
PCK4BITS(1,1,1,1,1,1,0,0),  /*  08 - 0f  */
PCK4BITS(1,1,1,1,1,1,1,1),  /*  10 - 17  */
PCK4BITS(1,1,1,0,1,1,1,1),  /*  18 - 1f  */
PCK4BITS(1,1,1,1,1,1,1,1),  /*  20 - 27  */
PCK4BITS(1,1,1,1,1,1,1,1),  /*  28 - 2f  */
PCK4BITS(1,1,1,1,1,1,1,1),  /*  30 - 37  */
PCK4BITS(1,1,1,1,1,1,1,1),  /*  38 - 3f  */
PCK4BITS(2,2,2,2,2,2,2,2),  /*  40 - 47  */
PCK4BITS(2,2,2,2,2,2,2,2),  /*  48 - 4f  */
PCK4BITS(2,2,2,2,2,2,2,2),  /*  50 - 57  */
PCK4BITS(2,2,2,2,2,2,2,2),  /*  58 - 5f  */
PCK4BITS(2,2,2,2,2,2,2,2),  /*  60 - 67  */
PCK4BITS(2,2,2,2,2,2,2,2),  /*  68 - 6f  */
PCK4BITS(2,2,2,2,2,2,2,2),  /*  70 - 77  */
PCK4BITS(2,2,2,2,2,2,2,1),  /*  78 - 7f  */