Beispiel #1
0
void VerifyTables(FILE *fp)
{
    fprintf(stderr, "Testing final ITT and STT.\n");
    fseek(fp, 0, SEEK_SET);
    int Value;
    UTF32 nextcode = ReadCodePoint(fp);
    UTF32 i;
    for (i = 0; i <= UNI_MAX_LEGAL_UTF32; i++)
    {
        bool bMember;
        if (i == nextcode)
        {
            bMember = true;
            if (UNI_EOF != nextcode)
            {
                nextcode = ReadCodePoint(fp);
                if (nextcode <= i)
                {
                    fprintf(stderr, "Codes in file are not in order (U+%04X).\n", nextcode);
                    exit(0);
                }
            }
        }
        else
        {
            bMember = false;
        }

        UTF32 Source[2];
        Source[0] = i;
        Source[1] = L'\0';
        const UTF32 *pSource = Source;

        UTF8 Target[5];
        UTF8 *pTarget = Target;

        ConversionResult cr;
        cr = ConvertUTF32toUTF8(&pSource, pSource+1, &pTarget, pTarget+sizeof(Target)-1, lenientConversion);

        if (conversionOK == cr)
        {
            int iState = PRINT_START_STATE;
            UTF8 *p = Target;
            while (  p < pTarget
                  && iState < PRINT_ACCEPTING_STATES_START)
            {
                iState = print_stt[iState][print_itt[(unsigned char)*p]];
                p++;
            }

            bool j = ((iState - PRINT_ACCEPTING_STATES_START) == 1) ? true : false;
            if (j != bMember)
            {
                fprintf(stderr, "Input Translation Table and State Transition Table do not work.\n");
                exit(0);
            }
        }
    }
}
Beispiel #2
0
int main( int argc, char **argv ) {
    UTF32 data32[] = { 0x00000041, 0x00000042, 0x00000043, 0x00000044, 0x00000045, 0x00000046, 0x00000047, 0x00000048 };
    UTF32 *src32 = data32;
    UTF8 *out8 = (UTF8*)malloc(1024);
    UTF8 *work8 = out8;
    memset(out8,0xff,1024);
    ConversionResult res;
    res = ConvertUTF32toUTF8( (const UTF32**)&src32, src32+8, &work8, work8+8, 0 );
    printf("ptf:%p to %p\n", src32, src32+7);
    printf("res:%d out:%x %x %x %x %x %x %x %x\n",res,out8[0], out8[1], out8[2], out8[3], out8[4], out8[5], out8[6], out8[7] );
    size_t outlen = work8 - out8;
    printf("len:%d",outlen);
}
Beispiel #3
0
intp StringParser::ConvertToBuffer(const wchar_t* str, size_t length, HeapString& outBuffer)
{
	outBuffer.Clear();
	if (length == 0)
	{
		return 0;
	}


	constexpr bool isUTF16 = sizeof(wchar_t) == 2;
	if (isUTF16)
	{
		//sizeof(wchar_t)==2
		size_t utf8Size = length * 3 + 1;
		outBuffer.ReserveSize(utf8Size);

		const UTF16* sourceStart = reinterpret_cast<const UTF16*>(str);
		const UTF16* sourceEnd = sourceStart + length;
		UTF8* targetStart = reinterpret_cast<UTF8*>(outBuffer.MutableBuffer());
		UTF8* targetEnd = targetStart + utf8Size;
		ConversionResult res = ConvertUTF16toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion);
		*targetStart = 0;
		if (res == conversionOK)
		{
			intp count = targetStart - reinterpret_cast<UTF8*>(outBuffer.MutableBuffer());
			outBuffer.ForceSetLength(count);
			return count;
		}
	}
	else
	{
		//sizeof(wchar_t)==4
		size_t utf8Size = length * 4 + 1;
		outBuffer.ReserveSize(utf8Size);

		const UTF32* sourceStart = reinterpret_cast<const UTF32*>(str);
		const UTF32* sourceEnd = sourceStart + length;
		UTF8* targetStart = reinterpret_cast<UTF8*>(outBuffer.MutableBuffer());
		UTF8* targetEnd = targetStart + utf8Size;
		ConversionResult res = ConvertUTF32toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion);
		*targetStart = 0;
		if (res == conversionOK)
		{
			intp count = targetStart - reinterpret_cast<UTF8*>(outBuffer.MutableBuffer());
			outBuffer.ForceSetLength(count);
			return count;
		}
	}

	return 0;
}
Beispiel #4
0
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
  const UTF32 *SourceStart = &Source;
  const UTF32 *SourceEnd = SourceStart + 1;
  UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
  UTF8 *TargetEnd = TargetStart + 4;
  ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
                                           &TargetStart, TargetEnd,
                                           strictConversion);
  if (CR != conversionOK)
    return false;

  ResultPtr = reinterpret_cast<char*>(TargetStart);
  return true;
}
Beispiel #5
0
			static utf8_errors::error_code_enum convert(wchar_t const** src_start
				, wchar_t const* src_end
				, std::string& utf8)
			{
				char* dst_start = &utf8[0];
				int ret = ConvertUTF32toUTF8(
					reinterpret_cast<UTF32 const**>(src_start)
					, reinterpret_cast<UTF32 const*>(src_end)
					, reinterpret_cast<UTF8**>(&dst_start)
					, reinterpret_cast<UTF8*>(dst_start + utf8.size())
					, lenientConversion);
				utf8.resize(aux::numeric_cast<std::size_t>(dst_start - &utf8[0]));
				return static_cast<utf8_errors::error_code_enum>(ret);
			}
	String StringUtils::wideString2utf8String( const WideString& wideString )
	{
		size_t widesize = wideString.length();
		String returnString;

		if ( sizeof( wchar_t ) == 2 )
		{
			size_t utf8size = MAX_UTF8_CHAR_LENGTH * widesize + 1;
			returnString.resize( utf8size, '\0' );
			const UTF16* sourcestart = reinterpret_cast<const UTF16*>( wideString.c_str() );
			const UTF16* sourceend = sourcestart + widesize;
			UTF8* targetstart = reinterpret_cast<UTF8*>( &((returnString)[ 0 ]) );
			UTF8* thisFirstWChar = targetstart;
			UTF8* targetend = targetstart + utf8size;
			ConversionResult res = ConvertUTF16toUTF8( &sourcestart, sourceend, &targetstart, targetend, strictConversion );

			if ( res != conversionOK )
			{
				throw Exception(Exception::ERROR_WIDE_2_UTF8, String("Could not convert from wide string to UTF8."));
			}

			returnString.resize(targetstart - thisFirstWChar);
		}

		else if ( sizeof( wchar_t ) == 4 )
		{
			size_t utf8size = MAX_UTF8_CHAR_LENGTH * widesize + 1;
			returnString.resize( utf8size, '\0' );
			const UTF32* sourcestart = reinterpret_cast<const UTF32*>( wideString.c_str() );
			const UTF32* sourceend = sourcestart + widesize;
			UTF8* targetstart = reinterpret_cast<UTF8*>( &((returnString)[ 0 ]) );
			UTF8* thisFirstWChar = targetstart;
			UTF8* targetend = targetstart + utf8size;
			ConversionResult res = ConvertUTF32toUTF8( &sourcestart, sourceend, &targetstart, targetend, strictConversion );

			if ( res != conversionOK )
			{
				throw Exception(Exception::ERROR_WIDE_2_UTF8, String("Could not convert from wide string to UTF8."));
			}

			returnString.resize(targetstart - thisFirstWChar);
		}

		else
		{
			throw Exception(Exception::ERROR_WIDE_2_UTF8, String("Could not convert from wide string to UTF8."));
		}
		return returnString;
	}
Beispiel #7
0
    static std::string ToUtf8(const std::wstring& widestring)
    {
        size_t widesize = widestring.length();

        if (sizeof(wchar_t) == 2)
        {
            size_t utf8size = 3 * widesize + 1;
            char* utf8stringnative = new char[utf8size];
            const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str());
            const UTF16* sourceend = sourcestart + widesize;
            UTF8* targetstart = reinterpret_cast<UTF8*>(utf8stringnative);
            UTF8* targetend = targetstart + utf8size;
            ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                delete [] utf8stringnative;
                throw std::exception();
            }
            *targetstart = 0;
            std::string resultstring(utf8stringnative);
            delete [] utf8stringnative;
            return resultstring;
        }
        else if (sizeof(wchar_t) == 4)
        {
            size_t utf8size = 4 * widesize + 1;
            char* utf8stringnative = new char[utf8size];
            const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str());
            const UTF32* sourceend = sourcestart + widesize;
            UTF8* targetstart = reinterpret_cast<UTF8*>(utf8stringnative);
            UTF8* targetend = targetstart + utf8size;
            ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                delete [] utf8stringnative;
                throw std::exception();
            }
            *targetstart = 0;
            std::string resultstring(utf8stringnative);
            delete [] utf8stringnative;
            return resultstring;
        }
        else
        {
            throw std::exception();
        }
        return "";
    }
Beispiel #8
0
    std::string ToUtf8(const std::wstring& widestring)
    {
        size_t widesize = widestring.length();

        if (sizeof(wchar_t) == 2)
        {
            size_t utf8size = 3 * widesize + 1;
            std::string resultstring;
            resultstring.resize(utf8size, '\0');
            const UTF16* sourcestart = 
		reinterpret_cast<const UTF16*>(widestring.c_str());
            const UTF16* sourceend = sourcestart + widesize;
            UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
            UTF8* targetend = targetstart + utf8size;
            ConversionResult res = ConvertUTF16toUTF8
		(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                throw std::exception("La falla!");
            }
            *targetstart = 0;
            return resultstring;
        }
        else if (sizeof(wchar_t) == 4)
        {
            size_t utf8size = 4 * widesize + 1;
            std::string resultstring;
            resultstring.resize(utf8size, '\0');
            const UTF32* sourcestart = 
		reinterpret_cast<const UTF32*>(widestring.c_str());
            const UTF32* sourceend = sourcestart + widesize;
            UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
            UTF8* targetend = targetstart + utf8size;
            ConversionResult res = ConvertUTF32toUTF8
		(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                throw std::exception("La falla!");
            }
            *targetstart = 0;
            return resultstring;
        }
        else
        {
            throw std::exception("La falla!");
        }
        return "";
    }
Beispiel #9
0
void TestTable(FILE *fp)
{
    fprintf(stderr, "Testing STT table.\n");
    fseek(fp, 0, SEEK_SET);
    int Value;
    UTF32 nextcode = ReadCodePoint(fp);
    UTF32 i;
    for (i = 0; i <= UNI_MAX_LEGAL_UTF32; i++)
    {
        bool bMember;
        if (i == nextcode)
        {
            bMember = true;
            if (UNI_EOF != nextcode)
            {
                nextcode = ReadCodePoint(fp);
                if (nextcode <= i)
                {
                    fprintf(stderr, "Codes in file are not in order (U+%04X).\n", static_cast<unsigned int>(nextcode));
                    exit(0);
                }
            }
        }
        else
        {
            bMember = false;
        }

        UTF32 Source[2];
        Source[0] = i;
        Source[1] = L'\0';
        const UTF32 *pSource = Source;

        UTF8 Target[5];
        UTF8 *pTarget = Target;

        ConversionResult cr;
        cr = ConvertUTF32toUTF8(&pSource, pSource+1, &pTarget, pTarget+sizeof(Target)-1, lenientConversion);

        if (conversionOK == cr)
        {
            sm.TestString(Target, pTarget, bMember);
        }
    }
}
std::string StringUtils::wstring_To_Utf8(const std::wstring& widestring)
{
   size_t widesize = widestring.length();

   if (sizeof(wchar_t) == 2)
   {
      size_t utf8size = 3 * widesize + 1;
      std::string resultstring;
      resultstring.resize(utf8size, '\0');
      const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str());
      const UTF16* sourceend = sourcestart + widesize;
      UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
      UTF8* targetend = targetstart + utf8size;
      ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
      if (res != conversionOK)
      {
         return std::string(widestring.begin(), widestring.end());
      }
      *targetstart = 0;
      return std::string(resultstring.c_str());
   }
   else if (sizeof(wchar_t) == 4)
   {
      size_t utf8size = 4 * widesize + 1;
      std::string resultstring;
      resultstring.resize(utf8size, '\0');
      const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str());
      const UTF32* sourceend = sourcestart + widesize;
      UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
      UTF8* targetend = targetstart + utf8size;
      ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
      if (res != conversionOK)
      {
         return std::string(widestring.begin(), widestring.end());
      }
      *targetstart = 0;
      return std::string(resultstring.c_str());
   }
   else
   {
      assert(false);
   }
   return "";
}
Beispiel #11
0
const char*
jx_utf8_character(JxChar in_char)
/* returns the UTF-8 string representation of the supplied UTF-32 character;
 performs no unnecessary memory allocations */
{
    static char result[7];
    memset(result, 0, sizeof(result));
    
    UTF32 input_buffer[2] = { (UTF32)in_char, 0 };
    UTF32 *source = (UTF32*)&input_buffer;
    
    UTF8 *temp = (UTF8*)result;
    UTF8 *temp_end = temp;
    ConversionResult status = ConvertUTF32toUTF8((const UTF32**)&source,
                                                 (const UTF32*)source + 1,
                                                 (UTF8**)&temp_end,
                                                 (UTF8*)temp + sizeof(result),
                                                 lenientConversion);
    if (status != conversionOK) return NULL;
    
    long size = temp_end - temp;
    result[size] = '\0';
    return result;
}
Beispiel #12
0
const bool ToUTF8(const std::wstring &wcstring, std::string &utf8string)
{
	if(wcstring.size()==0)
	{
		utf8string.assign("");
		return true;
	}

	std::vector<std::wstring::value_type> source(wcstring.begin(),wcstring.end());

	if(sizeof(std::wstring::value_type)==2 && sizeof(UTF16)==2)
	{
		std::vector<std::string::value_type> dest(wcstring.size()*2,0);
		
		const UTF16 *sourcestart=reinterpret_cast<const UTF16 *>(&source[0]);
		const UTF16 *sourceend=sourcestart+source.size();
		
		UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]);
		UTF8 *destend=deststart+dest.size();
		
		ConversionResult rval=ConvertUTF16toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion);
		
		if(rval!=conversionOK)
		{
			return false;
		}
		
		utf8string.assign(dest.begin(),dest.end()-(destend-deststart));
		
	}
	else if(sizeof(std::wstring::value_type)==4 && sizeof(UTF32)==4)
	{
		std::vector<std::string::value_type> dest(wcstring.size()*4,0);
		
		const UTF32 *sourcestart=reinterpret_cast<const UTF32 *>(&source[0]);
		const UTF32 *sourceend=sourcestart+source.size();
		
		UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]);
		UTF8 *destend=deststart+dest.size();
		
		ConversionResult rval=ConvertUTF32toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion);
		
		if(rval!=conversionOK)
		{
			return false;
		}
		
		utf8string.assign(dest.begin(),dest.end()-(destend-deststart));
		
	}
	else
	{
		std::vector<UTF32> source2(wcstring.begin(),wcstring.end());
		std::vector<std::string::value_type> dest(wcstring.size()*sizeof(std::wstring::value_type),0);
		
		const UTF32 *sourcestart=reinterpret_cast<const UTF32 *>(&source2[0]);
		const UTF32 *sourceend=sourcestart+source2.size();

		UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]);
		UTF8 *destend=deststart+dest.size();

		ConversionResult rval=ConvertUTF32toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion);

		if(rval!=conversionOK)
		{
			return false;
		}

		utf8string.assign(dest.begin(),dest.end()-(destend-deststart));

	}

	return true;
}
Beispiel #13
0
void LoadStrings(FILE *fp, FILE *fpBody, FILE *fpInclude)
{
    int cIncluded = 0;
    int cExcluded = 0;
    int cErrors   = 0;

    fseek(fp, 0, SEEK_SET);
    int Value;
    UTF32 nextcode = ReadCodePoint(fp);

    UTF32 i;
    for (i = 0; i <= UNI_MAX_LEGAL_UTF32; i++)
    {
        bool bMember;
        if (i == nextcode)
        {
            bMember = true;
            cIncluded++;
            if (UNI_EOF != nextcode)
            {
                nextcode = ReadCodePoint(fp);
                if (nextcode <= i)
                {
                    fprintf(stderr, "Codes in file are not in order (U+%04X).\n", static_cast<unsigned int>(nextcode));
                    exit(0);
                }
            }
        }
        else
        {
            bMember = false;
            cExcluded++;
        }

        UTF32 Source[2];
        Source[0] = i;
        Source[1] = L'\0';
        const UTF32 *pSource = Source;

        UTF8 Target[5];
        UTF8 *pTarget = Target;

        ConversionResult cr;
        cr = ConvertUTF32toUTF8(&pSource, pSource+1, &pTarget, pTarget+sizeof(Target)-1, lenientConversion);

        if (conversionOK == cr)
        {
            sm.RecordString(Target, pTarget, bMember);
        }
        else
        {
            cErrors++;
        }
    }
    fprintf(fpBody, "// %d included, %d excluded, %d errors.\n", cIncluded, cExcluded, cErrors);
    fprintf(fpInclude, "// %d included, %d excluded, %d errors.\n", cIncluded, cExcluded, cErrors);
    fprintf(stderr, "%d included, %d excluded, %d errors.\n", cIncluded, cExcluded, cErrors);

    OutputStatus os;
    sm.OutputTables(NULL, &os);
    fprintf(stderr, "%d states, %d columns, %d bytes\n", os.nStates, os.nColumns, os.SizeOfMachine);
}
/* UTF-8 입력 */
int IDX_IndexByMA(char *SecVal, POSTINFO *PostInfo, int StopCheck)
{
	int		ret_tok, PostInfoCnt = 0, org_PostInfoCnt;
	UTF32	token[MAXTOKENLEN], u32_str[MAXTOKENLEN], *u32_ptr;
	UTF32	hconv_tok[MAXTOKENLEN];
	UTF8	u8_str[MAXTOKENLEN], *u8_start_ptr, *u8_end_ptr;
	UTF8	original_word[MAXTOKENLEN];
	int		u8str_len;
	int		token_len;
	JO_CHAR	j_hanstr[MAXTOKENLEN];
	int		j_hanstr_len;
	int		idx_num, i, j, k;
	JO_INDEX_WORD idx_words;
	ConversionResult cnvt_res;
	int		wordNum = 1, max_wordNum = 0, org_wordNum;
	int		old_psgNum, firstFlag = 1;

	extern int StemCheck;
	extern int HanjaFlag;
	extern int StartWordNum;

	wordNum = StartWordNum;

	InitTokenizer((unsigned char *) SecVal);

	while ((ret_tok = GetNextToken(token, &token_len, 0)) != -1) {
		if (token_len > 42)
			continue;

		u32_ptr = token;
		u8_start_ptr = (UTF8 *) u8_str;
		u8_end_ptr = (UTF8 *) &(u8_str[MAXTOKENLEN]);
		cnvt_res = ConvertUTF32toUTF8(&u32_ptr, &(token[token_len]), &u8_start_ptr, u8_end_ptr, strictConversion, &u8str_len);
		u8_str[u8str_len] = '\0';
		
		/* 불용어 제거 */
		if (IDX_FindStopWord(u8_str))
			continue;

		strcpy(original_word, u8_str);

		switch (ret_tok) {
			case T_HAN: /* 한글 */
				/* UCS4 --> Johab */
				j_hanstr_len = 0;
				for (i = 0; i < token_len; i++) {
					/*
					ucs2_to_johab((int) token[i], (int *) &(j_hanstr[j_hanstr_len].j_code));
					*/
					j_hanstr[j_hanstr_len].j_code = ucs2_to_johab((int) token[i]);
					j_hanstr[j_hanstr_len].j_han.sign = 1;
					j_hanstr_len++;
				}

				org_wordNum = wordNum;
				org_PostInfoCnt = PostInfoCnt;
				max_wordNum = 0;

				/* 조합형 어절 색인 수행 */
				idx_num = GetIndexFromOneWord(j_hanstr, j_hanstr_len, &idx_words, 1);
			//	printf("\nIDX_ByMa\n");
				if (idx_num > 0) {
					firstFlag = 1;
					for (j = 0; j < idx_words.nIndex; j++) {
						/* 중복 색인어 제거 과정에서 str_len값을 0으로 세팅하므로 이 부분을 꼭 넣어야 한다. */
						if (idx_words.IDX[j].str_len == 0)
							continue;


						/* Johab --> UCS4 */
						for (k = 0; k < idx_words.IDX[j].str_len; k++)
							u32_str[k] = johab_to_ucs2((int)(idx_words.IDX[j].str[k].j_code));

						/* UCS4 --> UTF-8 */
						u32_ptr = u32_str;
						u8_start_ptr = (UTF8 *) u8_str;
						u8_end_ptr = (UTF8 *) &(u8_str[MAXTOKENLEN]);
						cnvt_res = ConvertUTF32toUTF8(&u32_ptr, &(u32_str[k]), &u8_start_ptr, u8_end_ptr, strictConversion, &u8str_len);

						u8_str[u8str_len] = '\0';
						/*
						 * 2003-06-02 불용어 처리
						 * 형태소분석 결과에 대한 불용어 처리 추가
						 * 2음절이상인 단일명사에 대해서만 불용어 처리
						 */
						if (idx_words.IDX[j].str_len >= 2 && idx_words.nIndex == 1) {
							/* 불용어 제거 */
							if (IDX_FindStopWord(u8_str))
								continue;
						}

						strcpy(PostInfo[PostInfoCnt].key, (char *) u8_str);
						PostInfo[PostInfoCnt].keyLen = u8str_len;
						PostInfo[PostInfoCnt].psgNum = idx_words.IDX[j].loc;

						if (firstFlag) {
							firstFlag = 0;
							old_psgNum = PostInfo[PostInfoCnt].psgNum;
						} else {
							if (old_psgNum != PostInfo[PostInfoCnt].psgNum) {
								wordNum = org_wordNum;
								old_psgNum = PostInfo[PostInfoCnt].psgNum;
							}
						}

						if (max_wordNum < wordNum)
							max_wordNum = wordNum;

						PostInfo[PostInfoCnt].wordNum = wordNum++;
						PostInfoCnt++;

						if (PostInfoCnt > MAXPOSTINFOSIZE - 1)
							return PostInfoCnt;
					}

					if (max_wordNum != 0)
						wordNum = max_wordNum + 1;

					/////////////////////////////////////////////////////////////////
					// 워드 자체도 색인어로 지정(2005/02/14)
					/////////////////////////////////////////////////////////////////
					for (k = org_PostInfoCnt; k < PostInfoCnt; k++)
						if (!strcmp(original_word, PostInfo[k].key))
							break;
					if (k == PostInfoCnt) {
						strcpy(PostInfo[PostInfoCnt].key, (char *) original_word);
						PostInfo[PostInfoCnt].keyLen = strlen(original_word);
						PostInfo[PostInfoCnt].psgNum = 7777;
						PostInfo[PostInfoCnt].wordNum = org_wordNum;
						if (org_PostInfoCnt == PostInfoCnt) // 색인어가 없는 어절...
							wordNum++;
						PostInfoCnt++;
					}
					/////////////////////////////////////////////////////////////////
				}

				break;

			case T_CJK: /* 한자 */
				/* "한자는 그대로" 플래그가 세팅되지 않으면 한글로 변환 */
				/* 수정 : 한자 플래그 1 --> 변환 */
				if (HanjaFlag == 1) {
					Hanja2Hangul_UCS4(token, token_len, hconv_tok);

					/* 색인 수행 */

					/* UCS4 --> Johab */
					j_hanstr_len = 0;
					for (i = 0; i < token_len; i++) {
						/*
						ucs2_to_johab((int) hconv_tok[i], (int *) &(j_hanstr[j_hanstr_len].j_code));
						*/
						j_hanstr[j_hanstr_len].j_code = ucs2_to_johab((int) hconv_tok[i]);
						j_hanstr[j_hanstr_len].j_han.sign = 1;
						j_hanstr_len++;
					}

					org_wordNum = wordNum;
					max_wordNum = 0;

					/* 조합형 어절 색인 수행 */
					idx_num = GetIndexFromOneWord(j_hanstr, j_hanstr_len, &idx_words, 1);

					if (idx_num > 0) {
						firstFlag = 1;
						for (j = 0; j < idx_words.nIndex; j++) {
							/* 중복 색인어 제거 과정에서 str_len값을 0으로 세팅하므로 이 부분을 꼭 넣어야 한다. */
							if (idx_words.IDX[j].str_len == 0)
								continue;

							/* Johab --> UCS4 */
							for (k = 0; k < idx_words.IDX[j].str_len; k++)
								u32_str[k] = johab_to_ucs2((int)(idx_words.IDX[j].str[k].j_code));

							/* UCS4 --> UTF-8 */
							u32_ptr = u32_str;
							u8_start_ptr = (UTF8 *) u8_str;
							u8_end_ptr = (UTF8 *) &(u8_str[MAXTOKENLEN]);
							cnvt_res = ConvertUTF32toUTF8(&u32_ptr, &(u32_str[k]), &u8_start_ptr, u8_end_ptr, strictConversion, &u8str_len);

							u8_str[u8str_len] = '\0';
							strcpy(PostInfo[PostInfoCnt].key, (char *) u8_str);
							PostInfo[PostInfoCnt].keyLen = u8str_len;
							PostInfo[PostInfoCnt].psgNum = idx_words.IDX[j].loc;

							if (firstFlag) {
								firstFlag = 0;
								old_psgNum = PostInfo[PostInfoCnt].psgNum;
							} else {
								if (old_psgNum != PostInfo[PostInfoCnt].psgNum) {
									wordNum = org_wordNum;
									old_psgNum = PostInfo[PostInfoCnt].psgNum;
								}
							}

							if (max_wordNum < wordNum)
								max_wordNum = wordNum;

							PostInfo[PostInfoCnt].wordNum = wordNum++;
							PostInfoCnt++;

							if (PostInfoCnt > MAXPOSTINFOSIZE - 1)
								return PostInfoCnt;
						}
						if (max_wordNum != 0)
							wordNum = max_wordNum + 1;
					}
				} else {
					/* UCS4 --> UTF-8 */
					u32_ptr = token;
					u8_start_ptr = (UTF8 *) u8_str;
					u8_end_ptr = (UTF8 *) &(u8_str[MAXTOKENLEN]);
					cnvt_res = ConvertUTF32toUTF8(&u32_ptr, &(token[token_len]), &u8_start_ptr, u8_end_ptr, strictConversion, &u8str_len);

					u8_str[u8str_len] = '\0';

					if (strlen(u8_str) > MAXKEYLEN)
						break;

					strcpy(PostInfo[PostInfoCnt].key, (char *) u8_str);
					PostInfo[PostInfoCnt].keyLen = u8str_len;
					PostInfo[PostInfoCnt].psgNum = 1;
					PostInfo[PostInfoCnt].wordNum = wordNum++;

					PostInfoCnt++;

					if (PostInfoCnt > MAXPOSTINFOSIZE - 1)
						return PostInfoCnt;
				}

				break;

			case T_DIG:	/* 숫자 */
			case T_CYR: /* 러시아어 */
				/* UCS4 --> UTF-8 */
				/*
				u32_ptr = token;
				u8_start_ptr = (UTF8 *) u8_str;
				u8_end_ptr = (UTF8 *) &(u8_str[1024]);
				cnvt_res = ConvertUTF32toUTF8(&u32_ptr, &(token[token_len]), &u8_start_ptr, u8_end_ptr, strictConversion, &u8str_len);

				u8_str[u8str_len] = '\0';
				*/
				if (strlen(u8_str) > MAXKEYLEN)
					break;
				strcpy(PostInfo[PostInfoCnt].key, (char *) u8_str);
				PostInfo[PostInfoCnt].keyLen = strlen(u8_str);
				PostInfo[PostInfoCnt].psgNum = 1;
				PostInfo[PostInfoCnt].wordNum = wordNum++;

				PostInfoCnt++;

				if (PostInfoCnt > MAXPOSTINFOSIZE - 1)
					return PostInfoCnt;

				break;

			case T_LAT: /* 영어 */
				/* UCS4 --> UTF-8 */
				/*
				u32_ptr = token;
				u8_start_ptr = (UTF8 *) u8_str;
				u8_end_ptr = (UTF8 *) &(u8_str[1024]);
				cnvt_res = ConvertUTF32toUTF8(&u32_ptr, &(token[token_len]), &u8_start_ptr, u8_end_ptr, strictConversion, &u8str_len);
				u8_str[u8str_len] = '\0';
				*/
				if (strlen(u8_str) > MAXKEYLEN)
					break;

				strcpy(PostInfo[PostInfoCnt].key, (char *) u8_str);

				/* 영어 스태밍 */
				if (StemCheck == 1) {
					IDX_strip_affixes(PostInfo[PostInfoCnt].key, &u8str_len);
					if (u8str_len <= 0)
						break;
				}


				PostInfo[PostInfoCnt].key[u8str_len] = '\0';
				strlower(PostInfo[PostInfoCnt].key);

				PostInfo[PostInfoCnt].keyLen = u8str_len;
				PostInfo[PostInfoCnt].psgNum = 1;
				PostInfo[PostInfoCnt].wordNum = wordNum++;

				PostInfoCnt++;

				if (PostInfoCnt > MAXPOSTINFOSIZE - 1)
					return PostInfoCnt;

				break;

			default:
				break;
		}
	}

	return PostInfoCnt;
}