void ParsePY(FcitxPinyinConfig *pyconfig, const char *strPY, ParsePYStruct * parsePY, PYPARSEINPUTMODE mode, boolean bSP) { const char *strP; int iIndex; int iTemp; char str_Map[3]; char strTemp[7]; parsePY->iMode = PARSE_SINGLEHZ; strP = strPY; parsePY->iHZCount = 0; if (bSP) { char strQP[7]; char strJP[3]; strJP[2] = '\0'; while (*strP) { strJP[0] = *strP++; strJP[1] = *strP; SP2QP(pyconfig, strJP, strQP); MapPY(pyconfig, strQP, str_Map, mode); if (!*strP) { strcpy(parsePY->strMap[parsePY->iHZCount], str_Map); strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP); break; } iIndex = FindPYFAIndex(pyconfig, strQP, 0); if (iIndex != -1) { strcpy(parsePY->strMap[parsePY->iHZCount], str_Map); strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP); strP++; } else { strJP[1] = '\0'; SP2QP(pyconfig, strJP, strQP); if (!MapPY(pyconfig, strQP, str_Map, mode)) strcpy(parsePY->strMap[parsePY->iHZCount], strJP); else strcpy(parsePY->strMap[parsePY->iHZCount], str_Map); strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP); } if (*strP == PY_SEPARATOR) { strcat(parsePY->strPYParsed[parsePY->iHZCount - 1], PY_SEPARATOR_S); while (*strP == PY_SEPARATOR) strP++; } } } else { boolean bSeperator = false; do { iIndex = FindPYFAIndex(pyconfig, strP, 1); if (iIndex != -1) { size_t lIndex = strlen(pyconfig->PYTable[iIndex].strPY); strTemp[0] = pyconfig->PYTable[iIndex].strPY[lIndex - 1]; iTemp = -1; /* * if the end of pinyin is 'g', 'n', 'e' * there might be another possbility, for example "wanan" can be "wa nan" and "wan an" * try resolve these problem here */ if (strTemp[0] == 'g' || strTemp[0] == 'n' || strTemp[0] == 'e' || strTemp[0] == 'a') { strncpy(strTemp, strP, lIndex - 1); strTemp[lIndex - 1] = '\0'; /* for example we have "wan", so we try to check "wa" is valid or not, with exact match */ iTemp = FindPYFAIndex(pyconfig, strTemp, 0); /* if "wa" is valid */ if (iTemp != -1) { /* also check "nan" is valid or not */ int firstIndex; firstIndex = iTemp; iTemp = FindPYFAIndex(pyconfig, strP + strlen(pyconfig->PYTable[iTemp].strPY), 1); /* if still is valid */ if (iTemp != -1) { /* * length 1 split is what we must avoid, * for example, "nin" can be "ni n", but no separator can for "nin" if we split here * * and "ying" can be also "yi ng", for just the same case" */ if (strlen(pyconfig->PYTable[iTemp].strPY) == 1 || !strcmp("ng", pyconfig->PYTable[iTemp].strPY)) iTemp = -1; } if (iTemp != -1) { /* check the general frequency that this shoud split or not */ int index2 = FindPYFAIndex(pyconfig, strP + strlen(pyconfig->PYTable[iIndex].strPY), 1); boolean resplit = false; do { /* prefer longer */ if (index2 == -1) { resplit = true; break; } size_t length1 = strlen(pyconfig->PYTable[iIndex].strPY) + strlen(pyconfig->PYTable[index2].strPY); size_t length2 = strlen(pyconfig->PYTable[firstIndex].strPY) + strlen(pyconfig->PYTable[iTemp].strPY); if (length1 != length2) { resplit = (length1 < length2); break; } double freq1 = LookupPYFreq(pyconfig, iIndex, index2); double freq2 = LookupPYFreq(pyconfig, firstIndex, iTemp); resplit = (freq1 <= freq2); } while(0); if (resplit) { strncpy(strTemp, strP, lIndex - 1); strTemp[lIndex - 1] = '\0'; } else iTemp = -1; } } } if (iTemp == -1) strcpy(strTemp, pyconfig->PYTable[iIndex].strPY); MapPY(pyconfig, strTemp, str_Map, mode); strcpy(parsePY->strMap[parsePY->iHZCount], str_Map); strP += strlen(strTemp); if (bSeperator) { bSeperator = false; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat(parsePY->strPYParsed[parsePY->iHZCount++], strTemp); } else { if (pyconfig->bFullPY && *strP != PY_SEPARATOR) parsePY->iMode = PARSE_ERROR; iIndex = IsConsonant(strP, 1); if (-1 != iIndex) { parsePY->iMode = PARSE_ERROR; if (bSeperator) { bSeperator = false; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat(parsePY->strPYParsed[parsePY->iHZCount], consonantMapTable[iIndex].strPY); MapPY(pyconfig, consonantMapTable[iIndex].strPY, str_Map, mode); strcpy(parsePY->strMap[parsePY->iHZCount++], str_Map); strP += strlen(consonantMapTable[iIndex].strPY); } else { iIndex = IsSyllabary(strP, 1); if (-1 != iIndex) { if (bSeperator) { bSeperator = false; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat(parsePY->strPYParsed[parsePY->iHZCount], syllabaryMapTable[iIndex].strPY); MapPY(pyconfig, syllabaryMapTable[iIndex].strPY, str_Map, mode); strcpy(parsePY->strMap[parsePY->iHZCount++], str_Map); strP += strlen(syllabaryMapTable[iIndex].strPY); if (parsePY->iMode != PARSE_ERROR) parsePY->iMode = PARSE_ABBR; } else { //必定是分隔符 strP++; bSeperator = true; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; parsePY->strMap[parsePY->iHZCount][0] = '0'; parsePY->strMap[parsePY->iHZCount][1] = '0'; parsePY->strMap[parsePY->iHZCount][2] = '\0'; } } } } while (*strP); } if (strPY[strlen(strPY) - 1] == PY_SEPARATOR && !bSP) parsePY->iHZCount++; if (parsePY->iMode != PARSE_ERROR) { parsePY->iMode = parsePY->iMode & PARSE_ABBR; if (parsePY->iHZCount > 1) parsePY->iMode = parsePY->iMode | PARSE_PHRASE; else parsePY->iMode = parsePY->iMode | PARSE_SINGLEHZ; } }
void ParsePY (char *strPY, ParsePYStruct * parsePY, PYPARSEINPUTMODE mode) { char *strP; int iIndex; int iTemp; char str_Map[3]; char strTemp[7]; parsePY->iMode = PARSE_SINGLEHZ; strP = strPY; parsePY->iHZCount = 0; if (bSP) { char strQP[7]; char strJP[3]; strJP[2] = '\0'; while (*strP) { strJP[0] = *strP++; strJP[1] = *strP; SP2QP (strJP, strQP); MapPY (strQP, str_Map, mode); if (!*strP) { strcpy (parsePY->strMap[parsePY->iHZCount], str_Map); strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP); break; } iIndex = FindPYFAIndex (strQP, 0); if (iIndex != -1) { strcpy (parsePY->strMap[parsePY->iHZCount], str_Map); strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP); strP++; } else { strJP[1] = '\0'; SP2QP (strJP, strQP); if (!MapPY (strQP, str_Map, mode)) strcpy (parsePY->strMap[parsePY->iHZCount], strJP); else strcpy (parsePY->strMap[parsePY->iHZCount], str_Map); strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP); } if (*strP == PY_SEPARATOR) { strcat (parsePY->strPYParsed[parsePY->iHZCount - 1], PY_SEPARATOR_S); while (*strP == PY_SEPARATOR ) strP++; } } } else { Bool bSeperator = False; do { iIndex = FindPYFAIndex (strP, 1); if (iIndex != -1) { strTemp[0] = PYTable[iIndex].strPY[strlen (PYTable[iIndex].strPY) - 1]; iTemp = -1; if (strTemp[0] == 'g' || strTemp[0] == 'n') { strncpy (strTemp, strP, strlen (PYTable[iIndex].strPY) - 1); strTemp[strlen (PYTable[iIndex].strPY) - 1] = '\0'; iTemp = FindPYFAIndex (strTemp, 0); if (iTemp != -1) { iTemp = FindPYFAIndex (strP + strlen (PYTable[iTemp].strPY), 1); if (iTemp != -1) { if (strlen (PYTable[iTemp].strPY) == 1 || !strcmp ("ng", PYTable[iTemp].strPY)) iTemp = -1; } if (iTemp != -1) { strncpy (strTemp, strP, strlen (PYTable[iIndex].strPY) - 1); strTemp[strlen (PYTable[iIndex].strPY) - 1] = '\0'; } } } if (iTemp == -1) strcpy (strTemp, PYTable[iIndex].strPY); MapPY (strTemp, str_Map, mode); strcpy (parsePY->strMap[parsePY->iHZCount], str_Map); strP += strlen (strTemp); if (bSeperator) { bSeperator = False; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat (parsePY->strPYParsed[parsePY->iHZCount++], strTemp); } else { if (bFullPY && *strP != PY_SEPARATOR) parsePY->iMode = PARSE_ERROR; iIndex = IsConsonant (strP, 1); if (-1 != iIndex) { parsePY->iMode = PARSE_ERROR; if (bSeperator) { bSeperator = False; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat (parsePY->strPYParsed[parsePY->iHZCount], consonantMapTable[iIndex].strPY); MapPY (consonantMapTable[iIndex].strPY, str_Map, mode); strcpy (parsePY->strMap[parsePY->iHZCount++], str_Map); strP += strlen (consonantMapTable[iIndex].strPY); } else { iIndex = IsSyllabary (strP, 1); if (-1 != iIndex) { if (bSeperator) { bSeperator = False; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat (parsePY->strPYParsed[parsePY->iHZCount], syllabaryMapTable[iIndex].strPY); MapPY (syllabaryMapTable[iIndex].strPY, str_Map, mode); strcpy (parsePY->strMap[parsePY->iHZCount++], str_Map); strP += strlen (syllabaryMapTable[iIndex].strPY); if (parsePY->iMode != PARSE_ERROR) parsePY->iMode = PARSE_ABBR; } else { //必定是分隔符 strP++; bSeperator = True; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; parsePY->strMap[parsePY->iHZCount][0] = '0'; parsePY->strMap[parsePY->iHZCount][1] = '0'; parsePY->strMap[parsePY->iHZCount][2] = '\0'; } } } } while (*strP); } if (strPY[strlen (strPY) - 1] == PY_SEPARATOR && !bSP) parsePY->iHZCount++; if (parsePY->iMode != PARSE_ERROR) { parsePY->iMode = parsePY->iMode & PARSE_ABBR; if (parsePY->iHZCount > 1) parsePY->iMode = parsePY->iMode | PARSE_PHRASE; else parsePY->iMode = parsePY->iMode | PARSE_SINGLEHZ; } }