/* * 将一个拼音(包括仅为声母或韵母)转换为拼音映射 * 返回True为转换成功,否则为False(一般是因为strPY不是一个标准的拼音) */ Bool MapPY (char *strPY, char strMap[3], PYPARSEINPUTMODE mode) { char str[5]; int iIndex; //特殊处理eng if (!strcmp (strPY, "eng") && MHPY_C[1].bMode) { strcpy (strMap, "X0"); return True; } strMap[2] = '\0'; iIndex = IsSyllabary (strPY, 0); if (-1 != iIndex) { strMap[0] = syllabaryMapTable[iIndex].cMap; strMap[1] = mode; return True; } iIndex = IsConsonant (strPY, 0); if (-1 != iIndex) { strMap[0] = mode; strMap[1] = consonantMapTable[iIndex].cMap; return True; } str[0] = strPY[0]; str[1] = '\0'; if (strPY[1] == 'h' || strPY[1] == 'g') { str[0] = strPY[0]; str[1] = strPY[1]; str[2] = '\0'; iIndex = IsSyllabary (str, 0); strMap[0] = consonantMapTable[iIndex].cMap; iIndex = IsConsonant (strPY + 2, 0); strMap[1] = consonantMapTable[iIndex].cMap; } else { str[0] = strPY[0]; str[1] = '\0'; iIndex = IsSyllabary (str, 0); if (iIndex == -1) return False; strMap[0] = consonantMapTable[iIndex].cMap; iIndex = IsConsonant (strPY + 1, 0); if (iIndex == -1) return False; strMap[1] = consonantMapTable[iIndex].cMap; } return True; }
/* * 此处只转换单个双拼,并且不检查错误 */ void SP2QP (char *strSP, char *strQP) { int iIndex1 = 0, iIndex2 = 0; char strTmp[2]; char str_QP[MAX_PY_LENGTH + 1]; strTmp[1] = '\0'; strQP[0] = '\0'; if (strSP[0] != cNonS) { iIndex1 = GetSPIndexJP_S (*strSP); if (iIndex1 == -1) { strTmp[0] = strSP[0]; strcat (strQP, strTmp); } else strcat (strQP, SPMap_S[iIndex1].strQP); } else if (!strSP[1]) strcpy (strQP, strSP); if (strSP[1]) { iIndex2 = -1; while (1) { iIndex2 = GetSPIndexJP_C (strSP[1], iIndex2 + 1); if (iIndex2 == -1) { strTmp[0] = strSP[1]; strcat (strQP, strTmp); break; } strcpy (str_QP, strQP); strcat (strQP, SPMap_C[iIndex2].strQP); if (FindPYFAIndex (strQP, False) != -1) break; strcpy (strQP, str_QP); } } if (FindPYFAIndex (strQP, False) != -1) iIndex2 = 0; //这只是将iIndex2置为非-1,以免后面的判断 strTmp[0] = strSP[0]; strTmp[1] = '\0'; if ((iIndex1 == -1 && !(IsSyllabary (strTmp, 0))) || iIndex2 == -1) { iIndex1 = FindPYFAIndex (strSP, False); if (iIndex1 != -1) strcpy (strQP, strSP); } }
/* * 将一个拼音(包括仅为声母或韵母)转换为拼音映射 * 返回true为转换成功,否则为false(一般是因为strPY不是一个标准的拼音) */ boolean MapPY(FcitxPinyinConfig* pyconfig, const char* strPYorigin, char strMap[3], PYPARSEINPUTMODE mode) { char str[5]; char strPY[7]; int iIndex; strcpy(strPY, strPYorigin); size_t len = strlen(strPY); if (pyconfig->bMisstype && strPY[len - 1] == 'n' && strPY[len - 2] == 'g') { strPY[len - 2] = 'n'; strPY[len - 1] = 'g'; } //特殊处理eng if (!strcmp(strPY, "eng") && pyconfig->MHPY_C[1].bMode) { strcpy(strMap, "X0"); return true; } strMap[2] = '\0'; iIndex = IsSyllabary(strPY, 0); if (-1 != iIndex) { strMap[0] = syllabaryMapTable[iIndex].cMap; strMap[1] = mode; return true; } iIndex = IsConsonant(strPY, 0); if (-1 != iIndex) { strMap[0] = mode; strMap[1] = consonantMapTable[iIndex].cMap; return true; } str[0] = strPY[0]; str[1] = '\0'; if (strPY[1] == 'h' || strPY[1] == 'g') { str[0] = strPY[0]; str[1] = strPY[1]; str[2] = '\0'; iIndex = IsSyllabary(str, 0); strMap[0] = consonantMapTable[iIndex].cMap; iIndex = IsConsonant(strPY + 2, 0); strMap[1] = consonantMapTable[iIndex].cMap; } else { str[0] = strPY[0]; str[1] = '\0'; iIndex = IsSyllabary(str, 0); if (iIndex == -1) return false; strMap[0] = consonantMapTable[iIndex].cMap; iIndex = IsConsonant(strPY + 1, 0); if (iIndex == -1) return false; strMap[1] = consonantMapTable[iIndex].cMap; } return true; }
void ParsePY(FcitxPinyinConfig *pyconfig, const char *strPY, ParsePYStruct * parsePY, PYPARSEINPUTMODE mode, boolean bSP) { const char *strP; int iIndex; int iTemp; char str_Map[3]; char strTemp[7]; parsePY->iMode = PARSE_SINGLEHZ; strP = strPY; parsePY->iHZCount = 0; if (bSP) { char strQP[7]; char strJP[3]; strJP[2] = '\0'; while (*strP) { strJP[0] = *strP++; strJP[1] = *strP; SP2QP(pyconfig, strJP, strQP); MapPY(pyconfig, strQP, str_Map, mode); if (!*strP) { strcpy(parsePY->strMap[parsePY->iHZCount], str_Map); strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP); break; } iIndex = FindPYFAIndex(pyconfig, strQP, 0); if (iIndex != -1) { strcpy(parsePY->strMap[parsePY->iHZCount], str_Map); strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP); strP++; } else { strJP[1] = '\0'; SP2QP(pyconfig, strJP, strQP); if (!MapPY(pyconfig, strQP, str_Map, mode)) strcpy(parsePY->strMap[parsePY->iHZCount], strJP); else strcpy(parsePY->strMap[parsePY->iHZCount], str_Map); strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP); } if (*strP == PY_SEPARATOR) { strcat(parsePY->strPYParsed[parsePY->iHZCount - 1], PY_SEPARATOR_S); while (*strP == PY_SEPARATOR) strP++; } } } else { boolean bSeperator = false; do { iIndex = FindPYFAIndex(pyconfig, strP, 1); if (iIndex != -1) { size_t lIndex = strlen(pyconfig->PYTable[iIndex].strPY); strTemp[0] = pyconfig->PYTable[iIndex].strPY[lIndex - 1]; iTemp = -1; /* * if the end of pinyin is 'g', 'n', 'e' * there might be another possbility, for example "wanan" can be "wa nan" and "wan an" * try resolve these problem here */ if (strTemp[0] == 'g' || strTemp[0] == 'n' || strTemp[0] == 'e' || strTemp[0] == 'a') { strncpy(strTemp, strP, lIndex - 1); strTemp[lIndex - 1] = '\0'; /* for example we have "wan", so we try to check "wa" is valid or not, with exact match */ iTemp = FindPYFAIndex(pyconfig, strTemp, 0); /* if "wa" is valid */ if (iTemp != -1) { /* also check "nan" is valid or not */ int firstIndex; firstIndex = iTemp; iTemp = FindPYFAIndex(pyconfig, strP + strlen(pyconfig->PYTable[iTemp].strPY), 1); /* if still is valid */ if (iTemp != -1) { /* * length 1 split is what we must avoid, * for example, "nin" can be "ni n", but no separator can for "nin" if we split here * * and "ying" can be also "yi ng", for just the same case" */ if (strlen(pyconfig->PYTable[iTemp].strPY) == 1 || !strcmp("ng", pyconfig->PYTable[iTemp].strPY)) iTemp = -1; } if (iTemp != -1) { /* check the general frequency that this shoud split or not */ int index2 = FindPYFAIndex(pyconfig, strP + strlen(pyconfig->PYTable[iIndex].strPY), 1); boolean resplit = false; do { /* prefer longer */ if (index2 == -1) { resplit = true; break; } size_t length1 = strlen(pyconfig->PYTable[iIndex].strPY) + strlen(pyconfig->PYTable[index2].strPY); size_t length2 = strlen(pyconfig->PYTable[firstIndex].strPY) + strlen(pyconfig->PYTable[iTemp].strPY); if (length1 != length2) { resplit = (length1 < length2); break; } double freq1 = LookupPYFreq(pyconfig, iIndex, index2); double freq2 = LookupPYFreq(pyconfig, firstIndex, iTemp); resplit = (freq1 <= freq2); } while(0); if (resplit) { strncpy(strTemp, strP, lIndex - 1); strTemp[lIndex - 1] = '\0'; } else iTemp = -1; } } } if (iTemp == -1) strcpy(strTemp, pyconfig->PYTable[iIndex].strPY); MapPY(pyconfig, strTemp, str_Map, mode); strcpy(parsePY->strMap[parsePY->iHZCount], str_Map); strP += strlen(strTemp); if (bSeperator) { bSeperator = false; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat(parsePY->strPYParsed[parsePY->iHZCount++], strTemp); } else { if (pyconfig->bFullPY && *strP != PY_SEPARATOR) parsePY->iMode = PARSE_ERROR; iIndex = IsConsonant(strP, 1); if (-1 != iIndex) { parsePY->iMode = PARSE_ERROR; if (bSeperator) { bSeperator = false; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat(parsePY->strPYParsed[parsePY->iHZCount], consonantMapTable[iIndex].strPY); MapPY(pyconfig, consonantMapTable[iIndex].strPY, str_Map, mode); strcpy(parsePY->strMap[parsePY->iHZCount++], str_Map); strP += strlen(consonantMapTable[iIndex].strPY); } else { iIndex = IsSyllabary(strP, 1); if (-1 != iIndex) { if (bSeperator) { bSeperator = false; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat(parsePY->strPYParsed[parsePY->iHZCount], syllabaryMapTable[iIndex].strPY); MapPY(pyconfig, syllabaryMapTable[iIndex].strPY, str_Map, mode); strcpy(parsePY->strMap[parsePY->iHZCount++], str_Map); strP += strlen(syllabaryMapTable[iIndex].strPY); if (parsePY->iMode != PARSE_ERROR) parsePY->iMode = PARSE_ABBR; } else { //必定是分隔符 strP++; bSeperator = true; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; parsePY->strMap[parsePY->iHZCount][0] = '0'; parsePY->strMap[parsePY->iHZCount][1] = '0'; parsePY->strMap[parsePY->iHZCount][2] = '\0'; } } } } while (*strP); } if (strPY[strlen(strPY) - 1] == PY_SEPARATOR && !bSP) parsePY->iHZCount++; if (parsePY->iMode != PARSE_ERROR) { parsePY->iMode = parsePY->iMode & PARSE_ABBR; if (parsePY->iHZCount > 1) parsePY->iMode = parsePY->iMode | PARSE_PHRASE; else parsePY->iMode = parsePY->iMode | PARSE_SINGLEHZ; } }
void ParsePY (char *strPY, ParsePYStruct * parsePY, PYPARSEINPUTMODE mode) { char *strP; int iIndex; int iTemp; char str_Map[3]; char strTemp[7]; parsePY->iMode = PARSE_SINGLEHZ; strP = strPY; parsePY->iHZCount = 0; if (bSP) { char strQP[7]; char strJP[3]; strJP[2] = '\0'; while (*strP) { strJP[0] = *strP++; strJP[1] = *strP; SP2QP (strJP, strQP); MapPY (strQP, str_Map, mode); if (!*strP) { strcpy (parsePY->strMap[parsePY->iHZCount], str_Map); strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP); break; } iIndex = FindPYFAIndex (strQP, 0); if (iIndex != -1) { strcpy (parsePY->strMap[parsePY->iHZCount], str_Map); strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP); strP++; } else { strJP[1] = '\0'; SP2QP (strJP, strQP); if (!MapPY (strQP, str_Map, mode)) strcpy (parsePY->strMap[parsePY->iHZCount], strJP); else strcpy (parsePY->strMap[parsePY->iHZCount], str_Map); strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP); } if (*strP == PY_SEPARATOR) { strcat (parsePY->strPYParsed[parsePY->iHZCount - 1], PY_SEPARATOR_S); while (*strP == PY_SEPARATOR ) strP++; } } } else { Bool bSeperator = False; do { iIndex = FindPYFAIndex (strP, 1); if (iIndex != -1) { strTemp[0] = PYTable[iIndex].strPY[strlen (PYTable[iIndex].strPY) - 1]; iTemp = -1; if (strTemp[0] == 'g' || strTemp[0] == 'n') { strncpy (strTemp, strP, strlen (PYTable[iIndex].strPY) - 1); strTemp[strlen (PYTable[iIndex].strPY) - 1] = '\0'; iTemp = FindPYFAIndex (strTemp, 0); if (iTemp != -1) { iTemp = FindPYFAIndex (strP + strlen (PYTable[iTemp].strPY), 1); if (iTemp != -1) { if (strlen (PYTable[iTemp].strPY) == 1 || !strcmp ("ng", PYTable[iTemp].strPY)) iTemp = -1; } if (iTemp != -1) { strncpy (strTemp, strP, strlen (PYTable[iIndex].strPY) - 1); strTemp[strlen (PYTable[iIndex].strPY) - 1] = '\0'; } } } if (iTemp == -1) strcpy (strTemp, PYTable[iIndex].strPY); MapPY (strTemp, str_Map, mode); strcpy (parsePY->strMap[parsePY->iHZCount], str_Map); strP += strlen (strTemp); if (bSeperator) { bSeperator = False; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat (parsePY->strPYParsed[parsePY->iHZCount++], strTemp); } else { if (bFullPY && *strP != PY_SEPARATOR) parsePY->iMode = PARSE_ERROR; iIndex = IsConsonant (strP, 1); if (-1 != iIndex) { parsePY->iMode = PARSE_ERROR; if (bSeperator) { bSeperator = False; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat (parsePY->strPYParsed[parsePY->iHZCount], consonantMapTable[iIndex].strPY); MapPY (consonantMapTable[iIndex].strPY, str_Map, mode); strcpy (parsePY->strMap[parsePY->iHZCount++], str_Map); strP += strlen (consonantMapTable[iIndex].strPY); } else { iIndex = IsSyllabary (strP, 1); if (-1 != iIndex) { if (bSeperator) { bSeperator = False; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; } else parsePY->strPYParsed[parsePY->iHZCount][0] = '\0'; strcat (parsePY->strPYParsed[parsePY->iHZCount], syllabaryMapTable[iIndex].strPY); MapPY (syllabaryMapTable[iIndex].strPY, str_Map, mode); strcpy (parsePY->strMap[parsePY->iHZCount++], str_Map); strP += strlen (syllabaryMapTable[iIndex].strPY); if (parsePY->iMode != PARSE_ERROR) parsePY->iMode = PARSE_ABBR; } else { //必定是分隔符 strP++; bSeperator = True; parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR; parsePY->strPYParsed[parsePY->iHZCount][1] = '\0'; parsePY->strMap[parsePY->iHZCount][0] = '0'; parsePY->strMap[parsePY->iHZCount][1] = '0'; parsePY->strMap[parsePY->iHZCount][2] = '\0'; } } } } while (*strP); } if (strPY[strlen (strPY) - 1] == PY_SEPARATOR && !bSP) parsePY->iHZCount++; if (parsePY->iMode != PARSE_ERROR) { parsePY->iMode = parsePY->iMode & PARSE_ABBR; if (parsePY->iHZCount > 1) parsePY->iMode = parsePY->iMode | PARSE_PHRASE; else parsePY->iMode = parsePY->iMode | PARSE_SINGLEHZ; } }