Example #1
0
/*
 * 将一个拼音(包括仅为声母或韵母)转换为拼音映射
 * 返回True为转换成功,否则为False(一般是因为strPY不是一个标准的拼音)
 */
Bool MapPY (char *strPY, char strMap[3], PYPARSEINPUTMODE mode)
{
    char            str[5];
    int             iIndex;

    //特殊处理eng
    if (!strcmp (strPY, "eng") && MHPY_C[1].bMode) {
	strcpy (strMap, "X0");
	return True;
    }

    strMap[2] = '\0';
    iIndex = IsSyllabary (strPY, 0);
    if (-1 != iIndex) {
	strMap[0] = syllabaryMapTable[iIndex].cMap;
	strMap[1] = mode;
	return True;
    }
    iIndex = IsConsonant (strPY, 0);

    if (-1 != iIndex) {
	strMap[0] = mode;
	strMap[1] = consonantMapTable[iIndex].cMap;
	return True;
    }

    str[0] = strPY[0];
    str[1] = '\0';

    if (strPY[1] == 'h' || strPY[1] == 'g') {
	str[0] = strPY[0];
	str[1] = strPY[1];
	str[2] = '\0';
	iIndex = IsSyllabary (str, 0);
	strMap[0] = consonantMapTable[iIndex].cMap;
	iIndex = IsConsonant (strPY + 2, 0);
	strMap[1] = consonantMapTable[iIndex].cMap;
    }
    else {
	str[0] = strPY[0];
	str[1] = '\0';
	iIndex = IsSyllabary (str, 0);
	if (iIndex == -1)
	    return False;
	strMap[0] = consonantMapTable[iIndex].cMap;
	iIndex = IsConsonant (strPY + 1, 0);
	if (iIndex == -1)
	    return False;
	strMap[1] = consonantMapTable[iIndex].cMap;
    }

    return True;
}
Example #2
0
/*
 * 此处只转换单个双拼,并且不检查错误
 */
void SP2QP (char *strSP, char *strQP)
{
    int             iIndex1 = 0, iIndex2 = 0;
    char            strTmp[2];
    char            str_QP[MAX_PY_LENGTH + 1];

    strTmp[1] = '\0';
    strQP[0] = '\0';

    if (strSP[0] != cNonS) {
	iIndex1 = GetSPIndexJP_S (*strSP);
	if (iIndex1 == -1) {
	    strTmp[0] = strSP[0];
	    strcat (strQP, strTmp);
	}
	else
	    strcat (strQP, SPMap_S[iIndex1].strQP);
    }
    else if (!strSP[1])
	strcpy (strQP, strSP);

    if (strSP[1]) {
	iIndex2 = -1;
	while (1) {
	    iIndex2 = GetSPIndexJP_C (strSP[1], iIndex2 + 1);
	    if (iIndex2 == -1) {
		strTmp[0] = strSP[1];
		strcat (strQP, strTmp);
		break;
	    }

	    strcpy (str_QP, strQP);
	    strcat (strQP, SPMap_C[iIndex2].strQP);
	    if (FindPYFAIndex (strQP, False) != -1)
		break;

	    strcpy (strQP, str_QP);
	}
    }

    if (FindPYFAIndex (strQP, False) != -1)
	iIndex2 = 0;		//这只是将iIndex2置为非-1,以免后面的判断

    strTmp[0] = strSP[0];
    strTmp[1] = '\0';
    if ((iIndex1 == -1 && !(IsSyllabary (strTmp, 0))) || iIndex2 == -1) {
	iIndex1 = FindPYFAIndex (strSP, False);
	if (iIndex1 != -1)
	    strcpy (strQP, strSP);
    }
}
Example #3
0
/*
 * 将一个拼音(包括仅为声母或韵母)转换为拼音映射
 * 返回true为转换成功,否则为false(一般是因为strPY不是一个标准的拼音)
 */
boolean MapPY(FcitxPinyinConfig* pyconfig, const char* strPYorigin, char strMap[3], PYPARSEINPUTMODE mode)
{
    char            str[5];
    char            strPY[7];
    int             iIndex;

    strcpy(strPY, strPYorigin);

    size_t          len = strlen(strPY);

    if (pyconfig->bMisstype && strPY[len - 1] == 'n' && strPY[len - 2] == 'g') {
        strPY[len - 2] = 'n';
        strPY[len - 1] = 'g';
    }

    //特殊处理eng
    if (!strcmp(strPY, "eng") && pyconfig->MHPY_C[1].bMode) {
        strcpy(strMap, "X0");
        return true;
    }

    strMap[2] = '\0';

    iIndex = IsSyllabary(strPY, 0);

    if (-1 != iIndex) {
        strMap[0] = syllabaryMapTable[iIndex].cMap;
        strMap[1] = mode;
        return true;
    }

    iIndex = IsConsonant(strPY, 0);

    if (-1 != iIndex) {
        strMap[0] = mode;
        strMap[1] = consonantMapTable[iIndex].cMap;
        return true;
    }

    str[0] = strPY[0];

    str[1] = '\0';

    if (strPY[1] == 'h' || strPY[1] == 'g') {
        str[0] = strPY[0];
        str[1] = strPY[1];
        str[2] = '\0';
        iIndex = IsSyllabary(str, 0);
        strMap[0] = consonantMapTable[iIndex].cMap;
        iIndex = IsConsonant(strPY + 2, 0);
        strMap[1] = consonantMapTable[iIndex].cMap;
    } else {
        str[0] = strPY[0];
        str[1] = '\0';
        iIndex = IsSyllabary(str, 0);

        if (iIndex == -1)
            return false;

        strMap[0] = consonantMapTable[iIndex].cMap;

        iIndex = IsConsonant(strPY + 1, 0);

        if (iIndex == -1)
            return false;

        strMap[1] = consonantMapTable[iIndex].cMap;
    }

    return true;
}
Example #4
0
void ParsePY(FcitxPinyinConfig *pyconfig, const char *strPY, ParsePYStruct * parsePY, PYPARSEINPUTMODE mode, boolean bSP)
{
    const char           *strP;
    int             iIndex;
    int             iTemp;
    char            str_Map[3];
    char            strTemp[7];

    parsePY->iMode = PARSE_SINGLEHZ;
    strP = strPY;
    parsePY->iHZCount = 0;

    if (bSP) {
        char            strQP[7];
        char            strJP[3];

        strJP[2] = '\0';

        while (*strP) {
            strJP[0] = *strP++;
            strJP[1] = *strP;
            SP2QP(pyconfig, strJP, strQP);
            MapPY(pyconfig, strQP, str_Map, mode);

            if (!*strP) {
                strcpy(parsePY->strMap[parsePY->iHZCount], str_Map);
                strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP);
                break;
            }

            iIndex = FindPYFAIndex(pyconfig, strQP, 0);

            if (iIndex != -1) {
                strcpy(parsePY->strMap[parsePY->iHZCount], str_Map);
                strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP);
                strP++;
            } else {
                strJP[1] = '\0';
                SP2QP(pyconfig, strJP, strQP);

                if (!MapPY(pyconfig, strQP, str_Map, mode))
                    strcpy(parsePY->strMap[parsePY->iHZCount], strJP);
                else
                    strcpy(parsePY->strMap[parsePY->iHZCount], str_Map);

                strcpy(parsePY->strPYParsed[parsePY->iHZCount++], strJP);
            }

            if (*strP == PY_SEPARATOR) {
                strcat(parsePY->strPYParsed[parsePY->iHZCount - 1], PY_SEPARATOR_S);

                while (*strP == PY_SEPARATOR)
                    strP++;
            }
        }
    } else {
        boolean            bSeperator = false;

        do {
            iIndex = FindPYFAIndex(pyconfig, strP, 1);

            if (iIndex != -1) {
                size_t lIndex = strlen(pyconfig->PYTable[iIndex].strPY);
                strTemp[0] = pyconfig->PYTable[iIndex].strPY[lIndex - 1];
                iTemp = -1;

                /*
                 * if the end of pinyin is 'g', 'n', 'e'
                 * there might be another possbility, for example "wanan" can be "wa nan" and "wan an"
                 * try resolve these problem here
                 */
                if (strTemp[0] == 'g' || strTemp[0] == 'n' || strTemp[0] == 'e' || strTemp[0] == 'a') {
                    strncpy(strTemp, strP, lIndex - 1);
                    strTemp[lIndex - 1] = '\0';

                    /* for example we have "wan", so we try to check "wa" is valid or not, with exact match */
                    iTemp = FindPYFAIndex(pyconfig, strTemp, 0);

                    /* if "wa" is valid */
                    if (iTemp != -1) {
                        /* also check "nan" is valid or not */
                        int firstIndex;
                        firstIndex = iTemp;
                        iTemp = FindPYFAIndex(pyconfig, strP + strlen(pyconfig->PYTable[iTemp].strPY), 1);

                        /* if still is valid */
                        if (iTemp != -1) {
                            /*
                             * length 1 split is what we must avoid,
                             * for example, "nin" can be "ni n", but no separator can for "nin" if we split here
                             *
                             * and "ying" can be also "yi ng", for just the same case"
                             */
                            if (strlen(pyconfig->PYTable[iTemp].strPY) == 1 || !strcmp("ng", pyconfig->PYTable[iTemp].strPY))
                                iTemp = -1;
                        }

                        if (iTemp != -1) {
                            /* check the general frequency that this shoud split or not */
                            int index2 = FindPYFAIndex(pyconfig, strP + strlen(pyconfig->PYTable[iIndex].strPY), 1);

                            boolean resplit = false;
                            do {
                                /* prefer longer */
                                if (index2 == -1) {
                                    resplit = true;
                                    break;
                                }

                                size_t length1 = strlen(pyconfig->PYTable[iIndex].strPY) + strlen(pyconfig->PYTable[index2].strPY);
                                size_t length2 = strlen(pyconfig->PYTable[firstIndex].strPY) + strlen(pyconfig->PYTable[iTemp].strPY);
                                if (length1 != length2) {
                                    resplit = (length1 < length2);
                                    break;
                                }

                                double freq1 = LookupPYFreq(pyconfig, iIndex, index2);
                                double freq2 = LookupPYFreq(pyconfig, firstIndex, iTemp);

                                resplit = (freq1 <= freq2);
                            } while(0);

                            if (resplit) {
                                strncpy(strTemp, strP, lIndex - 1);
                                strTemp[lIndex - 1] = '\0';
                            }
                            else
                                iTemp = -1;
                        }
                    }
                }

                if (iTemp == -1)
                    strcpy(strTemp, pyconfig->PYTable[iIndex].strPY);

                MapPY(pyconfig, strTemp, str_Map, mode);

                strcpy(parsePY->strMap[parsePY->iHZCount], str_Map);

                strP += strlen(strTemp);

                if (bSeperator) {
                    bSeperator = false;
                    parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR;
                    parsePY->strPYParsed[parsePY->iHZCount][1] = '\0';
                } else
                    parsePY->strPYParsed[parsePY->iHZCount][0] = '\0';

                strcat(parsePY->strPYParsed[parsePY->iHZCount++], strTemp);
            } else {
                if (pyconfig->bFullPY && *strP != PY_SEPARATOR)
                    parsePY->iMode = PARSE_ERROR;

                iIndex = IsConsonant(strP, 1);

                if (-1 != iIndex) {
                    parsePY->iMode = PARSE_ERROR;

                    if (bSeperator) {
                        bSeperator = false;
                        parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR;
                        parsePY->strPYParsed[parsePY->iHZCount][1] = '\0';
                    } else
                        parsePY->strPYParsed[parsePY->iHZCount][0] = '\0';

                    strcat(parsePY->strPYParsed[parsePY->iHZCount], consonantMapTable[iIndex].strPY);

                    MapPY(pyconfig, consonantMapTable[iIndex].strPY, str_Map, mode);

                    strcpy(parsePY->strMap[parsePY->iHZCount++], str_Map);

                    strP += strlen(consonantMapTable[iIndex].strPY);
                } else {
                    iIndex = IsSyllabary(strP, 1);

                    if (-1 != iIndex) {
                        if (bSeperator) {
                            bSeperator = false;
                            parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR;
                            parsePY->strPYParsed[parsePY->iHZCount][1] = '\0';
                        } else
                            parsePY->strPYParsed[parsePY->iHZCount][0] = '\0';

                        strcat(parsePY->strPYParsed[parsePY->iHZCount], syllabaryMapTable[iIndex].strPY);

                        MapPY(pyconfig, syllabaryMapTable[iIndex].strPY, str_Map, mode);

                        strcpy(parsePY->strMap[parsePY->iHZCount++], str_Map);

                        strP += strlen(syllabaryMapTable[iIndex].strPY);

                        if (parsePY->iMode != PARSE_ERROR)
                            parsePY->iMode = PARSE_ABBR;
                    } else {
                        //必定是分隔符
                        strP++;
                        bSeperator = true;
                        parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR;
                        parsePY->strPYParsed[parsePY->iHZCount][1] = '\0';
                        parsePY->strMap[parsePY->iHZCount][0] = '0';
                        parsePY->strMap[parsePY->iHZCount][1] = '0';
                        parsePY->strMap[parsePY->iHZCount][2] = '\0';
                    }
                }
            }
        } while (*strP);
    }

    if (strPY[strlen(strPY) - 1] == PY_SEPARATOR && !bSP)
        parsePY->iHZCount++;

    if (parsePY->iMode != PARSE_ERROR) {
        parsePY->iMode = parsePY->iMode & PARSE_ABBR;

        if (parsePY->iHZCount > 1)
            parsePY->iMode = parsePY->iMode | PARSE_PHRASE;
        else
            parsePY->iMode = parsePY->iMode | PARSE_SINGLEHZ;
    }
}
Example #5
0
void ParsePY (char *strPY, ParsePYStruct * parsePY, PYPARSEINPUTMODE mode)
{
    char           *strP;
    int             iIndex;
    int             iTemp;
    char            str_Map[3];
    char            strTemp[7];

    parsePY->iMode = PARSE_SINGLEHZ;
    strP = strPY;
    parsePY->iHZCount = 0;

    if (bSP) {
	char            strQP[7];
	char            strJP[3];

	strJP[2] = '\0';

	while (*strP) {
	    strJP[0] = *strP++;
	    strJP[1] = *strP;
	    SP2QP (strJP, strQP);
	    MapPY (strQP, str_Map, mode);

	    if (!*strP) {
		strcpy (parsePY->strMap[parsePY->iHZCount], str_Map);
		strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP);
		break;
	    }

	    iIndex = FindPYFAIndex (strQP, 0);
	    if (iIndex != -1) {
		strcpy (parsePY->strMap[parsePY->iHZCount], str_Map);
		strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP);
		strP++;
	    }
	    else {
		strJP[1] = '\0';
		SP2QP (strJP, strQP);
		if (!MapPY (strQP, str_Map, mode))
		    strcpy (parsePY->strMap[parsePY->iHZCount], strJP);
		else
		    strcpy (parsePY->strMap[parsePY->iHZCount], str_Map);
		strcpy (parsePY->strPYParsed[parsePY->iHZCount++], strJP);
	    }

	    if (*strP == PY_SEPARATOR) {
		strcat (parsePY->strPYParsed[parsePY->iHZCount - 1], PY_SEPARATOR_S);
		while (*strP == PY_SEPARATOR )
		    strP++;
	    }
	}
    }
    else {
	Bool            bSeperator = False;

	do {
	    iIndex = FindPYFAIndex (strP, 1);

	    if (iIndex != -1) {
		strTemp[0] = PYTable[iIndex].strPY[strlen (PYTable[iIndex].strPY) - 1];
		iTemp = -1;
		if (strTemp[0] == 'g' || strTemp[0] == 'n') {
		    strncpy (strTemp, strP, strlen (PYTable[iIndex].strPY) - 1);
		    strTemp[strlen (PYTable[iIndex].strPY) - 1] = '\0';

		    iTemp = FindPYFAIndex (strTemp, 0);
		    if (iTemp != -1) {
			iTemp = FindPYFAIndex (strP + strlen (PYTable[iTemp].strPY), 1);
			if (iTemp != -1) {
			    if (strlen (PYTable[iTemp].strPY) == 1 || !strcmp ("ng", PYTable[iTemp].strPY))
				iTemp = -1;
			}
			if (iTemp != -1) {
			    strncpy (strTemp, strP, strlen (PYTable[iIndex].strPY) - 1);
			    strTemp[strlen (PYTable[iIndex].strPY) - 1] = '\0';
			}
		    }
		}
		if (iTemp == -1)
		    strcpy (strTemp, PYTable[iIndex].strPY);
		MapPY (strTemp, str_Map, mode);
		strcpy (parsePY->strMap[parsePY->iHZCount], str_Map);
		strP += strlen (strTemp);

		if (bSeperator) {
		    bSeperator = False;
		    parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR;
		    parsePY->strPYParsed[parsePY->iHZCount][1] = '\0';
		}
		else
		    parsePY->strPYParsed[parsePY->iHZCount][0] = '\0';
		strcat (parsePY->strPYParsed[parsePY->iHZCount++], strTemp);
	    }
	    else {
		if (bFullPY && *strP != PY_SEPARATOR)
		    parsePY->iMode = PARSE_ERROR;

		iIndex = IsConsonant (strP, 1);
		if (-1 != iIndex) {
		    parsePY->iMode = PARSE_ERROR;

		    if (bSeperator) {
			bSeperator = False;
			parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR;
			parsePY->strPYParsed[parsePY->iHZCount][1] = '\0';
		    }
		    else
			parsePY->strPYParsed[parsePY->iHZCount][0] = '\0';
		    strcat (parsePY->strPYParsed[parsePY->iHZCount], consonantMapTable[iIndex].strPY);
		    MapPY (consonantMapTable[iIndex].strPY, str_Map, mode);
		    strcpy (parsePY->strMap[parsePY->iHZCount++], str_Map);
		    strP += strlen (consonantMapTable[iIndex].strPY);
		}
		else {
		    iIndex = IsSyllabary (strP, 1);
		    if (-1 != iIndex) {
			if (bSeperator) {
			    bSeperator = False;
			    parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR;
			    parsePY->strPYParsed[parsePY->iHZCount][1] = '\0';
			}
			else
			    parsePY->strPYParsed[parsePY->iHZCount][0] = '\0';
			strcat (parsePY->strPYParsed[parsePY->iHZCount], syllabaryMapTable[iIndex].strPY);
			MapPY (syllabaryMapTable[iIndex].strPY, str_Map, mode);
			strcpy (parsePY->strMap[parsePY->iHZCount++], str_Map);

			strP += strlen (syllabaryMapTable[iIndex].strPY);
			if (parsePY->iMode != PARSE_ERROR)
			    parsePY->iMode = PARSE_ABBR;
		    }
		    else {	//必定是分隔符
			strP++;
			bSeperator = True;
			parsePY->strPYParsed[parsePY->iHZCount][0] = PY_SEPARATOR;
			parsePY->strPYParsed[parsePY->iHZCount][1] = '\0';
			parsePY->strMap[parsePY->iHZCount][0] = '0';
			parsePY->strMap[parsePY->iHZCount][1] = '0';
			parsePY->strMap[parsePY->iHZCount][2] = '\0';
		    }
		}
	    }
	} while (*strP);
    }

    if (strPY[strlen (strPY) - 1] == PY_SEPARATOR && !bSP)
	parsePY->iHZCount++;

    if (parsePY->iMode != PARSE_ERROR) {
	parsePY->iMode = parsePY->iMode & PARSE_ABBR;
	if (parsePY->iHZCount > 1)
	    parsePY->iMode = parsePY->iMode | PARSE_PHRASE;
	else
	    parsePY->iMode = parsePY->iMode | PARSE_SINGLEHZ;
    }
}