Exemple #1
0
boolean LoadTableDict(TableMetaData* tableMetaData)
{
    char            strCode[MAX_CODE_LENGTH + 1];
    char           *strHZ = 0;
    FILE           *fpDict;
    RECORD         *recTemp;
    unsigned int    i = 0;
    uint32_t        iTemp, iTempCount;
    char            cChar = 0, cTemp;
    int8_t          iVersion = 1;
    int             iRecordIndex;
    TableDict      *tableDict;

    //读入码表
    FcitxLog(DEBUG, _("Loading Table Dict"));

    int reload = 0;
    do {
        boolean error = false;
        if (!reload) {
            /**
             * kcm saves a absolute path here but it is then interpreted as
             * a relative path?
             **/
            fpDict = FcitxXDGGetFileWithPrefix("table", tableMetaData->strPath,
                                               "r", NULL);
        } else {
            char *tablepath;
            char *path = fcitx_utils_get_fcitx_path("pkgdatadir");
            fcitx_utils_alloc_cat_str(tablepath, path, "/table/",
                                      tableMetaData->strPath);
            fpDict = fopen(tablepath, "r");
            free(tablepath);
        }
        if (!fpDict)
            return false;

        tableMetaData->tableDict = fcitx_utils_new(TableDict);

        tableDict = tableMetaData->tableDict;
        tableDict->pool = fcitx_memory_pool_create();
#define CHECK_LOAD_TABLE_ERROR(SIZE) if (size < (SIZE)) { error = true; goto table_load_error; }

        //先读取码表的信息
        //判断版本信息
        size_t size;
        size = fcitx_utils_read_uint32(fpDict, &iTemp);
        CHECK_LOAD_TABLE_ERROR(1);

        if (!iTemp) {
            size = fread(&iVersion, sizeof(int8_t), 1, fpDict);
            CHECK_LOAD_TABLE_ERROR(1);
            iVersion = (iVersion < INTERNAL_VERSION);
            size = fcitx_utils_read_uint32(fpDict, &iTemp);
            CHECK_LOAD_TABLE_ERROR(1);
        }

        tableDict->strInputCode = (char*)realloc(tableDict->strInputCode, sizeof(char) * (iTemp + 1));
        size = fread(tableDict->strInputCode, sizeof(char), iTemp + 1, fpDict);
        CHECK_LOAD_TABLE_ERROR(iTemp + 1);
        /*
         * 建立索引,加26是为了为拼音编码预留空间
         */
        size_t tmp_len = strlen(tableDict->strInputCode) + 26;
        tableDict->recordIndex = (RECORD_INDEX*)fcitx_memory_pool_alloc(tableDict->pool, tmp_len * sizeof(RECORD_INDEX));
        for (iTemp = 0; iTemp < tmp_len; iTemp++) {
            tableDict->recordIndex[iTemp].cCode = 0;
            tableDict->recordIndex[iTemp].record = NULL;
        }
        /********************************************************************/

        size = fread(&(tableDict->iCodeLength), sizeof(uint8_t), 1, fpDict);
        CHECK_LOAD_TABLE_ERROR(1);
        UpdateTableMetaData(tableMetaData);

        if (!iVersion) {
            size = fread(&(tableDict->iPYCodeLength), sizeof(uint8_t), 1, fpDict);
            CHECK_LOAD_TABLE_ERROR(1);
        }
        else
            tableDict->iPYCodeLength = tableDict->iCodeLength;

        size = fcitx_utils_read_uint32(fpDict, &iTemp);
        CHECK_LOAD_TABLE_ERROR(1);
        tableDict->strIgnoreChars = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (iTemp + 1));
        size = fread(tableDict->strIgnoreChars, sizeof(char), iTemp + 1, fpDict);
        CHECK_LOAD_TABLE_ERROR(iTemp + 1);

        size = fread(&(tableDict->bRule), sizeof(unsigned char), 1, fpDict);
        CHECK_LOAD_TABLE_ERROR(1);

        if (tableDict->bRule) { //表示有组词规则
            tableDict->rule = (RULE*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(RULE) * (tableDict->iCodeLength - 1));
            for (i = 0; i < tableDict->iCodeLength - 1; i++) {
                size = fread(&(tableDict->rule[i].iFlag), sizeof(unsigned char), 1, fpDict);
                CHECK_LOAD_TABLE_ERROR(1);
                size = fread(&(tableDict->rule[i].iWords), sizeof(unsigned char), 1, fpDict);
                CHECK_LOAD_TABLE_ERROR(1);
                tableDict->rule[i].rule = (RULE_RULE*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(RULE_RULE) * tableDict->iCodeLength);
                for (iTemp = 0; iTemp < tableDict->iCodeLength; iTemp++) {
                    size = fread(&(tableDict->rule[i].rule[iTemp].iFlag), sizeof(unsigned char), 1, fpDict);
                    CHECK_LOAD_TABLE_ERROR(1);
                    size = fread(&(tableDict->rule[i].rule[iTemp].iWhich), sizeof(unsigned char), 1, fpDict);
                    CHECK_LOAD_TABLE_ERROR(1);
                    size = fread(&(tableDict->rule[i].rule[iTemp].iIndex), sizeof(unsigned char), 1, fpDict);
                    CHECK_LOAD_TABLE_ERROR(1);
                }
            }
        }

        tableDict->recordHead = (RECORD*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(RECORD));
        tableDict->currentRecord = tableDict->recordHead;

        size = fcitx_utils_read_uint32(fpDict, &tableDict->iRecordCount);
        CHECK_LOAD_TABLE_ERROR(1);

        for (i = 0; i < SINGLE_HZ_COUNT; i++) {
            tableDict->tableSingleHZ[i] = (RECORD*)NULL;
            tableDict->tableSingleHZCons[i] = (RECORD*)NULL;
        }

        iRecordIndex = 0;
        size_t bufSize = 0;
        for (i = 0; i < tableDict->iRecordCount; i++) {
            size = fread(strCode, sizeof(int8_t), tableDict->iPYCodeLength + 1, fpDict);
            CHECK_LOAD_TABLE_ERROR(tableDict->iPYCodeLength + 1);
            size = fcitx_utils_read_uint32(fpDict, &iTemp);
            CHECK_LOAD_TABLE_ERROR(1);
            /* we don't actually have such limit, but sometimes, broken table
             * may break this, so we need to give a limitation.
             */
            if (iTemp > UTF8_MAX_LENGTH * 30) {
                error = true;
                goto table_load_error;
            }
            if (iTemp > bufSize) {
                bufSize = iTemp;
                strHZ = realloc(strHZ, bufSize);
            }
            size = fread(strHZ, sizeof(int8_t), iTemp, fpDict);
            CHECK_LOAD_TABLE_ERROR(iTemp);
            recTemp = (RECORD*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(RECORD));
            recTemp->strCode = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (tableDict->iPYCodeLength + 1));
            memset(recTemp->strCode, 0, sizeof(char) * (tableDict->iPYCodeLength + 1));
            strcpy(recTemp->strCode, strCode);
            recTemp->strHZ = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * iTemp);
            strcpy(recTemp->strHZ, strHZ);

            if (!iVersion) {
                size = fread(&cTemp, sizeof(int8_t), 1, fpDict);
                CHECK_LOAD_TABLE_ERROR(1);
                recTemp->type = cTemp;
            }

            size = fcitx_utils_read_uint32(fpDict, &recTemp->iHit);
            CHECK_LOAD_TABLE_ERROR(1);
            size = fcitx_utils_read_uint32(fpDict, &recTemp->iIndex);
            CHECK_LOAD_TABLE_ERROR(1);
            if (recTemp->iIndex > tableDict->iTableIndex)
                tableDict->iTableIndex = recTemp->iIndex;

            /* 建立索引 */
            if (cChar != recTemp->strCode[0]) {
                cChar = recTemp->strCode[0];
                tableDict->recordIndex[iRecordIndex].cCode = cChar;
                tableDict->recordIndex[iRecordIndex].record = recTemp;
                iRecordIndex++;
            }
            /******************************************************************/
            /** 为单字生成一个表   */
            if (fcitx_utf8_strlen(recTemp->strHZ) == 1 && !IsIgnoreChar(tableDict, strCode[0]))
            {
                RECORD** tableSingleHZ = NULL;
                if (recTemp->type == RECORDTYPE_NORMAL)
                    tableSingleHZ = tableDict->tableSingleHZ;
                else if (recTemp->type == RECORDTYPE_CONSTRUCT)
                    tableSingleHZ = tableDict->tableSingleHZCons;

                if (tableSingleHZ) {
                    iTemp = CalHZIndex(recTemp->strHZ);
                    if (iTemp < SINGLE_HZ_COUNT) {
                        if (tableSingleHZ[iTemp]) {
                            if (strlen(strCode) > strlen(tableDict->tableSingleHZ[iTemp]->strCode))
                                tableSingleHZ[iTemp] = recTemp;
                        } else
                            tableSingleHZ[iTemp] = recTemp;
                    }
                }
            }

            if (recTemp->type == RECORDTYPE_PINYIN)
                tableDict->bHasPinyin = true;

            if (recTemp->type == RECORDTYPE_PROMPT && strlen(recTemp->strCode) == 1)
                tableDict->promptCode[(uint8_t) recTemp->strCode[0]] = recTemp;

            tableDict->currentRecord->next = recTemp;
            recTemp->prev = tableDict->currentRecord;
            tableDict->currentRecord = recTemp;
        }
        if (strHZ) {
            free(strHZ);
            strHZ = NULL;
        }

        tableDict->currentRecord->next = tableDict->recordHead;
        tableDict->recordHead->prev = tableDict->currentRecord;

table_load_error:
        fclose(fpDict);
        if (error) {
            fcitx_memory_pool_destroy(tableDict->pool);
            tableDict->pool = NULL;
            reload++;
        } else {
            break;
        }
    } while(reload < 2);

    if (!tableDict->pool)
        return false;

    FcitxLog(DEBUG, _("Load Table Dict OK"));

    //读取相应的特殊符号表
    fpDict = FcitxXDGGetFileWithPrefix("table", tableMetaData->strSymbolFile, "r", NULL);

    if (fpDict) {
        tableDict->iFH = fcitx_utils_calculate_record_number(fpDict);
        tableDict->fh = (FH*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(FH) * tableDict->iFH);

        char* strBuf = NULL;
        size_t bufLen = 0;
        for (i = 0; i < tableDict->iFH; i++) {
            if (getline(&strBuf, &bufLen, fpDict) == -1)
                break;

            if (!fcitx_utf8_check_string(strBuf))
                break;

            if (fcitx_utf8_strlen(strBuf) > FH_MAX_LENGTH)
                break;

            strcpy(tableDict->fh[i].strFH, strBuf);
        }
        fcitx_utils_free(strBuf);
        tableDict->iFH = i;

        fclose(fpDict);
    }

    tableDict->strNewPhraseCode = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (tableDict->iCodeLength + 1));
    tableDict->strNewPhraseCode[tableDict->iCodeLength] = '\0';

    tableDict->iAutoPhrase = 0;
    if (tableMetaData->bAutoPhrase) {
        tableDict->autoPhrase = (AUTOPHRASE*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(AUTOPHRASE) * AUTO_PHRASE_COUNT);

        //读取上次保存的自动词组信息
        FcitxLog(DEBUG, _("Loading Autophrase."));

        char *temppath;
        fcitx_utils_alloc_cat_str(temppath, tableMetaData->uniqueName,
                                  "_LastAutoPhrase.tmp");
        fpDict = FcitxXDGGetFileWithPrefix("table", temppath, "r", NULL);
        free(temppath);
        i = 0;
        if (fpDict) {
            size_t size = fcitx_utils_read_int32(fpDict, &tableDict->iAutoPhrase);
            if (size == 1) {
                for (; i < tableDict->iAutoPhrase; i++) {
                    tableDict->autoPhrase[i].strCode = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (tableDict->iCodeLength + 1));
                    tableDict->autoPhrase[i].strHZ = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (PHRASE_MAX_LENGTH * UTF8_MAX_LENGTH + 1));
                    size = fread(tableDict->autoPhrase[i].strCode, tableDict->iCodeLength + 1, 1, fpDict);
                    if (size != 1) {
                        tableDict->iAutoPhrase = i;
                        break;
                    }
                    size = fread(tableDict->autoPhrase[i].strHZ, PHRASE_MAX_LENGTH * UTF8_MAX_LENGTH + 1, 1, fpDict);
                    tableDict->autoPhrase[i].strHZ[PHRASE_MAX_LENGTH * UTF8_MAX_LENGTH] = 0;
                    if (size != 1 || !fcitx_utf8_check_string(tableDict->autoPhrase[i].strHZ)) {
                        tableDict->iAutoPhrase = i;
                        break;
                    }
                    size = fcitx_utils_read_uint32(fpDict, &iTempCount);
                    if (size != 1) {
                        tableDict->iAutoPhrase = i;
                        break;
                    }

                    tableDict->autoPhrase[i].iSelected = iTempCount;
                    if (i == AUTO_PHRASE_COUNT - 1)
                        tableDict->autoPhrase[i].next = &tableDict->autoPhrase[0];
                    else
                        tableDict->autoPhrase[i].next = &tableDict->autoPhrase[i + 1];
                }
            }
            fclose(fpDict);
        }

        for (; i < AUTO_PHRASE_COUNT; i++) {
            tableDict->autoPhrase[i].strCode = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (tableDict->iCodeLength + 1));
            tableDict->autoPhrase[i].strHZ = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (PHRASE_MAX_LENGTH * UTF8_MAX_LENGTH + 1));
            tableDict->autoPhrase[i].iSelected = 0;
            if (i == AUTO_PHRASE_COUNT - 1)
                tableDict->autoPhrase[i].next = &tableDict->autoPhrase[0];
            else
                tableDict->autoPhrase[i].next = &tableDict->autoPhrase[i + 1];
        }

        if (i == AUTO_PHRASE_COUNT)
            tableDict->insertPoint = &tableDict->autoPhrase[0];
        else
            tableDict->insertPoint = &tableDict->autoPhrase[i - 1];

        FcitxLog(DEBUG, _("Load Autophrase OK"));
    } else
        tableDict->autoPhrase = (AUTOPHRASE *) NULL;

    return true;
}
Exemple #2
0
FcitxPunc* LoadPuncFile(const char* filename)
{
    FILE           *fpDict;             // 词典文件指针
    int             iRecordNo;
    char            strText[4 + MAX_PUNC_LENGTH * UTF8_MAX_LENGTH];
    char           *pstr;               // 临时指针
    int             i;
    fpDict = FcitxXDGGetFileWithPrefix("data", filename, "r", NULL);

    if (strlen(filename) < strlen(PUNC_DICT_FILENAME))
        return NULL;

    if (!fpDict) {
        FcitxLog(WARNING, _("Can't open punc file."));
        return NULL;
    }

    /* 计算词典里面有多少的数据
     * 这个函数非常简单,就是计算该文件有多少行(包含空行)。
     * 因为空行,在下面会略去,所以,这儿存在内存的浪费现象。
     * 没有一个空行就是浪费sizeof (WidePunc)字节内存*/
    iRecordNo = fcitx_utils_calculate_record_number(fpDict);
    // 申请空间,用来存放这些数据。这儿没有检查是否申请到内存,严格说有小隐患
    WidePunc* punc = (WidePunc *) fcitx_utils_malloc0(sizeof(WidePunc) * (iRecordNo + 1));

    iRecordNo = 0;

    // 下面这个循环,就是一行一行的读入词典文件的数据。并将其放入到curPunc里面去。
    for (;;) {
        if (!fgets(strText, (MAX_PUNC_LENGTH * UTF8_MAX_LENGTH + 3), fpDict))
            break;
        i = strlen(strText) - 1;

        // 先找到最后一个字符
        while ((strText[i] == '\n') || (strText[i] == ' ')) {
            if (!i)
                break;
            i--;
        }

        // 如果找到,进行出入。当是空行时,肯定找不到。所以,也就略过了空行的处理
        if (i) {
            strText[i + 1] = '\0';              // 在字符串的最后加个封口
            pstr = strText;                     // 将pstr指向第一个非空字符
            while (*pstr == ' ')
                pstr++;
            punc[iRecordNo].ASCII = *pstr++; // 这个就是中文符号所对应的ASCII码值
            while (*pstr == ' ')                // 然后,将pstr指向下一个非空字符
                pstr++;

            punc[iRecordNo].iCount = 0;      // 该符号有几个转化,比如英文"就可以转换成“和”
            // 依次将该ASCII码所对应的符号放入到结构中
            while (*pstr) {
                i = 0;
                // 因为中文符号都是多字节(这里读取并不像其他地方是固定两个,所以没有问题)的,所以,要一直往后读,知道空格或者字符串的末尾
                while (*pstr != ' ' && *pstr) {
                    punc[iRecordNo].strWidePunc[punc[iRecordNo].iCount][i] = *pstr;
                    i++;
                    pstr++;
                }

                // 每个中文符号用'\0'隔开
                punc[iRecordNo].strWidePunc[punc[iRecordNo].iCount][i] = '\0';
                while (*pstr == ' ')
                    pstr++;
                punc[iRecordNo].iCount++;
            }

            iRecordNo++;
        }
    }

    punc[iRecordNo].ASCII = '\0';
    fclose(fpDict);

    FcitxPunc* p = fcitx_utils_malloc0(sizeof(FcitxPunc));
    p->langCode = "";

    const char* langcode = filename + strlen(PUNC_DICT_FILENAME);
    if (*langcode == '\0')
        p->langCode = strdup("C");
    else
        p->langCode = strdup(langcode + 1);

    p->curPunc = punc;

    return p;
}