boolean LoadTableDict(TableMetaData* tableMetaData) { char strCode[MAX_CODE_LENGTH + 1]; char *strHZ = 0; FILE *fpDict; RECORD *recTemp; unsigned int i = 0; uint32_t iTemp, iTempCount; char cChar = 0, cTemp; int8_t iVersion = 1; int iRecordIndex; TableDict *tableDict; //读入码表 FcitxLog(DEBUG, _("Loading Table Dict")); int reload = 0; do { boolean error = false; if (!reload) { /** * kcm saves a absolute path here but it is then interpreted as * a relative path? **/ fpDict = FcitxXDGGetFileWithPrefix("table", tableMetaData->strPath, "r", NULL); } else { char *tablepath; char *path = fcitx_utils_get_fcitx_path("pkgdatadir"); fcitx_utils_alloc_cat_str(tablepath, path, "/table/", tableMetaData->strPath); fpDict = fopen(tablepath, "r"); free(tablepath); } if (!fpDict) return false; tableMetaData->tableDict = fcitx_utils_new(TableDict); tableDict = tableMetaData->tableDict; tableDict->pool = fcitx_memory_pool_create(); #define CHECK_LOAD_TABLE_ERROR(SIZE) if (size < (SIZE)) { error = true; goto table_load_error; } //先读取码表的信息 //判断版本信息 size_t size; size = fcitx_utils_read_uint32(fpDict, &iTemp); CHECK_LOAD_TABLE_ERROR(1); if (!iTemp) { size = fread(&iVersion, sizeof(int8_t), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); iVersion = (iVersion < INTERNAL_VERSION); size = fcitx_utils_read_uint32(fpDict, &iTemp); CHECK_LOAD_TABLE_ERROR(1); } tableDict->strInputCode = (char*)realloc(tableDict->strInputCode, sizeof(char) * (iTemp + 1)); size = fread(tableDict->strInputCode, sizeof(char), iTemp + 1, fpDict); CHECK_LOAD_TABLE_ERROR(iTemp + 1); /* * 建立索引,加26是为了为拼音编码预留空间 */ size_t tmp_len = strlen(tableDict->strInputCode) + 26; tableDict->recordIndex = (RECORD_INDEX*)fcitx_memory_pool_alloc(tableDict->pool, tmp_len * sizeof(RECORD_INDEX)); for (iTemp = 0; iTemp < tmp_len; iTemp++) { tableDict->recordIndex[iTemp].cCode = 0; tableDict->recordIndex[iTemp].record = NULL; } /********************************************************************/ size = fread(&(tableDict->iCodeLength), sizeof(uint8_t), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); UpdateTableMetaData(tableMetaData); if (!iVersion) { size = fread(&(tableDict->iPYCodeLength), sizeof(uint8_t), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); } else tableDict->iPYCodeLength = tableDict->iCodeLength; size = fcitx_utils_read_uint32(fpDict, &iTemp); CHECK_LOAD_TABLE_ERROR(1); tableDict->strIgnoreChars = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (iTemp + 1)); size = fread(tableDict->strIgnoreChars, sizeof(char), iTemp + 1, fpDict); CHECK_LOAD_TABLE_ERROR(iTemp + 1); size = fread(&(tableDict->bRule), sizeof(unsigned char), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); if (tableDict->bRule) { //表示有组词规则 tableDict->rule = (RULE*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(RULE) * (tableDict->iCodeLength - 1)); for (i = 0; i < tableDict->iCodeLength - 1; i++) { size = fread(&(tableDict->rule[i].iFlag), sizeof(unsigned char), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); size = fread(&(tableDict->rule[i].iWords), sizeof(unsigned char), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); tableDict->rule[i].rule = (RULE_RULE*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(RULE_RULE) * tableDict->iCodeLength); for (iTemp = 0; iTemp < tableDict->iCodeLength; iTemp++) { size = fread(&(tableDict->rule[i].rule[iTemp].iFlag), sizeof(unsigned char), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); size = fread(&(tableDict->rule[i].rule[iTemp].iWhich), sizeof(unsigned char), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); size = fread(&(tableDict->rule[i].rule[iTemp].iIndex), sizeof(unsigned char), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); } } } tableDict->recordHead = (RECORD*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(RECORD)); tableDict->currentRecord = tableDict->recordHead; size = fcitx_utils_read_uint32(fpDict, &tableDict->iRecordCount); CHECK_LOAD_TABLE_ERROR(1); for (i = 0; i < SINGLE_HZ_COUNT; i++) { tableDict->tableSingleHZ[i] = (RECORD*)NULL; tableDict->tableSingleHZCons[i] = (RECORD*)NULL; } iRecordIndex = 0; size_t bufSize = 0; for (i = 0; i < tableDict->iRecordCount; i++) { size = fread(strCode, sizeof(int8_t), tableDict->iPYCodeLength + 1, fpDict); CHECK_LOAD_TABLE_ERROR(tableDict->iPYCodeLength + 1); size = fcitx_utils_read_uint32(fpDict, &iTemp); CHECK_LOAD_TABLE_ERROR(1); /* we don't actually have such limit, but sometimes, broken table * may break this, so we need to give a limitation. */ if (iTemp > UTF8_MAX_LENGTH * 30) { error = true; goto table_load_error; } if (iTemp > bufSize) { bufSize = iTemp; strHZ = realloc(strHZ, bufSize); } size = fread(strHZ, sizeof(int8_t), iTemp, fpDict); CHECK_LOAD_TABLE_ERROR(iTemp); recTemp = (RECORD*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(RECORD)); recTemp->strCode = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (tableDict->iPYCodeLength + 1)); memset(recTemp->strCode, 0, sizeof(char) * (tableDict->iPYCodeLength + 1)); strcpy(recTemp->strCode, strCode); recTemp->strHZ = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * iTemp); strcpy(recTemp->strHZ, strHZ); if (!iVersion) { size = fread(&cTemp, sizeof(int8_t), 1, fpDict); CHECK_LOAD_TABLE_ERROR(1); recTemp->type = cTemp; } size = fcitx_utils_read_uint32(fpDict, &recTemp->iHit); CHECK_LOAD_TABLE_ERROR(1); size = fcitx_utils_read_uint32(fpDict, &recTemp->iIndex); CHECK_LOAD_TABLE_ERROR(1); if (recTemp->iIndex > tableDict->iTableIndex) tableDict->iTableIndex = recTemp->iIndex; /* 建立索引 */ if (cChar != recTemp->strCode[0]) { cChar = recTemp->strCode[0]; tableDict->recordIndex[iRecordIndex].cCode = cChar; tableDict->recordIndex[iRecordIndex].record = recTemp; iRecordIndex++; } /******************************************************************/ /** 为单字生成一个表 */ if (fcitx_utf8_strlen(recTemp->strHZ) == 1 && !IsIgnoreChar(tableDict, strCode[0])) { RECORD** tableSingleHZ = NULL; if (recTemp->type == RECORDTYPE_NORMAL) tableSingleHZ = tableDict->tableSingleHZ; else if (recTemp->type == RECORDTYPE_CONSTRUCT) tableSingleHZ = tableDict->tableSingleHZCons; if (tableSingleHZ) { iTemp = CalHZIndex(recTemp->strHZ); if (iTemp < SINGLE_HZ_COUNT) { if (tableSingleHZ[iTemp]) { if (strlen(strCode) > strlen(tableDict->tableSingleHZ[iTemp]->strCode)) tableSingleHZ[iTemp] = recTemp; } else tableSingleHZ[iTemp] = recTemp; } } } if (recTemp->type == RECORDTYPE_PINYIN) tableDict->bHasPinyin = true; if (recTemp->type == RECORDTYPE_PROMPT && strlen(recTemp->strCode) == 1) tableDict->promptCode[(uint8_t) recTemp->strCode[0]] = recTemp; tableDict->currentRecord->next = recTemp; recTemp->prev = tableDict->currentRecord; tableDict->currentRecord = recTemp; } if (strHZ) { free(strHZ); strHZ = NULL; } tableDict->currentRecord->next = tableDict->recordHead; tableDict->recordHead->prev = tableDict->currentRecord; table_load_error: fclose(fpDict); if (error) { fcitx_memory_pool_destroy(tableDict->pool); tableDict->pool = NULL; reload++; } else { break; } } while(reload < 2); if (!tableDict->pool) return false; FcitxLog(DEBUG, _("Load Table Dict OK")); //读取相应的特殊符号表 fpDict = FcitxXDGGetFileWithPrefix("table", tableMetaData->strSymbolFile, "r", NULL); if (fpDict) { tableDict->iFH = fcitx_utils_calculate_record_number(fpDict); tableDict->fh = (FH*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(FH) * tableDict->iFH); char* strBuf = NULL; size_t bufLen = 0; for (i = 0; i < tableDict->iFH; i++) { if (getline(&strBuf, &bufLen, fpDict) == -1) break; if (!fcitx_utf8_check_string(strBuf)) break; if (fcitx_utf8_strlen(strBuf) > FH_MAX_LENGTH) break; strcpy(tableDict->fh[i].strFH, strBuf); } fcitx_utils_free(strBuf); tableDict->iFH = i; fclose(fpDict); } tableDict->strNewPhraseCode = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (tableDict->iCodeLength + 1)); tableDict->strNewPhraseCode[tableDict->iCodeLength] = '\0'; tableDict->iAutoPhrase = 0; if (tableMetaData->bAutoPhrase) { tableDict->autoPhrase = (AUTOPHRASE*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(AUTOPHRASE) * AUTO_PHRASE_COUNT); //读取上次保存的自动词组信息 FcitxLog(DEBUG, _("Loading Autophrase.")); char *temppath; fcitx_utils_alloc_cat_str(temppath, tableMetaData->uniqueName, "_LastAutoPhrase.tmp"); fpDict = FcitxXDGGetFileWithPrefix("table", temppath, "r", NULL); free(temppath); i = 0; if (fpDict) { size_t size = fcitx_utils_read_int32(fpDict, &tableDict->iAutoPhrase); if (size == 1) { for (; i < tableDict->iAutoPhrase; i++) { tableDict->autoPhrase[i].strCode = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (tableDict->iCodeLength + 1)); tableDict->autoPhrase[i].strHZ = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (PHRASE_MAX_LENGTH * UTF8_MAX_LENGTH + 1)); size = fread(tableDict->autoPhrase[i].strCode, tableDict->iCodeLength + 1, 1, fpDict); if (size != 1) { tableDict->iAutoPhrase = i; break; } size = fread(tableDict->autoPhrase[i].strHZ, PHRASE_MAX_LENGTH * UTF8_MAX_LENGTH + 1, 1, fpDict); tableDict->autoPhrase[i].strHZ[PHRASE_MAX_LENGTH * UTF8_MAX_LENGTH] = 0; if (size != 1 || !fcitx_utf8_check_string(tableDict->autoPhrase[i].strHZ)) { tableDict->iAutoPhrase = i; break; } size = fcitx_utils_read_uint32(fpDict, &iTempCount); if (size != 1) { tableDict->iAutoPhrase = i; break; } tableDict->autoPhrase[i].iSelected = iTempCount; if (i == AUTO_PHRASE_COUNT - 1) tableDict->autoPhrase[i].next = &tableDict->autoPhrase[0]; else tableDict->autoPhrase[i].next = &tableDict->autoPhrase[i + 1]; } } fclose(fpDict); } for (; i < AUTO_PHRASE_COUNT; i++) { tableDict->autoPhrase[i].strCode = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (tableDict->iCodeLength + 1)); tableDict->autoPhrase[i].strHZ = (char*)fcitx_memory_pool_alloc(tableDict->pool, sizeof(char) * (PHRASE_MAX_LENGTH * UTF8_MAX_LENGTH + 1)); tableDict->autoPhrase[i].iSelected = 0; if (i == AUTO_PHRASE_COUNT - 1) tableDict->autoPhrase[i].next = &tableDict->autoPhrase[0]; else tableDict->autoPhrase[i].next = &tableDict->autoPhrase[i + 1]; } if (i == AUTO_PHRASE_COUNT) tableDict->insertPoint = &tableDict->autoPhrase[0]; else tableDict->insertPoint = &tableDict->autoPhrase[i - 1]; FcitxLog(DEBUG, _("Load Autophrase OK")); } else tableDict->autoPhrase = (AUTOPHRASE *) NULL; return true; }
FcitxPunc* LoadPuncFile(const char* filename) { FILE *fpDict; // 词典文件指针 int iRecordNo; char strText[4 + MAX_PUNC_LENGTH * UTF8_MAX_LENGTH]; char *pstr; // 临时指针 int i; fpDict = FcitxXDGGetFileWithPrefix("data", filename, "r", NULL); if (strlen(filename) < strlen(PUNC_DICT_FILENAME)) return NULL; if (!fpDict) { FcitxLog(WARNING, _("Can't open punc file.")); return NULL; } /* 计算词典里面有多少的数据 * 这个函数非常简单,就是计算该文件有多少行(包含空行)。 * 因为空行,在下面会略去,所以,这儿存在内存的浪费现象。 * 没有一个空行就是浪费sizeof (WidePunc)字节内存*/ iRecordNo = fcitx_utils_calculate_record_number(fpDict); // 申请空间,用来存放这些数据。这儿没有检查是否申请到内存,严格说有小隐患 WidePunc* punc = (WidePunc *) fcitx_utils_malloc0(sizeof(WidePunc) * (iRecordNo + 1)); iRecordNo = 0; // 下面这个循环,就是一行一行的读入词典文件的数据。并将其放入到curPunc里面去。 for (;;) { if (!fgets(strText, (MAX_PUNC_LENGTH * UTF8_MAX_LENGTH + 3), fpDict)) break; i = strlen(strText) - 1; // 先找到最后一个字符 while ((strText[i] == '\n') || (strText[i] == ' ')) { if (!i) break; i--; } // 如果找到,进行出入。当是空行时,肯定找不到。所以,也就略过了空行的处理 if (i) { strText[i + 1] = '\0'; // 在字符串的最后加个封口 pstr = strText; // 将pstr指向第一个非空字符 while (*pstr == ' ') pstr++; punc[iRecordNo].ASCII = *pstr++; // 这个就是中文符号所对应的ASCII码值 while (*pstr == ' ') // 然后,将pstr指向下一个非空字符 pstr++; punc[iRecordNo].iCount = 0; // 该符号有几个转化,比如英文"就可以转换成“和” // 依次将该ASCII码所对应的符号放入到结构中 while (*pstr) { i = 0; // 因为中文符号都是多字节(这里读取并不像其他地方是固定两个,所以没有问题)的,所以,要一直往后读,知道空格或者字符串的末尾 while (*pstr != ' ' && *pstr) { punc[iRecordNo].strWidePunc[punc[iRecordNo].iCount][i] = *pstr; i++; pstr++; } // 每个中文符号用'\0'隔开 punc[iRecordNo].strWidePunc[punc[iRecordNo].iCount][i] = '\0'; while (*pstr == ' ') pstr++; punc[iRecordNo].iCount++; } iRecordNo++; } } punc[iRecordNo].ASCII = '\0'; fclose(fpDict); FcitxPunc* p = fcitx_utils_malloc0(sizeof(FcitxPunc)); p->langCode = ""; const char* langcode = filename + strlen(PUNC_DICT_FILENAME); if (*langcode == '\0') p->langCode = strdup("C"); else p->langCode = strdup(langcode + 1); p->curPunc = punc; return p; }