int main(int argc, char *argv[]) { FILE *fpDict, *fpNew; RECORD *temp, *head, *newRec, *current; uint32_t s = 0; int i; uint32_t iTemp; char *pstr = 0; char strTemp[10]; unsigned char bRule; RULE *rule = NULL; unsigned int l; unsigned char iCodeLength = 0; unsigned char iPYCodeLength = 0; int8_t type; if (argc != 3) { printf("\nUsage: txt2mb <Source File> <IM File>\n\n"); exit(1); } fpDict = fopen(argv[1], "r"); if (!fpDict) { printf("\nCannot read source file!\n\n"); exit(2); } head = (RECORD *) malloc(sizeof(RECORD)); head->next = head; head->prev = head; current = head; bRule = 0; l = 0; char* buf = NULL, *buf1 = NULL; size_t len; for (;;) { l++; if (getline(&buf, &len, fpDict) == -1) break; i = strlen(buf) - 1; while ((i >= 0) && (buf[i] == ' ' || buf[i] == '\n' || buf[i] == '\r')) buf[i--] = '\0'; pstr = buf; if (*pstr == ' ') pstr++; if (pstr[0] == '#') continue; if (CHECK_OPTION(pstr, STR_KEYCODE)) { pstr += ADD_LENGTH(pstr, STR_KEYCODE); strcpy(strInputCode, pstr); } else if (CHECK_OPTION(pstr, STR_CODELEN)) { pstr += ADD_LENGTH(pstr, STR_CODELEN); iCodeLength = atoi(pstr); if (iCodeLength > MAX_CODE_LENGTH) { iCodeLength = MAX_CODE_LENGTH; printf("Max Code Length is %d\n", MAX_CODE_LENGTH); } } else if (CHECK_OPTION(pstr, STR_IGNORECHAR)) { pstr += ADD_LENGTH(pstr, STR_IGNORECHAR); strcpy(strIgnoreChars, pstr); } else if (CHECK_OPTION(pstr, STR_PINYIN)) { pstr += ADD_LENGTH(pstr, STR_PINYIN); while (*pstr == ' ' && *pstr != '\0') pstr++; cPinyinKey = *pstr; } else if (CHECK_OPTION(pstr, STR_PROMPT)) { pstr += ADD_LENGTH(pstr, STR_PROMPT); while (*pstr == ' ' && *pstr != '\0') pstr++; cPromptKey = *pstr; } else if (CHECK_OPTION(pstr, STR_CONSTRUCTPHRASE)) { pstr += ADD_LENGTH(pstr, STR_CONSTRUCTPHRASE); while (*pstr == ' ' && *pstr != '\0') pstr++; cPhraseKey = *pstr; } else if (CHECK_OPTION(pstr, STR_PINYINLEN)) { pstr += ADD_LENGTH(pstr, STR_PINYINLEN); iPYCodeLength = atoi(pstr); } else if (CHECK_OPTION(pstr, STR_DATA)) break; else if (CHECK_OPTION(pstr, STR_RULE)) { bRule = 1; break; } } if (iCodeLength <= 0 || !strInputCode[0]) { printf("Source File Format Error!\n"); exit(1); } if (bRule) { /* * 组词规则数应该比键码长度小1 */ rule = (RULE *) malloc(sizeof(RULE) * (iCodeLength - 1)); for (iTemp = 0; iTemp < (iCodeLength - 1); iTemp++) { l++; if (getline(&buf, &len, fpDict) == -1) break; rule[iTemp].rule = (RULE_RULE *) malloc(sizeof(RULE_RULE) * iCodeLength); i = strlen(buf) - 1; while ((i >= 0) && (buf[i] == ' ' || buf[i] == '\n' || buf[i] == '\r')) buf[i--] = '\0'; pstr = buf; if (*pstr == ' ') pstr++; if (pstr[0] == '#') continue; if (CHECK_OPTION(pstr, STR_DATA)) break; switch (*pstr) { case 'e': case 'E': rule[iTemp].iFlag = 0; break; case 'a': case 'A': rule[iTemp].iFlag = 1; break; default: printf("2 Phrase rules are not suitable!\n"); printf("\t\t%s\n", buf); exit(1); } pstr++; char* p = pstr; while (*p && *p != '=') p++; if (!(*p)) { printf("3 Phrase rules are not suitable!\n"); printf("\t\t%s\n", buf); exit(1); } strncpy(strTemp, pstr, p - pstr); strTemp[p - pstr] = '\0'; rule[iTemp].iWords = atoi(strTemp); p++; for (i = 0; i < iCodeLength; i++) { while (*p == ' ') p++; switch (*p) { case 'p': case 'P': rule[iTemp].rule[i].iFlag = 1; break; case 'n': case 'N': rule[iTemp].rule[i].iFlag = 0; break; default: printf("4 Phrase rules are not suitable!\n"); printf("\t\t%s\n", buf); exit(1); } p++; rule[iTemp].rule[i].iWhich = *p++ - '0'; rule[iTemp].rule[i].iIndex = *p++ - '0'; while (*p == ' ') p++; if (i != (iCodeLength - 1)) { if (*p != '+') { printf("5 Phrase rules are not suitable!\n"); printf("\t\t%s %d\n", buf, iCodeLength); exit(1); } p++; } } } if (iTemp != iCodeLength - 1) { printf("6 Phrase rules are not suitable!\n"); exit(1); } for (iTemp = 0; iTemp < (iCodeLength - 1); iTemp++) { l++; if (getline(&buf, &len, fpDict) == -1) break; i = strlen(buf) - 1; while ((i >= 0) && (buf[i] == ' ' || buf[i] == '\n' || buf[i] == '\r')) buf[i--] = '\0'; pstr = buf; if (*pstr == ' ') pstr++; if (pstr[0] == '#') continue; if (CHECK_OPTION(pstr, STR_DATA)) break; } } if (iPYCodeLength < iCodeLength) iPYCodeLength = iCodeLength; if (!CHECK_OPTION(pstr, STR_DATA)) { printf("Source File Format Error!\n"); exit(1); } while (getline(&buf, &len, fpDict) != -1) { l++; if (buf1) free(buf1); buf1 = fcitx_utils_trim(buf); char *p = buf1; while (*p && !isspace(*p)) p ++; if (*p == '\0') continue; while (isspace(*p)) { *p = '\0'; p ++; } char* strHZ = p; if (!IsValidCode(buf1[0])) { printf("Invalid Format: Line-%d %s %s\n", l, buf1, strHZ); exit(1); } if (((buf1[0] != cPinyinKey) && (strlen(buf1) > iCodeLength)) || ((buf1[0] == cPinyinKey) && (strlen(buf1) > (iPYCodeLength + 1))) || ((buf1[0] == cPhraseKey) && (strlen(buf1) > (iCodeLength + 1))) || ((buf1[0] == cPromptKey) && (strlen(buf1) > (iPYCodeLength + 1))) ) { printf("Delete: %s %s, Too long\n", buf1, strHZ); continue; } size_t hzLen = fcitx_utf8_strlen(strHZ); if (buf1[0] == cPhraseKey && hzLen != 1) { printf("Delete: %s %s, Too long\n", buf1, strHZ); continue; } type = RECORDTYPE_NORMAL; pstr = buf1; if (buf1[0] == cPinyinKey) { pstr ++; type = RECORDTYPE_PINYIN; } else if (buf1[0] == cPhraseKey) { pstr ++; type = RECORDTYPE_CONSTRUCT; } else if (buf1[0] == cPromptKey) { pstr ++; type = RECORDTYPE_PROMPT; } //查找是否重复 temp = current; if (temp != head) { if (strcmp(temp->strCode, pstr) >= 0) { while (temp != head && strcmp(temp->strCode, pstr) >= 0) { if (!strcmp(temp->strHZ, strHZ) && !strcmp(temp->strCode, pstr) && temp->type == type) { printf("Delete: %s %s\n", pstr, strHZ); goto _next; } temp = temp->prev; } if (temp == head) temp = temp->next; while (temp != head && strcmp(temp->strCode, pstr) <= 0) temp = temp->next; } else { while (temp != head && strcmp(temp->strCode, pstr) <= 0) { if (!strcmp(temp->strHZ, strHZ) && !strcmp(temp->strCode, pstr) && temp->type == type) { printf("Delete: %s %s\n", pstr, strHZ); goto _next; } temp = temp->next; } } } //插在temp的前面 newRec = (RECORD *) fcitx_utils_malloc0(sizeof(RECORD)); newRec->strCode = (char *) fcitx_utils_malloc0(sizeof(char) * (iPYCodeLength + 1)); newRec->strHZ = (char *) fcitx_utils_malloc0(sizeof(char) * strlen(strHZ) + 1); strcpy(newRec->strCode, pstr); strcpy(newRec->strHZ, strHZ); newRec->type = type; newRec->iHit = 0; newRec->iIndex = 0; temp->prev->next = newRec; newRec->next = temp; newRec->prev = temp->prev; temp->prev = newRec; current = newRec; s++; _next: continue; } if (buf) free(buf); if (buf1) free(buf1); fclose(fpDict); printf("\nReading %d records.\n\n", s); fpNew = fopen(argv[2], "w"); if (!fpNew) { printf("\nCannot create target file!\n\n"); exit(3); } int8_t iInternalVersion = INTERNAL_VERSION; //写入版本号--如果第一个字为0,表示后面那个字节为版本号 fcitx_utils_write_uint32(fpNew, 0); fwrite(&iInternalVersion, sizeof(int8_t), 1, fpNew); iTemp = (uint32_t)strlen(strInputCode); fcitx_utils_write_uint32(fpNew, iTemp); fwrite(strInputCode, sizeof(char), iTemp + 1, fpNew); fwrite(&iCodeLength, sizeof(unsigned char), 1, fpNew); fwrite(&iPYCodeLength, sizeof(unsigned char), 1, fpNew); iTemp = (uint32_t)strlen(strIgnoreChars); fcitx_utils_write_uint32(fpNew, iTemp); fwrite(strIgnoreChars, sizeof(char), iTemp + 1, fpNew); fwrite(&bRule, sizeof(unsigned char), 1, fpNew); if (bRule) { for (i = 0; i < iCodeLength - 1; i++) { fwrite(&(rule[i].iFlag), sizeof(unsigned char), 1, fpNew); fwrite(&(rule[i].iWords), sizeof(unsigned char), 1, fpNew); for (iTemp = 0; iTemp < iCodeLength; iTemp++) { fwrite(&(rule[i].rule[iTemp].iFlag), sizeof(unsigned char), 1, fpNew); fwrite(&(rule[i].rule[iTemp].iWhich), sizeof(unsigned char), 1, fpNew); fwrite(&(rule[i].rule[iTemp].iIndex), sizeof(unsigned char), 1, fpNew); } } } fcitx_utils_write_uint32(fpNew, s); current = head->next; while (current != head) { fwrite(current->strCode, sizeof(char), iPYCodeLength + 1, fpNew); s = strlen(current->strHZ) + 1; fcitx_utils_write_uint32(fpNew, s); fwrite(current->strHZ, sizeof(char), s, fpNew); fwrite(&(current->type), sizeof(int8_t), 1, fpNew); fcitx_utils_write_uint32(fpNew, current->iHit); fcitx_utils_write_uint32(fpNew, current->iIndex); current = current->next; } fclose(fpNew); return 0; }
void parse(int argc, char *argv[], int Noption, Option *theOptions) { const char routineName[] = "parse"; register int n, nopt; char **options, **values; bool_t recognized; int Narg = argc - 1, Nset; /* --- Break down the command line in option/value pairs -- ------- */ options = (char **) malloc(Narg * sizeof(char *)); values = (char **) malloc(Narg * sizeof(char *)); Nset = 0; for (n = 1; n <= Narg; n++) { if (argv[n][0] == '-') { options[Nset] = argv[n] + 1; if (n < Narg && argv[n+1][0] != '-') { n++; values[Nset] = argv[n]; } else values[Nset] = NULL; Nset++; } } /* --- If called with -help show options and exit -- -------------- */ for (n = 0; n < Nset; n++) { if (CHECK_OPTION(options[n], "help", theOptions[0].minlength)) { ShowUsage(argv[0], Noption, theOptions); } } /* --- Match the command line options with internal options -- ---- */ for (n = 0; n < Nset; n++) { recognized = FALSE; for (nopt = 1; nopt < Noption; nopt++) { if (CHECK_OPTION(options[n], theOptions[nopt].name, theOptions[nopt].minlength)) { recognized = TRUE; /* --- If a value was specified it is copied to the appropriate tag of the Option structure -- -------------- */ if (values[n] != NULL) strcpy(theOptions[nopt].value, values[n]); else { if (theOptions[nopt].value_required) { /* --- An error results if value is not given when required */ sprintf(messageStr, "Option %s requires value", theOptions[nopt].name); Error(ERROR_LEVEL_2, routineName, messageStr); } else /* --- Options that require no value are assumed to be boolean and are set to TRUE -- -------------- */ strcpy(theOptions[nopt].value, "TRUE"); } } } if (!recognized) { sprintf(messageStr, "Unknown command line option %s", options[n]); Error(WARNING, routineName, messageStr); } } /* --- Finally, set the internal options with either the command line specified values or their defaults -- --------- */ for (nopt = 1; nopt < Noption; nopt++) { if (theOptions[nopt].pointer != NULL) (*theOptions[nopt].setValue)(theOptions[nopt].value, theOptions[nopt].pointer); } free(options); free(values); }