bool PinyinPhraseLib::input_pinyin_lib (const PinyinValidator &validator, std::istream &is) { if (!is) return false; m_pinyin_lib.clear (); char header [40]; bool binary; //check header is.getline (header, 40); if (strncmp (header, scim_pinyin_lib_text_header, strlen (scim_pinyin_lib_text_header)) == 0) { binary = false; } else if (strncmp (header, scim_pinyin_lib_binary_header, strlen (scim_pinyin_lib_binary_header)) == 0) { binary = true; } else { return false; } is.getline (header, 40); if (strncmp (header, scim_pinyin_lib_version, strlen (scim_pinyin_lib_version)) != 0) return false; unsigned char bytes [4]; PinyinKey key; uint32 number; //get length if (binary) { is.read ((char*) bytes, sizeof(unsigned char) * 4); number = scim_bytestouint32 (bytes); } else { is.getline (header, 40); number = atoi (header); } if (number <= 0) return false; m_pinyin_lib.reserve (number + 256); if (binary) { for (uint32 i=0; i<number; i++) { key.input_binary (validator, is); m_pinyin_lib.push_back (key); } } else { for (uint32 i=0; i<number; i++) { key.input_text (validator, is); m_pinyin_lib.push_back (key); } } return true; }
static MPlist * open_scim (TableContext *context, MPlist *args) { MText *mt; int rc; char *file = NULL; unsigned char buf[BUFSIZE]; mt = (MText *) mplist_value (args); rc = mtext_to_utf8 (context, mt, buf, sizeof (buf)); if (rc < 0) return NULL; file = strdup ((const char *)buf); args = mplist_next (args); if (mplist_key (args) == Minteger) context->xlen = (long) mplist_value (args); else context->xlen = XLEN; args = mplist_next (args); if (mplist_key (args) == Minteger) context->max_candidates = (long) mplist_value (args); else context->max_candidates = MAX_CANDIDATES; if (context->mem && context->file && strcmp (context->file, file) != 0) { munmap (context->mem, context->memlen); context->mem = NULL; free (context->file); if (context->fp) { fclose (context->fp); context->fp = NULL; } } if (!context->fp) context->fp = fopen (file, "rb"); if (!context->mem) { while (1) { if (!fgets ((char *)buf, sizeof buf, context->fp)) break; if (strncmp ("###", (const char *)buf, 3) == 0) continue; if (strncmp ("MAX_KEY_LENGTH", (const char *)buf, 14) == 0) { char *p = strrchr ((char *)buf, '\n'); if (!*p) continue; *p-- = '\0'; while (*p >= '0' && *p <= '9') p--; context->mlen = strtoul (p + 1, NULL, 10); continue; } if (strncmp ("BEGIN_TABLE", (const char *)buf, 11) == 0) { long start_pos, end_pos; if (fread (buf, 4, 1, context->fp) != 1) break; context->content_size = scim_bytestouint32 (buf); start_pos = ftell (context->fp); if (fseek (context->fp, 0, SEEK_END) < 0) break; end_pos = ftell (context->fp); if (context->content_size >= end_pos - start_pos) break; context->mem = mmap (0, end_pos, PROT_READ, MAP_PRIVATE, fileno (context->fp), 0); if (context->mem) { context->memlen = end_pos; context->content = (unsigned char *)context->mem + start_pos; context->file = file; } break; } } } if (!context->mem) { free (file); return NULL; } if (!context->offsets) { int offset; if (!context->mlen) context->mlen = MLEN; context->offsets = calloc (sizeof (TableOffsetArray), context->mlen); for (offset = 0; offset < context->content_size;) { int klen = context->content[offset] & 0x3F; int plen = context->content[offset + 1]; TableOffsetArray *array; assert (klen > 0); if (klen > context->mlen) continue; array = &context->offsets[klen - 1]; if (array->cap < array->len + 1) { if (array->cap == 0) { array->cap = 1; array->data = calloc (sizeof (int), array->cap); if (!array->data) return NULL; } else { array->cap *= 2; array->data = realloc (array->data, sizeof (int) * array->cap); if (!array->data) return NULL; memset (array->data + array->len, 0, array->cap - array->len); } } *(array->data + array->len++) = offset; offset += 4 + klen + plen; } } return NULL; }
bool PinyinPhraseLib::input_indexes (std::istream &is) { char header [40]; bool binary = false; if (!is) return false; //check index file is.getline (header, 40); if (strncmp (header, scim_pinyin_phrase_idx_lib_text_header, strlen (scim_pinyin_phrase_idx_lib_text_header)) == 0) { binary = false; } else if (strncmp (header, scim_pinyin_phrase_idx_lib_binary_header, strlen (scim_pinyin_phrase_idx_lib_binary_header)) == 0) { binary = true; } else { return false; } is.getline (header, 40); if (strncmp (header, scim_pinyin_phrase_idx_lib_version, strlen (scim_pinyin_phrase_idx_lib_version)) != 0) return false; unsigned char bytes [8]; uint32 number; //get index number if (binary) { is.read ((char*) bytes, sizeof(unsigned char) * 4); number = scim_bytestouint32 (bytes); } else { is.getline (header, 40); number = atoi (header); } if (number == 0) return false; clear_phrase_index (); if (binary) { for (uint32 i=0; i<number; i++) { is.read ((char*) bytes, sizeof(unsigned char) * 8); insert_pinyin_phrase_into_index (scim_bytestouint32 (bytes), scim_bytestouint32 (bytes+4)); } } else { uint32 phrase_offset; uint32 pinyin_offset; for (uint32 i=0; i<number; i++) { is >> phrase_offset; is >> pinyin_offset; insert_pinyin_phrase_into_index (phrase_offset, pinyin_offset); } } sort_phrase_tables (); return true; }