Beispiel #1
0
bool
PinyinPhraseLib::input_pinyin_lib (const PinyinValidator &validator, std::istream &is)
{
	if (!is) return false;

	m_pinyin_lib.clear ();

	char header [40];
	bool binary;

	//check header
	is.getline (header, 40);
	if (strncmp (header,
		scim_pinyin_lib_text_header,
		strlen (scim_pinyin_lib_text_header)) == 0) {
		binary = false;
	} else if (strncmp (header,
		scim_pinyin_lib_binary_header,
		strlen (scim_pinyin_lib_binary_header)) == 0) {
		binary = true;
	} else {
		return false;
	}
	
	is.getline (header, 40);
	if (strncmp (header, scim_pinyin_lib_version, strlen (scim_pinyin_lib_version)) != 0)
		return false;

	unsigned char bytes [4];
	PinyinKey key;
	uint32 number;

	//get length
	if (binary) {
		is.read ((char*) bytes, sizeof(unsigned char) * 4);
		number = scim_bytestouint32 (bytes);
	} else {
		is.getline (header, 40);
		number = atoi (header);
	}

	if (number <= 0) return false;

	m_pinyin_lib.reserve (number + 256);

	if (binary) {
		for (uint32 i=0; i<number; i++) {
			key.input_binary (validator, is);
			m_pinyin_lib.push_back (key);
		}
	} else {
		for (uint32 i=0; i<number; i++) {
			key.input_text (validator, is);
			m_pinyin_lib.push_back (key);
		}
	}

	return true;
}
Beispiel #2
0
static MPlist *
open_scim (TableContext *context, MPlist *args)
{
  MText *mt;
  int rc;
  char *file = NULL;
  unsigned char buf[BUFSIZE];

  mt = (MText *) mplist_value (args);
  rc = mtext_to_utf8 (context, mt, buf, sizeof (buf));
  if (rc < 0)
    return NULL;
  file = strdup ((const char *)buf);

  args = mplist_next (args);
  if (mplist_key (args) == Minteger)
    context->xlen = (long) mplist_value (args);
  else
    context->xlen = XLEN;

  args = mplist_next (args);
  if (mplist_key (args) == Minteger)
    context->max_candidates = (long) mplist_value (args);
  else
    context->max_candidates = MAX_CANDIDATES;

  if (context->mem && context->file && strcmp (context->file, file) != 0)
    {
      munmap (context->mem, context->memlen);
      context->mem = NULL;
      free (context->file);
      if (context->fp)
	{
	  fclose (context->fp);
	  context->fp = NULL;
	}
    }

  if (!context->fp)
    context->fp = fopen (file, "rb");

  if (!context->mem)
    {
      while (1)
	{
	  if (!fgets ((char *)buf, sizeof buf, context->fp))
	    break;
	  if (strncmp ("###", (const char *)buf, 3) == 0)
	    continue;
	  if (strncmp ("MAX_KEY_LENGTH", (const char *)buf, 14) == 0)
	    {
	      char *p = strrchr ((char *)buf, '\n');
	      if (!*p)
		continue;
	      *p-- = '\0';
	      while (*p >= '0' && *p <= '9')
		p--;

	      context->mlen = strtoul (p + 1, NULL, 10);
	      continue;
	    }
	  if (strncmp ("BEGIN_TABLE", (const char *)buf, 11) == 0)
	    {
	      long start_pos, end_pos;

	      if (fread (buf, 4, 1, context->fp) != 1)
		break;
	      context->content_size = scim_bytestouint32 (buf);
	      start_pos = ftell (context->fp);
	      if (fseek (context->fp, 0, SEEK_END) < 0)
		break;
	      end_pos = ftell (context->fp);
	      if (context->content_size >= end_pos - start_pos)
		break;
	      context->mem = mmap (0, end_pos, PROT_READ, MAP_PRIVATE,
				   fileno (context->fp), 0);
	      if (context->mem)
		{
		  context->memlen = end_pos;
		  context->content = (unsigned char *)context->mem + start_pos;
		  context->file = file;
		}
	      break;
	    }
	}
    }

  if (!context->mem)
    {
      free (file);
      return NULL;
    }

  if (!context->offsets)
    {
      int offset;

      if (!context->mlen)
	context->mlen = MLEN;
      context->offsets = calloc (sizeof (TableOffsetArray), context->mlen);
      
      for (offset = 0; offset < context->content_size;)
	{
	  int klen = context->content[offset] & 0x3F;
	  int plen = context->content[offset + 1];
	  TableOffsetArray *array;

	  assert (klen > 0);
	  if (klen > context->mlen)
	    continue;

	  array = &context->offsets[klen - 1];
	  if (array->cap < array->len + 1)
	    {
	      if (array->cap == 0)
		{
		  array->cap = 1;
		  array->data = calloc (sizeof (int), array->cap);
		  if (!array->data)
		    return NULL;
		}
	      else
		{
		  array->cap *= 2;
		  array->data = realloc (array->data,
					 sizeof (int) * array->cap);
		  if (!array->data)
		    return NULL;
		  memset (array->data + array->len, 0, array->cap - array->len);
		}
	    }
	  *(array->data + array->len++) = offset;
	  offset += 4 + klen + plen;
	}
    }

  return NULL;
}
Beispiel #3
0
bool
PinyinPhraseLib::input_indexes (std::istream &is)
{
	char header [40];
	bool binary = false;

	if (!is) return false;

	//check index file
	is.getline (header, 40);
	if (strncmp (header,
		scim_pinyin_phrase_idx_lib_text_header,
		strlen (scim_pinyin_phrase_idx_lib_text_header)) == 0) {
		binary = false;
	} else if (strncmp (header,
		scim_pinyin_phrase_idx_lib_binary_header,
		strlen (scim_pinyin_phrase_idx_lib_binary_header)) == 0) {
		binary = true;
	} else {
		return false;
	}

	is.getline (header, 40);
	if (strncmp (header, scim_pinyin_phrase_idx_lib_version,
					strlen (scim_pinyin_phrase_idx_lib_version)) != 0)
		return false;

	unsigned char bytes [8];
	uint32 number;

	//get index number
	if (binary) {
		is.read ((char*) bytes, sizeof(unsigned char) * 4);
		number = scim_bytestouint32 (bytes);
	} else {
		is.getline (header, 40);
		number = atoi (header);
	}

	if (number == 0) return false;

	clear_phrase_index ();

	if (binary) {
		for (uint32 i=0; i<number; i++) {
			is.read ((char*) bytes, sizeof(unsigned char) * 8);

			insert_pinyin_phrase_into_index (scim_bytestouint32 (bytes),
											  scim_bytestouint32 (bytes+4));
		}
	} else {
		uint32 phrase_offset;
		uint32 pinyin_offset;
		for (uint32 i=0; i<number; i++) {
			is >> phrase_offset;
			is >> pinyin_offset;

			insert_pinyin_phrase_into_index (phrase_offset, pinyin_offset);
		}
	}

	sort_phrase_tables ();

	return true;
}