示例#1
0
文件: pwl.c 项目: Distrotech/enchant
void enchant_pwl_add(EnchantPWL *pwl,
			 const char *const word, size_t len)
{
	enchant_pwl_refresh_from_file(pwl);

	enchant_pwl_add_to_trie(pwl, word, len);

	if (pwl->filename != NULL)
	{
		FILE *f;
		
		f = enchant_fopen(pwl->filename, "a");
		if (f)
			{
				struct stat stats;

				enchant_lock_file (f);
				if(g_stat(pwl->filename, &stats)==0)
					pwl->file_changed = stats.st_mtime;

                /* we write the new line first since we can't guarantee
                   that the file was terminated by a new line before
                   and we are just appending to the end of the file */
				fwrite ("\n", sizeof(char), 1, f);
				fwrite (word, sizeof(char), len, f);
				enchant_unlock_file (f);
				fclose (f);
			}	
	}
}
示例#2
0
文件: pwl.c 项目: Distrotech/enchant
/**
 * enchant_pwl_init_with_file
 *
 * Returns: a new PWL object used to store/check/suggest words
 * or NULL if the file cannot be opened or created
 */ 
EnchantPWL* enchant_pwl_init_with_file(const char * file)
{
	FILE* fd;
	EnchantPWL *pwl;

	g_return_val_if_fail (file != NULL, NULL);

	fd = enchant_fopen(file, "ab+");
	if(fd == NULL)
		{
			return NULL;
		}
	fclose(fd);
	pwl = enchant_pwl_init();
	pwl->filename = g_strdup(file);
	pwl->file_changed = 0;

	enchant_pwl_refresh_from_file(pwl);
	return pwl;
}
示例#3
0
文件: pwl.c 项目: Distrotech/enchant
void enchant_pwl_remove(EnchantPWL *pwl,
			 const char *const word, size_t len)
{
	if(enchant_pwl_check(pwl, word, len) == 1)
		return;

	enchant_pwl_refresh_from_file(pwl);

	enchant_pwl_remove_from_trie(pwl, word, len);

	if (pwl->filename)
		{
			char * contents;
			size_t length;

			FILE *f;

			if(!g_file_get_contents(pwl->filename, &contents, &length, NULL))
				return;

			f = enchant_fopen(pwl->filename, "wb"); /*binary because g_file_get_contents reads binary*/
			if (f)
				{
					const gunichar BOM = 0xfeff;
					char * filestart, *searchstart, *needle;
					char * key;
					struct stat stats;

					enchant_lock_file (f);
					key = g_strndup(word, len);

					if(BOM == g_utf8_get_char(contents))
						{
							filestart = g_utf8_next_char(contents);
							fwrite (contents, sizeof(char), filestart-contents, f);
						}
					else
						filestart = contents;

					searchstart = filestart;
					for(;;)
						{
							/*find word*/
							needle = strstr(searchstart, key);
							if(needle == NULL)
								{
									fwrite (searchstart, sizeof(char), length - (searchstart - contents), f);
									break;
								}
							else 
								{
									char* foundend = needle+len;
									if((needle == filestart || contents[needle-contents-1] == '\n' || contents[needle-contents-1] == '\r') &&
										(foundend == contents + length || *foundend == '\n' || *foundend == '\r'))
										{
											fwrite (searchstart, sizeof(char), needle - searchstart, f);
											searchstart = foundend;
											while (*searchstart == '\n' || *searchstart == '\r')
												++searchstart;
										}
									else {
										fwrite (searchstart, sizeof(char), needle - searchstart+1, f);
										searchstart = needle+1;
									}
								}
						}
					g_free(key);
					
					if(g_stat(pwl->filename, &stats)==0)
						pwl->file_changed = stats.st_mtime;

					enchant_unlock_file (f);

					fclose (f);
				}	
			g_free(contents);
		}
}
示例#4
0
文件: pwl.c 项目: Distrotech/enchant
static void enchant_pwl_refresh_from_file(EnchantPWL* pwl)
{
	char buffer[BUFSIZ];
	char* line;
	size_t line_number = 1;
	FILE *f;
	struct stat stats;

	if(!pwl->filename)
		return;

	if(g_stat(pwl->filename, &stats)!=0)
		return;    /*presumably I won't be able to open the file either*/
	
	if(pwl->file_changed == stats.st_mtime)
		return;  /*nothing changed since last read*/

	enchant_trie_free(pwl->trie);
	pwl->trie = NULL;
	g_hash_table_destroy (pwl->words_in_trie);
	pwl->words_in_trie = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);

	f = enchant_fopen(pwl->filename, "r");
	if (!f) 
		return;

	pwl->file_changed = stats.st_mtime;

	enchant_lock_file (f);
	
	for (;NULL != (fgets (buffer, sizeof (buffer), f));++line_number)
		{
			const gunichar BOM = 0xfeff;
			size_t l;

			line = buffer;
			if(line_number == 1 && BOM == g_utf8_get_char(line))
				line = g_utf8_next_char(line);

			l = strlen(line)-1;
			if (line[l]=='\n') 
				line[l] = '\0';
			else if(!feof(f)) /* ignore lines longer than BUFSIZ. */ 
				{
					g_warning ("Line too long (ignored) in %s at line:%u\n", pwl->filename, line_number);
					while (NULL != (fgets (buffer, sizeof (buffer), f)))
						{
							if (line[strlen(buffer)-1]=='\n') 
								break;
						}
					continue;
				}
						
			if( line[0] != '#')
				{
					if(g_utf8_validate(line, -1, NULL))
						enchant_pwl_add_to_trie(pwl, line, strlen(line));
					else
						g_warning ("Bad UTF-8 sequence in %s at line:%u\n", pwl->filename, line_number);
				}
		}
	
	enchant_unlock_file (f);
	fclose (f);
}
示例#5
0
/*!
 * \param hashname name of the hash file (dictionary)
 *
 * \return
 */
int ISpellChecker::linit (char *hashname)
{
	FILE*	fpHash;
		
    register int	i;
    register struct dent * dp;
    struct flagent *	entry;
    struct flagptr *	ind;
    int			nextchar, x;
    int			viazero;
    register ichar_t *	cp;

    if ((fpHash = enchant_fopen (hashname, "rb")) == NULL)
	{
		return (-1);
	}

    m_hashsize = fread (reinterpret_cast<char *>(&m_hashheader), 1, sizeof m_hashheader, fpHash);
    if (m_hashsize < static_cast<int>(sizeof(m_hashheader)))
	{
		if (m_hashsize < 0)
			fprintf (stderr, LOOKUP_C_CANT_READ, hashname);
		else if (m_hashsize == 0)
			fprintf (stderr, LOOKUP_C_NULL_HASH, hashname);
		else
			fprintf (stderr,
			  LOOKUP_C_SHORT_HASH (m_hashname, m_hashsize,
				static_cast<int>(sizeof m_hashheader)));
		return (-1);
	}
    else if (m_hashheader.magic != MAGIC)
	{
		fprintf (stderr,
		  LOOKUP_C_BAD_MAGIC (hashname, static_cast<unsigned int>(MAGIC),
			static_cast<unsigned int>(m_hashheader.magic)));
		return (-1);
	}
    else if (m_hashheader.magic2 != MAGIC)
	{
		fprintf (stderr,
		  LOOKUP_C_BAD_MAGIC2 (hashname, static_cast<unsigned int>(MAGIC),
			static_cast<unsigned int>(m_hashheader.magic2)));
		return (-1);
	}
/*  else if (hashheader.compileoptions != COMPILEOPTIONS*/
    else if ( 1 != 1
      ||  m_hashheader.maxstringchars != MAXSTRINGCHARS
      ||  m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN)
	{
		fprintf (stderr,
		  LOOKUP_C_BAD_OPTIONS (static_cast<unsigned int>(m_hashheader.compileoptions),
			m_hashheader.maxstringchars, m_hashheader.maxstringcharlen,
			static_cast<unsigned int>(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN));
		return (-1);
	}

	{
		m_hashtbl =
		 (struct dent *)
			calloc (static_cast<unsigned>(m_hashheader.tblsize), sizeof (struct dent));
		m_hashsize = m_hashheader.tblsize;
		m_hashstrings = static_cast<char *>(malloc(static_cast<unsigned>(m_hashheader.stringsize)));
	}
    m_numsflags = m_hashheader.stblsize;
    m_numpflags = m_hashheader.ptblsize;
    m_sflaglist = (struct flagent *)
      malloc ((m_numsflags + m_numpflags) * sizeof (struct flagent));
    if (m_hashtbl == NULL  ||  m_hashstrings == NULL  ||  m_sflaglist == NULL)
	{
		fprintf (stderr, LOOKUP_C_NO_HASH_SPACE);
		return (-1);
	}
    m_pflaglist = m_sflaglist + m_numsflags;

	{
		if( fread ( m_hashstrings, 1, static_cast<unsigned>(m_hashheader.stringsize), fpHash) 
			!= static_cast<size_t>(m_hashheader.stringsize) )
	    {
		    fprintf (stderr, LOOKUP_C_BAD_FORMAT);
			fprintf (stderr, "stringsize err\n" );
	    	return (-1);
	    }
		if ( m_hashheader.compileoptions & 0x04 )
		{
			if(  fread (reinterpret_cast<char *>(m_hashtbl), 1, static_cast<unsigned>(m_hashheader.tblsize) * sizeof(struct dent), fpHash)
		    	!= (static_cast<size_t>(m_hashheader.tblsize * sizeof (struct dent))))
		    {
			    fprintf (stderr, LOOKUP_C_BAD_FORMAT);
		    	return (-1);
		    }
		}
		else
		{
			for( x=0; x<m_hashheader.tblsize; x++ )
			{
				if(  fread ( reinterpret_cast<char*>(m_hashtbl+x), sizeof( struct dent)-sizeof( MASKTYPE ), 1, fpHash)
			    	!= 1)
			    {
				    fprintf (stderr, LOOKUP_C_BAD_FORMAT);
			    	return (-1);
			    }
			}	/*for*/
		}	/*else*/
	}
    if (fread (reinterpret_cast<char *>(m_sflaglist), 1,
	static_cast<unsigned>(m_numsflags+ m_numpflags) * sizeof (struct flagent), fpHash)
      != (m_numsflags + m_numpflags) * sizeof (struct flagent))
	{
		fprintf (stderr, LOOKUP_C_BAD_FORMAT);
		return (-1);
	}
    fclose (fpHash);

	{
		for (i = m_hashsize, dp = m_hashtbl;  --i >= 0;  dp++)
		{
			if (dp->word == (char *) -1)
				dp->word = NULL;
			else
				dp->word = &m_hashstrings [ reinterpret_cast<size_t>(dp->word) ];
			if (dp->next == (struct dent *) -1)
				dp->next = NULL;
			else
				dp->next = &m_hashtbl [ reinterpret_cast<size_t>(dp->next) ];
	    }
	}

    for (i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++)
	{
		if (entry->stripl)
			entry->strip = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->strip)]);
		else
			entry->strip = NULL;
		if (entry->affl)
			entry->affix = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->affix)]);
		else
			entry->affix = NULL;
	}
    /*
    ** Warning - 'entry' and 'i' are reset in the body of the loop
    ** below.  Don't try to optimize it by (e.g.) moving the decrement
    ** of i into the loop condition.
    */
    for (i = m_numsflags, entry = m_sflaglist;  i > 0;  i--, entry++)
	{
		if (entry->affl == 0)
		{
			cp = NULL;
			ind = &m_sflagindex[0];
			viazero = 1;
		}
		else
		{
			cp = entry->affix + entry->affl - 1;
			ind = &m_sflagindex[*cp];
			viazero = 0;
			while (ind->numents == 0  &&  ind->pu.fp != NULL)
			{
				if (cp == entry->affix)
				{
					ind = &ind->pu.fp[0];
					viazero = 1;
				}
				else
				{
					ind = &ind->pu.fp[*--cp];
					viazero = 0;
				}
			}
		}
		if (ind->numents == 0)
			ind->pu.ent = entry;
		ind->numents++;
		/*
		** If this index entry has more than MAXSEARCH flags in
		** it, we will split it into subentries to reduce the
		** searching.  However, the split doesn't make sense in
		** two cases:  (a) if we are already at the end of the
		** current affix, or (b) if all the entries in the list
		** have identical affixes.  Since the list is sorted, (b)
		** is true if the first and last affixes in the list
		** are identical.
		*/
		if (!viazero  &&  ind->numents >= MAXSEARCH
		  &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
		{
			/* Sneaky trick:  back up and reprocess */
			entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
			i = m_numsflags - (entry - m_sflaglist);
			ind->pu.fp =
			  (struct flagptr *)
			calloc (static_cast<unsigned>(SET_SIZE + m_hashheader.nstrchars),
			  sizeof (struct flagptr));
			if (ind->pu.fp == NULL)
			{
				fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
				return (-1);
			}
			ind->numents = 0;
		}
	}
    /*
    ** Warning - 'entry' and 'i' are reset in the body of the loop
    ** below.  Don't try to optimize it by (e.g.) moving the decrement
    ** of i into the loop condition.
    */
    for (i = m_numpflags, entry = m_pflaglist;  i > 0;  i--, entry++)
	{
		if (entry->affl == 0)
	    {
			cp = NULL;
			ind = &m_pflagindex[0];
			viazero = 1;
	    }
		else
		{
			cp = entry->affix;
			ind = &m_pflagindex[*cp++];
			viazero = 0;
			while (ind->numents == 0  &&  ind->pu.fp != NULL)
			{
				if (*cp == 0)
				{
					ind = &ind->pu.fp[0];
					viazero = 1;
				}
				else
				{
					ind = &ind->pu.fp[*cp++];
					viazero = 0;
				}
			}
		}
		if (ind->numents == 0)
			ind->pu.ent = entry;
		ind->numents++;
		/*
		** If this index entry has more than MAXSEARCH flags in
		** it, we will split it into subentries to reduce the
		** searching.  However, the split doesn't make sense in
		** two cases:  (a) if we are already at the end of the
		** current affix, or (b) if all the entries in the list
		** have identical affixes.  Since the list is sorted, (b)
		** is true if the first and last affixes in the list
		** are identical.
		*/
		if (!viazero  &&  ind->numents >= MAXSEARCH
		  &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
		{
			/* Sneaky trick:  back up and reprocess */
			entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
			i = m_numpflags - (entry - m_pflaglist);
			ind->pu.fp =
			  static_cast<struct flagptr *>(calloc(SET_SIZE + m_hashheader.nstrchars,
				sizeof (struct flagptr)));
			if (ind->pu.fp == NULL)
			{
				fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
				return (-1);
			}
			ind->numents = 0;
		}
	}
#ifdef INDEXDUMP
    fprintf (stderr, "Prefix index table:\n");
    dumpindex (m_pflagindex, 0);
    fprintf (stderr, "Suffix index table:\n");
    dumpindex (m_sflagindex, 0);
#endif
    if (m_hashheader.nstrchartype == 0)
		m_chartypes = NULL;
    else
	{
		m_chartypes = (struct strchartype *)
		  malloc (m_hashheader.nstrchartype * sizeof (struct strchartype));
		if (m_chartypes == NULL)
		{
			fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
			return (-1);
		}
		for (i = 0, nextchar = m_hashheader.strtypestart;
		  i < m_hashheader.nstrchartype;
		  i++)
		{
			m_chartypes[i].name = &m_hashstrings[nextchar];
			nextchar += strlen (m_chartypes[i].name) + 1;
			m_chartypes[i].deformatter = &m_hashstrings[nextchar];
			nextchar += strlen (m_chartypes[i].deformatter) + 1;
			m_chartypes[i].suffixes = &m_hashstrings[nextchar];
			while (m_hashstrings[nextchar] != '\0')
				nextchar += strlen (&m_hashstrings[nextchar]) + 1;
			nextchar++;
		}
	}

    initckch(NULL);   
   
    return (0);
}
示例#6
0
int main (int argc, char ** argv)
{
	IspellMode_t mode = MODE_NONE;
	
	char * file = NULL;
	int i, rval = 0;
	
	FILE * fp = stdin;

	int countLines = 0;
	gchar *dictionary = 0;  /* -d dictionary */

	/* Initialize system locale */
	setlocale(LC_ALL, "");

#ifdef WIN32
	/* Workaround about glib's "locale" not being the set C locale */
	if (GetFileType(GetStdHandle(STD_INPUT_HANDLE)) != FILE_TYPE_CHAR) {
		sprintf_s(charset,15,"CP%u",GetACP());
	} else {
		sprintf_s(charset,15,"CP%u",GetConsoleCP());
	}
#endif

	for (i = 1; i < argc; i++) {
		char * arg = argv[i];
		if (arg[0] == '-') {
			if (strlen (arg) == 2) {
				/* It seems that the first one of these that is specified gets precedence. */
				if (arg[1] == 'a' && MODE_NONE == mode)
					mode = MODE_A;
				else if (arg[1] == 'l' && MODE_NONE == mode)
					mode = MODE_L;
				else if (arg[1] == 'v' && MODE_NONE == mode)
					mode = MODE_VERSION;
				else if (arg[1] == 'L' && MODE_NONE == mode)
					countLines = 1;
				else if (arg[1] == 'm')
				     	; /* Ignore. Emacs calls ispell with '-m'. */
				else if (arg[1] == 'd') {
				     	i++;
					dictionary = argv[i];  /* Emacs calls ispell with '-d dictionary'. */
				}
			} 
			else if ((strlen (arg) == 3) && (arg[1] == 'v') && (arg[2] == 'v')) {
			     	mode = MODE_VERSION;   /* Emacs (or ispell.el) calls [ai]spell with '-vv'. */
			}
			else if (arg[1] == 'd') {
			        dictionary = arg + 2;  /* Accept "-ddictionary", i.e. no space between -d and dictionary. */
			}
			else if (strlen (arg) > 2) {
				fprintf (stderr, "-%c does not take any parameters.\n", arg[1]);
				exit(1);
			} 
			else
				file = arg;
		} 
		else
			file = arg;
	}
	
	if (mode == MODE_VERSION) {
		print_version (stdout);
	} 
	else if (mode == MODE_NONE && !file) {
		print_help (stdout, argv[0]);
	}
	else {
		if (file) {
			fp = enchant_fopen (file, "rb");
			if (!fp) {
				fprintf (stderr, "Error: Could not open the file \"%s\" for reading.\n", file);
				exit (1);
			}
		}
		
		rval = parse_file (fp, stdout, mode, countLines, dictionary);
		
		if (file)
			fclose (fp);
	}
	
	return rval;
}