Пример #1
0
bool CGraphmatFile::IsKey(size_t LB, size_t HB, size_t& GraLast) const
{
  if (GetUnits()[LB].GetToken() == 0) return false;
  int ch1 =  (unsigned char)GetUnits()[LB].GetToken()[0];
  int ch2 =  (unsigned char)ReverseChar((unsigned char)GetUnits()[LB].GetToken()[0], m_Language);
  GraLast = LB+1;
  if (ispunct (ch1)) return false;  

  long i=0;
  for (; i < m_pDicts->m_Keys.size(); i++)
  {
    const char* title =  m_pDicts->m_Keys[i].c_str();
    if (
	        (    (ch1 == (unsigned char)title[0])
              || (ch2 == (unsigned char)title[0])
            )
	     && FindKeySequence(title,LB, HB, GraLast)
	   )
		break;
  }

  if (i < m_pDicts->m_Keys.size())
	  return true;
 if  (IsOneAlpha(LB))
 {
	  
	   GraLast = LB+1;
	   return true;
 };
  return  false;
};
Пример #2
0
void RmlPcreMakeTables(vector<BYTE>& table, MorphLanguageEnum Langua)
{
	table.resize(tables_length);
	int start  = 0;
	/* First comes the lower casing table */
	for (size_t i = 0; i < 256; i++) 
		if (is_upper_alpha(i, Langua))
			table[i+start] = ReverseChar(i, Langua);

	start = 256;
	/* Next the case-flipping table */
	for (size_t i = 0; i < 256; i++) 
		table[i+start] = ReverseChar(i, Langua);

	start += 256;
	for (size_t i=0; i < cbit_length; i++)
		table[i+start] = 0;
	
	for (size_t i = 0; i < 256; i++)
	{
		if (isdigit(i))
		{
			table[start+cbit_digit  + i/8] |= 1 << (i&7);
			table[start+cbit_word   + i/8] |= 1 << (i&7);
		}

		if (is_upper_alpha(i, Langua))
		{
			table[start+cbit_upper  + i/8] |= 1 << (i&7);
			table[start+cbit_word   + i/8] |= 1 << (i&7);
		}

		if (is_lower_alpha(i, Langua))
		{
			table[start+cbit_lower  + i/8] |= 1 << (i&7);
			table[start+cbit_word   + i/8] |= 1 << (i&7);
		}
		if (i == '_')   table[start+cbit_word   + i/8] |= 1 << (i&7);
		if (isspace(i)) table[start+cbit_space  + i/8] |= 1 << (i&7);
		if (isxdigit(i))table[start+cbit_xdigit + i/8] |= 1 << (i&7);
		if (is_alpha(i, Langua) ||ispunct(i)) table[start+cbit_graph  + i/8] |= 1 << (i&7);
		if (is_alpha(i, Langua) ||isprint(i)) table[start+cbit_print  + i/8] |= 1 << (i&7);
		if (ispunct(i)) table[start+cbit_punct  + i/8] |= 1 << (i&7);
		if (iscntrl(i)) table[start+cbit_cntrl  + i/8] |= 1 << (i&7);
	}

	start += cbit_length;

/* Finally, the character type table. In this, we exclude VT from the white
space chars, because Perl doesn't recognize it as such for \s and for comments
within regexes. */

	for (size_t i = 0; i < 256; i++)
	{
		int x = 0;
		if (i != 0x0b && isspace(i)) x += ctype_space;
		if (isalpha(i)) x += ctype_letter;
		if (isdigit(i)) x += ctype_digit;
		if (isxdigit(i)) x += ctype_xdigit;
		if (isalnum(i) || i == '_') x += ctype_word;

		/* Note: strchr includes the terminating zero in the characters it considers.
		In this instance, that is ok because we want binary zero to be flagged as a
		meta-character, which in this sense is any character that terminates a run
		of data characters. */

		if (strchr("*+?{^.$|()[", i) != 0) 
			x += ctype_meta; 

		table[start+i] = x; 
	}

}