예제 #1
0
void CFX_SAXReader::ParseTagAttributeValue() {
  if (m_SkipChar) {
    if (m_SkipChar == m_CurByte) {
      {
        m_iDataLength = m_iDataPos;
        m_iDataPos = 0;
        if (m_pHandler) {
          NotifyAttribute();
        }
      }
      m_SkipChar = 0;
      m_eMode = FX_SAXMODE_TagAttributeName;
      return;
    }
    ParseChar(m_CurByte);
    return;
  }
  if (m_CurByte < 0x21) {
    return;
  }
  if (m_iDataPos < 1) {
    if (m_CurByte == '\'' || m_CurByte == '\"') {
      m_SkipChar = m_CurByte;
    }
  }
}
예제 #2
0
/*
* Returns an encoded string according to the 
* Soundex algorithm.
*/
string ParseName(string name) {
    string code;
    name = ConvertToUpperCase(name);
    for (int i = 0; i < name.length(); i++) {    
        if (i == 0)
            code += name[i]; // save first char
        if (i > 0 && isalpha(name[i]) != 0)
            code += ParseChar(name[i]);
    }
    
    for (int j = 0; j < code.length(); j++) {
        if (j > 0 && code[j] == code[j-1]) {
            code.erase(j,1); // delete duplicates
        } else if (code[j] == '0') {
            code.erase(j,1); // delete zeros
        }
    }
    
    if (code.length() < CODE_LENGTH) {
        int zeros = CODE_LENGTH - code.length();
        for (int k = 0; k < zeros; k++) {
            code += '0';  // padd zeros if code is too short
        }
    } else if (code.length() > CODE_LENGTH) {
        code = code.substr(0,CODE_LENGTH); // truncate if code is too long
    }
    return code;
}
// <local-name> := Z <(function) encoding> E <(entity) name>
//                 [<discriminator>]
//              := Z <(function) encoding> E s [<discriminator>]
static bool ParseLocalName(State *state) {
  State copy = *state;
  if (ParseChar(state, 'Z') && ParseEncoding(state) &&
      ParseChar(state, 'E') && MaybeAppend(state, "::") &&
      ParseName(state) && Optional(ParseDiscriminator(state))) {
    return true;
  }
  *state = copy;

  if (ParseChar(state, 'Z') && ParseEncoding(state) &&
      ParseTwoChar(state, "Es") && Optional(ParseDiscriminator(state))) {
    return true;
  }
  *state = copy;
  return false;
}
예제 #4
0
파일: scanner.c 프로젝트: pmprog/cc65
static void CharConst (void)
/* Parse a character constant. */
{
    int C;

    /* Skip the quote */
    NextChar ();

    /* Get character */
    C = ParseChar ();

    /* Check for closing quote */
    if (CurC != '\'') {
        Error ("`\'' expected");
    } else {
        /* Skip the quote */
        NextChar ();
    }

    /* Setup values and attributes */
    NextTok.Tok  = TOK_CCONST;

    /* Translate into target charset */
    NextTok.IVal = SignExtendChar (TgtTranslateChar (C));

    /* Character constants have type int */
    NextTok.Type = type_int;
}
// <call-offset> ::= h <nv-offset> _
//               ::= v <v-offset> _
static bool ParseCallOffset(State *state) {
  State copy = *state;
  if (ParseChar(state, 'h') &&
      ParseNVOffset(state) && ParseChar(state, '_')) {
    return true;
  }
  *state = copy;

  if (ParseChar(state, 'v') &&
      ParseVOffset(state) && ParseChar(state, '_')) {
    return true;
  }
  *state = copy;

  return false;
}
예제 #6
0
static int ParseFont(Kanji_Font* font, FILE* fp) {
	char buf[BUF], *p;
	int index;
	int k_rshift, a_rshift;
	int s;

	for (s = 8; s < font->k_size; s += 8) {}
	k_rshift = s - font->k_size;
	for (s = 8; s < font->a_size; s += 8) {}
	a_rshift = s - font->a_size;

	while (1) {
		if (fgets(buf, BUF, fp) == NULL) {
			break;
		}

		if (strstr(buf, "ENCODING") != NULL) {
			p = strchr(buf, ' ');
			index = strtol(p, 0, 10);

			while (strstr(buf, "BITMAP") == NULL) {
				fgets(buf, BUF, fp);
			}

			if (index > 255) {
				index = (((index & 0xff00) >> 8) - 0x20) * 96
					+ (index & 0xff) - 0x20 + 0xff;
				ParseChar(font, index, fp, k_rshift);
			}
			else {
// <discriminator> := _ <(non-negative) number>
static bool ParseDiscriminator(State *state) {
  State copy = *state;
  if (ParseChar(state, '_') && ParseNumber(state)) {
    return true;
  }
  *state = copy;
  return false;
}
// <expr-primary> ::= L <type> <(value) number> E
//                ::= L <type> <(value) float> E
//                ::= L <mangled-name> E
//                // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
//                ::= LZ <encoding> E
static bool ParseExprPrimary(State *state) {
  State copy = *state;
  if (ParseChar(state, 'L') && ParseType(state) &&
      ParseNumber(state) &&
      ParseChar(state, 'E')) {
    return true;
  }
  *state = copy;

  if (ParseChar(state, 'L') && ParseType(state) &&
      ParseFloatNumber(state) &&
      ParseChar(state, 'E')) {
    return true;
  }
  *state = copy;

  if (ParseChar(state, 'L') && ParseMangledName(state) &&
      ParseChar(state, 'E')) {
    return true;
  }
  *state = copy;

  if (ParseTwoChar(state, "LZ") && ParseEncoding(state) &&
      ParseChar(state, 'E')) {
    return true;
  }
  *state = copy;

  return false;
}
// <local-source-name> ::= L <source-name> [<discriminator>]
//
// References:
//   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
//   http://gcc.gnu.org/viewcvs?view=rev&revision=124467
static bool ParseLocalSourceName(State *state) {
  State copy = *state;
  if (ParseChar(state, 'L') && ParseSourceName(state) &&
      Optional(ParseDiscriminator(state))) {
    return true;
  }
  *state = copy;
  return false;
}
예제 #10
0
// <v-offset>  ::= <(offset) number> _ <(virtual offset) number>
static bool ParseVOffset(State *state) {
  State copy = *state;
  if (ParseNumber(state) && ParseChar(state, '_') &&
      ParseNumber(state)) {
    return true;
  }
  *state = copy;
  return false;
}
예제 #11
0
// <pointer-to-member-type> ::= M <(class) type> <(member) type>
static bool ParsePointerToMemberType(State *state) {
  State copy = *state;
  if (ParseChar(state, 'M') && ParseType(state) &&
      ParseType(state)) {
    return true;
  }
  *state = copy;
  return false;
}
예제 #12
0
파일: scanner.c 프로젝트: pmprog/cc65
static void StringConst (void)
/* Parse a quoted string */
{
    /* String buffer */
    StrBuf S = AUTO_STRBUF_INITIALIZER;

    /* Assume next token is a string constant */
    NextTok.Tok  = TOK_SCONST;

    /* Concatenate strings. If at least one of the concenated strings is a wide
    ** character literal, the whole string is a wide char literal, otherwise
    ** it's a normal string literal.
    */
    while (1) {

        /* Check if this is a normal or a wide char string */
        if (CurC == 'L' && NextC == '\"') {
            /* Wide character literal */
            NextTok.Tok = TOK_WCSCONST;
            NextChar ();
            NextChar ();
        } else if (CurC == '\"') {
            /* Skip the quote char */
            NextChar ();
        } else {
            /* No string */
            break;
        }

        /* Read until end of string */
        while (CurC != '\"') {
            if (CurC == '\0') {
                Error ("Unexpected newline");
                break;
            }
            SB_AppendChar (&S, ParseChar ());
        }

        /* Skip closing quote char if there was one */
        NextChar ();

        /* Skip white space, read new input */
        SkipWhite ();

    }

    /* Terminate the string */
    SB_AppendChar (&S, '\0');

    /* Add the whole string to the literal pool */
    NextTok.SVal = AddLiteralStr (&S);

    /* Free the buffer */
    SB_Done (&S);
}
예제 #13
0
// <ctor-dtor-name> ::= C1 | C2 | C3
//                  ::= D0 | D1 | D2
static bool ParseCtorDtorName(State *state) {
  State copy = *state;
  if (ParseChar(state, 'C') &&
      ParseCharClass(state, "123")) {
    const char * const prev_name = state->prev_name;
    const int prev_name_length = state->prev_name_length;
    MaybeAppendWithLength(state, prev_name, prev_name_length);
    return true;
  }
  *state = copy;

  if (ParseChar(state, 'D') &&
      ParseCharClass(state, "012")) {
    const char * const prev_name = state->prev_name;
    const int prev_name_length = state->prev_name_length;
    MaybeAppend(state, "~");
    MaybeAppendWithLength(state, prev_name, prev_name_length);
    return true;
  }
  *state = copy;
  return false;
}
예제 #14
0
int SB_GetString (StrBuf* B, StrBuf* S)
/* Get a string from the string buffer. Returns 1 if a string was found and 0
** otherwise. Errors are only output in case of invalid strings (missing end
** of string).
*/
{
    char C;

    /* Clear S */
    SB_Clear (S);

    /* A string starts with quote marks */
    if (SB_Peek (B) == '\"') {

        /* String follows, be sure to concatenate strings */
        while (SB_Peek (B) == '\"') {

            /* Skip the quote char */
            SB_Skip (B);

            /* Read the actual string contents */
            while ((C = SB_Peek (B)) != '\"') {
                if (C == '\0') {
                    Error ("Unexpected end of string");
                    break;
                }
                SB_AppendChar (S, ParseChar (B));
            }

            /* Skip the closing quote char if there was one */
            SB_Skip (B);

            /* Skip white space, read new input */
            SB_SkipWhite (B);
        }

        /* Terminate the string */
        SB_Terminate (S);

        /* Success */
        return 1;

    } else {

        /* Not a string */
        SB_Terminate (S);
        return 0;
    }
}
예제 #15
0
// <builtin-type> ::= v, etc.
//                ::= u <source-name>
static bool ParseBuiltinType(State *state) {
  const AbbrevPair *p;
  for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
    if (state->mangled_cur[0] == p->abbrev[0]) {
      MaybeAppend(state, p->real_name);
      ++state->mangled_cur;
      return true;
    }
  }

  State copy = *state;
  if (ParseChar(state, 'u') && ParseSourceName(state)) {
    return true;
  }
  *state = copy;
  return false;
}
/*------------------------------------------------------------------------------
 * function: ParseWhiteSpace
 * Parse whitespace; return false if can't parse.
 */
static Boolean
ParseWhiteSpace(int numArgs, char **args)
{
  if (numArgs >= MAX_SEPARATORS) {
    fprintf(stderr, "ParseCat: too many separators, max = %d\n",
	    MAX_SEPARATORS);
    return false;
  }

  for(int i = 1; i < numArgs; i++) {
    if (!ParseChar(args[i], whitespaces[i - 1]))
      return false;
  }

  numWhitespace = numArgs - 1;
  return true;
}
예제 #17
0
void C4RTFFile::ParseHexChar(StdStrBuf &sResult, char c)
	{
	pState->bHex = pState->bHex << 4;
  if (isdigit((unsigned char)c))
		pState->bHex += c - '0';
  else if (Inside<char>(c, 'a', 'f'))
		pState->bHex += c - 'a' + 10;
  else if (Inside<char>(c, 'A', 'F'))
		pState->bHex += c - 'A' + 10;
	else
		throw new ParserError("Invalid hex character");
  if (!--pState->iHexBinCnt)
		{
		pState->eState = psNormal;
		ParseChar(sResult, pState->bHex);
		}
	}
예제 #18
0
void CFX_SAXReader::ParseTagEnd() {
  if (m_CurByte < 0x21) {
    return;
  }
  if (m_CurByte == '>') {
    Pop();
    m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex;
    m_iDataLength = m_iDataPos;
    m_iDataPos = 0;
    if (m_pHandler) {
      NotifyEnd();
    }
    Pop();
    m_eMode = FX_SAXMODE_Text;
  } else {
    ParseChar(m_CurByte);
  }
}
예제 #19
0
// <operator-name> ::= nw, and other two letters cases
//                 ::= cv <type>  # (cast)
//                 ::= v  <digit> <source-name> # vendor extended operator
static bool ParseOperatorName(State *state) {
  if (RemainingLength(state) < 2) {
    return false;
  }
  // First check with "cv" (cast) case.
  State copy = *state;
  if (ParseTwoChar(state, "cv") &&
      MaybeAppend(state, "operator ") &&
      EnterNestedName(state) &&
      ParseType(state) &&
      LeaveNestedName(state, copy.nest_level)) {
    return true;
  }
  *state = copy;

  // Then vendor extended operators.
  if (ParseChar(state, 'v') && ParseCharClass(state, "0123456789") &&
      ParseSourceName(state)) {
    return true;
  }
  *state = copy;

  // Other operator names should start with a lower alphabet followed
  // by a lower/upper alphabet.
  if (!(IsLower(state->mangled_cur[0]) &&
        IsAlpha(state->mangled_cur[1]))) {
    return false;
  }
  // We may want to perform a binary search if we really need speed.
  const AbbrevPair *p;
  for (p = kOperatorList; p->abbrev != nullptr; ++p) {
    if (state->mangled_cur[0] == p->abbrev[0] &&
        state->mangled_cur[1] == p->abbrev[1]) {
      MaybeAppend(state, "operator");
      if (IsLower(*p->real_name)) {  // new, delete, etc.
        MaybeAppend(state, " ");
      }
      MaybeAppend(state, p->real_name);
      state->mangled_cur += 2;
      return true;
    }
  }
  return false;
}
예제 #20
0
void CFX_SAXReader::ParseText() {
  if (m_CurByte == '<') {
    if (m_iDataPos > 0) {
      m_iDataLength = m_iDataPos;
      m_iDataPos = 0;
      if (m_pHandler) {
        NotifyData();
      }
    }
    Push();
    m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex;
    m_eMode = FX_SAXMODE_NodeStart;
    return;
  }
  if (m_iDataPos < 1 && SkipSpace(m_CurByte)) {
    return;
  }
  ParseChar(m_CurByte);
}
예제 #21
0
// <type> ::= <CV-qualifiers> <type>
//        ::= P <type>
//        ::= R <type>
//        ::= C <type>
//        ::= G <type>
//        ::= U <source-name> <type>
//        ::= <builtin-type>
//        ::= <function-type>
//        ::= <class-enum-type>
//        ::= <array-type>
//        ::= <pointer-to-member-type>
//        ::= <template-template-param> <template-args>
//        ::= <template-param>
//        ::= <substitution>
static bool ParseType(State *state) {
  // We should check CV-qualifers, and PRGC things first.
  State copy = *state;
  if (ParseCVQualifiers(state) && ParseType(state)) {
    return true;
  }
  *state = copy;

  if (ParseCharClass(state, "PRCG") && ParseType(state)) {
    return true;
  }
  *state = copy;

  if (ParseChar(state, 'U') && ParseSourceName(state) &&
      ParseType(state)) {
    return true;
  }
  *state = copy;

  if (ParseBuiltinType(state) ||
      ParseFunctionType(state) ||
      ParseClassEnumType(state) ||
      ParseArrayType(state) ||
      ParsePointerToMemberType(state) ||
      ParseSubstitution(state)) {
    return true;
  }

  if (ParseTemplateTemplateParam(state) &&
      ParseTemplateArgs(state)) {
    return true;
  }
  *state = copy;

  // Less greedy than <template-template-param> <template-args>.
  if (ParseTemplateParam(state)) {
    return true;
  }

  return false;
}
예제 #22
0
// <number> ::= [n] <non-negative decimal integer>
static bool ParseNumber(State *state) {
  int sign = 1;
  if (ParseChar(state, 'n')) {
    sign = -1;
  }
  const char *p = state->mangled_cur;
  int number = 0;
  for (;p < state->mangled_end; ++p) {
    if ((*p >= '0' && *p <= '9')) {
      number = number * 10 + (*p - '0');
    } else {
      break;
    }
  }
  if (p != state->mangled_cur) {  // Conversion succeeded.
    state->mangled_cur = p;
    state->number = number * sign;
    return true;
  }
  return false;
}
예제 #23
0
void CFX_SAXReader::SkipNode() {
  int32_t iLen = m_SkipStack.GetSize();
  if (m_SkipChar == '\'' || m_SkipChar == '\"') {
    if (m_CurByte != m_SkipChar) {
      return;
    }
    iLen--;
    FXSYS_assert(iLen > -1);
    m_SkipStack.RemoveAt(iLen, 1);
    m_SkipChar = iLen ? m_SkipStack[iLen - 1] : 0;
    return;
  }
  switch (m_CurByte) {
    case '<':
      m_SkipChar = '>';
      m_SkipStack.Add('>');
      break;
    case '[':
      m_SkipChar = ']';
      m_SkipStack.Add(']');
      break;
    case '(':
      m_SkipChar = ')';
      m_SkipStack.Add(')');
      break;
    case '\'':
      m_SkipChar = '\'';
      m_SkipStack.Add('\'');
      break;
    case '\"':
      m_SkipChar = '\"';
      m_SkipStack.Add('\"');
      break;
    default:
      if (m_CurByte == m_SkipChar) {
        iLen--;
        m_SkipStack.RemoveAt(iLen, 1);
        m_SkipChar = iLen ? m_SkipStack[iLen - 1] : 0;
        if (iLen == 0 && m_CurByte == '>') {
          m_iDataLength = m_iDataPos;
          m_iDataPos = 0;
          if (m_iDataLength >= 9 &&
              FXSYS_memcmp(m_pszData, "[CDATA[", 7 * sizeof(uint8_t)) == 0 &&
              FXSYS_memcmp(m_pszData + m_iDataLength - 2, "]]",
                           2 * sizeof(uint8_t)) == 0) {
            Pop();
            m_iDataLength -= 9;
            m_dwDataOffset += 7;
            FXSYS_memmove(m_pszData, m_pszData + 7,
                          m_iDataLength * sizeof(uint8_t));
            m_bCharData = TRUE;
            if (m_pHandler) {
              NotifyData();
            }
            m_bCharData = FALSE;
          } else {
            Pop();
          }
          m_eMode = FX_SAXMODE_Text;
        }
      }
      break;
  }
  if (iLen > 0) {
    ParseChar(m_CurByte);
  }
}
예제 #24
0
StdStrBuf C4RTFFile::GetPlainText()
	{
	// clear any previous crap
	ClearState();
	// start with a fresh state
	pState = new PropertyState();
	pState->eState = psNormal;
	StdStrBuf sResult;
	// nothing to do for empty RTFs
	if (sRTF.getSize()<=0) return sResult;
	// parse through all chars
	try
		{
		char c; size_t iPos = 0;
		while (iPos < sRTF.getSize())
			{
			c = ((const char *) sRTF.getData())[iPos++];
			// binary parsing?
			if (pState->eState == psBinary)
				{
				if (!--pState->iHexBinCnt) pState->eState = psNormal;
				ParseChar(sResult, c);
				continue;
				}
			// normal parsing: Handle state blocks
			switch (c)
				{
				case '{': PushState(); break;
				case '}': PopState(); break;
				case '\\':
					ParseKeyword(sResult, iPos);
					break;
        case 0x0d: case 0x0a: // ignored chars
          break;
				default:
					// regular char parsing
					if (pState->eState == psNormal)
						// normal mode
						ParseChar(sResult, c);
					else if (pState->eState == psHex)
						ParseHexChar(sResult, c);
					else
						throw new ParserError("Invalid State");
					break;
				}
			// next char
			}
		// all states must be closed in the end
		if (pState->pNext) throw new ParserError("Block not closed");
		}
	catch (ParserError *pe)
		{
		// invalid RTF file: Display error message instead
		sResult = "Invalid RTF file: ";
		sResult.Append(pe->ErrorText);
		delete pe;
		}
	// cleanup
	ClearState();
	// return result
	return sResult;
	}
예제 #25
0
	// Returns true if a node was read successfully, false on EOF
	bool XMLReader::ReadInternal()
	{
		switch (mNodeType) {
			case kNone:
				FillInputBuffer();
				if ((mInputEnd - mInputStart) >= 2) {
					uint16_t x = (uint16_t(mInputBuffer[0]) << 8) | mInputBuffer[1];
					switch (x) {
						case 0xFEFF:
						case 0x003C:
							mConverter.Reset(TextEncoding::UTF16BE());
							break;
						case 0xFFFE:
						case 0x3C00:
							mConverter.Reset(TextEncoding::UTF16LE());
							break;
					}
				}
				mNodeType = kDocument;
				return true;
			
			case kDocument:
				// An XML document can start with:
				// document	::= prolog element Misc*
				// prolog	::= XMLDecl? Misc* (doctypedecl Misc*)?
				// XMLDecl	::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
				// Misc ::= Comment | PI | S
				// doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
				// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
				// PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
				// S ::= (#x20 | #x9 | #xD | #xA)+

				// If the XML file starts with a byte order mark, throw it away.
				// The earlier code for the kNone case has already used it to
				// set the default encoding.
				ParseChar(UnicodeChar(0xFEFF));

				if (BufferStartsWith("<?xml")) {
					if (! ParseXmlDeclaration()) return false;
					UnicodeString encodingName = GetAttribute("encoding");
					if (encodingName.empty()) return true;
					TextEncoding newEncoding = TextEncoding::WebCharset(encodingName.c_str());
					if (newEncoding == mConverter.GetSourceEncoding()) return true;

					// The encoding in the XML declaration is different from the one
					// we assumed, so we have to reset all the input buffering and
					// re-parse the XmlDeclaration.
					mConverter.Reset(newEncoding);
					mInput.Restart();
					mInputStart = mInputEnd = mInputBuffer;
					mOutputStart = mOutputEnd = mOutputBuffer;
					ParseChar(UnicodeChar(0xFEFF));
					return ParseXmlDeclaration();
				}
				else if (StartsWithWhitespace()) return ParseRequiredWhitespace();
				//else if (BufferStartsWith("<!--")) return ParseComment();
				//else if (BufferStartsWith("<?")) return ParseProcessingInstruction();
				//else if (BufferStartsWith("<!DOCTYPE")) return ParseDocumentType();
				else if (BufferStartsWith("<")) return ParseElement();
				else return false;

			case kXmlDeclaration:
			case kElement:
			case kEndElement:
			case kText:
		    case kWhitespace:
				if (BufferStartsWith("</")) return ParseEndElement();
				else if (BufferStartsWith("<")) return ParseElement();
				else return ParseText();
		}
		return false;
	}
예제 #26
0
파일: re_comp.c 프로젝트: JukkaL/alore
/* Parse a run of simple and parenthised regular expressions concatenated
   together. Plain characters, dots and character classes (optionally followed
   by a repetition specifier) are considered simple. IDEA: \< and friends */
static void ParseConcat(ParseInfo *info)
{
    int litLen;
    ABool isPrevLit;

    litLen = 0;
    isPrevLit = FALSE;

    while (info->str < info->strEnd) {
        if (!isPrevLit)
            litLen = 0;

        isPrevLit = FALSE;

        switch (*info->str) {
        case '(':
            ParseParen(info);
            break;

        case ')':
        case '|':
            return;

        case '^':
            /* Beginning of line */
            Emit(info, A_BOL_MULTI);
            info->str++;
            break;

        case '$':
            /* End of line */
            Emit(info, A_EOL_MULTI);
            info->str++;
            break;

        case '.':
            /* Any character */

            if (info->minLen == 0)
                memset(info->startChar, 0xff, sizeof(info->startChar));

            info->str++;

            /* IDEA: Perhaps ANY_ALL (i.e. match newlines)? */
            ParseSimpleRepetition(info, A_ANY);

            break;

        case '[': {
            /* Character class */

            ABool complement = FALSE;
            AReOpcode set[A_SET_SIZE];
            AWideChar ch;
            int i;
            WideCharSet wset;
            int flags = 0;

            InitWideSet(&wset);

            info->str++;

            /* Complement set? */
            if (info->str < info->strEnd && *info->str == '^') {
                info->str++;
                complement = TRUE;
            }

            /* End of expression? */
            if (info->str == info->strEnd)
                AGenerateError(info, ErrUnmatchedLbracket);

            memset(set, 0, sizeof(set));

            do {
                AReOpcode code;
                ch = ParseChar(info, &code);

                if (info->str == info->strEnd)
                    break;

                if (code >= CC) {
                    int i;
                    const AReOpcode *chClass =
                        ACharClass[(code - CC) & ~CC_COMP];

                    for (i = 0; i < A_SET_SIZE; i++) {
                        if (code & CC_COMP)
                            set[i] |= ~chClass[i];
                        else
                            set[i] |= chClass[i];
                    }

                    if (code == CC_W)
                        flags |= A_WS_WORD_CHAR;
                    else if (code == (CC_W | CC_COMP))
                        flags |= A_WS_NOT_WORD_CHAR;

                    /* Underline character is part of the \w set. */
                    if (code == CC_W || code == (CC_W | CC_COMP))
                        AToggleInSet(set, '_');

                    if (*info->str == '-')
                        AGenerateError(info, ErrInvalidCharacterSet);
                } else {
                    /* Character range? */
                    if (*info->str == '-') {
                        AWideChar hiChar = ch;

                        /* Skip '-', check end of expression. */
                        if (++info->str == info->strEnd)
                            break;

                        if (*info->str == ']')
                            AAddToSet(set, '-');
                        else {
                            hiChar = ParseChar(info, &code);
                            if (code >= CC)
                                AGenerateError(info, ErrInvalidCharacterSet);
                        }

                        AddToWideSet(info, &wset, ch, hiChar);

                        for (; ch <= AMin(hiChar, 255); ch++)
                            AAddToSet(set, ch);

                    } else {
                        AAddWideToSet(set, ch);
                        AddToWideSet(info, &wset, ch, ch);
                    }
                }
            } while (info->str < info->strEnd && *info->str != ']');

            if (info->str == info->strEnd)
                AGenerateError(info, ErrUnmatchedLbracket);

            /* Skip ']'. */
            info->str++;

            if (info->flags & A_RE_NOCASE) {
                for (i = 0; i < 256; i++)
                    if (AIsInSet(set, i)) {
                        AAddToSet(set, ALower(i));
                        AAddToSet(set, AUpper(i));
                    }
            }

            AddStartSet(info, set, complement);
            ParseSimpleRepetition(info, A_SET);
            EmitCharSet(info, set, &wset, complement, flags);

            FreeWideSet(&wset);

            break;
        }

        case '*':
        case '+':
        case '?':
        case '{':
            AGenerateError(info, ErrInvalidRepeat);
            info->str = info->strEnd;
            break;

        default:
        {
            AReOpcode code;
            AWideChar ch;

            ch = ParseChar(info, &code);

            /* Special character? */
            if (code != A_EMPTY) {
                if (code < CC) {
                    Emit(info, code);

                    if (code == A_BACKREF || code == A_BACKREF_I) {
                        int num = ch & ~48;
                        int oldMaxLen = info->maxLen;

                        if (!(info->parenFlags & (1 << num)))
                            AGenerateError(info, ErrInvalidBackReference);

                        Emit(info, num);

                        info->maxLen *= 2;

                        ParseRepetition(info, info->minLen, oldMaxLen, 2,
                                        info->mustStr, info->mustStrBack);
                    }
                } else {
                    AReOpcode set[A_SET_SIZE];
                    int flags = 0;

                    memcpy(set, ACharClass[(code  - CC) & ~CC_COMP], 32);

                    /* Underline character is part of the \w set. */
                    if (code == CC_W || code == (CC_W | CC_COMP))
                        AToggleInSet(set, '_');

                    AddStartSet(info, set, code & CC_COMP);
                    ParseSimpleRepetition(info, A_SET);

                    if (code == CC_W || code == (CC_W | CC_COMP))
                        flags = A_WS_WORD_CHAR;

                    EmitCharSet(info, set, NULL, code & CC_COMP, flags);
                }

                break;
            }

            if (info->flags & A_RE_NOCASE) {
                code = A_LITERAL_I;

                ch = ALower(ch);

                if (info->minLen == 0) {
                    AAddWideToSet(info->startChar, ch);
                    AAddWideToSet(info->startChar, AUpper(ch));
                }
            } else {
                code = A_LITERAL;

                if (info->minLen == 0)
                    AAddWideToSet(info->startChar, ch);
            }

            if (info->str == info->strEnd
                    || (*info->str != '*' && *info->str != '+'
                        && *info->str != '?' && *info->str != '{')) {
                info->minLen++;
                info->maxLen++;

                litLen++;
                isPrevLit = TRUE;

                if (litLen == 2) {
                    /* Convert a single character to literal string. */
                    AWideChar prevCh;

                    prevCh = info->buf[info->bufInd - 1];

                    info->buf[info->bufInd - 2] += A_STRING;
                    info->buf[info->bufInd - 1]  = 2;
                    Emit(info, prevCh);
                    Emit(info, ch);

                    if (info->mustStr == 0) {
                        info->mustStr = info->bufInd - litLen;
                        info->mustStrBack = info->maxLen - 2; /* FIX? */
                    }
                } else if (litLen > 2) {
                    /* Add a character to a literal string. */

                    Emit(info, ch);

                    if (info->buf[info->mustStr - 1] <= litLen) {
                        info->mustStr = info->bufInd - litLen;
                        info->mustStrBack = info->maxLen - litLen; /* FIX? */
                    }

                    info->buf[info->bufInd - litLen - 1] = litLen;
                } else {
                    Emit(info, code);
                    Emit(info, ch);
                }
            } else {
                ParseSimpleRepetition(info, code);
                Emit(info, ch);
            }

            break;
        }
        }
    }
}
예제 #27
0
int SB_GetNumber (StrBuf* B, long* Val)
/* Get a number from the string buffer. Accepted formats are decimal, octal,
** hex and character constants. Numeric constants may be preceeded by a
** minus or plus sign. The function returns 1 if a number was found and
** zero otherwise. Errors are only output for invalid numbers.
*/
{
    int      Sign;
    char     C;
    unsigned Base;
    unsigned DigitVal;


    /* Initialize Val */
    *Val = 0;

    /* Handle character constants */
    if (SB_Peek (B) == '\'') {

        /* Character constant */
        SB_Skip (B);
        *Val = SignExtendChar (TgtTranslateChar (ParseChar (B)));
        if (SB_Peek (B) != '\'') {
            Error ("`\'' expected");
            return 0;
        } else {
            /* Skip the quote */
            SB_Skip (B);
            return 1;
        }
    }

    /* Check for a sign. A sign must be followed by a digit, otherwise it's
    ** not a number
    */
    Sign = 1;
    switch (SB_Peek (B)) {
        case '-':
            Sign = -1;
            /* FALLTHROUGH */
        case '+':
            if (!IsDigit (SB_LookAt (B, SB_GetIndex (B) + 1))) {
                return 0;
            }
            SB_Skip (B);
            break;
    }

    /* We must have a digit now, otherwise its not a number */
    C = SB_Peek (B);
    if (!IsDigit (C)) {
        return 0;
    }

    /* Determine the base */
    if (C == '0') {
        /* Hex or octal */
        SB_Skip (B);
        if (tolower (SB_Peek (B)) == 'x') {
            SB_Skip (B);
            Base = 16;
            if (!IsXDigit (SB_Peek (B))) {
                Error ("Invalid hexadecimal number");
                return 0;
            }
        } else {
            Base = 8;
        }
    } else {
        Base = 10;
    }

    /* Read the number */
    while (IsXDigit (C = SB_Peek (B)) && (DigitVal = HexVal (C)) < Base) {
        *Val = (*Val * Base) + DigitVal;
        SB_Skip (B);
    }

    /* Allow optional 'U' and 'L' modifiers */
    C = SB_Peek (B);
    if (C == 'u' || C == 'U') {
        SB_Skip (B);
        C = SB_Peek (B);
        if (C == 'l' || C == 'L') {
            SB_Skip (B);
        }
    } else if (C == 'l' || C == 'L') {
        SB_Skip (B);
        C = SB_Peek (B);
        if (C == 'u' || C == 'U') {
            SB_Skip (B);
        }
    }

    /* Success, value read is in Val */
    *Val *= Sign;
    return 1;
}