示例#1
0
ExprTree *ClassAdXMLParser::
ParseNumberOrString(XMLLexer::TagID tag_id)
{

	bool             have_token;
	ExprTree         *tree;
	XMLLexer::Token  token;

	// Get start tag
	tree = NULL;
	have_token = lexer.ConsumeToken(&token);
	assert(have_token && token.tag_id == tag_id);

	// Get text of number or string
	have_token = lexer.PeekToken(&token);

	if (have_token && token.token_type == XMLLexer::tokenType_Text) {
		lexer.ConsumeToken(&token);
		Value value;
		if (tag_id == XMLLexer::tagID_Integer) {
			long long number;
			sscanf(token.text.c_str(), "%lld", &number);
			value.SetIntegerValue(number);
		}
		else if (tag_id == XMLLexer::tagID_Real) {
			double real;
            real = strtod(token.text.c_str(), NULL);
            value.SetRealValue(real);
        }
		else {        // its a string
			bool validStr = true;
			token.text += " ";
			convert_escapes(token.text, validStr );
			if(!validStr) {  // invalid string because it had /0 escape sequence
				return NULL;
			} else {
				value.SetStringValue(token.text);
			}
		}
	
		  tree = Literal::MakeLiteral(value);
		
	} else if (tag_id == XMLLexer::tagID_String) {
		// We were expecting text and got none, so we had
		// the empty string, which was skipped by the lexer.
		Value  value;
		value.SetStringValue("");
		tree = Literal::MakeLiteral(value);
	}

    SwallowEndTag(tag_id);

	return tree;
}
示例#2
0
ExprTree *ClassAdXMLParser::
ParseNumberOrString(XMLLexer::TagID tag_id)
{

	bool             have_token;
	ExprTree         *tree;
	XMLLexer::Token  token;

	// Get start tag
	tree = NULL;
	have_token = lexer.ConsumeToken(&token);
	assert(have_token && token.tag_id == tag_id);

	// Get text of number or string
	have_token = lexer.PeekToken(&token);

	if (have_token && token.token_type == XMLLexer::tokenType_Text) {
		lexer.ConsumeToken(&token);
		Value value;
		if (tag_id == XMLLexer::tagID_Integer) {
			long long number;
			char * pend;
			const char * pnum = token.text.c_str();
			number = strtoll(pnum, &pend, 10);
			if ( ! number && (pend == pnum)) {
				value.SetErrorValue();
			} else {
				value.SetIntegerValue(number);
			}
		}
		else if (tag_id == XMLLexer::tagID_Real) {
			double real;
			char * pend;
			const char * pnum = token.text.c_str();
			real = strtod(pnum, &pend);
			if (pend == pnum) {
				value.SetErrorValue();
			} else {
				value.SetRealValue(real);
			}
		}
		else {        // its a string
			bool validStr = true;
			//token.text.push_back('\0'); // force an explicit null terminator (because that's that the normal lexer does.)
			convert_escapes(token.text, validStr );
			if(!validStr) {  // invalid string because it had /0 escape sequence
				return NULL;
			} else {
				value.SetStringValue(token.text);
			}
		}
	
		  tree = Literal::MakeLiteral(value);
		
	} else if (tag_id == XMLLexer::tagID_String) {
		// We were expecting text and got none, so we had
		// the empty string, which was skipped by the lexer.
		Value  value;
		value.SetStringValue("");
		tree = Literal::MakeLiteral(value);
	}

    SwallowEndTag(tag_id);

	return tree;
}
示例#3
0
void XMLLexer::
BreakdownTag(const char *complete_tag)
{

	int length, i;
	int start, count;

	length = strlen(complete_tag);
	
	// Skip whitespace
	for (i = 0; i < length && isspace(complete_tag[i]); i++) {
		;
	}
	
	// Is it a begin or end tag?
	if (complete_tag[i] == '/') {
		current_token.tag_type = tagType_End;
		i++;
	} else if (complete_tag[length-1] == '/') {
		current_token.tag_type = tagType_Empty;
		length--; // skip the / in the processing that follows.
	} else {
		current_token.tag_type = tagType_Start;
	}
	
	// Now pull out the tag name
	current_token.text = "";
	start = i;
	count = 0;
	while (i < length && i != '>' && !isspace(complete_tag[i])) {
		//current_token.text += complete_tag[i];
		i++;
		count++;
	}
	// With gcc-2.x's STL, this is faster than using a bunch of += statements.
	current_token.text.assign(complete_tag+start, count);

	// Figure out which tag it is
	current_token.tag_id = tagID_NoTag;
	for (unsigned int x = 0; x < NUMBER_OF_TAG_MAPPINGS; x++) {
		if (!strcmp(current_token.text.c_str(), tag_mappings[x].tag_name)) {
			current_token.tag_id = tag_mappings[x].id;
			break;
		}
	}
	
	// If we're not at the end, we probably have attributes, so let's
	// pull them out. (We might just have whitespace though.)
	while (i < length) {
		string name, value;

		name  = "";
		value = "";

		// Skip whitespace
		while (i < length && isspace(complete_tag[i])) {
			i++;
		}

		// Now take text up to a whitespace or equal sign. This is the name
		start = i;
		count = 0;
		while (i < length 
			   && !isspace(complete_tag[i]) 
			   && complete_tag[i] != '=') {
			//name += complete_tag[i];
			i++;
			count++;
		}
		// With gcc-2.x's STL, this is faster than using a bunch of += statements.
		name.assign(complete_tag+start, count);

		// Now skip whitespace and equal signs
		// Note that this allows some technically illegal things
		// like " == = = =", but who really cares?
		while (i < length
			   && (isspace(complete_tag[i]) || complete_tag[i] == '=')) {
			i++;
		}

		i++; // go past 1st \"

		// Now pick out the value
		char oldCh = 0;
		// consume the string literal; read upto " ignoring \"
		while (    (i<length)  
				&& (    complete_tag[i] != '\"' 
					 || ( complete_tag[i] == '\"' && oldCh == '\\' ) ) ) {
			oldCh = complete_tag[i];
			value += complete_tag[i];
			i++;
		}
		// scan string for &...; & replace them with their corresponding entities
		for (unsigned int k=0; k< value.length(); k++) {
			if (value[k] == '&') { // create substring
				int index = k-1;
				string str;
				do {
					index++;
					str += value[index];
				} while(value[index] != ';');
				for (unsigned int j = 0; j < NUMBER_OF_ENTITIES; j++){
					if (!strcmp(str.c_str(), entities[j].name)) {
						value.replace(k, str.length(), entities[j].replacement_text);
					}
				}
			}
		}
		bool validStr = true;
		//value.push_back('\0'); // force an explicit null terminator (because that's that the normal lexer does.)
		convert_escapes(value, validStr);
		if(!validStr) {  // contains a \0 escape char
			current_token.tag_type = tagType_Invalid;
		}
		else if (name.size() > 0 && value.size() > 0) {
			current_token.attributes[name] = value;
		}		
	}
	return;
}
示例#4
0
// tokenizeStringLiteral:  Scans strings of the form " ... " or '...' 
// based on whether the argument passed was '\"' or '\''
int Lexer::
tokenizeString(char delim)
{
	bool stringComplete = false;

	// need to mark() after the quote
	inString = true;
	wind ();
	mark ();
	
	while (!stringComplete) {
		bool oddBackWhacks = false;
		int oldCh = 0;
		// consume the string literal; read upto " ignoring \"
		while( ( ch > 0 ) && ( ch != delim || ( ch == delim && oldCh == '\\' && oddBackWhacks ) ) ) {
			if( !oddBackWhacks && ch == '\\' ) {
				oddBackWhacks = true;
			}
			else {
				oddBackWhacks = false;
			}
			oldCh = ch;
			wind( );
		}
		
		if( ch == delim ) {
			int tempch = ' ';
			// read past the whitespace characters
			while (isspace(tempch)) {
				tempch = lexSource->ReadCharacter();
			}
			if (tempch != delim) {  // a new token exists after the string
                if (tempch != -1) {
                    lexSource->UnreadCharacter();
                }
				stringComplete = true;
			} else {    // the adjacent string is to be concatenated to the existing string
				lexBuffer.erase(lexBufferCount--); // erase the lagging '\"'
				wind();
			}
		}
		else {
			// loop quit due to ch == 0 or ch == EOF
			tokenType = LEX_TOKEN_ERROR;
			return tokenType;
		}    
	}
	cut( );
	wind( );	// skip over the close quote
	bool validStr = true; // to check if string is valid after converting escape
	convert_escapes(lexBuffer, validStr);
	yylval.SetStringValue( lexBuffer.c_str( ) );
	if (validStr) {
		if(delim == '\"') {
			tokenType = LEX_STRING_VALUE;
		}
		else {
			tokenType = LEX_IDENTIFIER;
		}
	}
	else {
		tokenType = LEX_TOKEN_ERROR; // string conatins a '\0' character inbetween
	}
	
	return tokenType;
}