Пример #1
0
// A cookie header looks like: Cookie: username=xaxxon; password=comoesta
//   everything after the : gets passed in in irsData
int HttpRequest::ParseCookieData ( std::string & irsData )
{

#ifdef EHS_DEBUG
	fprintf ( stderr, "looking for cookies in '%s'\n", irsData.c_str ( ) );
#endif
	
	PME oCookieRegex ( "\\s*([^=]+)=([^;]+)(;|$)*", "g" );

	int nNameValuePairsFound = 0;

	while ( oCookieRegex.match ( irsData ) ) {

/*		fprintf ( stderr, "mapping cookie data: %s => %s\n",
		oCookieRegex [ 1 ].c_str ( ),
		oCookieRegex [ 2 ].c_str ( ) );
*/
		oCookieMap [ oCookieRegex [ 1 ] ] = oCookieRegex [ 2 ];
		nNameValuePairsFound++;

	}

	return nNameValuePairsFound;

}
Пример #2
0
std::string PME::UpdateReplacementString ( const std::string & r ) {
	std::string finalreplacement = r;
	
	// search for each backref and store it out
	PME dollars ( "\\$([0-9]+)", "g" );
	
	// search each backref
	while ( int numdollars = dollars.match ( r ) ) {
		
		// create a regex to replace the backref
		stringstream regextext;
		regextext << "[$]" << dollars[1];
		PME dollarsub ( regextext.str ( ) );
		
		// do the replacement, telling it not to look for backref
		finalreplacement = dollarsub.sub ( finalreplacement, 
										   (*this)[atoi(dollars[1].c_str())],
										   0 );
		
		
	}

	return finalreplacement;
	
}
Пример #3
0
std::string GetNextPathPart ( std::string & irsUri ///< URI to look for next path part in
	)
{


	PME oNextPathPartRegex ( "^[/]{0,1}([^/]+)/(.*)$" );
	
	if ( oNextPathPartRegex.match ( irsUri ) ) {
					 
		std::string sReturnValue = oNextPathPartRegex [ 1 ];
		std::string sNewUri = oNextPathPartRegex [ 2 ];

		irsUri = sNewUri;
		return sReturnValue;
		
	} else {

		return "";

	}

}
Пример #4
0
int main ( )
{

	PME regex ( ", " );
	int i = regex.split ( "very, very2, tired" );

	for ( int j = 0; j < i; j++ ) {
		
		printf ( "%s\n", regex [ j ].c_str ( ) );

	}

	StringVector sv = regex.GetStringVector ( );
	
	for ( StringVector::iterator i = sv.begin ( );
		  i != sv.end ( );
		  i++ ) {

		printf ( "%s\n", i->c_str ( ) );

	}


	PME match ( "([a-z])([0-9])([A-Z])" );

	match.match ( "b2Z" );

	sv = match.GetStringVector ( );

	for ( StringVector::iterator i = sv.begin ( );
		  i != sv.end ( );
		  i++ ) {

		printf ( "%s\n", i->c_str ( ) );

	}

}
Пример #5
0
void HttpRequest::GetFormDataFromString ( const std::string & irsString ///< string to parse for form data
	)
{

	PME oNameValueRegex ( "[?]?([^?=]*)=([^&]*)&?", "g" );
	while ( oNameValueRegex.match ( irsString ) ) {

		ContentDisposition oContentDisposition;
		std::string sName = oNameValueRegex [ 1 ];
		std::string sValue = oNameValueRegex [ 2 ];

#ifdef EHS_DEBUG
		fprintf ( stderr, "[EHS_DEBUG] Info: Got form data: '%s' => '%s'\n",
				  sName.c_str ( ),
				  sValue.c_str ( ) );
#endif

		oFormValueMap [ sName ] =
			FormValue ( sValue, oContentDisposition );

	}

}
Пример #6
0
// returns 1 if it got a line, 0 otherwise
int GetNextLine ( std::string & irsLine, ///< line removed from *ippsBuffer
				  std::string & irsBuffer ///< buffer from which to remove a line
	)
{

	int nResult = 0;

	// can't use split, because we lose our \r\n
	PME oLineRegex ( "^([^\\r]*\\r\\n)(.*)$", "sm" );

	if ( oLineRegex.match ( irsBuffer ) == 3 ) {

		irsLine = oLineRegex [ 1 ];
		irsBuffer = oLineRegex [ 2 ];
		
	} else {

		irsLine = "";

	}

	return nResult;

}
Пример #7
0
HttpRequest::ParseSubbodyResult HttpRequest::ParseSubbody ( std::string isSubbody ///< string in which to look for subbody stuff
	)
{

	// find the spot after the headers in the body
	std::string::size_type nBlankLinePosition = isSubbody.find ( "\r\n\r\n" );
	
	// if there's no double blank line, then this isn't a valid subbody
	if ( nBlankLinePosition == std::string::npos ) {

#ifdef EHS_DEBUG
		fprintf ( stderr, "[EHS_DEUBG] Invalix subbody, couldn't find double blank line\n" );
#endif
		return PARSESUBBODY_INVALIDSUBBODY;

	}

	// create a string from the beginning to the blank line -- OFF BY ONE?
	std::string sHeaders ( isSubbody, 0, nBlankLinePosition - 1 );


	// first line MUST be the content-disposition header line, so that
	//   we know what the name of the field is.. otherwise, we're in trouble



	int nMatchResult = 0;
	PME oContentDispositionRegex ( "Content-Disposition:[ ]?([^;]+);[ ]?(.*)" );

	nMatchResult = oContentDispositionRegex.match ( sHeaders );


	if ( nMatchResult == 3 ) {
				

		std::string sContentDisposition ( oContentDispositionRegex [ 1 ] );
		std::string sNameValuePairs ( oContentDispositionRegex [ 2 ] );

/*		fprintf ( stderr, "cont. disp. '%s', name/value pairs '%s'\n",
				  sContentDisposition.c_str ( ),
				  sNameValuePairs.c_str ( ) );
*/
		StringMap oStringMap;
		


		PME oContentDispositionNameValueRegex ( "[ ]?([^= ]+)=\"([^\"]+)\"[;]?", "g" );
			

		// go through the sNameValuePairs string and grab out the pieces
		nMatchResult = 3;

		while ( nMatchResult == 3 ) {

			nMatchResult = oContentDispositionNameValueRegex.match ( sNameValuePairs );
			if ( nMatchResult == 3 ) {

				std::string sName = oContentDispositionNameValueRegex [ 1 ];
				std::string sValue = oContentDispositionNameValueRegex [ 2 ];

#ifdef EHS_DEBUG
				fprintf ( stderr, "[EHS_DEBUG] Info: Subbody header found: '%s' => '%s' with %d matches\n",
						  sName.c_str ( ),
						  sValue.c_str ( ),
						  nMatchResult );
#endif				
				
				oStringMap [ sName ] = sValue;
				
			} else if ( nMatchResult == 0 ) {
				; // this is okay -- just done with name value pairs
			} else {
				//fprintf ( stderr, "Unexpected number of matches, '%d' != 3\n", nMatchResult );
				assert ( 0 );				
			}

			//fprintf ( stderr, "nMatchResult = %d\n", nMatchResult );
			
			
		}

//		fprintf ( stderr, "done looking for headers\n" );
		
		// take oStringMap and actually fill the right object with its data
		FormValue & roFormValue = oFormValueMap [ oStringMap [ "name" ] ];
		
		// copy the headers in now that we know the name
		roFormValue.oContentDisposition.oContentDispositionHeaders = oStringMap;
		
		// grab out the body
		roFormValue.sBody = isSubbody.substr ( nBlankLinePosition + 4);

#ifdef EHS_DEBUG		
		fprintf ( stderr, "[EHS_DEBUG] Info: Subbody body (in binary):\n---\n" );
		fwrite ( roFormValue.sBody.c_str ( ), 1, roFormValue.sBody.length ( ), stderr );
		fprintf ( stderr, "\n---\n" );
#endif
		
	}
	// couldn't find content-disposition line -- FATAL ERROR
	else {
		
#ifdef EHS_DEBUG
		fprintf ( stderr, "[EHS_DEBUG] Error: Couldn't find content-disposition line\n" );
#endif
		return PARSESUBBODY_INVALIDSUBBODY;
		
	}
	
	
	
	
	return PARSESUBBODY_SUCCESS;
	
}
Пример #8
0
// takes data and tries to figure out what it is.  it will loop through 
//   irsData as many times as it can as long as it gets a full line each time.
//   Depending on how much data it's gotten already, it will handle a line 
//   differently.. 
HttpRequest::HttpParseStates HttpRequest::ParseData ( std::string & irsData ///< buffer to look in for more data
	)
{


	std::string sLine;

	int nDoneWithCurrentData = 0;

	PME oHeaderRegex ( "^([^:]*):\\s+(.*)\\r\\n$" );

	while ( ! nDoneWithCurrentData && 
			nCurrentHttpParseState != HTTPPARSESTATE_INVALIDREQUEST &&
			nCurrentHttpParseState != HTTPPARSESTATE_COMPLETEREQUEST &&
			nCurrentHttpParseState != HTTPPARSESTATE_INVALID ) {

		switch ( nCurrentHttpParseState ) {
			
		case HTTPPARSESTATE_REQUEST:
			
			// get the request line
			GetNextLine ( sLine, irsData );
			
			// if we got a line, parse out the data..
			if ( sLine.length ( ) == 0 ) {

				nDoneWithCurrentData = 1;

			} 
			// if we got a line, look for a request line
			else {
				
				// everything must be uppercase according to rfc2616
				PME oRequestLineRegex ( "^(OPTIONS|GET|HEAD|POST|PUT|DELETE|TRACE|CONNECT) ([^ ]*) HTTP/([^ ]+)\\r\\n$" );

				if ( oRequestLineRegex.match ( sLine ) ) {

					// get the info from the request line
					nRequestMethod = GetRequestMethodFromString ( oRequestLineRegex [ 1 ] );
					sUri = oRequestLineRegex [ 2 ];
					sOriginalUri = oRequestLineRegex [ 2 ];
					sHttpVersionNumber = oRequestLineRegex [ 3 ];
					
					
					// check to see if the uri appeared to have form data in it
					GetFormDataFromString ( sUri );

					// on to the headers
					nCurrentHttpParseState = HTTPPARSESTATE_HEADERS;
					
				} 
				// if the regex failed
				else {

					nCurrentHttpParseState = HTTPPARSESTATE_INVALIDREQUEST;

				} // end match on request line
			
			} // end whether we got a line
			
			break;
			
		case HTTPPARSESTATE_HEADERS:
			
			// get the next line
			GetNextLine ( sLine, irsData );

			// if we didn't get a full line of data
			if ( sLine.length ( ) == 0 ) {

				nDoneWithCurrentData = 1;

			}
			// check to see if we're done with headers
			else if ( sLine == "\r\n" ) {

				// if content length is found
				if ( oRequestHeaders.find ( "content-length" ) !=
					 oRequestHeaders.end ( ) ) {
					
					nCurrentHttpParseState = HTTPPARSESTATE_BODY;
				
				} else {

					
					nCurrentHttpParseState = HTTPPARSESTATE_COMPLETEREQUEST;

				}

				// if this is an HTTP/1.1 request, then it had better have a Host: header
				if ( sHttpVersionNumber == "1.1" && 
					 oRequestHeaders [ "host" ].length ( ) == 0 ) {

					nCurrentHttpParseState = HTTPPARSESTATE_INVALIDREQUEST;

				}

			}
			// else if there is still data
			else if ( oHeaderRegex.match ( sLine ) ) {

				std::string sName = oHeaderRegex [ 1 ];
				std::string sValue = oHeaderRegex [ 2 ];

				if ( sName == "Transfer-Encoding" &&
					 sValue == "chunked" ) {
					
					fprintf ( stderr, "EHS DOES NOT SUPPORT CHUNKED ENCODING.  Send an email to [email protected] and tell him you want chunked encoding (or send a patch)\n" );
					nCurrentHttpParseState = HTTPPARSESTATE_INVALIDREQUEST;

				}

				sName = mytolower ( sName );

				if ( sName == "cookie" ) {

					ParseCookieData ( sValue );

				}

				oRequestHeaders [ sName ] = sValue;

				
				
			}
			// else we had some sort of error -- bail out
			else {

#ifdef EHS_DEBUG
				fprintf ( stderr, "[EHS_DEBUG] Error: Invalid header line: '%s'\n",
						  sLine.c_str ( ) );
#endif

				nCurrentHttpParseState = HTTPPARSESTATE_INVALIDREQUEST;
				nDoneWithCurrentData = 1;

			}

			break;

			
		case HTTPPARSESTATE_BODY:
		{

			// if a content length wasn't specified, we can't be here (we 
			//   don't know chunked encoding)
			if ( oRequestHeaders.find ( "content-length" ) == 
				 oRequestHeaders.end ( ) ) {

				nCurrentHttpParseState = HTTPPARSESTATE_INVALIDREQUEST;
				continue;

			}

			// get the content length
			unsigned int nContentLength = 
				atoi ( oRequestHeaders [ "content-length" ].c_str ( ) );

			// else if we haven't gotten all the data we're looking for,
			//   just hold off and try again when we get more
			if ( irsData.length ( ) < nContentLength ) {

#ifdef EHS_DEBUG
				fprintf ( stderr, "[EHS_DEBUG] Info: Not enough data yet -- %d < %d\n",
						  irsData.length ( ), nContentLength );
#endif
				nDoneWithCurrentData = 1;

			}
			// otherwise, we've gotten enough data from the client, handle it now
			else {

				// grab out the actual body from the request and leave the rest
				sBody = irsData.substr ( 0, nContentLength );

				irsData = irsData.substr ( nContentLength );

				
				// if we're dealing with multi-part form attachments
				if ( oRequestHeaders [ "content-type" ].substr ( 0, 9 ) == 
					 "multipart" ) {

					// handle the body as if it's multipart form data
					if ( ParseMultipartFormData ( ) == 
						 PARSEMULTIPARTFORMDATA_SUCCESS ) {

						nCurrentHttpParseState = HTTPPARSESTATE_COMPLETEREQUEST;

					} else {

#ifdef EHS_DEBUG						
						fprintf ( stderr, "[EHS_DEBUG] Error: Mishandled multi-part form data for unknown reason\n" );
#endif
						nCurrentHttpParseState = HTTPPARSESTATE_INVALIDREQUEST;

					}
	

				}
				// else the body is just one piece
				else {
					// check for any form data
					GetFormDataFromString ( sBody );
					
#ifdef EHS_DEBUG
					fprintf ( stderr, "Done with body, done with entire request\n" );
#endif
					
					nCurrentHttpParseState = HTTPPARSESTATE_COMPLETEREQUEST;
				}
				
			}
			
			nDoneWithCurrentData = 1;

		}

		break;


		case HTTPPARSESTATE_INVALID:
		default:
#ifdef EHS_DEBUG
			fprintf ( stderr, "[EHS_DEBUG] Critical error: Invalid internal state: %d.  Aborting\n", nCurrentHttpParseState );
#endif
			assert ( 0 );
			break;
		}
	}

	return nCurrentHttpParseState;
	
}
Пример #9
0
HttpRequest::ParseMultipartFormDataResult
HttpRequest::ParseMultipartFormData ( )
{
	
	assert ( !oRequestHeaders [ "content-type" ].empty ( ) );
	

	// find the boundary string
	int nMatchResult = 0;

	PME oMultipartFormDataContentTypeValueRegex ( "multipart/[^;]+;[ ]*boundary=([^\"]+)$" );

#ifdef EHS_DEBUG
	fprintf ( stderr, "looking for boundary in '%s'\n",oRequestHeaders [ "content-type" ].c_str ( ) );
#endif	

	if ( ( nMatchResult = oMultipartFormDataContentTypeValueRegex.match ( oRequestHeaders [ "content-type" ] ) ) ) {

		
		assert ( nMatchResult == 2 );

		std::string sBoundaryString = oMultipartFormDataContentTypeValueRegex [ 1 ];


		// the actual boundary has two dashes prepended to it
		std::string sActualBoundary = std::string ("--") + sBoundaryString;

#ifdef EHS_DEBUG
		fprintf ( stderr, "[EHS_DEBUG] Info: Found boundary of '%s'\n", sBoundaryString.c_str ( ) );
		fprintf ( stderr, "[EHS_DEBUG] Info: Looking for boundary to match (%d) '%s'\n", sBody.length ( ), 
				  sBody.substr ( 0, sActualBoundary.length ( ) ).c_str ( ) );
#endif


		// check to make sure we started at the boundary
		if ( sBody.substr ( 0, sActualBoundary.length ( ) ) != sActualBoundary ) {
#ifdef EHS_DEBUG
			fprintf ( stderr, "[EHS_DEBUG] Error: Misparsed multi-part form data for unknown reason - first bytes weren't the boundary string\n" );
#endif			
			return PARSEMULTIPARTFORMDATA_FAILED;

		}


		// go past the initial boundary
		std::string sRemainingBody = sBody.substr ( sActualBoundary.length ( ) );

		// while we're at a boundary after we grab a part, keep going
		std::string::size_type nNextPartPosition;


		while ( ( nNextPartPosition = sRemainingBody.find ( std::string ( "\r\n" ) + sActualBoundary ) ) != 
				std::string::npos ) {


#ifdef EHS_DEBUG
			fprintf ( stderr, "[EHS_DEBUG] Info: Found subbody from pos %d to %d\n",
					  0, nNextPartPosition );
#endif

			assert ( (unsigned int) sRemainingBody.length ( ) >= sActualBoundary.length ( ) );
			
			ParseSubbody ( sRemainingBody.substr ( 0, nNextPartPosition ) );
			
			// skip past the boundary at the end and look for the next one
			nNextPartPosition += sActualBoundary.length ( );

			sRemainingBody = sRemainingBody.substr ( nNextPartPosition );

		}
			
	} else {

#ifdef EHS_DEBUG
		fprintf ( stderr, "[EHS_DEBUG] Error: Couldn't find boundary specification in content-type header\n" );
#endif
		return PARSEMULTIPARTFORMDATA_FAILED;
	}

	return PARSEMULTIPARTFORMDATA_SUCCESS;
	
}