Esempio n. 1
0
void encoding::parseImpl(
	const parsingContext& /* ctx */,
	const string& buffer,
	const size_t position,
	const size_t end,
	size_t* newPosition
) {

	m_usage = USAGE_UNKNOWN;

	m_name = utility::stringUtils::toLower(
		utility::stringUtils::trim(
			utility::stringUtils::unquote(
				utility::stringUtils::trim(
					string(buffer.begin() + position, buffer.begin() + end)
				)
			)
		)
	);

	if (m_name.empty()) {
		m_name = encodingTypes::SEVEN_BIT;   // assume default "7-bit"
	}

	setParsedBounds(position, end);

	if (newPosition) {
		*newPosition = end;
	}
}
Esempio n. 2
0
void mediaType::parse(const string& buffer, const string::size_type position,
	const string::size_type end, string::size_type* newPosition)
{
	const string::value_type* const pend = buffer.data() + end;
	const string::value_type* const pstart = buffer.data() + position;
	const string::value_type* p = pstart;

	// Extract the type
	const string::size_type typeStart = position;

	while (p < pend && *p != '/') ++p;

	m_type = utility::stringUtils::trim(utility::stringUtils::toLower(
		string(buffer.begin() + typeStart,
	            buffer.begin() + position + (p - pstart))));

	if (p < pend)
	{
		// Skip '/' character
		++p;

		// Extract the sub-type
		m_subType = utility::stringUtils::trim(utility::stringUtils::toLower(
			string(buffer.begin() + position + (p - pstart),
		            buffer.begin() + end)));
	}

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 3
0
void mailboxGroup::parseImpl(const string& buffer, const string::size_type position,
	const string::size_type end, string::size_type* newPosition)
{
	const string::value_type* const pend = buffer.data() + end;
	const string::value_type* const pstart = buffer.data() + position;
	const string::value_type* p = pstart;

	while (p < pend && parserHelpers::isSpace(*p))
		++p;

	string name;

	while (p < pend && *p != ':')
	{
		name += *p;
		++p;
	}

	if (p < pend && *p == ':')
		++p;


	string::size_type pos = position + (p - pstart);

	while (pos < end)
	{
		ref <address> parsedAddress = address::parseNext(buffer, pos, end, &pos);

		if (parsedAddress)
		{
			if (parsedAddress->isGroup())
			{
				ref <mailboxGroup> group = parsedAddress.staticCast <mailboxGroup>();

				// Sub-groups are not allowed in mailbox groups: so, we add all
				// the contents of the sub-group into this group...
				for (int i = 0 ; i < group->getMailboxCount() ; ++i)
				{
					m_list.push_back(group->getMailboxAt(i)->clone().staticCast <mailbox>());
				}
			}
			else
			{
				m_list.push_back(parsedAddress.staticCast <mailbox>());
			}
		}
	}

	text::decodeAndUnfold(name, &m_name);

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 4
0
void contentDisposition::parseImpl
	(const parsingContext& /* ctx */, const string& buffer, const size_t position,
	 const size_t end, size_t* newPosition)
{
	m_name = utility::stringUtils::trim(utility::stringUtils::toLower
		(string(buffer.begin() + position, buffer.begin() + end)));

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 5
0
void charset::parseImpl
	(const parsingContext& /* ctx */, const string& buffer, const size_t position,
	 const size_t end, size_t* newPosition)
{
	m_name = utility::stringUtils::trim
		(string(buffer.begin() + position, buffer.begin() + end));

	// If we parsed this rfc-1642 valid MIME charset, convert it to something usefull for iconv
	if (utility::stringUtils::isStringEqualNoCase(m_name, "unicode-1-1-utf-7"))
		m_name = "utf-7";

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 6
0
void text::parseImpl
	(const parsingContext& ctx, const string& buffer, const size_t position,
	 const size_t end, size_t* newPosition)
{
	removeAllWords();

	size_t newPos;

	const std::vector <shared_ptr <word> > words = word::parseMultiple(ctx, buffer, position, end, &newPos);

	copy_vector(words, m_words);

	setParsedBounds(position, newPos);

	if (newPosition)
		*newPosition = newPos;
}
Esempio n. 7
0
void bodyPart::parseImpl
	(ref <utility::parserInputStreamAdapter> parser,
	 const utility::stream::size_type position,
	 const utility::stream::size_type end,
	 utility::stream::size_type* newPosition)
{
	// Parse the headers
	string::size_type pos = position;
	m_header->parse(parser, pos, end, &pos);

	// Parse the body contents
	m_body->parse(parser, pos, end, NULL);

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 8
0
void header::parseImpl(const string& buffer, const string::size_type position,
	const string::size_type end, string::size_type* newPosition)
{
	string::size_type pos = position;

	removeAllFields();

	while (pos < end)
	{
		ref <headerField> field = headerField::parseNext(buffer, pos, end, &pos);
		if (field == NULL) break;

		m_fields.push_back(field);
	}

	setParsedBounds(position, pos);

	if (newPosition)
		*newPosition = pos;
}
Esempio n. 9
0
void addressList::parseImpl
	(const parsingContext& ctx, const string& buffer, const size_t position,
	 const size_t end, size_t* newPosition)
{
	removeAllAddresses();

	size_t pos = position;

	while (pos < end)
	{
		shared_ptr <address> parsedAddress = address::parseNext(ctx, buffer, pos, end, &pos, NULL);

		if (parsedAddress != NULL)
			m_list.push_back(parsedAddress);
	}

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 10
0
void messageIdSequence::parseImpl
	(const parsingContext& ctx, const string& buffer, const size_t position,
	 const size_t end, size_t* newPosition)
{
	removeAllMessageIds();

	size_t pos = position;

	while (pos < end)
	{
		shared_ptr <messageId> parsedMid = messageId::parseNext(ctx, buffer, pos, end, &pos);

		if (parsedMid != NULL)
			m_list.push_back(parsedMid);
	}

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 11
0
void mailboxField::parse
	(const parsingContext& ctx, const string& buffer, const string::size_type position,
	 const string::size_type end, string::size_type* newPosition)
{
	ref <mailbox> mbox = vmime::create <mailbox>();

	// Here, we cannot simply call "m_mailbox.parse()" because it
	// may have more than one address specified (even if this field
	// should contain only one). We are never too much careful...
	ref <address> parsedAddress = address::parseNext(ctx, buffer, position, end, newPosition, NULL);

	if (parsedAddress)
	{
		if (parsedAddress->isGroup())
		{
			// If it is a group of mailboxes, take the first
			// mailbox of the group
			ref <mailboxGroup> group = parsedAddress.staticCast <mailboxGroup>();

			if (!group->isEmpty())
				mbox = group->getMailboxAt(0);
		}
		else
		{
			// Parse only if it is a mailbox
			mbox = parsedAddress.staticCast <mailbox>();
		}
	}

	mbox->setParsedBounds(position, end);

	setValue(mbox);

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 12
0
void body::parseImpl
(ref <utility::parserInputStreamAdapter> parser,
 const utility::stream::size_type position,
 const utility::stream::size_type end,
 utility::stream::size_type* newPosition)
{
    removeAllParts();

    m_prologText.clear();
    m_epilogText.clear();

    if (end == position)
    {

        setParsedBounds(position, end);

        if (newPosition)
            *newPosition = end;

        return;
    }

    // Check whether the body is a MIME-multipart
    bool isMultipart = false;
    string boundary;

    try
    {
        const ref <const contentTypeField> ctf =
            m_header.acquire()->findField(fields::CONTENT_TYPE).dynamicCast <contentTypeField>();

        const mediaType type = *ctf->getValue().dynamicCast <const mediaType>();

        if (type.getType() == mediaTypes::MULTIPART)
        {
            isMultipart = true;

            try
            {
                boundary = ctf->getBoundary();
            }
            catch (exceptions::no_such_parameter&)
            {
                // No "boundary" parameter specified: we can try to
                // guess it by scanning the body contents...
                utility::stream::size_type pos = position;

                parser->seek(pos);

                if (pos + 2 < end && parser->matchBytes("--", 2))
                {
                    pos += 2;
                }
                else
                {
                    pos = parser->findNext("\n--", position);

                    if ((pos != utility::stream::npos) && (pos + 3 < end))
                        pos += 3;  // skip \n--
                }

                if ((pos != utility::stream::npos) && (pos < end))
                {
                    parser->seek(pos);

                    // Read some bytes after boundary separator
                    utility::stream::value_type buffer[256];
                    const utility::stream::size_type bufferLen =
                        parser->read(buffer, std::min(end - pos, sizeof(buffer) / sizeof(buffer[0])));

                    buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0';

                    // Extract boundary from buffer (stop at first CR or LF).
                    // We have to stop after a reasonnably long boundary length (100)
                    // not to take the whole body contents for a boundary...
                    string::value_type boundaryBytes[100];
                    string::size_type boundaryLen = 0;

                    for (string::value_type c = buffer[0] ;
                            boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ;
                            c = buffer[++boundaryLen])
                    {
                        boundaryBytes[boundaryLen] = buffer[boundaryLen];
                    }

                    if (boundaryLen >= 1 && boundaryLen < 100)
                    {
                        // RFC #1521, Page 31:
                        // "...the boundary parameter, which consists of 1 to 70
                        //  characters from a set of characters known to be very
                        //  robust through email gateways, and NOT ending with
                        //  white space..."
                        while (boundaryLen != 0 &&
                                parserHelpers::isSpace(boundaryBytes[boundaryLen - 1]))
                        {
                            boundaryLen--;
                        }

                        if (boundaryLen >= 1)
                            boundary = string(boundaryBytes, boundaryBytes + boundaryLen);
                    }
                }
            }
        }
    }
    catch (exceptions::no_such_field&)
    {
        // No "Content-Type" field...
    }

    // This is a multi-part body
    if (isMultipart && !boundary.empty())
    {
        const string boundarySep("--" + boundary);

        utility::stream::size_type partStart = position;
        utility::stream::size_type pos = position;

        bool lastPart = false;

        while (pos != utility::stream::npos && pos < end)
        {
            pos = parser->findNext(boundarySep, pos);

            if (pos == utility::stream::npos)
                break;  // not found

            if (pos != 0)
            {
                parser->seek(pos - 1);

                if (parser->peekByte() != '\n')
                {
                    // Boundary is not at a beginning of a line
                    pos++;
                    continue;
                }

                parser->skip(1 + boundarySep.length());
            }
            else
            {
                parser->seek(pos + boundarySep.length());
            }

            const utility::stream::value_type next = parser->peekByte();

            if (next == '\r' || next == '\n' || next == '-')
                break;

            // Boundary is a prefix of another, continue the search
            pos++;
        }

        if (pos != utility::stream::npos && pos < end)
        {
            vmime::text text;
            text.parse(parser, position, pos);

            m_prologText = text.getWholeBuffer();
        }

        for (int index = 0 ; !lastPart && (pos != utility::stream::npos) && (pos < end) ; ++index)
        {
            utility::stream::size_type partEnd = pos;

            // Get rid of the [CR]LF just before the boundary string
            if (pos >= (position + 1))
            {
                parser->seek(pos - 1);

                if (parser->peekByte() == '\n')
                    --partEnd;
            }

            if (pos >= (position + 2))
            {
                parser->seek(pos - 2);

                if (parser->peekByte() == '\r')
                    --partEnd;
            }

            // Check whether it is the last part (boundary terminated by "--")
            pos += boundarySep.length();
            parser->seek(pos);

            if (pos + 1 < end && parser->matchBytes("--", 2))
            {
                lastPart = true;
                pos += 2;
            }

            // RFC #1521, Page 31:
            // "...(If a boundary appears to end with white space, the
            //  white space must be presumed to have been added by a
            //  gateway, and must be deleted.)..."
            parser->seek(pos);
            pos += parser->skipIf(parserHelpers::isSpaceOrTab, end);

            // End of boundary line
            if (pos + 1 < end && parser->matchBytes("\r\n", 2))
            {
                pos += 2;
            }
            else if (pos < end && parser->peekByte() == '\n')
            {
                ++pos;
            }

            if (index > 0)
            {
                ref <bodyPart> part = vmime::create <bodyPart>();

                // End before start may happen on empty bodyparts (directly
                // successive boundaries without even a line-break)
                if (partEnd < partStart)
                    std::swap(partStart, partEnd);

                part->parse(parser, partStart, partEnd, NULL);
                part->m_parent = m_part;

                m_parts.push_back(part);
            }

            partStart = pos;

            while (pos != utility::stream::npos && pos < end)
            {
                pos = parser->findNext(boundarySep, pos);

                if (pos == utility::stream::npos)
                    break;  // not found

                if (pos != 0)
                {
                    parser->seek(pos - 1);

                    if (parser->peekByte() != '\n')
                    {
                        // Boundary is not at a beginning of a line
                        pos++;
                        continue;
                    }

                    parser->skip(1 + boundarySep.length());
                }
                else
                {
                    parser->seek(pos + boundarySep.length());
                }

                const utility::stream::value_type next = parser->peekByte();

                if (next == '\r' || next == '\n' || next == '-')
                    break;

                // Boundary is a prefix of another, continue the search
                pos++;
            }
        }

        m_contents = vmime::create <emptyContentHandler>();

        // Last part was not found: recover from missing boundary
        if (!lastPart && pos == utility::stream::npos)
        {
            ref <bodyPart> part = vmime::create <bodyPart>();

            try
            {
                part->parse(parser, partStart, end);
            }
            catch (std::exception&)
            {
                throw;
            }

            part->m_parent = m_part;

            m_parts.push_back(part);
        }
        // Treat remaining text as epilog
        else if (partStart < end)
        {
            vmime::text text;
            text.parse(parser, partStart, end);

            m_epilogText = text.getWholeBuffer();
        }
    }
    // Treat the contents as 'simple' data
    else
    {
        encoding enc;

        try
        {
            const ref <const headerField> cef =
                m_header.acquire()->findField(fields::CONTENT_TRANSFER_ENCODING);

            enc = *cef->getValue().dynamicCast <const encoding>();
        }
        catch (exceptions::no_such_field&)
        {
            // Defaults to "7bit" (RFC-1521)
            enc = vmime::encoding(encodingTypes::SEVEN_BIT);

            // Set header field
            m_header.acquire()->ContentTransferEncoding()->setValue(enc);
        }

        // Extract the (encoded) contents
        const utility::stream::size_type length = end - position;

        ref <utility::inputStream> contentStream =
            vmime::create <utility::seekableInputStreamRegionAdapter>
            (parser->getUnderlyingStream(), position, length);

        m_contents = vmime::create <streamContentHandler>(contentStream, length, enc);
    }

    setParsedBounds(position, end);

    if (newPosition)
        *newPosition = end;
}
Esempio n. 13
0
void messageId::parseImpl
	(const parsingContext& /* ctx */, const string& buffer, const size_t position,
	 const size_t end, size_t* newPosition)
{
	const char* const pend = buffer.data() + end;
	const char* const pstart = buffer.data() + position;
	const char* p = pstart;

	m_left.clear();
	m_right.clear();

	unsigned int commentLevel = 0;
	bool escape = false;
	bool stop = false;

	for ( ; !stop && p < pend ; ++p)
	{
		if (escape)
		{
			// Ignore this character
		}
		else
		{
			switch (*p)
			{
			case '(': ++commentLevel; break;
			case ')': --commentLevel; break;
			case '\\': escape = true; break;
			case '<':
			{
				if (commentLevel == 0)
				{
					stop = true;
					break;
				}
			}

			}
		}
	}

	// Fix for message ids without angle brackets (invalid)
	bool hasBrackets = true;

	if (p == pend)  // no opening angle bracket found
	{
		hasBrackets = false;
		p = pstart;

		while (p < pend && parserHelpers::isSpace(*p))
			++p;
	}

	if (p < pend)
	{
		// Extract left part
		const size_t leftStart = position + (p - pstart);

		while (p < pend && *p != '@' && *p != '>') ++p;

		m_left = string(buffer.begin() + leftStart,
		                buffer.begin() + position + (p - pstart));

		if (p < pend)
		{
			// Skip '@'
			++p;

			// Extract right part
			const size_t rightStart = position + (p - pstart);

			while (p < pend && *p != '>' && (hasBrackets || !parserHelpers::isSpace(*p))) ++p;

			m_right = string(buffer.begin() + rightStart,
			                 buffer.begin() + position + (p - pstart));
		}
	}

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 14
0
void mailbox::parse(const string& buffer, const string::size_type position,
	const string::size_type end, string::size_type* newPosition)
{
	const string::value_type* const pend = buffer.data() + end;
	const string::value_type* const pstart = buffer.data() + position;
	const string::value_type* p = pstart;

	// Ignore blank spaces at the beginning
	while (p < pend && parserHelpers::isSpace(*p)) ++p;

	// Current state for parsing machine
	enum States
	{
		State_None,
		State_Name,
		State_Address
	};

	States state = State_Name;   // let's start with name, we will see later (*)

	// Temporary buffers for extracted name and address
	string name;
	string address;

	while (p < pend)
	{
		if (state == State_Name)
		{
			if (*p == '<')
			{
				state = State_Address;
				continue;
			}

			if (*p == '"') // Quoted string
			{
				++p;

				bool escaped = false;

				while (p < pend)
				{
					if (escaped)
					{
						name += *p;
						escaped = false;
					}
					else if (*p == '\\')
					{
						escaped = true;
					}
					else
					{
						if (*p == '"')
						{
							++p;
							break;
						}
						else
						{
							name += *p;
						}
					}

					++p;
				}
			}
			else
			{
				bool escaped = false;
				int comment = 0;

				while (p < pend)
				{
					if (escaped)
					{
						if (!comment) name += *p;
						escaped = false;
					}
					else if (comment)
					{
						if (*p == '\\')
							escaped = true;
						else if (*p == '(')
							++comment;
						else if (*p == ')')
							--comment;
					}
					else if (*p == '\\')
					{
						escaped = true;
					}
					else if (*p == '(')
					{
						++comment;
					}
					else if (*p == '<')
					{
						// Erase any space between display name and <address>
						string::iterator q = name.end();
						for ( ; q != name.begin() && parserHelpers::isSpace(*(q - 1)) ; --q);
						name.erase(q, name.end());

						break;
					}
					else if (/* parserHelpers::isSpace(*p) || */ *p == '@')
					{
						break;
					}
					else
					{
						name += *p;
					}

					++p;
				}
			}

			if (p < pend && *p == '@')
			{
				// (*) Actually, we were parsing the local-part of an address
				// and not a display name...
				address = name;
				name.clear();

				bool escaped = false;
				int comment = 0;

				while (p < pend)
				{
					if (escaped)
					{
						if (!comment) address += *p;
						escaped = false;
					}
					else if (comment)
					{
						if (*p == '\\')
							escaped = true;
						else if (*p == '(')
							++comment;
						else if (*p == ')')
							--comment;
					}
					else if (*p == '\\')
					{
						escaped = true;
					}
					else if (*p == '(')
					{
						++comment;
					}
					else if (parserHelpers::isSpace(*p))
					{
						break;
					}
					else
					{
						address += *p;
					}

					++p;
				}

				break;
			}
			else
			{
				while (p < pend && parserHelpers::isSpace(*p)) ++p;
				state = State_None;
			}
		}
		else if (state == State_Address)
		{
			// Skip '<' character
			if (*p == '<')
				++p;

			bool escaped = false;
			int comment = 0;

			while (p < pend)
			{
				if (escaped)
				{
					if (!comment) address += *p;
					escaped = false;
				}
				else if (comment)
				{
					if (*p == '\\')
						escaped = true;
					else if (*p == '(')
						++comment;
					else if (*p == ')')
						--comment;
				}
				else if (*p == '(')
				{
					++comment;
				}
				else if (*p == '\\')
				{
					escaped = true;
				}
				else if (*p == '<')
				{
					// If we found a '<' here, it means that the address
					// starts _only_ here...and the stuff we have parsed
					// before belongs actually to the display name!
					name += address;
					address.clear();
				}
				else if (*p == '>')
				{
					break;
				}
				else if (!parserHelpers::isSpace(*p))
				{
					address += *p;
				}

				++p;
			}

			break;
		}
		else
		{
			while (p < pend && parserHelpers::isSpace(*p)) ++p;

			if (p < pend)
			{
				//if (*p == '<')
					state = State_Address;
			}
		}
	}

	// Swap name and address when no address was found
	// (email address is mandatory, whereas name is optional).
	if (address.empty() && !name.empty())
	{
		m_email.empty();
		m_email.reserve(name.size());
		m_name.removeAllWords();

		for (string::size_type i = 0 ; i < name.size() ; ++i)
		{
			if (!parserHelpers::isSpace(name[i]))
				m_email += name[i];
		}
	}
	else
	{
		text::decodeAndUnfold(name, &m_name);
		m_email.empty();
		m_email.reserve(address.size());

		for (string::size_type i = 0 ; i < address.size() ; ++i)
		{
			if (!parserHelpers::isSpace(address[i]))
				m_email += address[i];
		}
	}

	setParsedBounds(position, position + (p - pstart));

	if (newPosition)
		*newPosition = position + (p - pstart);
}
Esempio n. 15
0
/*

 RFC #822:
 5. DATE AND TIME SPECIFICATION

date-time = [ day "," ] date time                 ; dd mm yy
                                                  ; hh:mm:ss zzz
day = "Mon" / "Tue" / "Wed" / "Thu" /
      "Fri" / "Sat" / "Sun"

date = 1*2DIGIT month 2DIGIT                      ; day month year
                                                  ; e.g. 20 Jun 82
month = "Jan" / "Feb" / "Mar" / "Apr" /
        "May" / "Jun" / "Jul" / "Aug" /
        "Sep" / "Oct" / "Nov" / "Dec"

time = hour zone                                  ; ANSI and Military

hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT]             ; 00:00:00 - 23:59:59

zone = "UT" / "GMT"                               ; Universal Time
                                                  ; North American : UT
       / "EST" / "EDT"                            ; Eastern: - 5/ - 4
       / "CST" / "CDT"                            ; Central: - 6/ - 5
       / "MST" / "MDT"                            ; Mountain: - 7/ - 6
       / "PST" / "PDT"                            ; Pacific: - 8/ - 7
       / 1ALPHA                                   ; Military: Z = UT;
                                                  ; A:-1; (J not used)
                                                  ; M:-12; N:+1; Y:+12
       / ( ("+" / "-") 4DIGIT )                   ; Local differential
                                                  ; hours+min. (HHMM)
*/
void datetime::parseImpl
	(const parsingContext& /* ctx */, const string& buffer, size_t position,
	 size_t end, size_t* newPosition)
{
	const char* pend = buffer.data() + end;
	const char* p = buffer.data() + position;

	// Parse the date and time value
	while (p < pend && parserHelpers::isSpace(*p)) ++p;

	if (p < pend)
	{
		if (parserHelpers::isAlpha(*p))
		{
			// Ignore week day
			while (p < pend && parserHelpers::isAlpha(*p)) ++p;
			while (p < pend && parserHelpers::isSpace(*p)) ++p;
			if (p < pend && *p == ',') ++p;
			while (p < pend && parserHelpers::isSpace(*p)) ++p;
		}

		bool dayParsed = false;

		if (parserHelpers::isAlpha(*p))
		{
			// Ill-formed date/time, this may be the month,
			// so we skip day parsing (will be done later)
		}
		else
		{
			while (p < pend && !parserHelpers::isDigit(*p)) ++p;

			if (p < pend && parserHelpers::isDigit(*p))
			{
				// Month day
				int day = 0;

				do
				{
					day = day * 10 + (*p - '0');
					++p;
				}
				while (p < pend && parserHelpers::isDigit(*p));

				m_day = (day >= 1 && day <= 31) ? day : 1;

				while (p < pend && !parserHelpers::isSpace(*p)) ++p;
				while (p < pend && parserHelpers::isSpace(*p)) ++p;
			}
			else
			{
				m_day = 1;

				// Skip everything to the next field
				while (p < pend && !parserHelpers::isSpace(*p)) ++p;
				while (p < pend && parserHelpers::isSpace(*p)) ++p;
			}

			dayParsed = true;
		}

		if (p < pend && parserHelpers::isAlpha(*p))
		{
			// Month
			char_t month[4] = { 0 };
			int monthLength = 0;

			do
			{
				month[monthLength++] = *p;
				++p;
			}
			while (monthLength < 3 && p < pend && parserHelpers::isAlpha(*p));

			while (p < pend && parserHelpers::isAlpha(*p)) ++p;

			switch (month[0])
			{
			case 'a':
			case 'A':
			{
				if (month[1] == 'u' || month[1] == 'U')
					m_month = AUGUST;
				else
					m_month = APRIL; // by default

				break;
			}
			case 'd':
			case 'D':
			{
				m_month = DECEMBER;
				break;
			}
			case 'f':
			case 'F':
			{
				m_month = FEBRUARY;
				break;
			}
			case 'j':
			case 'J':
			{
				if (month[1] == 'u' || month[1] == 'U')
				{
					if (month[2] == 'l' || month[2] == 'L')
						m_month = JULY;
					else // if (month[2] == 'n' || month[2] == 'N')
						m_month = JUNE;
				}
				else
				{
					m_month = JANUARY; // by default
				}

				break;
			}
			case 'm':
			case 'M':
			{
				if ((month[1] == 'a' || month[1] == 'A') &&
				    (month[2] == 'y' || month[2] == 'Y'))
				{
					m_month = MAY;
				}
				else
				{
					m_month = MARCH; // by default
				}

				break;
			}
			case 'n':
			case 'N':
			{
				m_month = NOVEMBER;
				break;
			}
			case 'o':
			case 'O':
			{
				m_month = OCTOBER;
				break;
			}
			case 's':
			case 'S':
			{
				m_month = SEPTEMBER;
				break;
			}
			default:
			{
				m_month = JANUARY; // by default
				break;
			}

			}

			while (p < pend && !parserHelpers::isSpace(*p)) ++p;
			while (p < pend && parserHelpers::isSpace(*p)) ++p;
		}
		else
		{
			m_month = JANUARY;

			if (parserHelpers::isDigit(*p))
			{
				// Here, we expected a month, but it maybe
				// a ill-formed date, so try to parse a year
				// (we don't skip anything).
			}
			else
			{
				// Skip everything to the next field
				while (p < pend && !parserHelpers::isSpace(*p)) ++p;
				while (p < pend && parserHelpers::isSpace(*p)) ++p;
			}
		}

		if (!dayParsed && p < pend && parserHelpers::isDigit(*p))
		{
			// Month day
			int day = 0;

			do
			{
				day = day * 10 + (*p - '0');
				++p;
			}
			while (p < pend && parserHelpers::isDigit(*p));

			m_day = (day >= 1 && day <= 31) ? day : 1;

			while (p < pend && !parserHelpers::isSpace(*p)) ++p;
			while (p < pend && parserHelpers::isSpace(*p)) ++p;
		}

		if (p < pend && parserHelpers::isDigit(*p))
		{
			// Check for ill-formed date/time and try to recover
			if (p + 2 < pend && *(p + 2) == ':')
			{
				// Skip year (default to current), and advance
				// to time parsing
				m_year = now().getYear();
			}
			else
			{
				// Year
				int year = 0;

				do
				{
					year = year * 10 + (*p - '0');
					++p;
				}
				while (p < pend && parserHelpers::isDigit(*p));

				if (year < 70)         m_year = year + 2000;
				else if (year < 1000)  m_year = year + 1900;
				else                   m_year = year;

				while (p < pend && !parserHelpers::isSpace(*p)) ++p;
				while (p < pend && parserHelpers::isSpace(*p)) ++p;
			}
		}
		else
		{
			m_year = 1970;

			// Skip everything to the next field
			while (p < pend && !parserHelpers::isSpace(*p)) ++p;
			while (p < pend && parserHelpers::isSpace(*p)) ++p;
		}

		if (p < pend && parserHelpers::isDigit(*p))
		{
			// Hour
			int hour = 0;

			do
			{
				hour = hour * 10 + (*p - '0');
				++p;
			}
			while (p < pend && parserHelpers::isDigit(*p));

			m_hour = (hour >= 0 && hour <= 23) ? hour : 0;

			while (p < pend && parserHelpers::isSpace(*p)) ++p;

			if (p < pend && *p == ':')
			{
				++p;

				while (p < pend && parserHelpers::isSpace(*p)) ++p;

				if (p < pend && parserHelpers::isDigit(*p))
				{
					// Minute
					int minute = 0;

					do
					{
						minute = minute * 10 + (*p - '0');
						++p;
					}
					while (p < pend && parserHelpers::isDigit(*p));

					m_minute = (minute >= 0 && minute <= 59) ? minute : 0;

					while (p < pend && parserHelpers::isSpace(*p)) ++p;

					if (p < pend && *p == ':')
					{
						++p;

						while (p < pend && parserHelpers::isSpace(*p)) ++p;

						if (p < pend && parserHelpers::isDigit(*p))
						{
							// Second
							int second = 0;

							do
							{
								second = second * 10 + (*p - '0');
								++p;
							}
							while (p < pend && parserHelpers::isDigit(*p));

							m_second = (second >= 0 && second <= 59) ? second : 0;

							while (p < pend && !parserHelpers::isSpace(*p)) ++p;
							while (p < pend && parserHelpers::isSpace(*p)) ++p;
						}
						else
						{
							m_second = 0;
						}
					}
					else
					{
						m_second = 0;
					}
				}
				else
				{
					m_minute = 0;
				}
			}
			else
			{
				m_minute = 0;
			}
		}
		else
		{
			m_hour = 0;

			// Skip everything to the next field
			while (p < pend && !parserHelpers::isSpace(*p)) ++p;
			while (p < pend && parserHelpers::isSpace(*p)) ++p;
		}

		if (p + 1 < pend && (*p == '+' || *p == '-') && parserHelpers::isDigit(*(p + 1)))
		{
			char_t sign = *p;
			++p;

			// Zone offset (in hour/minutes)
			int offset = 0;

			do
			{
				offset = offset * 10 + (*p - '0');
				++p;
			}
			while (p < pend && parserHelpers::isDigit(*p));

			int hourOff = offset / 100;
			int minOff = offset % 100;

			if (sign == '+')
				m_zone = hourOff * 60 + minOff;
			else
				m_zone = -(hourOff * 60 + minOff);
		}
		else if (p < pend && isalpha(*p))
		{
			bool done = false;

			// Zone offset (Time zone name)
			char_t zone[4] = { 0 };
			int zoneLength = 0;

			do
			{
				zone[zoneLength++] = *p;
				++p;
			}
			while (zoneLength < 3 && p < pend);

			switch (zone[0])
			{
			case 'c':
			case 'C':
			{
				if (zoneLength >= 2)
				{
					if (zone[1] == 's' || zone[1] == 'S')
						m_zone = CST;
					else
						m_zone = CDT;

					done = true;
				}

				break;
			}
			case 'e':
			case 'E':
			{
				if (zoneLength >= 2)
				{
					if (zone[1] == 's' || zone[1] == 'S')
						m_zone = EST;
					else
						m_zone = EDT;

					done = true;
				}

				break;
			}
			case 'm':
			case 'M':
			{
				if (zoneLength >= 2)
				{
					if (zone[1] == 's' || zone[1] == 'S')
						m_zone = MST;
					else
						m_zone = MDT;

					done = true;
				}

				break;
			}
			case 'p':
			case 'P':
			{
				if (zoneLength >= 2)
				{
					if (zone[1] == 's' || zone[1] == 'S')
						m_zone = PST;
					else
						m_zone = PDT;

					done = true;
				}

				break;
			}
			case 'g':
			case 'G':
			case 'u':
			case 'U':
			{
				if (zoneLength >= 2)
				{
					m_zone = GMT;  // = UTC
					done = true;
				}

				break;
			}

			}

			if (!done)
			{
				char_t z = zone[0];

				// Military time zone
				if (z != 'j' && z != 'J')
				{
					typedef std::map <char_t, int> Map;
					static const Map::value_type offsetMapInit[] =
					{
						Map::value_type('a', -60),
						Map::value_type('b', -120),
						Map::value_type('c', -180),
						Map::value_type('d', -240),
						Map::value_type('e', -300),
						Map::value_type('f', -360),
						Map::value_type('g', -420),
						Map::value_type('h', -480),
						Map::value_type('i', -540),
						Map::value_type('k', -600),
						Map::value_type('l', -660),
						Map::value_type('m', -720),

						Map::value_type('n', 60),
						Map::value_type('o', 120),
						Map::value_type('p', 180),
						Map::value_type('q', 240),
						Map::value_type('r', 300),
						Map::value_type('s', 360),
						Map::value_type('t', 420),
						Map::value_type('u', 480),
						Map::value_type('v', 540),
						Map::value_type('w', 600),
						Map::value_type('x', 660),
						Map::value_type('y', 720),

						Map::value_type('z', 0),
					};
					static const Map offsetMap
						(::vmime::begin(offsetMapInit),
						 ::vmime::end(offsetMapInit));

					Map::const_iterator pos =
						offsetMap.find(parserHelpers::toLower(z));

					if (pos != offsetMap.end())
						m_zone = (*pos).second;
					else
						m_zone = GMT;
				}
				else
				{
					m_zone = GMT;
				}
			}
		}
		else
		{
			m_zone = 0;
		}
	}
	else
	{
		m_year = 1970;
		m_month = JANUARY;
		m_day = 1;

		m_hour = 0;
		m_minute = 0;
		m_second = 0;

		m_zone = 0;
	}

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}
Esempio n. 16
0
void emailAddress::parseImpl
	(const parsingContext& /* ctx */, const string& buffer, const string::size_type position,
	 const string::size_type end, string::size_type* newPosition)
{
	const string::value_type* const pend = buffer.data() + end;
	const string::value_type* const pstart = buffer.data() + position;
	const string::value_type* p = pstart;

	enum ParserStates
	{
		State_Before,
		State_LocalPartStart,
		State_LocalPartMiddle,
		State_LocalPartComment,
		State_LocalPartQuoted,
		State_DomainPartStart,
		State_DomainPartMiddle,
		State_DomainPartComment,
		State_End,
		State_Error
	} state = State_Before;

	std::ostringstream localPart;
	std::ostringstream domainPart;

	bool escapeNext = false;  // for quoting
	bool prevIsDot = false;
	bool atFound = false;
	bool stop = false;
	int commentLevel = 0;

	while (p < pend && !stop)
	{
		const string::value_type c = *p;

		if ((localPart.str().length() + domainPart.str().length()) >= 256)
		{
			state = State_Error;
			break;
		}

		switch (state)
		{
		case State_Before:

			if (parserHelpers::isSpace(c))
				++p;
			else
				state = State_LocalPartStart;

		case State_LocalPartStart:

			if (c == '"')
			{
				state = State_LocalPartQuoted;
				++p;
			}
			else if (c == '(')
			{
				state = State_LocalPartComment;
				++commentLevel;
				++p;
			}
			else
			{
				state = State_LocalPartMiddle;
				localPart << c;
				++p;
			}

			break;

		case State_LocalPartComment:

			if (escapeNext)
			{
				escapeNext = false;
				++p;
			}
			else if (c == '\\')
			{
				escapeNext = true;
				++p;
			}
			else if (c == '(')
			{
				++commentLevel;
				++p;
			}
			else if (c == ')')
			{
				if (--commentLevel == 0)
				{
					// End of comment
					state = State_LocalPartMiddle;
				}

				++p;
			}
			else
			{
				// Comment continues
				++p;
			}

			break;

		case State_LocalPartQuoted:

			if (escapeNext)
			{
				escapeNext = false;

				if (c == '"' || c == '\\')
				{
					localPart << c;
					++p;
				}
				else
				{
					// This char cannot be escaped
					state = State_Error;
				}
			}
			else if (c == '"')
			{
				// End of quoted string
				state = State_LocalPartMiddle;
				++p;
			}
			else if (c == '\\')
			{
				escapeNext = true;
				++p;
			}
			else
			{
				localPart << c;
				++p;
			}

			break;

		case State_LocalPartMiddle:

			if (c == '.')
			{
				prevIsDot = true;
				localPart << c;
				++p;
			}
			else if (c == '"' && prevIsDot)
			{
				prevIsDot = false;
				state = State_LocalPartQuoted;
				++p;
			}
			else if (c == '(')
			{
				// By allowing comments anywhere in the local part,
				// we are more permissive than RFC-2822
				state = State_LocalPartComment;
				++commentLevel;
				++p;
			}
			else if (c == '@')
			{
				atFound = true;
				state = State_DomainPartStart;
				++p;
			}
			else if (parserHelpers::isSpace(c))
			{
				// Allow not specifying domain part
				state = State_End;
			}
			else
			{
				prevIsDot = false;
				localPart << c;
				++p;
			}

			break;

		case State_DomainPartStart:

			if (c == '(')
			{
				state = State_DomainPartComment;
				++commentLevel;
				++p;
			}
			else
			{
				state = State_DomainPartMiddle;
				domainPart << c;
				++p;
			}

			break;

		case State_DomainPartMiddle:

			if (parserHelpers::isSpace(c))
			{
				state = State_End;
			}
			else if (c == '(')
			{
				// By allowing comments anywhere in the domain part,
				// we are more permissive than RFC-2822
				state = State_DomainPartComment;
				++commentLevel;
				++p;
			}
			else
			{
				domainPart << c;
				++p;
			}

			break;

		case State_DomainPartComment:

			if (escapeNext)
			{
				escapeNext = false;
				++p;
			}
			else if (c == '\\')
			{
				escapeNext = true;
				++p;
			}
			else if (c == '(')
			{
				++commentLevel;
				++p;
			}
			else if (c == ')')
			{
				if (--commentLevel == 0)
				{
					// End of comment
					state = State_DomainPartMiddle;
				}

				++p;
			}
			else
			{
				// Comment continues
				++p;
			}

			break;

		case State_End:
		case State_Error:

			stop = true;
			break;
		}
	}

	if (p == pend && state != State_Error)
	{
		if (state == State_DomainPartMiddle)
			state = State_End;
		else if (state == State_LocalPartMiddle)
			state = State_End;  // allow not specifying domain part
	}

	if (state != State_End)
	{
		m_localName = word("invalid", vmime::charsets::UTF_8);
		m_domainName = word("invalid", vmime::charsets::UTF_8);
	}
	else
	{
		// If the domain part is missing, use local host name
		if (domainPart.str().empty() && !atFound)
			domainPart << platform::getHandler()->getHostName();

		m_localName = word(localPart.str(), vmime::charsets::UTF_8);
		m_domainName = word(domainPart.str(), vmime::charsets::UTF_8);
	}

	setParsedBounds(position, p - pend);

	if (newPosition)
		*newPosition = p - pend;
}
Esempio n. 17
0
void body::parseImpl
	(const parsingContext& /* ctx */,
	 shared_ptr <utility::parserInputStreamAdapter> parser,
	 const size_t position, const size_t end, size_t* newPosition)
{
	removeAllParts();

	m_prologText.clear();
	m_epilogText.clear();

	if (end == position)
	{

		setParsedBounds(position, end);

		if (newPosition)
			*newPosition = end;

		return;
	}

	// Check whether the body is a MIME-multipart.
	// If it is, also get (or try to guess) the boundary separator.
	bool isMultipart = false;
	string boundary;

	shared_ptr <const contentTypeField> ctf =
		m_part->getHeader()->findField <contentTypeField>(fields::CONTENT_TYPE);

	if (ctf)
	{
		const mediaType type = *ctf->getValue <mediaType>();

		if (type.getType() == mediaTypes::MULTIPART)
		{
			isMultipart = true;

			if (ctf->hasBoundary())
			{
				boundary = ctf->getBoundary();
			}
			else
			{
				// No "boundary" parameter specified: we can try to
				// guess it by scanning the body contents...
				size_t pos = position;

				parser->seek(pos);

				if (pos + 2 < end && parser->matchBytes("--", 2))
				{
					pos += 2;
				}
				else
				{
					pos = parser->findNext("\n--", position);

					if ((pos != npos) && (pos + 3 < end))
						pos += 3;  // skip \n--
				}

				if ((pos != npos) && (pos < end))
				{
					parser->seek(pos);

					// Read some bytes after boundary separator
					byte_t buffer[256];
					const size_t bufferLen =
						parser->read(buffer, std::min(end - pos, sizeof(buffer) / sizeof(buffer[0])));

					buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0';

					// Skip transport padding bytes (SPACE or HTAB), if any
					size_t boundarySkip = 0;

					while (boundarySkip < bufferLen && parserHelpers::isSpace(buffer[boundarySkip]))
						++boundarySkip;

					// Extract boundary from buffer (stop at first CR or LF).
					// We have to stop after a reasonnably long boundary length (100)
					// not to take the whole body contents for a boundary...
					byte_t boundaryBytes[100];
					size_t boundaryLen = 0;

					for (byte_t c = buffer[boundarySkip] ;
					     boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ;
					     ++boundaryLen, c = buffer[boundarySkip + boundaryLen])
					{
						boundaryBytes[boundaryLen] = c;
					}

					if (boundaryLen >= 1 && boundaryLen < 100)
					{
						// RFC #1521, Page 31:
						// "...the boundary parameter, which consists of 1 to 70
						//  characters from a set of characters known to be very
						//  robust through email gateways, and NOT ending with
						//  white space..."
						while (boundaryLen != 0 &&
						       parserHelpers::isSpace(boundaryBytes[boundaryLen - 1]))
						{
							boundaryLen--;
						}

						if (boundaryLen >= 1)
							boundary = string(boundaryBytes, boundaryBytes + boundaryLen);
					}
				}
			}
		}
 	}

	// This is a multi-part body
	if (isMultipart && !boundary.empty())
	{
		size_t partStart = position;
		size_t pos = position;

		bool lastPart = false;

		// Find the first boundary
		size_t boundaryStart, boundaryEnd;
		pos = findNextBoundaryPosition(parser, boundary, pos, end, &boundaryStart, &boundaryEnd);

		for (int index = 0 ; !lastPart && (pos != npos) && (pos < end) ; ++index)
		{
			size_t partEnd = boundaryStart;

			// Check whether it is the last part (boundary terminated by "--")
			parser->seek(boundaryEnd);

			if (boundaryEnd + 1 < end && parser->matchBytes("--", 2))
			{
				lastPart = true;
				boundaryEnd += 2;
			}

			// RFC #1521, Page 31:
			// "...(If a boundary appears to end with white space, the
			//  white space must be presumed to have been added by a
			//  gateway, and must be deleted.)..."
			parser->seek(boundaryEnd);
			boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end);

			// End of boundary line
			if (boundaryEnd + 1 < end && parser->matchBytes("\r\n", 2))
			{
				boundaryEnd += 2;
			}
			else if (boundaryEnd < end && parser->peekByte() == '\n')
			{
				++boundaryEnd;
			}

			if (index == 0)
			{
				if (partEnd > partStart)
				{
					vmime::text text;
					text.parse(parser, partStart, partEnd);

					m_prologText = text.getWholeBuffer();
				}
				else
				{
					m_prologText = "";
				}
			}
			else // index > 0
			{
				shared_ptr <bodyPart> part = m_part->createChildPart();

				// End before start may happen on empty bodyparts (directly
				// successive boundaries without even a line-break)
				if (partEnd < partStart)
					std::swap(partStart, partEnd);

				part->parse(parser, partStart, partEnd, NULL);

				m_parts.push_back(part);
			}

			partStart = boundaryEnd;

			// Find the next boundary
			pos = findNextBoundaryPosition
				(parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd);
		}

		m_contents = make_shared <emptyContentHandler>();

		// Last part was not found: recover from missing boundary
		if (!lastPart && pos == npos)
		{
			shared_ptr <bodyPart> part = m_part->createChildPart();

			try
			{
				part->parse(parser, partStart, end);
			}
			catch (std::exception&)
			{
				throw;
			}

			m_parts.push_back(part);
		}
		// Treat remaining text as epilog
		else if (partStart < end)
		{
			vmime::text text;
			text.parse(parser, partStart, end);

			m_epilogText = text.getWholeBuffer();
		}
	}
	// Treat the contents as 'simple' data
	else
	{
		encoding enc;

		shared_ptr <const headerField> cef =
			m_part->getHeader()->findField(fields::CONTENT_TRANSFER_ENCODING);

		if (cef)
		{
			enc = *cef->getValue <encoding>();
		}
		else
		{
			// Defaults to "7bit" (RFC-1521)
			enc = vmime::encoding(encodingTypes::SEVEN_BIT);
		}

		// Extract the (encoded) contents
		const size_t length = end - position;

		shared_ptr <utility::inputStream> contentStream =
			make_shared <utility::seekableInputStreamRegionAdapter>
				(parser->getUnderlyingStream(), position, length);

		m_contents = make_shared <streamContentHandler>(contentStream, length, enc);
	}

	setParsedBounds(position, end);

	if (newPosition)
		*newPosition = end;
}