void encoding::parseImpl( const parsingContext& /* ctx */, const string& buffer, const size_t position, const size_t end, size_t* newPosition ) { m_usage = USAGE_UNKNOWN; m_name = utility::stringUtils::toLower( utility::stringUtils::trim( utility::stringUtils::unquote( utility::stringUtils::trim( string(buffer.begin() + position, buffer.begin() + end) ) ) ) ); if (m_name.empty()) { m_name = encodingTypes::SEVEN_BIT; // assume default "7-bit" } setParsedBounds(position, end); if (newPosition) { *newPosition = end; } }
void mediaType::parse(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; const string::value_type* p = pstart; // Extract the type const string::size_type typeStart = position; while (p < pend && *p != '/') ++p; m_type = utility::stringUtils::trim(utility::stringUtils::toLower( string(buffer.begin() + typeStart, buffer.begin() + position + (p - pstart)))); if (p < pend) { // Skip '/' character ++p; // Extract the sub-type m_subType = utility::stringUtils::trim(utility::stringUtils::toLower( string(buffer.begin() + position + (p - pstart), buffer.begin() + end))); } setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void mailboxGroup::parseImpl(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; const string::value_type* p = pstart; while (p < pend && parserHelpers::isSpace(*p)) ++p; string name; while (p < pend && *p != ':') { name += *p; ++p; } if (p < pend && *p == ':') ++p; string::size_type pos = position + (p - pstart); while (pos < end) { ref <address> parsedAddress = address::parseNext(buffer, pos, end, &pos); if (parsedAddress) { if (parsedAddress->isGroup()) { ref <mailboxGroup> group = parsedAddress.staticCast <mailboxGroup>(); // Sub-groups are not allowed in mailbox groups: so, we add all // the contents of the sub-group into this group... for (int i = 0 ; i < group->getMailboxCount() ; ++i) { m_list.push_back(group->getMailboxAt(i)->clone().staticCast <mailbox>()); } } else { m_list.push_back(parsedAddress.staticCast <mailbox>()); } } } text::decodeAndUnfold(name, &m_name); setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void contentDisposition::parseImpl (const parsingContext& /* ctx */, const string& buffer, const size_t position, const size_t end, size_t* newPosition) { m_name = utility::stringUtils::trim(utility::stringUtils::toLower (string(buffer.begin() + position, buffer.begin() + end))); setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void charset::parseImpl (const parsingContext& /* ctx */, const string& buffer, const size_t position, const size_t end, size_t* newPosition) { m_name = utility::stringUtils::trim (string(buffer.begin() + position, buffer.begin() + end)); // If we parsed this rfc-1642 valid MIME charset, convert it to something usefull for iconv if (utility::stringUtils::isStringEqualNoCase(m_name, "unicode-1-1-utf-7")) m_name = "utf-7"; setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void text::parseImpl (const parsingContext& ctx, const string& buffer, const size_t position, const size_t end, size_t* newPosition) { removeAllWords(); size_t newPos; const std::vector <shared_ptr <word> > words = word::parseMultiple(ctx, buffer, position, end, &newPos); copy_vector(words, m_words); setParsedBounds(position, newPos); if (newPosition) *newPosition = newPos; }
void bodyPart::parseImpl (ref <utility::parserInputStreamAdapter> parser, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition) { // Parse the headers string::size_type pos = position; m_header->parse(parser, pos, end, &pos); // Parse the body contents m_body->parse(parser, pos, end, NULL); setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void header::parseImpl(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { string::size_type pos = position; removeAllFields(); while (pos < end) { ref <headerField> field = headerField::parseNext(buffer, pos, end, &pos); if (field == NULL) break; m_fields.push_back(field); } setParsedBounds(position, pos); if (newPosition) *newPosition = pos; }
void addressList::parseImpl (const parsingContext& ctx, const string& buffer, const size_t position, const size_t end, size_t* newPosition) { removeAllAddresses(); size_t pos = position; while (pos < end) { shared_ptr <address> parsedAddress = address::parseNext(ctx, buffer, pos, end, &pos, NULL); if (parsedAddress != NULL) m_list.push_back(parsedAddress); } setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void messageIdSequence::parseImpl (const parsingContext& ctx, const string& buffer, const size_t position, const size_t end, size_t* newPosition) { removeAllMessageIds(); size_t pos = position; while (pos < end) { shared_ptr <messageId> parsedMid = messageId::parseNext(ctx, buffer, pos, end, &pos); if (parsedMid != NULL) m_list.push_back(parsedMid); } setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void mailboxField::parse (const parsingContext& ctx, const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { ref <mailbox> mbox = vmime::create <mailbox>(); // Here, we cannot simply call "m_mailbox.parse()" because it // may have more than one address specified (even if this field // should contain only one). We are never too much careful... ref <address> parsedAddress = address::parseNext(ctx, buffer, position, end, newPosition, NULL); if (parsedAddress) { if (parsedAddress->isGroup()) { // If it is a group of mailboxes, take the first // mailbox of the group ref <mailboxGroup> group = parsedAddress.staticCast <mailboxGroup>(); if (!group->isEmpty()) mbox = group->getMailboxAt(0); } else { // Parse only if it is a mailbox mbox = parsedAddress.staticCast <mailbox>(); } } mbox->setParsedBounds(position, end); setValue(mbox); setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void body::parseImpl (ref <utility::parserInputStreamAdapter> parser, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition) { removeAllParts(); m_prologText.clear(); m_epilogText.clear(); if (end == position) { setParsedBounds(position, end); if (newPosition) *newPosition = end; return; } // Check whether the body is a MIME-multipart bool isMultipart = false; string boundary; try { const ref <const contentTypeField> ctf = m_header.acquire()->findField(fields::CONTENT_TYPE).dynamicCast <contentTypeField>(); const mediaType type = *ctf->getValue().dynamicCast <const mediaType>(); if (type.getType() == mediaTypes::MULTIPART) { isMultipart = true; try { boundary = ctf->getBoundary(); } catch (exceptions::no_such_parameter&) { // No "boundary" parameter specified: we can try to // guess it by scanning the body contents... utility::stream::size_type pos = position; parser->seek(pos); if (pos + 2 < end && parser->matchBytes("--", 2)) { pos += 2; } else { pos = parser->findNext("\n--", position); if ((pos != utility::stream::npos) && (pos + 3 < end)) pos += 3; // skip \n-- } if ((pos != utility::stream::npos) && (pos < end)) { parser->seek(pos); // Read some bytes after boundary separator utility::stream::value_type buffer[256]; const utility::stream::size_type bufferLen = parser->read(buffer, std::min(end - pos, sizeof(buffer) / sizeof(buffer[0]))); buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0'; // Extract boundary from buffer (stop at first CR or LF). // We have to stop after a reasonnably long boundary length (100) // not to take the whole body contents for a boundary... string::value_type boundaryBytes[100]; string::size_type boundaryLen = 0; for (string::value_type c = buffer[0] ; boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ; c = buffer[++boundaryLen]) { boundaryBytes[boundaryLen] = buffer[boundaryLen]; } if (boundaryLen >= 1 && boundaryLen < 100) { // RFC #1521, Page 31: // "...the boundary parameter, which consists of 1 to 70 // characters from a set of characters known to be very // robust through email gateways, and NOT ending with // white space..." while (boundaryLen != 0 && parserHelpers::isSpace(boundaryBytes[boundaryLen - 1])) { boundaryLen--; } if (boundaryLen >= 1) boundary = string(boundaryBytes, boundaryBytes + boundaryLen); } } } } } catch (exceptions::no_such_field&) { // No "Content-Type" field... } // This is a multi-part body if (isMultipart && !boundary.empty()) { const string boundarySep("--" + boundary); utility::stream::size_type partStart = position; utility::stream::size_type pos = position; bool lastPart = false; while (pos != utility::stream::npos && pos < end) { pos = parser->findNext(boundarySep, pos); if (pos == utility::stream::npos) break; // not found if (pos != 0) { parser->seek(pos - 1); if (parser->peekByte() != '\n') { // Boundary is not at a beginning of a line pos++; continue; } parser->skip(1 + boundarySep.length()); } else { parser->seek(pos + boundarySep.length()); } const utility::stream::value_type next = parser->peekByte(); if (next == '\r' || next == '\n' || next == '-') break; // Boundary is a prefix of another, continue the search pos++; } if (pos != utility::stream::npos && pos < end) { vmime::text text; text.parse(parser, position, pos); m_prologText = text.getWholeBuffer(); } for (int index = 0 ; !lastPart && (pos != utility::stream::npos) && (pos < end) ; ++index) { utility::stream::size_type partEnd = pos; // Get rid of the [CR]LF just before the boundary string if (pos >= (position + 1)) { parser->seek(pos - 1); if (parser->peekByte() == '\n') --partEnd; } if (pos >= (position + 2)) { parser->seek(pos - 2); if (parser->peekByte() == '\r') --partEnd; } // Check whether it is the last part (boundary terminated by "--") pos += boundarySep.length(); parser->seek(pos); if (pos + 1 < end && parser->matchBytes("--", 2)) { lastPart = true; pos += 2; } // RFC #1521, Page 31: // "...(If a boundary appears to end with white space, the // white space must be presumed to have been added by a // gateway, and must be deleted.)..." parser->seek(pos); pos += parser->skipIf(parserHelpers::isSpaceOrTab, end); // End of boundary line if (pos + 1 < end && parser->matchBytes("\r\n", 2)) { pos += 2; } else if (pos < end && parser->peekByte() == '\n') { ++pos; } if (index > 0) { ref <bodyPart> part = vmime::create <bodyPart>(); // End before start may happen on empty bodyparts (directly // successive boundaries without even a line-break) if (partEnd < partStart) std::swap(partStart, partEnd); part->parse(parser, partStart, partEnd, NULL); part->m_parent = m_part; m_parts.push_back(part); } partStart = pos; while (pos != utility::stream::npos && pos < end) { pos = parser->findNext(boundarySep, pos); if (pos == utility::stream::npos) break; // not found if (pos != 0) { parser->seek(pos - 1); if (parser->peekByte() != '\n') { // Boundary is not at a beginning of a line pos++; continue; } parser->skip(1 + boundarySep.length()); } else { parser->seek(pos + boundarySep.length()); } const utility::stream::value_type next = parser->peekByte(); if (next == '\r' || next == '\n' || next == '-') break; // Boundary is a prefix of another, continue the search pos++; } } m_contents = vmime::create <emptyContentHandler>(); // Last part was not found: recover from missing boundary if (!lastPart && pos == utility::stream::npos) { ref <bodyPart> part = vmime::create <bodyPart>(); try { part->parse(parser, partStart, end); } catch (std::exception&) { throw; } part->m_parent = m_part; m_parts.push_back(part); } // Treat remaining text as epilog else if (partStart < end) { vmime::text text; text.parse(parser, partStart, end); m_epilogText = text.getWholeBuffer(); } } // Treat the contents as 'simple' data else { encoding enc; try { const ref <const headerField> cef = m_header.acquire()->findField(fields::CONTENT_TRANSFER_ENCODING); enc = *cef->getValue().dynamicCast <const encoding>(); } catch (exceptions::no_such_field&) { // Defaults to "7bit" (RFC-1521) enc = vmime::encoding(encodingTypes::SEVEN_BIT); // Set header field m_header.acquire()->ContentTransferEncoding()->setValue(enc); } // Extract the (encoded) contents const utility::stream::size_type length = end - position; ref <utility::inputStream> contentStream = vmime::create <utility::seekableInputStreamRegionAdapter> (parser->getUnderlyingStream(), position, length); m_contents = vmime::create <streamContentHandler>(contentStream, length, enc); } setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void messageId::parseImpl (const parsingContext& /* ctx */, const string& buffer, const size_t position, const size_t end, size_t* newPosition) { const char* const pend = buffer.data() + end; const char* const pstart = buffer.data() + position; const char* p = pstart; m_left.clear(); m_right.clear(); unsigned int commentLevel = 0; bool escape = false; bool stop = false; for ( ; !stop && p < pend ; ++p) { if (escape) { // Ignore this character } else { switch (*p) { case '(': ++commentLevel; break; case ')': --commentLevel; break; case '\\': escape = true; break; case '<': { if (commentLevel == 0) { stop = true; break; } } } } } // Fix for message ids without angle brackets (invalid) bool hasBrackets = true; if (p == pend) // no opening angle bracket found { hasBrackets = false; p = pstart; while (p < pend && parserHelpers::isSpace(*p)) ++p; } if (p < pend) { // Extract left part const size_t leftStart = position + (p - pstart); while (p < pend && *p != '@' && *p != '>') ++p; m_left = string(buffer.begin() + leftStart, buffer.begin() + position + (p - pstart)); if (p < pend) { // Skip '@' ++p; // Extract right part const size_t rightStart = position + (p - pstart); while (p < pend && *p != '>' && (hasBrackets || !parserHelpers::isSpace(*p))) ++p; m_right = string(buffer.begin() + rightStart, buffer.begin() + position + (p - pstart)); } } setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void mailbox::parse(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; const string::value_type* p = pstart; // Ignore blank spaces at the beginning while (p < pend && parserHelpers::isSpace(*p)) ++p; // Current state for parsing machine enum States { State_None, State_Name, State_Address }; States state = State_Name; // let's start with name, we will see later (*) // Temporary buffers for extracted name and address string name; string address; while (p < pend) { if (state == State_Name) { if (*p == '<') { state = State_Address; continue; } if (*p == '"') // Quoted string { ++p; bool escaped = false; while (p < pend) { if (escaped) { name += *p; escaped = false; } else if (*p == '\\') { escaped = true; } else { if (*p == '"') { ++p; break; } else { name += *p; } } ++p; } } else { bool escaped = false; int comment = 0; while (p < pend) { if (escaped) { if (!comment) name += *p; escaped = false; } else if (comment) { if (*p == '\\') escaped = true; else if (*p == '(') ++comment; else if (*p == ')') --comment; } else if (*p == '\\') { escaped = true; } else if (*p == '(') { ++comment; } else if (*p == '<') { // Erase any space between display name and <address> string::iterator q = name.end(); for ( ; q != name.begin() && parserHelpers::isSpace(*(q - 1)) ; --q); name.erase(q, name.end()); break; } else if (/* parserHelpers::isSpace(*p) || */ *p == '@') { break; } else { name += *p; } ++p; } } if (p < pend && *p == '@') { // (*) Actually, we were parsing the local-part of an address // and not a display name... address = name; name.clear(); bool escaped = false; int comment = 0; while (p < pend) { if (escaped) { if (!comment) address += *p; escaped = false; } else if (comment) { if (*p == '\\') escaped = true; else if (*p == '(') ++comment; else if (*p == ')') --comment; } else if (*p == '\\') { escaped = true; } else if (*p == '(') { ++comment; } else if (parserHelpers::isSpace(*p)) { break; } else { address += *p; } ++p; } break; } else { while (p < pend && parserHelpers::isSpace(*p)) ++p; state = State_None; } } else if (state == State_Address) { // Skip '<' character if (*p == '<') ++p; bool escaped = false; int comment = 0; while (p < pend) { if (escaped) { if (!comment) address += *p; escaped = false; } else if (comment) { if (*p == '\\') escaped = true; else if (*p == '(') ++comment; else if (*p == ')') --comment; } else if (*p == '(') { ++comment; } else if (*p == '\\') { escaped = true; } else if (*p == '<') { // If we found a '<' here, it means that the address // starts _only_ here...and the stuff we have parsed // before belongs actually to the display name! name += address; address.clear(); } else if (*p == '>') { break; } else if (!parserHelpers::isSpace(*p)) { address += *p; } ++p; } break; } else { while (p < pend && parserHelpers::isSpace(*p)) ++p; if (p < pend) { //if (*p == '<') state = State_Address; } } } // Swap name and address when no address was found // (email address is mandatory, whereas name is optional). if (address.empty() && !name.empty()) { m_email.empty(); m_email.reserve(name.size()); m_name.removeAllWords(); for (string::size_type i = 0 ; i < name.size() ; ++i) { if (!parserHelpers::isSpace(name[i])) m_email += name[i]; } } else { text::decodeAndUnfold(name, &m_name); m_email.empty(); m_email.reserve(address.size()); for (string::size_type i = 0 ; i < address.size() ; ++i) { if (!parserHelpers::isSpace(address[i])) m_email += address[i]; } } setParsedBounds(position, position + (p - pstart)); if (newPosition) *newPosition = position + (p - pstart); }
/* RFC #822: 5. DATE AND TIME SPECIFICATION date-time = [ day "," ] date time ; dd mm yy ; hh:mm:ss zzz day = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" date = 1*2DIGIT month 2DIGIT ; day month year ; e.g. 20 Jun 82 month = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" time = hour zone ; ANSI and Military hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT] ; 00:00:00 - 23:59:59 zone = "UT" / "GMT" ; Universal Time ; North American : UT / "EST" / "EDT" ; Eastern: - 5/ - 4 / "CST" / "CDT" ; Central: - 6/ - 5 / "MST" / "MDT" ; Mountain: - 7/ - 6 / "PST" / "PDT" ; Pacific: - 8/ - 7 / 1ALPHA ; Military: Z = UT; ; A:-1; (J not used) ; M:-12; N:+1; Y:+12 / ( ("+" / "-") 4DIGIT ) ; Local differential ; hours+min. (HHMM) */ void datetime::parseImpl (const parsingContext& /* ctx */, const string& buffer, size_t position, size_t end, size_t* newPosition) { const char* pend = buffer.data() + end; const char* p = buffer.data() + position; // Parse the date and time value while (p < pend && parserHelpers::isSpace(*p)) ++p; if (p < pend) { if (parserHelpers::isAlpha(*p)) { // Ignore week day while (p < pend && parserHelpers::isAlpha(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; if (p < pend && *p == ',') ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } bool dayParsed = false; if (parserHelpers::isAlpha(*p)) { // Ill-formed date/time, this may be the month, // so we skip day parsing (will be done later) } else { while (p < pend && !parserHelpers::isDigit(*p)) ++p; if (p < pend && parserHelpers::isDigit(*p)) { // Month day int day = 0; do { day = day * 10 + (*p - '0'); ++p; } while (p < pend && parserHelpers::isDigit(*p)); m_day = (day >= 1 && day <= 31) ? day : 1; while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } else { m_day = 1; // Skip everything to the next field while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } dayParsed = true; } if (p < pend && parserHelpers::isAlpha(*p)) { // Month char_t month[4] = { 0 }; int monthLength = 0; do { month[monthLength++] = *p; ++p; } while (monthLength < 3 && p < pend && parserHelpers::isAlpha(*p)); while (p < pend && parserHelpers::isAlpha(*p)) ++p; switch (month[0]) { case 'a': case 'A': { if (month[1] == 'u' || month[1] == 'U') m_month = AUGUST; else m_month = APRIL; // by default break; } case 'd': case 'D': { m_month = DECEMBER; break; } case 'f': case 'F': { m_month = FEBRUARY; break; } case 'j': case 'J': { if (month[1] == 'u' || month[1] == 'U') { if (month[2] == 'l' || month[2] == 'L') m_month = JULY; else // if (month[2] == 'n' || month[2] == 'N') m_month = JUNE; } else { m_month = JANUARY; // by default } break; } case 'm': case 'M': { if ((month[1] == 'a' || month[1] == 'A') && (month[2] == 'y' || month[2] == 'Y')) { m_month = MAY; } else { m_month = MARCH; // by default } break; } case 'n': case 'N': { m_month = NOVEMBER; break; } case 'o': case 'O': { m_month = OCTOBER; break; } case 's': case 'S': { m_month = SEPTEMBER; break; } default: { m_month = JANUARY; // by default break; } } while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } else { m_month = JANUARY; if (parserHelpers::isDigit(*p)) { // Here, we expected a month, but it maybe // a ill-formed date, so try to parse a year // (we don't skip anything). } else { // Skip everything to the next field while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } } if (!dayParsed && p < pend && parserHelpers::isDigit(*p)) { // Month day int day = 0; do { day = day * 10 + (*p - '0'); ++p; } while (p < pend && parserHelpers::isDigit(*p)); m_day = (day >= 1 && day <= 31) ? day : 1; while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } if (p < pend && parserHelpers::isDigit(*p)) { // Check for ill-formed date/time and try to recover if (p + 2 < pend && *(p + 2) == ':') { // Skip year (default to current), and advance // to time parsing m_year = now().getYear(); } else { // Year int year = 0; do { year = year * 10 + (*p - '0'); ++p; } while (p < pend && parserHelpers::isDigit(*p)); if (year < 70) m_year = year + 2000; else if (year < 1000) m_year = year + 1900; else m_year = year; while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } } else { m_year = 1970; // Skip everything to the next field while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } if (p < pend && parserHelpers::isDigit(*p)) { // Hour int hour = 0; do { hour = hour * 10 + (*p - '0'); ++p; } while (p < pend && parserHelpers::isDigit(*p)); m_hour = (hour >= 0 && hour <= 23) ? hour : 0; while (p < pend && parserHelpers::isSpace(*p)) ++p; if (p < pend && *p == ':') { ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; if (p < pend && parserHelpers::isDigit(*p)) { // Minute int minute = 0; do { minute = minute * 10 + (*p - '0'); ++p; } while (p < pend && parserHelpers::isDigit(*p)); m_minute = (minute >= 0 && minute <= 59) ? minute : 0; while (p < pend && parserHelpers::isSpace(*p)) ++p; if (p < pend && *p == ':') { ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; if (p < pend && parserHelpers::isDigit(*p)) { // Second int second = 0; do { second = second * 10 + (*p - '0'); ++p; } while (p < pend && parserHelpers::isDigit(*p)); m_second = (second >= 0 && second <= 59) ? second : 0; while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } else { m_second = 0; } } else { m_second = 0; } } else { m_minute = 0; } } else { m_minute = 0; } } else { m_hour = 0; // Skip everything to the next field while (p < pend && !parserHelpers::isSpace(*p)) ++p; while (p < pend && parserHelpers::isSpace(*p)) ++p; } if (p + 1 < pend && (*p == '+' || *p == '-') && parserHelpers::isDigit(*(p + 1))) { char_t sign = *p; ++p; // Zone offset (in hour/minutes) int offset = 0; do { offset = offset * 10 + (*p - '0'); ++p; } while (p < pend && parserHelpers::isDigit(*p)); int hourOff = offset / 100; int minOff = offset % 100; if (sign == '+') m_zone = hourOff * 60 + minOff; else m_zone = -(hourOff * 60 + minOff); } else if (p < pend && isalpha(*p)) { bool done = false; // Zone offset (Time zone name) char_t zone[4] = { 0 }; int zoneLength = 0; do { zone[zoneLength++] = *p; ++p; } while (zoneLength < 3 && p < pend); switch (zone[0]) { case 'c': case 'C': { if (zoneLength >= 2) { if (zone[1] == 's' || zone[1] == 'S') m_zone = CST; else m_zone = CDT; done = true; } break; } case 'e': case 'E': { if (zoneLength >= 2) { if (zone[1] == 's' || zone[1] == 'S') m_zone = EST; else m_zone = EDT; done = true; } break; } case 'm': case 'M': { if (zoneLength >= 2) { if (zone[1] == 's' || zone[1] == 'S') m_zone = MST; else m_zone = MDT; done = true; } break; } case 'p': case 'P': { if (zoneLength >= 2) { if (zone[1] == 's' || zone[1] == 'S') m_zone = PST; else m_zone = PDT; done = true; } break; } case 'g': case 'G': case 'u': case 'U': { if (zoneLength >= 2) { m_zone = GMT; // = UTC done = true; } break; } } if (!done) { char_t z = zone[0]; // Military time zone if (z != 'j' && z != 'J') { typedef std::map <char_t, int> Map; static const Map::value_type offsetMapInit[] = { Map::value_type('a', -60), Map::value_type('b', -120), Map::value_type('c', -180), Map::value_type('d', -240), Map::value_type('e', -300), Map::value_type('f', -360), Map::value_type('g', -420), Map::value_type('h', -480), Map::value_type('i', -540), Map::value_type('k', -600), Map::value_type('l', -660), Map::value_type('m', -720), Map::value_type('n', 60), Map::value_type('o', 120), Map::value_type('p', 180), Map::value_type('q', 240), Map::value_type('r', 300), Map::value_type('s', 360), Map::value_type('t', 420), Map::value_type('u', 480), Map::value_type('v', 540), Map::value_type('w', 600), Map::value_type('x', 660), Map::value_type('y', 720), Map::value_type('z', 0), }; static const Map offsetMap (::vmime::begin(offsetMapInit), ::vmime::end(offsetMapInit)); Map::const_iterator pos = offsetMap.find(parserHelpers::toLower(z)); if (pos != offsetMap.end()) m_zone = (*pos).second; else m_zone = GMT; } else { m_zone = GMT; } } } else { m_zone = 0; } } else { m_year = 1970; m_month = JANUARY; m_day = 1; m_hour = 0; m_minute = 0; m_second = 0; m_zone = 0; } setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void emailAddress::parseImpl (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; const string::value_type* p = pstart; enum ParserStates { State_Before, State_LocalPartStart, State_LocalPartMiddle, State_LocalPartComment, State_LocalPartQuoted, State_DomainPartStart, State_DomainPartMiddle, State_DomainPartComment, State_End, State_Error } state = State_Before; std::ostringstream localPart; std::ostringstream domainPart; bool escapeNext = false; // for quoting bool prevIsDot = false; bool atFound = false; bool stop = false; int commentLevel = 0; while (p < pend && !stop) { const string::value_type c = *p; if ((localPart.str().length() + domainPart.str().length()) >= 256) { state = State_Error; break; } switch (state) { case State_Before: if (parserHelpers::isSpace(c)) ++p; else state = State_LocalPartStart; case State_LocalPartStart: if (c == '"') { state = State_LocalPartQuoted; ++p; } else if (c == '(') { state = State_LocalPartComment; ++commentLevel; ++p; } else { state = State_LocalPartMiddle; localPart << c; ++p; } break; case State_LocalPartComment: if (escapeNext) { escapeNext = false; ++p; } else if (c == '\\') { escapeNext = true; ++p; } else if (c == '(') { ++commentLevel; ++p; } else if (c == ')') { if (--commentLevel == 0) { // End of comment state = State_LocalPartMiddle; } ++p; } else { // Comment continues ++p; } break; case State_LocalPartQuoted: if (escapeNext) { escapeNext = false; if (c == '"' || c == '\\') { localPart << c; ++p; } else { // This char cannot be escaped state = State_Error; } } else if (c == '"') { // End of quoted string state = State_LocalPartMiddle; ++p; } else if (c == '\\') { escapeNext = true; ++p; } else { localPart << c; ++p; } break; case State_LocalPartMiddle: if (c == '.') { prevIsDot = true; localPart << c; ++p; } else if (c == '"' && prevIsDot) { prevIsDot = false; state = State_LocalPartQuoted; ++p; } else if (c == '(') { // By allowing comments anywhere in the local part, // we are more permissive than RFC-2822 state = State_LocalPartComment; ++commentLevel; ++p; } else if (c == '@') { atFound = true; state = State_DomainPartStart; ++p; } else if (parserHelpers::isSpace(c)) { // Allow not specifying domain part state = State_End; } else { prevIsDot = false; localPart << c; ++p; } break; case State_DomainPartStart: if (c == '(') { state = State_DomainPartComment; ++commentLevel; ++p; } else { state = State_DomainPartMiddle; domainPart << c; ++p; } break; case State_DomainPartMiddle: if (parserHelpers::isSpace(c)) { state = State_End; } else if (c == '(') { // By allowing comments anywhere in the domain part, // we are more permissive than RFC-2822 state = State_DomainPartComment; ++commentLevel; ++p; } else { domainPart << c; ++p; } break; case State_DomainPartComment: if (escapeNext) { escapeNext = false; ++p; } else if (c == '\\') { escapeNext = true; ++p; } else if (c == '(') { ++commentLevel; ++p; } else if (c == ')') { if (--commentLevel == 0) { // End of comment state = State_DomainPartMiddle; } ++p; } else { // Comment continues ++p; } break; case State_End: case State_Error: stop = true; break; } } if (p == pend && state != State_Error) { if (state == State_DomainPartMiddle) state = State_End; else if (state == State_LocalPartMiddle) state = State_End; // allow not specifying domain part } if (state != State_End) { m_localName = word("invalid", vmime::charsets::UTF_8); m_domainName = word("invalid", vmime::charsets::UTF_8); } else { // If the domain part is missing, use local host name if (domainPart.str().empty() && !atFound) domainPart << platform::getHandler()->getHostName(); m_localName = word(localPart.str(), vmime::charsets::UTF_8); m_domainName = word(domainPart.str(), vmime::charsets::UTF_8); } setParsedBounds(position, p - pend); if (newPosition) *newPosition = p - pend; }
void body::parseImpl (const parsingContext& /* ctx */, shared_ptr <utility::parserInputStreamAdapter> parser, const size_t position, const size_t end, size_t* newPosition) { removeAllParts(); m_prologText.clear(); m_epilogText.clear(); if (end == position) { setParsedBounds(position, end); if (newPosition) *newPosition = end; return; } // Check whether the body is a MIME-multipart. // If it is, also get (or try to guess) the boundary separator. bool isMultipart = false; string boundary; shared_ptr <const contentTypeField> ctf = m_part->getHeader()->findField <contentTypeField>(fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue <mediaType>(); if (type.getType() == mediaTypes::MULTIPART) { isMultipart = true; if (ctf->hasBoundary()) { boundary = ctf->getBoundary(); } else { // No "boundary" parameter specified: we can try to // guess it by scanning the body contents... size_t pos = position; parser->seek(pos); if (pos + 2 < end && parser->matchBytes("--", 2)) { pos += 2; } else { pos = parser->findNext("\n--", position); if ((pos != npos) && (pos + 3 < end)) pos += 3; // skip \n-- } if ((pos != npos) && (pos < end)) { parser->seek(pos); // Read some bytes after boundary separator byte_t buffer[256]; const size_t bufferLen = parser->read(buffer, std::min(end - pos, sizeof(buffer) / sizeof(buffer[0]))); buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0'; // Skip transport padding bytes (SPACE or HTAB), if any size_t boundarySkip = 0; while (boundarySkip < bufferLen && parserHelpers::isSpace(buffer[boundarySkip])) ++boundarySkip; // Extract boundary from buffer (stop at first CR or LF). // We have to stop after a reasonnably long boundary length (100) // not to take the whole body contents for a boundary... byte_t boundaryBytes[100]; size_t boundaryLen = 0; for (byte_t c = buffer[boundarySkip] ; boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ; ++boundaryLen, c = buffer[boundarySkip + boundaryLen]) { boundaryBytes[boundaryLen] = c; } if (boundaryLen >= 1 && boundaryLen < 100) { // RFC #1521, Page 31: // "...the boundary parameter, which consists of 1 to 70 // characters from a set of characters known to be very // robust through email gateways, and NOT ending with // white space..." while (boundaryLen != 0 && parserHelpers::isSpace(boundaryBytes[boundaryLen - 1])) { boundaryLen--; } if (boundaryLen >= 1) boundary = string(boundaryBytes, boundaryBytes + boundaryLen); } } } } } // This is a multi-part body if (isMultipart && !boundary.empty()) { size_t partStart = position; size_t pos = position; bool lastPart = false; // Find the first boundary size_t boundaryStart, boundaryEnd; pos = findNextBoundaryPosition(parser, boundary, pos, end, &boundaryStart, &boundaryEnd); for (int index = 0 ; !lastPart && (pos != npos) && (pos < end) ; ++index) { size_t partEnd = boundaryStart; // Check whether it is the last part (boundary terminated by "--") parser->seek(boundaryEnd); if (boundaryEnd + 1 < end && parser->matchBytes("--", 2)) { lastPart = true; boundaryEnd += 2; } // RFC #1521, Page 31: // "...(If a boundary appears to end with white space, the // white space must be presumed to have been added by a // gateway, and must be deleted.)..." parser->seek(boundaryEnd); boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end); // End of boundary line if (boundaryEnd + 1 < end && parser->matchBytes("\r\n", 2)) { boundaryEnd += 2; } else if (boundaryEnd < end && parser->peekByte() == '\n') { ++boundaryEnd; } if (index == 0) { if (partEnd > partStart) { vmime::text text; text.parse(parser, partStart, partEnd); m_prologText = text.getWholeBuffer(); } else { m_prologText = ""; } } else // index > 0 { shared_ptr <bodyPart> part = m_part->createChildPart(); // End before start may happen on empty bodyparts (directly // successive boundaries without even a line-break) if (partEnd < partStart) std::swap(partStart, partEnd); part->parse(parser, partStart, partEnd, NULL); m_parts.push_back(part); } partStart = boundaryEnd; // Find the next boundary pos = findNextBoundaryPosition (parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd); } m_contents = make_shared <emptyContentHandler>(); // Last part was not found: recover from missing boundary if (!lastPart && pos == npos) { shared_ptr <bodyPart> part = m_part->createChildPart(); try { part->parse(parser, partStart, end); } catch (std::exception&) { throw; } m_parts.push_back(part); } // Treat remaining text as epilog else if (partStart < end) { vmime::text text; text.parse(parser, partStart, end); m_epilogText = text.getWholeBuffer(); } } // Treat the contents as 'simple' data else { encoding enc; shared_ptr <const headerField> cef = m_part->getHeader()->findField(fields::CONTENT_TRANSFER_ENCODING); if (cef) { enc = *cef->getValue <encoding>(); } else { // Defaults to "7bit" (RFC-1521) enc = vmime::encoding(encodingTypes::SEVEN_BIT); } // Extract the (encoded) contents const size_t length = end - position; shared_ptr <utility::inputStream> contentStream = make_shared <utility::seekableInputStreamRegionAdapter> (parser->getUnderlyingStream(), position, length); m_contents = make_shared <streamContentHandler>(contentStream, length, enc); } setParsedBounds(position, end); if (newPosition) *newPosition = end; }