const bool MDNHelper::isMDN(const ref <const message> msg) { const ref <const header> hdr = msg->getHeader(); // A MDN message implies the following: // - a Content-Type field is present and its value is "multipart/report" // - a "report-type" parameter is present in the Content-Type field, // and its value is "disposition-notification" if (hdr->hasField(fields::CONTENT_TYPE)) { const contentTypeField& ctf = *(hdr->ContentType() .dynamicCast <const contentTypeField>()); const mediaType type = *ctf.getValue().dynamicCast <const mediaType>(); if (type.getType() == vmime::mediaTypes::MULTIPART && type.getSubType() == vmime::mediaTypes::MULTIPART_REPORT) { if (ctf.hasParameter("report-type") && ctf.getReportType() == "disposition-notification") { return (true); } } } return (false); }
void messageParser::findTextParts(std::shared_ptr<const bodyPart> msg, std::shared_ptr<const bodyPart> part) { // Handle the case in which the message is not multipart: if the body part is // "text/*", take this part. if (part->getBody()->getPartCount() == 0) { mediaType type(mediaTypes::TEXT, mediaTypes::TEXT_PLAIN); bool accept = false; try { const contentTypeField& ctf = dynamic_cast<contentTypeField&> (*msg->getHeader()->findField(fields::CONTENT_TYPE)); const mediaType ctfType = // *ctf.getValue().dynamicCast <const mediaType>(); TODO // shared *std::dynamic_pointer_cast<const mediaType>(ctf.getValue()); if (ctfType.getType() == mediaTypes::TEXT) { type = ctfType; accept = true; } } catch (exceptions::no_such_field&) { // No "Content-type" field: assume "text/plain". accept = true; } if (accept) { std::shared_ptr<textPart> txtPart = textPartFactory::getInstance()->create(type); txtPart->parse(msg, msg, msg); m_textParts.push_back(txtPart); } } // Multipart message else { findSubTextParts(msg, part); } }
ref <textPart> textPartFactory::create(const mediaType& type) { for (MapType::const_iterator it = m_map.begin() ; it != m_map.end() ; ++it) { if ((*it).first == type) return ((*it).second)(); } // FIX by Elmue: Added usefull information for the user. // ("No factory available" is a completely meaningless error message!) throw exceptions::no_factory_available("Unknown media type '" + type.generate() + "'."); }
bool htmlTextPart::findPlainTextPart(const bodyPart& part, const bodyPart& parent, const bodyPart& textPart) { // We search for the nearest "multipart/alternative" part. const shared_ptr <const headerField> ctf = part.getHeader()->findField(fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue <mediaType>(); if (type.getType() == mediaTypes::MULTIPART && type.getSubType() == mediaTypes::MULTIPART_ALTERNATIVE) { shared_ptr <const bodyPart> foundPart; for (size_t i = 0 ; i < part.getBody()->getPartCount() ; ++i) { const shared_ptr <const bodyPart> p = part.getBody()->getPartAt(i); if (p.get() == &parent || // if "text/html" is in "multipart/related" p.get() == &textPart) // if not... { foundPart = p; } } if (foundPart) { bool found = false; // Now, search for the alternative plain text part for (size_t i = 0 ; !found && i < part.getBody()->getPartCount() ; ++i) { const shared_ptr <const bodyPart> p = part.getBody()->getPartAt(i); const shared_ptr <const headerField> ctf = p->getHeader()->findField(fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue <mediaType>(); if (type.getType() == mediaTypes::TEXT && type.getSubType() == mediaTypes::TEXT_PLAIN) { m_plainText = p->getBody()->getContents()->clone(); found = true; } } else { // No "Content-type" field. } } // If we don't have found the plain text part here, it means that // it does not exists (the MUA which built this message probably // did not include it...). return found; } } } else { // No "Content-type" field. } bool found = false; for (size_t i = 0 ; !found && i < part.getBody()->getPartCount() ; ++i) { found = findPlainTextPart(*part.getBody()->getPartAt(i), parent, textPart); } return found; }
bool messageParser::findSubTextParts(std::shared_ptr<const bodyPart> msg, std::shared_ptr<const bodyPart> part) { // In general, all the text parts are contained in parallel in the same // parent part (or message). // So, wherever the text parts are, all we have to do is to find the first // MIME part which is a text part. std::vector <std::shared_ptr<const bodyPart> > textParts; for (int i = 0 ; i < part->getBody()->getPartCount() ; ++i) { const std::shared_ptr<const bodyPart> p = part->getBody()->getPartAt(i); try { const contentTypeField& ctf = dynamic_cast <const contentTypeField&> (*(p->getHeader()->findField(fields::CONTENT_TYPE))); // const mediaType type = *ctf.getValue().dynamicCast <const // mediaType>(); TODO shared const mediaType type = *std::dynamic_pointer_cast<const mediaType>(ctf.getValue()); contentDisposition disp; // default should be inline if (type.getType() == mediaTypes::TEXT) { try { //std::shared_ptr<const contentDispositionField> cdf = p->getHeader()-> // findField(fields::CONTENT_DISPOSITION).dynamicCast // <const contentDispositionField>(); TODO shared std::shared_ptr<const contentDispositionField> cdf = std::dynamic_pointer_cast<const contentDispositionField>( p->getHeader()->findField(fields::CONTENT_DISPOSITION) ); // disp = *cdf->getValue().dynamicCast <const // contentDisposition>(); TODO shared disp = *std::dynamic_pointer_cast<const contentDisposition>(cdf->getValue()); } catch (exceptions::no_such_field&) { // No "Content-Disposition" field, assume default } if (disp.getName() == contentDispositionTypes::INLINE) textParts.push_back(p); } } catch (exceptions::no_such_field&) { // No "Content-type" field. } } if (textParts.size()) { // Okay. So we have found at least one text part for (std::vector <std::shared_ptr<const bodyPart> >::const_iterator p = textParts.begin() ; p != textParts.end() ; ++p) { const contentTypeField& ctf = dynamic_cast <const contentTypeField&> (*((*p)->getHeader()->findField(fields::CONTENT_TYPE))); // const mediaType type = *ctf.getValue().dynamicCast <const // mediaType>(); TODO shared const mediaType type = *std::dynamic_pointer_cast<const mediaType>(ctf.getValue()); try { std::shared_ptr<textPart> txtPart = textPartFactory::getInstance()->create(type); txtPart->parse(msg, part, *p); m_textParts.push_back(txtPart); } catch (exceptions::no_factory_available& e) { // Content-type not recognized. } } } bool found = false; for (int i = 0 ; !found && (i < part->getBody()->getPartCount()) ; ++i) { found = findSubTextParts(msg, part->getBody()->getPartAt(i)); } return found; }
void body::parseImpl (ref <utility::parserInputStreamAdapter> parser, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition) { removeAllParts(); m_prologText.clear(); m_epilogText.clear(); if (end == position) { setParsedBounds(position, end); if (newPosition) *newPosition = end; return; } // Check whether the body is a MIME-multipart bool isMultipart = false; string boundary; try { const ref <const contentTypeField> ctf = m_header.acquire()->findField(fields::CONTENT_TYPE).dynamicCast <contentTypeField>(); const mediaType type = *ctf->getValue().dynamicCast <const mediaType>(); if (type.getType() == mediaTypes::MULTIPART) { isMultipart = true; try { boundary = ctf->getBoundary(); } catch (exceptions::no_such_parameter&) { // No "boundary" parameter specified: we can try to // guess it by scanning the body contents... utility::stream::size_type pos = position; parser->seek(pos); if (pos + 2 < end && parser->matchBytes("--", 2)) { pos += 2; } else { pos = parser->findNext("\n--", position); if ((pos != utility::stream::npos) && (pos + 3 < end)) pos += 3; // skip \n-- } if ((pos != utility::stream::npos) && (pos < end)) { parser->seek(pos); // Read some bytes after boundary separator utility::stream::value_type buffer[256]; const utility::stream::size_type bufferLen = parser->read(buffer, std::min(end - pos, sizeof(buffer) / sizeof(buffer[0]))); buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0'; // Extract boundary from buffer (stop at first CR or LF). // We have to stop after a reasonnably long boundary length (100) // not to take the whole body contents for a boundary... string::value_type boundaryBytes[100]; string::size_type boundaryLen = 0; for (string::value_type c = buffer[0] ; boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ; c = buffer[++boundaryLen]) { boundaryBytes[boundaryLen] = buffer[boundaryLen]; } if (boundaryLen >= 1 && boundaryLen < 100) { // RFC #1521, Page 31: // "...the boundary parameter, which consists of 1 to 70 // characters from a set of characters known to be very // robust through email gateways, and NOT ending with // white space..." while (boundaryLen != 0 && parserHelpers::isSpace(boundaryBytes[boundaryLen - 1])) { boundaryLen--; } if (boundaryLen >= 1) boundary = string(boundaryBytes, boundaryBytes + boundaryLen); } } } } } catch (exceptions::no_such_field&) { // No "Content-Type" field... } // This is a multi-part body if (isMultipart && !boundary.empty()) { const string boundarySep("--" + boundary); utility::stream::size_type partStart = position; utility::stream::size_type pos = position; bool lastPart = false; while (pos != utility::stream::npos && pos < end) { pos = parser->findNext(boundarySep, pos); if (pos == utility::stream::npos) break; // not found if (pos != 0) { parser->seek(pos - 1); if (parser->peekByte() != '\n') { // Boundary is not at a beginning of a line pos++; continue; } parser->skip(1 + boundarySep.length()); } else { parser->seek(pos + boundarySep.length()); } const utility::stream::value_type next = parser->peekByte(); if (next == '\r' || next == '\n' || next == '-') break; // Boundary is a prefix of another, continue the search pos++; } if (pos != utility::stream::npos && pos < end) { vmime::text text; text.parse(parser, position, pos); m_prologText = text.getWholeBuffer(); } for (int index = 0 ; !lastPart && (pos != utility::stream::npos) && (pos < end) ; ++index) { utility::stream::size_type partEnd = pos; // Get rid of the [CR]LF just before the boundary string if (pos >= (position + 1)) { parser->seek(pos - 1); if (parser->peekByte() == '\n') --partEnd; } if (pos >= (position + 2)) { parser->seek(pos - 2); if (parser->peekByte() == '\r') --partEnd; } // Check whether it is the last part (boundary terminated by "--") pos += boundarySep.length(); parser->seek(pos); if (pos + 1 < end && parser->matchBytes("--", 2)) { lastPart = true; pos += 2; } // RFC #1521, Page 31: // "...(If a boundary appears to end with white space, the // white space must be presumed to have been added by a // gateway, and must be deleted.)..." parser->seek(pos); pos += parser->skipIf(parserHelpers::isSpaceOrTab, end); // End of boundary line if (pos + 1 < end && parser->matchBytes("\r\n", 2)) { pos += 2; } else if (pos < end && parser->peekByte() == '\n') { ++pos; } if (index > 0) { ref <bodyPart> part = vmime::create <bodyPart>(); // End before start may happen on empty bodyparts (directly // successive boundaries without even a line-break) if (partEnd < partStart) std::swap(partStart, partEnd); part->parse(parser, partStart, partEnd, NULL); part->m_parent = m_part; m_parts.push_back(part); } partStart = pos; while (pos != utility::stream::npos && pos < end) { pos = parser->findNext(boundarySep, pos); if (pos == utility::stream::npos) break; // not found if (pos != 0) { parser->seek(pos - 1); if (parser->peekByte() != '\n') { // Boundary is not at a beginning of a line pos++; continue; } parser->skip(1 + boundarySep.length()); } else { parser->seek(pos + boundarySep.length()); } const utility::stream::value_type next = parser->peekByte(); if (next == '\r' || next == '\n' || next == '-') break; // Boundary is a prefix of another, continue the search pos++; } } m_contents = vmime::create <emptyContentHandler>(); // Last part was not found: recover from missing boundary if (!lastPart && pos == utility::stream::npos) { ref <bodyPart> part = vmime::create <bodyPart>(); try { part->parse(parser, partStart, end); } catch (std::exception&) { throw; } part->m_parent = m_part; m_parts.push_back(part); } // Treat remaining text as epilog else if (partStart < end) { vmime::text text; text.parse(parser, partStart, end); m_epilogText = text.getWholeBuffer(); } } // Treat the contents as 'simple' data else { encoding enc; try { const ref <const headerField> cef = m_header.acquire()->findField(fields::CONTENT_TRANSFER_ENCODING); enc = *cef->getValue().dynamicCast <const encoding>(); } catch (exceptions::no_such_field&) { // Defaults to "7bit" (RFC-1521) enc = vmime::encoding(encodingTypes::SEVEN_BIT); // Set header field m_header.acquire()->ContentTransferEncoding()->setValue(enc); } // Extract the (encoded) contents const utility::stream::size_type length = end - position; ref <utility::inputStream> contentStream = vmime::create <utility::seekableInputStreamRegionAdapter> (parser->getUnderlyingStream(), position, length); m_contents = vmime::create <streamContentHandler>(contentStream, length, enc); } setParsedBounds(position, end); if (newPosition) *newPosition = end; }
void body::parseImpl (const parsingContext& /* ctx */, shared_ptr <utility::parserInputStreamAdapter> parser, const size_t position, const size_t end, size_t* newPosition) { removeAllParts(); m_prologText.clear(); m_epilogText.clear(); if (end == position) { setParsedBounds(position, end); if (newPosition) *newPosition = end; return; } // Check whether the body is a MIME-multipart. // If it is, also get (or try to guess) the boundary separator. bool isMultipart = false; string boundary; shared_ptr <const contentTypeField> ctf = m_part->getHeader()->findField <contentTypeField>(fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue <mediaType>(); if (type.getType() == mediaTypes::MULTIPART) { isMultipart = true; if (ctf->hasBoundary()) { boundary = ctf->getBoundary(); } else { // No "boundary" parameter specified: we can try to // guess it by scanning the body contents... size_t pos = position; parser->seek(pos); if (pos + 2 < end && parser->matchBytes("--", 2)) { pos += 2; } else { pos = parser->findNext("\n--", position); if ((pos != npos) && (pos + 3 < end)) pos += 3; // skip \n-- } if ((pos != npos) && (pos < end)) { parser->seek(pos); // Read some bytes after boundary separator byte_t buffer[256]; const size_t bufferLen = parser->read(buffer, std::min(end - pos, sizeof(buffer) / sizeof(buffer[0]))); buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0'; // Skip transport padding bytes (SPACE or HTAB), if any size_t boundarySkip = 0; while (boundarySkip < bufferLen && parserHelpers::isSpace(buffer[boundarySkip])) ++boundarySkip; // Extract boundary from buffer (stop at first CR or LF). // We have to stop after a reasonnably long boundary length (100) // not to take the whole body contents for a boundary... byte_t boundaryBytes[100]; size_t boundaryLen = 0; for (byte_t c = buffer[boundarySkip] ; boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ; ++boundaryLen, c = buffer[boundarySkip + boundaryLen]) { boundaryBytes[boundaryLen] = c; } if (boundaryLen >= 1 && boundaryLen < 100) { // RFC #1521, Page 31: // "...the boundary parameter, which consists of 1 to 70 // characters from a set of characters known to be very // robust through email gateways, and NOT ending with // white space..." while (boundaryLen != 0 && parserHelpers::isSpace(boundaryBytes[boundaryLen - 1])) { boundaryLen--; } if (boundaryLen >= 1) boundary = string(boundaryBytes, boundaryBytes + boundaryLen); } } } } } // This is a multi-part body if (isMultipart && !boundary.empty()) { size_t partStart = position; size_t pos = position; bool lastPart = false; // Find the first boundary size_t boundaryStart, boundaryEnd; pos = findNextBoundaryPosition(parser, boundary, pos, end, &boundaryStart, &boundaryEnd); for (int index = 0 ; !lastPart && (pos != npos) && (pos < end) ; ++index) { size_t partEnd = boundaryStart; // Check whether it is the last part (boundary terminated by "--") parser->seek(boundaryEnd); if (boundaryEnd + 1 < end && parser->matchBytes("--", 2)) { lastPart = true; boundaryEnd += 2; } // RFC #1521, Page 31: // "...(If a boundary appears to end with white space, the // white space must be presumed to have been added by a // gateway, and must be deleted.)..." parser->seek(boundaryEnd); boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end); // End of boundary line if (boundaryEnd + 1 < end && parser->matchBytes("\r\n", 2)) { boundaryEnd += 2; } else if (boundaryEnd < end && parser->peekByte() == '\n') { ++boundaryEnd; } if (index == 0) { if (partEnd > partStart) { vmime::text text; text.parse(parser, partStart, partEnd); m_prologText = text.getWholeBuffer(); } else { m_prologText = ""; } } else // index > 0 { shared_ptr <bodyPart> part = m_part->createChildPart(); // End before start may happen on empty bodyparts (directly // successive boundaries without even a line-break) if (partEnd < partStart) std::swap(partStart, partEnd); part->parse(parser, partStart, partEnd, NULL); m_parts.push_back(part); } partStart = boundaryEnd; // Find the next boundary pos = findNextBoundaryPosition (parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd); } m_contents = make_shared <emptyContentHandler>(); // Last part was not found: recover from missing boundary if (!lastPart && pos == npos) { shared_ptr <bodyPart> part = m_part->createChildPart(); try { part->parse(parser, partStart, end); } catch (std::exception&) { throw; } m_parts.push_back(part); } // Treat remaining text as epilog else if (partStart < end) { vmime::text text; text.parse(parser, partStart, end); m_epilogText = text.getWholeBuffer(); } } // Treat the contents as 'simple' data else { encoding enc; shared_ptr <const headerField> cef = m_part->getHeader()->findField(fields::CONTENT_TRANSFER_ENCODING); if (cef) { enc = *cef->getValue <encoding>(); } else { // Defaults to "7bit" (RFC-1521) enc = vmime::encoding(encodingTypes::SEVEN_BIT); } // Extract the (encoded) contents const size_t length = end - position; shared_ptr <utility::inputStream> contentStream = make_shared <utility::seekableInputStreamRegionAdapter> (parser->getUnderlyingStream(), position, length); m_contents = make_shared <streamContentHandler>(contentStream, length, enc); } setParsedBounds(position, end); if (newPosition) *newPosition = end; }