void htmlTextPart::parse(shared_ptr <const bodyPart> message, shared_ptr <const bodyPart> parent, shared_ptr <const bodyPart> textPart) { // Search for possible embedded objects in the _whole_ message. std::vector <shared_ptr <const bodyPart> > cidParts; std::vector <shared_ptr <const bodyPart> > locParts; findEmbeddedParts(*message, cidParts, locParts); // Extract HTML text std::ostringstream oss; utility::outputStreamAdapter adapter(oss); textPart->getBody()->getContents()->extract(adapter); const string data = oss.str(); m_text = textPart->getBody()->getContents()->clone(); // Find charset shared_ptr <const contentTypeField> ctf = textPart->getHeader()->findField <contentTypeField>(fields::CONTENT_TYPE); if (ctf && ctf->hasCharset()) m_charset = ctf->getCharset(); else m_charset = charset(); // Extract embedded objects. The algorithm is quite simple: for each previously // found inline part, we check if its CID/Location is contained in the HTML text. for (std::vector <shared_ptr <const bodyPart> >::const_iterator p = cidParts.begin() ; p != cidParts.end() ; ++p) { const shared_ptr <const headerField> midField = (*p)->getHeader()->findField(fields::CONTENT_ID); const messageId mid = *midField->getValue <messageId>(); if (data.find("CID:" + mid.getId()) != string::npos || data.find("cid:" + mid.getId()) != string::npos) { // This part is referenced in the HTML text. // Add it to the embedded object list. addEmbeddedObject(**p, mid.getId(), embeddedObject::REFERENCED_BY_ID); } } for (std::vector <shared_ptr <const bodyPart> >::const_iterator p = locParts.begin() ; p != locParts.end() ; ++p) { const shared_ptr <const headerField> locField = (*p)->getHeader()->findField(fields::CONTENT_LOCATION); const text loc = *locField->getValue <text>(); const string locStr = loc.getWholeBuffer(); if (data.find(locStr) != string::npos) { // This part is referenced in the HTML text. // Add it to the embedded object list. addEmbeddedObject(**p, locStr, embeddedObject::REFERENCED_BY_LOCATION); } } // Extract plain text, if any. if (!findPlainTextPart(*message, *parent, *textPart)) { m_plainText = make_shared <emptyContentHandler>(); } }
bool htmlTextPart::findPlainTextPart(const bodyPart& part, const bodyPart& parent, const bodyPart& textPart) { // We search for the nearest "multipart/alternative" part. const shared_ptr <const headerField> ctf = part.getHeader()->findField(fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue <mediaType>(); if (type.getType() == mediaTypes::MULTIPART && type.getSubType() == mediaTypes::MULTIPART_ALTERNATIVE) { shared_ptr <const bodyPart> foundPart; for (size_t i = 0 ; i < part.getBody()->getPartCount() ; ++i) { const shared_ptr <const bodyPart> p = part.getBody()->getPartAt(i); if (p.get() == &parent || // if "text/html" is in "multipart/related" p.get() == &textPart) // if not... { foundPart = p; } } if (foundPart) { bool found = false; // Now, search for the alternative plain text part for (size_t i = 0 ; !found && i < part.getBody()->getPartCount() ; ++i) { const shared_ptr <const bodyPart> p = part.getBody()->getPartAt(i); const shared_ptr <const headerField> ctf = p->getHeader()->findField(fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue <mediaType>(); if (type.getType() == mediaTypes::TEXT && type.getSubType() == mediaTypes::TEXT_PLAIN) { m_plainText = p->getBody()->getContents()->clone(); found = true; } } else { // No "Content-type" field. } } // If we don't have found the plain text part here, it means that // it does not exists (the MUA which built this message probably // did not include it...). return found; } } } else { // No "Content-type" field. } bool found = false; for (size_t i = 0 ; !found && i < part.getBody()->getPartCount() ; ++i) { found = findPlainTextPart(*part.getBody()->getPartAt(i), parent, textPart); } return found; }
void htmlTextPart::parse(ref <const bodyPart> message, ref <const bodyPart> parent, ref <const bodyPart> textPart) { // Search for possible embedded objects in the _whole_ message. std::vector <ref <const bodyPart> > cidParts; std::vector <ref <const bodyPart> > locParts; findEmbeddedParts(*message, cidParts, locParts); // Extract HTML text std::ostringstream oss; utility::outputStreamAdapter adapter(oss); textPart->getBody()->getContents()->extract(adapter); const string data = oss.str(); m_text = textPart->getBody()->getContents()->clone(); try { const ref <const contentTypeField> ctf = textPart->getHeader()->findField(fields::CONTENT_TYPE).dynamicCast <contentTypeField>(); m_charset = ctf->getCharset(); } catch (exceptions::no_such_field) { // No "Content-type" field. } catch (exceptions::no_such_parameter) { // No "charset" parameter. } // Extract embedded objects. The algorithm is quite simple: for each previously // found inline part, we check if its CID/Location is contained in the HTML text. for (std::vector <ref <const bodyPart> >::const_iterator p = cidParts.begin() ; p != cidParts.end() ; ++p) { const ref <const headerField> midField = (*p)->getHeader()->findField(fields::CONTENT_ID); const messageId mid = *midField->getValue().dynamicCast <const messageId>(); if (data.find("CID:" + mid.getId()) != string::npos || data.find("cid:" + mid.getId()) != string::npos) { // This part is referenced in the HTML text. // Add it to the embedded object list. addEmbeddedObject(**p, mid.getId()); } } for (std::vector <ref <const bodyPart> >::const_iterator p = locParts.begin() ; p != locParts.end() ; ++p) { const ref <const headerField> locField = (*p)->getHeader()->findField(fields::CONTENT_LOCATION); const text loc = *locField->getValue().dynamicCast <const text>(); const string locStr = loc.getWholeBuffer(); if (data.find(locStr) != string::npos) { // This part is referenced in the HTML text. // Add it to the embedded object list. addEmbeddedObject(**p, locStr); } } // Extract plain text, if any. if (!findPlainTextPart(*message, *parent, *textPart)) { m_plainText = vmime::create <emptyContentHandler>(); } }