smil_document_reader::smil_document_reader(const std::shared_ptr<xml::reader> &aReader, const rdf::uri &aSubject, rdf::graph &aPrimaryMetadata) : reader(aReader) , mBasePath(cainteoir::path(aSubject.str()).parent()) { aReader->set_nodes(xmlns::smil, smil_nodes); aReader->set_attrs(xmlns::smil, smil_attrs); aReader->set_attrs(xmlns::xml, xml::attrs); const xml::context::entry *current = aReader->context(); while (aReader->read() && current != &smil::body_node) switch (aReader->nodeType()) { case xml::reader::attribute: if (current == &smil::smil_node) { if (aReader->context() == &xml::lang_attr) aPrimaryMetadata.statement(aSubject, rdf::dc("language"), rdf::literal(aReader->nodeValue().str())); } break; case xml::reader::beginTagNode: current = aReader->context(); break; default: break; } aPrimaryMetadata.statement(aSubject, rdf::tts("mimetype"), rdf::literal("application/smil")); }
rtf_document_reader::rtf_document_reader(std::shared_ptr<cainteoir::buffer> &aData, const rdf::uri &aSubject, rdf::graph &aPrimaryMetadata, const std::string &aTitle) : rtf(aData) , mData(aData) , mSubject(aSubject) , mCodepage(1252) , mState(state_rtf) , mBlockCount(0) , mTitle(aTitle) , mClearText(false) { if (rtf.read() && read(&aPrimaryMetadata)) { range = { 0, 0 }; mState = state_title; } aPrimaryMetadata.statement(aSubject, rdf::tts("mimetype"), rdf::literal("application/rtf")); }
void cainteoir::mime::mimetype::metadata(rdf::graph &aGraph, const std::string &baseuri, const rdf::uri &type) const { const mime_info *mime = (const mime_info *)info; if (!mime) mime = &mimetypes[mime_type]; rdf::uri ref = rdf::uri(baseuri, name); aGraph.statement(ref, rdf::rdf("type"), type); aGraph.statement(ref, rdf::tts("name"), rdf::literal(name)); aGraph.statement(ref, rdf::dc("title"), rdf::literal(mime->label)); aGraph.statement(ref, rdf::dc("description"), rdf::literal(mime->label)); for (auto &mimetype : mime->mimetypes) aGraph.statement(ref, rdf::tts("mimetype"), rdf::literal(mimetype)); for (auto &glob : mime->globs) aGraph.statement(ref, rdf::tts("extension"), rdf::literal(glob)); }
bool parse_headers(const rdf::uri &subject, rdf::graph &aGraph, cainteoir::buffer &boundary) { while (first <= last) { if (*first == '~') ++first; cainteoir::buffer name(first, first); cainteoir::buffer value(first, first); while (first <= last && is_mime_header_char(*first)) ++first; name = cainteoir::buffer(name.begin(), first); if (name.empty()) { if (*first == '\r' || *first == '\n') { ++first; if (*first == '\n') ++first; return true; } return false; } if (first[0] == ':' && first[1] == ' ') { const char * start = first; while (first <= last && !(first[0] == '\n' && first[1] != ' ' && first[1] != '\t')) ++first; value = cainteoir::buffer(start + 2, *(first-1) == '\r' ? first-1 : first); ++first; } else return false; if (!name.comparei("Content-Transfer-Encoding")) { const char * type = value.begin(); while (type <= value.end() && !(*type == ';' || *type == '\n')) ++type; encoding = std::string(value.begin(), *(type-1) == '\r' ? type-1 : type); } else if (!name.comparei("Content-Type")) { const char * type = value.begin(); while (type <= value.end() && !(*type == ';' || *type == '\n')) ++type; mimetype = std::string(value.begin(), type); if (type <= value.end() && *type == ';') { ++type; while (type <= value.end() && (*type == ' ' || *type == '\t' || *type == '\r' || *type == '\n')) ++type; const char * name = type; while (type <= value.end() && *type != '=') ++type; if (*type != '=') continue; cainteoir::buffer arg(name, type); ++type; char end_of_value = '\n'; if (*type == '"') { ++type; end_of_value = '"'; } const char * content = type; while (type <= value.end() && *type != end_of_value) ++type; if (!arg.compare("boundary")) boundary = cainteoir::buffer(content, type); else if (!arg.compare("charset")) charset = cainteoir::buffer(content, type).str(); } } else if (!name.comparei("Subject") || !name.comparei("Title") || !name.comparei("Story")) { title = value.str(); aGraph.statement(subject, rdf::dc("title"), rdf::literal(title)); } else if (!name.comparei("From") || !name.comparei("Author")) { // name ... const char *name_begin = value.begin(); const char *name_end = value.begin(); const char *mbox_begin = nullptr; const char *mbox_end = nullptr; const char *email_at = nullptr; bool mbox_is_name = false; while (name_end <= value.end() && *name_end == ' ') ++name_end; name_begin = name_end; while (name_end <= value.end() && *name_end != '<' && *name_end != '(' && !(name_end[0] == '&' && name_end[1] == 'l' && name_end[2] == 't' && name_end[3] == ';')) { if (*name_end == '@') // email only ... { email_at = name_end; mbox_begin = name_begin; mbox_end = name_end; mbox_is_name = true; } ++name_end; } if (name_end < value.end()) // email address ... { if (*name_end == '&') // <...> { mbox_is_name = false; mbox_begin = name_end + 4; mbox_end = value.end(); while (mbox_end > mbox_begin && !(mbox_end[0] == '&' && mbox_end[1] == 'g' && mbox_end[2] == 't' && mbox_end[3] == ';')) --mbox_end; } else if (*name_end == '<') // <...> { mbox_is_name = false; mbox_begin = name_end + 1; mbox_end = value.end(); while (mbox_end > mbox_begin && *mbox_end != '>') --mbox_end; } else // (...) { mbox_is_name = true; mbox_begin = name_end + 1; mbox_end = value.end(); while (mbox_end > mbox_begin && *mbox_end != ')') --mbox_end; } } // clean-up name ... --name_end; while (name_end > value.begin() && (*name_end == ' ' || *name_end == '\r' || *name_end == '\n')) --name_end; ++name_end; if (*name_begin == '"' && *(name_end-1) == '"') { ++name_begin; --name_end; } if (email_at && !mbox_is_name) name_end = email_at; if (mbox_begin == nullptr) // name only ... aGraph.statement(subject, rdf::dc("creator"), rdf::literal(std::string(name_begin, value.end()))); else // name and email address ... { const rdf::uri from = aGraph.genid(); aGraph.statement(subject, rdf::dc("creator"), from); aGraph.statement(from, rdf::rdf("type"), rdf::foaf("Person")); if (mbox_is_name) { aGraph.statement(from, rdf::rdf("value"), rdf::literal(std::string(mbox_begin, mbox_end))); aGraph.statement(from, rdf::foaf("mbox"), rdf::literal("mailto:" + std::string(name_begin, name_end))); } else { aGraph.statement(from, rdf::rdf("value"), rdf::literal(std::string(name_begin, name_end))); aGraph.statement(from, rdf::foaf("mbox"), rdf::literal("mailto:" + std::string(mbox_begin, mbox_end))); } } } else if (!name.comparei("Newsgroups")) aGraph.statement(subject, rdf::dc("publisher"), rdf::literal(value.str())); else if (!name.comparei("Keywords")) { const char *a = value.begin(); const char *b = value.begin(); while (a != value.end()) { a = b; while (a != value.end() && (*a == ',' || *a == ' ')) ++a; b = a; while (b != value.end() && *b != ',') ++b; if (b == value.end()) --b; while (b != a && (*b == ',' || *b == ' ')) --b; ++b; if (b > a) aGraph.statement(subject, rdf::dc("subject"), rdf::literal(std::string(a, b))); } } } return false; }