Пример #1
0
smil_document_reader::smil_document_reader(const std::shared_ptr<xml::reader> &aReader, const rdf::uri &aSubject, rdf::graph &aPrimaryMetadata)
	: reader(aReader)
	, mBasePath(cainteoir::path(aSubject.str()).parent())
{
	aReader->set_nodes(xmlns::smil, smil_nodes);
	aReader->set_attrs(xmlns::smil, smil_attrs);
	aReader->set_attrs(xmlns::xml,  xml::attrs);

	const xml::context::entry *current = aReader->context();

	while (aReader->read() && current != &smil::body_node) switch (aReader->nodeType())
	{
	case xml::reader::attribute:
		if (current == &smil::smil_node)
		{
			if (aReader->context() == &xml::lang_attr)
				aPrimaryMetadata.statement(aSubject, rdf::dc("language"), rdf::literal(aReader->nodeValue().str()));
		}
		break;
	case xml::reader::beginTagNode:
		current = aReader->context();
		break;
	default:
		break;
	}

	aPrimaryMetadata.statement(aSubject, rdf::tts("mimetype"), rdf::literal("application/smil"));
}
Пример #2
0
rtf_document_reader::rtf_document_reader(std::shared_ptr<cainteoir::buffer> &aData, const rdf::uri &aSubject, rdf::graph &aPrimaryMetadata, const std::string &aTitle)
	: rtf(aData)
	, mData(aData)
	, mSubject(aSubject)
	, mCodepage(1252)
	, mState(state_rtf)
	, mBlockCount(0)
	, mTitle(aTitle)
	, mClearText(false)
{
	if (rtf.read() && read(&aPrimaryMetadata))
	{
		range = { 0, 0 };
		mState = state_title;
	}
	aPrimaryMetadata.statement(aSubject, rdf::tts("mimetype"), rdf::literal("application/rtf"));
}
Пример #3
0
void cainteoir::mime::mimetype::metadata(rdf::graph &aGraph, const std::string &baseuri, const rdf::uri &type) const
{
	const mime_info *mime = (const mime_info *)info;
	if (!mime)
		mime = &mimetypes[mime_type];

	rdf::uri ref = rdf::uri(baseuri, name);
	aGraph.statement(ref, rdf::rdf("type"), type);
	aGraph.statement(ref, rdf::tts("name"), rdf::literal(name));
	aGraph.statement(ref, rdf::dc("title"), rdf::literal(mime->label));
	aGraph.statement(ref, rdf::dc("description"), rdf::literal(mime->label));
	for (auto &mimetype : mime->mimetypes)
		aGraph.statement(ref, rdf::tts("mimetype"), rdf::literal(mimetype));
	for (auto &glob : mime->globs)
		aGraph.statement(ref, rdf::tts("extension"), rdf::literal(glob));
}
Пример #4
0
	bool parse_headers(const rdf::uri &subject, rdf::graph &aGraph, cainteoir::buffer &boundary)
	{
		while (first <= last)
		{
			if (*first == '~')
				++first;

			cainteoir::buffer name(first, first);
			cainteoir::buffer value(first, first);

			while (first <= last && is_mime_header_char(*first))
				++first;

			name = cainteoir::buffer(name.begin(), first);

			if (name.empty())
			{
				if (*first == '\r' || *first == '\n')
				{
					++first;
					if (*first == '\n')
						++first;
					return true;
				}
				return false;
			}

			if (first[0] == ':' && first[1] == ' ')
			{
				const char * start = first;
				while (first <= last && !(first[0] == '\n' && first[1] != ' ' && first[1] != '\t'))
					++first;

				value = cainteoir::buffer(start + 2, *(first-1) == '\r' ? first-1 : first);
				++first;
			}
			else
				return false;

			if (!name.comparei("Content-Transfer-Encoding"))
			{
				const char * type = value.begin();
				while (type <= value.end() && !(*type == ';' || *type == '\n'))
					++type;
				encoding = std::string(value.begin(), *(type-1) == '\r' ? type-1 : type);
			}
			else if (!name.comparei("Content-Type"))
			{
				const char * type = value.begin();
				while (type <= value.end() && !(*type == ';' || *type == '\n'))
					++type;
				mimetype = std::string(value.begin(), type);

				if (type <= value.end() && *type == ';')
				{
					++type;
					while (type <= value.end() && (*type == ' ' || *type == '\t' || *type == '\r' || *type == '\n'))
						++type;

					const char * name = type;
					while (type <= value.end() && *type != '=')
						++type;

					if (*type != '=') continue;

					cainteoir::buffer arg(name, type);
					++type;

					char end_of_value = '\n';
					if (*type == '"')
					{
						++type;
						end_of_value = '"';
					}

					const char * content = type;
					while (type <= value.end() && *type != end_of_value)
						++type;

					if (!arg.compare("boundary"))
						boundary = cainteoir::buffer(content, type);
					else if (!arg.compare("charset"))
						charset = cainteoir::buffer(content, type).str();
				}
			}
			else if (!name.comparei("Subject") || !name.comparei("Title") || !name.comparei("Story"))
			{
				title = value.str();
				aGraph.statement(subject, rdf::dc("title"), rdf::literal(title));
			}
			else if (!name.comparei("From") || !name.comparei("Author"))
			{
				// name ...

				const char *name_begin = value.begin();
				const char *name_end   = value.begin();
				const char *mbox_begin = nullptr;
				const char *mbox_end   = nullptr;
				const char *email_at   = nullptr;

				bool mbox_is_name = false;

				while (name_end <= value.end() && *name_end == ' ')
					++name_end;

				name_begin = name_end;

				while (name_end <= value.end() && *name_end != '<' && *name_end != '(' && !(name_end[0] == '&' && name_end[1] == 'l' && name_end[2] == 't' && name_end[3] == ';'))
				{
					if (*name_end == '@') // email only ...
					{
						email_at   = name_end;
						mbox_begin = name_begin;
						mbox_end   = name_end;
						mbox_is_name = true;
					}

					++name_end;
				}

				if (name_end < value.end()) // email address ...
				{
					if (*name_end == '&') // &lt;...&gt;
					{
						mbox_is_name = false;
						mbox_begin = name_end + 4;
						mbox_end = value.end();
						while (mbox_end > mbox_begin && !(mbox_end[0] == '&' && mbox_end[1] == 'g' && mbox_end[2] == 't' && mbox_end[3] == ';'))
							--mbox_end;
					}
					else if (*name_end == '<') // <...>
					{
						mbox_is_name = false;
						mbox_begin = name_end + 1;
						mbox_end = value.end();
						while (mbox_end > mbox_begin && *mbox_end != '>')
							--mbox_end;
					}
					else // (...)
					{
						mbox_is_name = true;
						mbox_begin = name_end + 1;
						mbox_end = value.end();
						while (mbox_end > mbox_begin && *mbox_end != ')')
							--mbox_end;
					}
				}

				// clean-up name ...

				--name_end;
				while (name_end > value.begin() && (*name_end == ' ' || *name_end == '\r' || *name_end == '\n'))
					--name_end;
				++name_end;

				if (*name_begin == '"' && *(name_end-1) == '"')
				{
					++name_begin;
					--name_end;
				}

				if (email_at && !mbox_is_name)
					name_end = email_at;

				if (mbox_begin == nullptr) // name only ...
					aGraph.statement(subject, rdf::dc("creator"), rdf::literal(std::string(name_begin, value.end())));
				else // name and email address ...
				{
					const rdf::uri from = aGraph.genid();
					aGraph.statement(subject, rdf::dc("creator"), from);
					aGraph.statement(from, rdf::rdf("type"), rdf::foaf("Person"));
					if (mbox_is_name)
					{
						aGraph.statement(from, rdf::rdf("value"), rdf::literal(std::string(mbox_begin, mbox_end)));
						aGraph.statement(from, rdf::foaf("mbox"), rdf::literal("mailto:" + std::string(name_begin, name_end)));
					}
					else
					{
						aGraph.statement(from, rdf::rdf("value"), rdf::literal(std::string(name_begin, name_end)));
						aGraph.statement(from, rdf::foaf("mbox"), rdf::literal("mailto:" + std::string(mbox_begin, mbox_end)));
					}
				}
			}
			else if (!name.comparei("Newsgroups"))
				aGraph.statement(subject, rdf::dc("publisher"), rdf::literal(value.str()));
			else if (!name.comparei("Keywords"))
			{
				const char *a = value.begin();
				const char *b = value.begin();
				while (a != value.end())
				{
					a = b;
					while (a != value.end() && (*a == ',' || *a == ' '))
						++a;

					b = a;
					while (b != value.end() && *b != ',')
						++b;

					if (b == value.end())
						--b;

					while (b != a && (*b == ',' || *b == ' '))
						--b;
					++b;

					if (b > a)
						aGraph.statement(subject, rdf::dc("subject"), rdf::literal(std::string(a, b)));
				}
			}
		}

		return false;
	}