C++ (Cpp) DocInfo::setLinks примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: DocInfo

Метод/Функция: setLinks

Примеров на hotexamples.com: 1

C++ (Cpp) DocInfo::setLinks - 1 пример найден. Это лучшие примеры C++ (Cpp) кода для DocInfo::setLinks, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

termCount(2)

setCurrentFrame(2)

setCurrentChannel(2)

docID(2)

setCanonicalUrl(1)

setUrl(1)

setTitle(1)

setLinks(1)

setCurrentChannelColumn(1)

setContent(1)

FrameID(1)

IsTopLevel(1)

doc(1)

currentFrame(1)

currentChannel(1)

URL(1)

ShouldMatchActiveTabPermission(1)

PrincipalURL(1)

Principal(1)

positions(1)

Пример #1

Показать файл

Файл: HTMLParser.cpp Проект: lizardoluis/irSearch

DocInfo HTMLParser::parse(RICPNS::Document &document) {

	oneurl curl;
	string html;

	cleanText(document.getText(), html);
	GumboOutput* output = gumbo_parse(html.c_str());
	GumboNode* node = output->root;

	string docUrl = document.getURL();
	string content, pageTitle;
	list<pair<string, string> > links;

//	thread t1(&HTMLParser::extractContent, this, node, ref(content));
//	thread t2(&HTMLParser::extractPageTitle, this, node, ref(pageTitle));
//	thread t3(&HTMLParser::extractLinks, this, node, ref(links), ref(docUrl));

	extractContent(node, content);
	extractPageTitle(node, pageTitle);
	extractLinks(node, links, docUrl);

//	t1.join();
//	t2.join();
//	t3.join();

	gumbo_destroy_output(&kGumboDefaultOptions, output);

	DocInfo docInfo;
	docInfo.setContent(content);

	docInfo.setCanonicalUrl(
			curl.Parse(docUrl) ?
					curl.CNormalize(docUrl) : docUrl);

	docInfo.setUrl(docUrl);

	docInfo.setTitle(pageTitle);
	docInfo.setLinks(links);
	//	cout << docInfo.getUrl() << "  -  "<<  "   " << link << endl;

//	static int i=1;
//	cout << i++ << " - " << docInfo.getCanonicalUrl() << endl;
//
//
//	for(pair<string, string> link : links){
//		cout <<  " ------- " << link.first << endl;
//		cout <<  link.second << endl;
//	}

	return docInfo;
}