static void generateSummary( Summary &summary, char *htmlInput, const char *queryStr, const char *urlStr ) {
	Xml xml;
	ASSERT_TRUE(xml.set(htmlInput, strlen(htmlInput), 0, CT_HTML));

	Words words;
	ASSERT_TRUE(words.set(&xml, true));

	Bits bits;
	ASSERT_TRUE(bits.set(&words));

	Url url;
	url.set(urlStr);

	Sections sections;
	ASSERT_TRUE(sections.set(&words, &bits, &url, "", CT_HTML));

	Query query;
	ASSERT_TRUE(query.set2(queryStr, langEnglish, true));

	LinkInfo linkInfo;
	memset ( &linkInfo , 0 , sizeof(LinkInfo) );
	linkInfo.m_lisize = sizeof(LinkInfo);

	Title title;
	ASSERT_TRUE(title.setTitle(&xml, &words, 80, &query, &linkInfo, &url, NULL, 0, CT_HTML, langEnglish));

	Pos pos;
	ASSERT_TRUE(pos.set(&words));

	Bits bitsForSummary;
	ASSERT_TRUE(bitsForSummary.setForSummary(&words));

	Phrases phrases;
	ASSERT_TRUE(phrases.set(&words, &bits));

	Matches matches;
	matches.setQuery(&query);
	ASSERT_TRUE(matches.set(&words, &phrases, &sections, &bitsForSummary, &pos, &xml, &title, &url, &linkInfo));

	summary.setSummary(&xml, &words, &sections, &pos, &query, 180, 3, 3, 180, &url, &matches, title.getTitle(), title.getTitleLen());
}
Ejemplo n.º 2
0
/// @brief ClientXMLDocument constructor creates an XML document header and root element
/// and puts them in the output string stream.
ClientXMLDocument::ClientXMLDocument(BOSS::CommandLine & config,
		const Phrases & phrases): cl(config)
{

  outs << "<?xml version='1.0' encoding='ISO-8859-1' ?>" << endl;
  outs << "<TEXT>" << endl;

   for (int i = 0; i < phrases.size(); i++)
   {
	   phraseToXML(phrases[i]);
   }

  outs << "</TEXT>" << endl;
}
// . return length stored into "buf"
// . content must be NULL terminated
// . if "useAnchors" is true we do click and scroll
// . if "isQueryTerms" is true, we do typical anchors in a special way
int32_t Highlight::set ( SafeBuf *sb,
		      //char        *buf          ,
		      //int32_t         bufLen       ,
		      char        *content      ,
		      int32_t         contentLen   ,
		      // primary language of the document (for synonyms)
		      char         docLangId    ,
		      Query       *q            ,
		      bool         doStemming   ,
		      bool         useAnchors   ,
		      const char  *baseUrl      ,
		      const char  *frontTag     ,
		      const char  *backTag      ,
		      int32_t         fieldCode    ,
		      int32_t         niceness      ) {

	Words words;
	if ( ! words.set ( content      , 
			   contentLen   , 
			   TITLEREC_CURRENT_VERSION,
			   true         , // computeId
			   true         ) ) // has html entites?
		return -1;

	int32_t version = TITLEREC_CURRENT_VERSION;

	Bits bits;
	if ( ! bits.set (&words,version,niceness) ) return -1;

	Phrases phrases;
	if ( !phrases.set(&words,&bits,true,false,version,niceness))return -1;

	//SafeBuf langBuf;
	//if ( !setLangVec ( &words , &langBuf , niceness )) return 0;
	//uint8_t *langVec = (uint8_t *)langBuf.getBufStart();

	// make synonyms
	//Synonyms syns;
	//if(!syns.set(&words,NULL,docLangId,&phrases,niceness,NULL)) return 0;

	Matches matches;
	matches.setQuery ( q );

	if ( ! matches.addMatches ( &words , &phrases ) ) return -1;

	// store
	m_numMatches = matches.getNumMatches();

	return set ( sb , 
		     //buf         ,
		     //bufLen      , 
		     &words      ,
		     &matches    ,
		     doStemming  ,
		     useAnchors  ,
		     baseUrl     ,
		     frontTag    ,
		     backTag     ,
		     fieldCode   ,
		     q		 );
}