Example #1
0
UAS_Pointer<UAS_List<UAS_TextRun> >
DtSR_SearchResultsEntry::create_matches()
{

#ifdef DEBUG
    fprintf(stderr, "(DEBUG) UAS_Common is being created from id=\"%s\"\n",
							(char*)f_id);
#endif
    UAS_Pointer<UAS_Common> doc = UAS_Common::create(f_id);

#ifdef DEBUG
    fprintf(stderr,
		"(DEBUG) id=%s\n\t"
		"book_name=%s title=%s\n",
		(char*)(doc->id()),
		(char*)(doc->book_name()), (char*)(doc->title()));
#endif

#ifdef DUMP_NODES
    {
	ofstream out("OriginalText");
	out << (char *) doc->data();
    }
#endif

    mtry
	{
	    style_sheet_mgr().initOnlineStyleSheet(doc);
	}
//  catch_noarg (StyleSheetSyntaxError)
    mcatch_any()
	{
#ifdef JOE_HATES_THIS
	    message_mgr().error_dialog(
		(char*)UAS_String(CATGETS(Set_Messages, 39, "File a Bug")));
#else
	    throw(CASTEXCEPT Exception());
#endif
	}
    end_try;

    istringstream input((char *)doc->data());
    ostringstream output;
    
    mtry
	{
	    Tml_TextRenderer	renderer(output, f_search_res->search_zones());
	    Resolver resolver(*gPathTab, renderer);
	    DocParser docparser(resolver);

	    docparser.parse(input);
	}
    mcatch_any()
	{
	    ON_DEBUG(cerr << "DtSR_SearchResultsEntry::create_matches...exception thrown" << '\n' << flush);
	    rethrow;
	}
    end_try;

    string outpstr = output.str();
    char* text = (char*)outpstr.c_str();

#ifdef DUMP_NODES
    {
	ofstream out("ParsedText");
	out << text;
    }
#endif

#ifdef DEBUG
    fprintf(stderr, "(DEBUG) stems=0x%p, count=%d\n",
			(char*)f_search_res->stems(f_dbn)->stems(),
			f_search_res->stems(f_dbn)->count());

    int n_of_stems = 0;
    for (; n_of_stems < f_search_res->stems(f_dbn)->count(); n_of_stems++) {
	fprintf(stderr, "(DEBUG) %dth stem = %s\n", n_of_stems,
			(f_search_res->stems(f_dbn)->stems())[n_of_stems]);
    }
#endif

    int stype = f_search_res->search_type();

    DtSrHitword* kwics = NULL;
    long n_kwics = 0;

    char* parseout = NULL;

    // hack! overwrite f_language, since austext's value is wrong
    // In future, the next lines should be removed.
    const char* lang = getenv("LANG");
    if (lang && !strncmp(lang, "ja", strlen("ja")))
	f_language = DtSrLaJPN;
    else
	f_language = DtSrLaENG;
	
    if (f_language == DtSrLaJPN) { // do not trust DtSearchHighlight!
	int count        = f_search_res->stems(f_dbn)->count();

	ostringstream stemsbuf;
	for (int i = 0; i < count; i++) {
	    stemsbuf << (f_search_res->stems(f_dbn)->stems())[i] << '\n';
	}
	string stemsbstr = stemsbuf.str();
	char* stems = (char*)stemsbstr.c_str();

	parseout = StringParser::hilite(text, count, stems);

	assert( parseout != NULL );

	delete[] stems;
    }
    else {

	static DtSR_SearchEngine& search_engine = DtSR_SearchEngine::search_engine();
	if (DtSearchHighlight(
		search_engine.char_db_name(f_dbn),
		text, &kwics, &n_kwics, stype,
		(char*)f_search_res->stems(f_dbn)->stems(),
		f_search_res->stems(f_dbn)->count()) != DtSrOK) {

	    fprintf(stderr, "(ERROR) DtSearchHighlight failed\n");
#ifdef DEBUG
	    abort();
#endif
	}

#ifdef DEBUG
	fprintf(stderr, "(DEBUG) %ld hit found in %s\n", n_kwics, (char*)f_id);
#endif
    }

    UAS_Pointer<UAS_List<UAS_TextRun> >
				matches = new UAS_List<UAS_TextRun>;

    // convert kwics to textrun
    string textrbstr;
    if (parseout == NULL && kwics) {
	ostringstream textrunbuf;
	for (int i = 0; i < n_kwics; i++)
	    textrunbuf << kwics[i].offset << '\t' << kwics[i].length << '\n';
	textrbstr = textrunbuf.str();
	parseout = (char*)textrbstr.c_str();
    }
    else if (parseout == NULL)
    {
	return matches;
    }

#ifdef DEBUG
    fprintf(stderr, "(DEBUG) byte offset and length\n%s", parseout);
#endif

    istringstream textruns(parseout);

    char linebuf[128];
    while (textruns.get(linebuf, 128, '\n')) {
	char newline;
	textruns.get(newline);
	assert( newline == '\n');

	char* off_str = linebuf;
	char* len_str = strchr(linebuf, '\t');
	assert( len_str && *len_str == '\t' );
	*len_str++ = '\0';

	int mode = True;

	const char* cursor = (const char*)text;
	assert( *cursor == ShiftIn || *cursor == ShiftOut );

	int off = atoi(off_str);
	int vcc = 0;
	
	while (off > 0) {

	    int scanned = 0;
	    if (*cursor == '\n' || *cursor == '\t' || *cursor == ' '  ||
		*cursor == 0x0D || (unsigned char)*cursor == 0xA0) {
		scanned++;
	    }
	    else if (*cursor == ShiftIn || *cursor == ShiftOut) {
		if (*cursor == ShiftIn)
		    mode = True;
		else
		    mode = False;
		scanned++;
	    }
	    else {
		scanned = mblen(cursor, MB_CUR_MAX);
		vcc++;

		/* skip one byte in case of failure */
		if (scanned < 0)
		    scanned = 1;
	    }

	    off -= scanned;
	    cursor += scanned;
	}

	if (mode == False)
	    continue;

	assert( off == 0 );

	int len = atoi(len_str);
	// remove leading white-spaces
	for (; len && (*cursor == ' ' || *cursor == '\t' ||
		       *cursor == '\n'|| *cursor == 0x0D); cursor++, len--);

	// remove trailing white-spaces
	if (len > 0) {
	    for (const char*  p = cursor + len - 1;
		 *p==' ' || *p=='\t' || *p=='\n' || *p==0x0D; p--, len--);
	}

	if (len == 0)
	    continue;

	int vlen = 0;
	for (; len > 0; vlen++) {
	    int scanned = mblen(cursor, MB_CUR_MAX);
	    assert( scanned >= 0 );
	    len -= scanned;
	    cursor += scanned;
	}

	UAS_Pointer<UAS_TextRun> textrun = new UAS_TextRun(vcc, vlen);
	matches->insert_item(textrun);
    }

    return matches;
}