コード例 #1
0
QString WordnetStemmer::getStem(const QString &_word, bool _reverseDir)
{
    if (_reverseDir)
        return _word; // Wordnet does not support reverse dir stemming

    QString Stem;
    char Word[_word.toUtf8().size()];
    if (re_wninit())
        throw exExternStemmer("Unable to reinit wordnet");

    strcpy(Word, wmaPrintable(_word));

    for(int i=1; i<5; i++){
       char* Morphed = morphword(Word, i);
       if (Morphed){
           if (Stem.size() && Stem != Morphed){
               wmaDebug<<"What to do in stemmer: "<<wmaPrintable(Stem)<<" - "<<Morphed<<std::endl;
           }else
               Stem = Morphed;
       }
    }

    if (Stem.isEmpty()) {
        wmaDebug<<"WordnetStemmer: "<<wmaPrintable(_word)<<" -> "<<wmaPrintable(_word.toLower())<<std::endl;
        return _word.toLower();
    }else{
        wmaDebug<<"WordnetStemmer: "<<wmaPrintable(_word)<<" -> "<<wmaPrintable(Stem)<<std::endl;
        return Stem;
    }
}
コード例 #2
0
ファイル: morphyplugin.c プロジェクト: ecntrk/Gardener
void
morphMe(char *str) {

    char *temp;
    if(morphinit()<0) printf("\nError, cant init morphy");
    //wninit();
    printf("\n%s", morphword(str, NOUN));
    while (temp != NULL) {
        temp=morphstr(str, NOUN);
        printf("\n%s", temp);
    }
    re_morphinit();
}
コード例 #3
0
ファイル: morph.c プロジェクト: Arnukk/TDS
static char *morphprep(char *s)
{
    char *rest, *exc_word, *lastwd = NULL, *last;
    int i, offset, cnt;
    char word[WORDBUF], end[WORDBUF];
    static char retval[WORDBUF];

    /* Assume that the verb is the first word in the phrase.  Strip it
       off, check for validity, then try various morphs with the
       rest of the phrase tacked on, trying to find a match. */

    rest = strchr(s, '_');
    last = strrchr(s, '_');
    if (rest != last) {		/* more than 2 words */
	if (lastwd = morphword(last + 1, NOUN)) {
	    strncpy(end, rest, last - rest + 1);
	    end[last-rest+1] = '\0';
	    strcat(end, lastwd);
	}
    }
    
    strncpy(word, s, rest - s);
    word[rest - s] = '\0';
    for (i = 0, cnt = strlen(word); i < cnt; i++)
	if (!isalnum(word[i])) return(NULL);

    offset = offsets[VERB];
    cnt = cnts[VERB];

    /* First try to find the verb in the exception list */

    if ((exc_word = exc_lookup(word, VERB)) &&
	strcmp(exc_word, word)) {

	sprintf(retval, "%s%s", exc_word, rest);
	if(is_defined(retval, VERB))
	    return(retval);
	else if (lastwd) {
	    sprintf(retval, "%s%s", exc_word, end);
	    if(is_defined(retval, VERB))
		return(retval);
	}
    }
    
    for (i = 0; i < cnt; i++) {
	if ((exc_word = wordbase(word, (i + offset))) &&
	    strcmp(word, exc_word)) { /* ending is different */

	    sprintf(retval, "%s%s", exc_word, rest);
	    if(is_defined(retval, VERB))
		return(retval);
	    else if (lastwd) {
		sprintf(retval, "%s%s", exc_word, end);
		if(is_defined(retval, VERB))
		    return(retval);
	    }
	}
    }
    sprintf(retval, "%s%s", word, rest);
    if (strcmp(s, retval))
	return(retval);
    if (lastwd) {
	sprintf(retval, "%s%s", word, end);
	if (strcmp(s, retval))
	    return(retval);
    }
    return(NULL);
}
コード例 #4
0
ファイル: morph.c プロジェクト: Arnukk/TDS
char *morphstr(char *origstr, int pos)
{
    static char searchstr[WORDBUF], str[WORDBUF];
    static int svcnt, svprep;
    char word[WORDBUF], *tmp;
    int cnt, st_idx = 0, end_idx;
    int prep;
    char *end_idx1, *end_idx2;
    char *append;
    
    if (pos == SATELLITE)
	pos = ADJ;

    /* First time through for this string */

    if (origstr != NULL) {
	/* Assume string hasn't had spaces substitued with '_' */
	strtolower(strsubst(strcpy(str, origstr), ' ', '_'));
	searchstr[0] = '\0';
	cnt = cntwords(str, '_');
	svprep = 0;

	/* first try exception list */

	if ((tmp = exc_lookup(str, pos)) && strcmp(tmp, str)) {
	    svcnt = 1;		/* force next time to pass NULL */
	    return(tmp);
	}

	/* Then try simply morph on original string */

	if (pos != VERB && (tmp = morphword(str, pos)) && strcmp(tmp, str))
	    return(tmp);

	if (pos == VERB && cnt > 1 && (prep = hasprep(str, cnt))) {
	    /* assume we have a verb followed by a preposition */
	    svprep = prep;
	    return(morphprep(str));
	} else {
	    svcnt = cnt = cntwords(str, '-');
	    while (origstr && --cnt) {
		end_idx1 = strchr(str + st_idx, '_');
		end_idx2 = strchr(str + st_idx, '-');
		if (end_idx1 && end_idx2) {
		    if (end_idx1 < end_idx2) {
			end_idx = (int)(end_idx1 - str);
			append = "_";
		    } else {
			end_idx = (int)(end_idx2 - str);
			append = "-";
		    }
		} else {
		    if (end_idx1) {
			end_idx = (int)(end_idx1 - str);
			append = "_";
		    } else {
			end_idx = (int)(end_idx2 - str);
			append = "-";
		    }
		}	
		if (end_idx < 0) return(NULL);		/* shouldn't do this */
		strncpy(word, str + st_idx, end_idx - st_idx);
		word[end_idx - st_idx] = '\0';
		if(tmp = morphword(word, pos))
		    strcat(searchstr,tmp);
		else
		    strcat(searchstr,word);
		strcat(searchstr, append);
		st_idx = end_idx + 1;
	    }
	    
	    if(tmp = morphword(strcpy(word, str + st_idx), pos)) 
		strcat(searchstr,tmp);
	    else
		strcat(searchstr,word);
	    if(strcmp(searchstr, str) && is_defined(searchstr,pos))
		return(searchstr);
	    else
		return(NULL);
	}
    } else {			/* subsequent call on string */
	if (svprep) {		/* if verb has preposition, no more morphs */
	    svprep = 0;
	    return(NULL);
	} else if (svcnt == 1)
	    return(exc_lookup(NULL, pos));
	else {
	    svcnt = 1;
	    if ((tmp = exc_lookup(str, pos)) && strcmp(tmp, str))
		return(tmp);
	    else
		return(NULL);
	}
    }
}