예제 #1
0
파일: main.c 프로젝트: antonmazun/gogog
int main()
{   int i,j;
    char str[20] = "do1.txt";
    text_t * text = text_new(str);
   // if (text != NULL)
        //text_print(text);
    sentense_t * tmp = NULL;
    int flag = 0;
    for ( i = 0; i < text_sentence_count(text); i++)
    {
        tmp = text_get_sentence(text, i);
        for ( j = 0; j < sentence_word_count(tmp); j++)
        {
            flag = 0;
            while (strcmp(word_get_str(sentence_get_word(tmp, j)), "not") == 0)
            {
                j++;
                if (j == sentence_word_count(tmp))
                    break;
                    flag = 1;
            }
            if (flag == 1)
                flag = 0;
            else
            {
                sentence_del_word(tmp, j);
                j--;
            }
        }
    }

    FILE * file_out = NULL;
    file_out = fopen("posle.txt", "w");
    if (file_out == NULL)
        return 1;

    for ( i = 0; i < text_sentence_count(text); i++)
    {
        tmp = text_get_sentence(text, i);
        for ( j = 0; j < sentence_word_count(tmp); j++)
        {
            if (j < sentence_word_count(tmp) - 1)
                fprintf(file_out, "%s, ", word_get_str(sentence_get_word(tmp, j)));
            else
                fprintf(file_out, "%s\n", word_get_str(sentence_get_word(tmp, j)));
        }
    }
    text_print(text);
    text_free(text);
    fclose(file_out);

    return 0;
}
예제 #2
0
/*
 *  call-seq:
 *     sentence.word( idx )   -> str
 *
 *  Returns the spelling of the n-th word in the sentence as it appears after 
 *  tokenization.
 */
static VALUE
rlink_sentence_word( VALUE self, VALUE n ) {
	struct rlink_sentence *ptr = get_sentence( self );
	const char *word;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	word = sentence_get_word( (Sentence)ptr->sentence, FIX2INT(n) );
	return rb_str_new2( word );
}
예제 #3
0
파일: jni-client.c 프로젝트: dyne/AutOrg
/*
 * Class:      LinkGrammar
 * Method:     getWord
 * Signature: (I)Ljava/lang/String;
 */
JNIEXPORT jstring JNICALL
Java_org_linkgrammar_LinkGrammar_getWord(JNIEnv *env, jclass cls, jint i)
{
	per_thread_data *ptd = get_ptd(env, cls);

	/* Does not need to be freed, points into sentence */
	const char * w = sentence_get_word(ptd->sent, i);

	/* FWIW, j will be null if w is utf8-encoded Japanese or Chinese.
	 * I guess my JVM is not capable of handling Chinese/Japanese ??
	 * Maybe some special java thing needs to be installed?
	 */
	jstring j = (*env)->NewStringUTF(env, w);
	return j;
}
std::map<int, EdgeDescription> LinkParserAdapterImpl::parseSentence(std::string sentenceStr) {
    Parse_Options parseOptions = parse_options_create();
    int verbosity = psi_logger.getLoggingPriority() / 100 - 4;
    parse_options_set_verbosity(parseOptions, verbosity);
    starts_.clear();
    ends_.clear();
    edgeDescriptions_.clear();
    freeSentence();
    sentence_ = sentence_create(sentenceStr.c_str(), dictionary_);
    if (!sentence_) {
        std::stringstream errorSs;
        errorSs << "Link-parser failed to tokenize the input text.";
        throw ParserException(errorSs.str());
    }
    boost::algorithm::to_lower(sentenceStr);
    if (sentence_parse(sentence_, parseOptions)) {

        size_t currentPos = 0;
        size_t foundPos = 0;
        int wordNo = 0;
        while (wordNo < sentence_length(sentence_)) {
            std::string word(sentence_get_word(sentence_, wordNo));
            boost::algorithm::to_lower(word);
            foundPos = sentenceStr.find(word, currentPos);
            if (foundPos != std::string::npos) {
                starts_[wordNo] = foundPos;
                ends_[wordNo] = currentPos = foundPos + word.length();
            }
            ++wordNo;
        }

        Linkage linkage = linkage_create(0, sentence_, parseOptions);
        CNode * ctree = linkage_constituent_tree(linkage);
        extractEdgeDescriptions(ctree, linkage);
        linkage_free_constituent_tree(ctree);
        linkage_delete(linkage);

    } else {
        std::stringstream errorSs;
        errorSs << "Link-parser failed to parse the input text.\n"
            << "Your input text is probably not a correct sentence.";
        WARN(errorSs.str());
    }
    return edgeDescriptions_;
}
예제 #5
0
/*
 *  call-seq:
 *     sentence.words   -> array
 *
 *  Returns the words of the sentence as they appear after tokenization.
 *
 *     sentence = LinkParser::Dictionary.new.parse( "The dogs barks." )
 *     sentence.words  #-> 
 */
static VALUE
rlink_sentence_words( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	const char *word;
	int i, length;
	VALUE words = rb_ary_new();

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	length = sentence_length( (Sentence)ptr->sentence );
	for ( i = 0; i < length; i++ ) {
		word = sentence_get_word( (Sentence)ptr->sentence, i );
		debugMsg(( "Word %d: <%s>", i, word ));
		rb_ary_push( words, rb_str_new2(word) );
	}

	return words;
}