int main() { int i,j; char str[20] = "do1.txt"; text_t * text = text_new(str); // if (text != NULL) //text_print(text); sentense_t * tmp = NULL; int flag = 0; for ( i = 0; i < text_sentence_count(text); i++) { tmp = text_get_sentence(text, i); for ( j = 0; j < sentence_word_count(tmp); j++) { flag = 0; while (strcmp(word_get_str(sentence_get_word(tmp, j)), "not") == 0) { j++; if (j == sentence_word_count(tmp)) break; flag = 1; } if (flag == 1) flag = 0; else { sentence_del_word(tmp, j); j--; } } } FILE * file_out = NULL; file_out = fopen("posle.txt", "w"); if (file_out == NULL) return 1; for ( i = 0; i < text_sentence_count(text); i++) { tmp = text_get_sentence(text, i); for ( j = 0; j < sentence_word_count(tmp); j++) { if (j < sentence_word_count(tmp) - 1) fprintf(file_out, "%s, ", word_get_str(sentence_get_word(tmp, j))); else fprintf(file_out, "%s\n", word_get_str(sentence_get_word(tmp, j))); } } text_print(text); text_free(text); fclose(file_out); return 0; }
/* * call-seq: * sentence.word( idx ) -> str * * Returns the spelling of the n-th word in the sentence as it appears after * tokenization. */ static VALUE rlink_sentence_word( VALUE self, VALUE n ) { struct rlink_sentence *ptr = get_sentence( self ); const char *word; if ( !RTEST(ptr->parsed_p) ) rlink_sentence_parse( 0, 0, self ); word = sentence_get_word( (Sentence)ptr->sentence, FIX2INT(n) ); return rb_str_new2( word ); }
/* * Class: LinkGrammar * Method: getWord * Signature: (I)Ljava/lang/String; */ JNIEXPORT jstring JNICALL Java_org_linkgrammar_LinkGrammar_getWord(JNIEnv *env, jclass cls, jint i) { per_thread_data *ptd = get_ptd(env, cls); /* Does not need to be freed, points into sentence */ const char * w = sentence_get_word(ptd->sent, i); /* FWIW, j will be null if w is utf8-encoded Japanese or Chinese. * I guess my JVM is not capable of handling Chinese/Japanese ?? * Maybe some special java thing needs to be installed? */ jstring j = (*env)->NewStringUTF(env, w); return j; }
std::map<int, EdgeDescription> LinkParserAdapterImpl::parseSentence(std::string sentenceStr) { Parse_Options parseOptions = parse_options_create(); int verbosity = psi_logger.getLoggingPriority() / 100 - 4; parse_options_set_verbosity(parseOptions, verbosity); starts_.clear(); ends_.clear(); edgeDescriptions_.clear(); freeSentence(); sentence_ = sentence_create(sentenceStr.c_str(), dictionary_); if (!sentence_) { std::stringstream errorSs; errorSs << "Link-parser failed to tokenize the input text."; throw ParserException(errorSs.str()); } boost::algorithm::to_lower(sentenceStr); if (sentence_parse(sentence_, parseOptions)) { size_t currentPos = 0; size_t foundPos = 0; int wordNo = 0; while (wordNo < sentence_length(sentence_)) { std::string word(sentence_get_word(sentence_, wordNo)); boost::algorithm::to_lower(word); foundPos = sentenceStr.find(word, currentPos); if (foundPos != std::string::npos) { starts_[wordNo] = foundPos; ends_[wordNo] = currentPos = foundPos + word.length(); } ++wordNo; } Linkage linkage = linkage_create(0, sentence_, parseOptions); CNode * ctree = linkage_constituent_tree(linkage); extractEdgeDescriptions(ctree, linkage); linkage_free_constituent_tree(ctree); linkage_delete(linkage); } else { std::stringstream errorSs; errorSs << "Link-parser failed to parse the input text.\n" << "Your input text is probably not a correct sentence."; WARN(errorSs.str()); } return edgeDescriptions_; }
/* * call-seq: * sentence.words -> array * * Returns the words of the sentence as they appear after tokenization. * * sentence = LinkParser::Dictionary.new.parse( "The dogs barks." ) * sentence.words #-> */ static VALUE rlink_sentence_words( VALUE self ) { struct rlink_sentence *ptr = get_sentence( self ); const char *word; int i, length; VALUE words = rb_ary_new(); if ( !RTEST(ptr->parsed_p) ) rlink_sentence_parse( 0, 0, self ); length = sentence_length( (Sentence)ptr->sentence ); for ( i = 0; i < length; i++ ) { word = sentence_get_word( (Sentence)ptr->sentence, i ); debugMsg(( "Word %d: <%s>", i, word )); rb_ary_push( words, rb_str_new2(word) ); } return words; }