static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio, const VALUE rb_topics, const VALUE rb_sections) { #ifdef HAVE_RUBY_ENCODING_H int enc = rb_enc_find_index("UTF-8"); #endif long int length = RSTRING_LEN(rb_str); char *text = StringValuePtr(rb_str); char *dictionary_file = StringValuePtr(rb_dict_file); int ratio = NUM2INT(rb_ratio); size_t result_len; OtsArticle *doc = ots_new_article(); VALUE summary; VALUE topics; VALUE result; VALUE sections; if (!ots_load_xml_dictionary(doc, dictionary_file)) { ots_free_article(doc); rb_raise(rb_eRuntimeError, "Cannot load dictionary file"); return Qnil; } ots_parse_stream(text, length, doc); ots_grade_doc(doc); ots_highlight_doc(doc, ratio); summary = rb_str_new2(ots_get_doc_text(doc, &result_len)); topics = rb_str_new2((const char *)doc->title); sections = rb_ary_new(); GList *li; for (li = (GList *) ots_get_doc_sections(doc); li != NULL; li = li->next) { VALUE section = rb_str_new2(li->data); rb_ary_push(sections, section); } #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(summary, enc); rb_enc_associate_index(summary, enc); #endif ots_free_article(doc); if (rb_topics == Qtrue) { result = rb_ary_new(); rb_ary_push(result, summary); rb_ary_push(result, topics); return result; } else if (rb_sections == Qtrue) { return sections; } else { return summary; } }
int main(int argc, char **argv) { char *dictionary_file = "en"; /* if not told otherwise, assume we're using english */ const char *input_file = NULL; char *output_file = NULL; FILE *input_stream = stdin; /*by default read from stdin */ FILE *output_stream = stdout; /*by default read from stdout */ OtsArticle *Art; int sumPercent = 20; /* if not told otherwise highlight 20% of the document */ int c,n_args=0; int html = FALSE; int keywords = FALSE; int about = FALSE; int version = FALSE; const char *const *args=NULL; GOptionContext *context = NULL; GError *error = NULL; const GOptionEntry options[] = { {"ratio" , 'r', 0, G_OPTION_ARG_INT , &sumPercent, "summarization % [default = 20%]", "<int>"}, {"dic" , 'd', 0, G_OPTION_ARG_STRING, &dictionary_file, "dictionary to use", "<string>"}, {"out" , 'o', 0, G_OPTION_ARG_STRING, &output_file, "output file [default = stdout]", "<string>"}, {"html" , 'h', 0, G_OPTION_ARG_NONE , &html, "output as html", NULL}, {"keywords", 'k', 0, G_OPTION_ARG_NONE , &keywords, "only output keywords", NULL}, {"about" , 'a', 0, G_OPTION_ARG_NONE , &about, "only output the summary", NULL}, {"version" , 'v', 0, G_OPTION_ARG_NONE , &version, "show version information", NULL}, { G_OPTION_REMAINING, '\0', 0, G_OPTION_ARG_FILENAME_ARRAY, &args, NULL, "[file.txt | stdin]" }, {NULL} }; context = g_option_context_new(" - Open Text Summarizer"); g_option_context_add_main_entries(context, options, NULL); /* Parse command line */ if (!g_option_context_parse (context, &argc, &argv, &error)) { g_option_context_free (context); g_print ("%s\n", error->message); g_error_free (error); exit (1); } /* print version number */ if (version) { printf("%s\n", PACKAGE_STRING); g_option_context_free(context); exit (0); } if(args==NULL) { printf("\nInvalid number of arguments. Use --help to see options\n"); exit(1); } if (args) while (args[n_args] != NULL) n_args++; if (n_args > 1) { g_option_context_free(context); return 1; } if (n_args == 1 && g_file_test (args[0], G_FILE_TEST_EXISTS) == TRUE) input_file = args[0]; if (input_file) { input_stream = fopen (input_file, "r"); if (!input_stream) { g_option_context_free(context); perror ("Couldn't load input file"); return 1; } } if (output_file) { output_stream = fopen (output_file, "w"); if (!output_stream) { if (input_file) fclose (input_stream); g_option_context_free(context); perror ("Couldn't load output file"); return 1; } } Art = ots_new_article (); if (!ots_load_xml_dictionary (Art, dictionary_file)) { ots_free_article (Art); if (input_file) fclose(input_stream); if (output_file) fclose(output_stream); g_option_context_free(context); perror ("Couldn't load dictionary"); return 1; } ots_parse_file (input_stream, Art); /* read article from stdin , put it in struct Article */ ots_grade_doc (Art); /* grade each sentence (how relevent is it to the text) */ /* int i; for (i=0;i<1000000;i++) { printf("\n word = %s ", ots_word_in_list(Art->ImpWords,i)); } */ ots_highlight_doc (Art, sumPercent); /* highlight what we are going to print 0% - 100% of the words */ if (html) ots_print_HTML(output_stream, Art); /* print article in html form */ else if (keywords) print_keywords(Art, input_file); else if (about) print_about(output_stream, Art); else ots_print_doc(output_stream, Art); /* print article in text */ ots_free_article (Art); if (input_file) fclose (input_stream); if (output_file) fclose (output_stream); return 0; }