/** Converts an array of strings to a CBLIST. */ CBLIST *array_2_CBLIST(VALUE ary) { long i = 0; CBLIST *result = cblistopen(); VALUE str; for(i = 0; (str = rb_ary_entry(ary, i)) != Qnil; i++) { cblistpush(result, RSTRING(str)->ptr, RSTRING(str)->len); } return result; }
/** * call-seq: * document.add_content(index, content) -> document * * Takes the contents, breaks the words up, and then puts them in the document * in normalized form. This is the common pattern that people use a Document * with. You may also use Document.addword to add one word a time, and * Document.add_word_list to add a list of words. * * It uses the default odanalyzetext method to break up the text, * which means you can use the Index::setcharclass method to configure * what is a DELIM, GLUE, and SPACE character. The default is the same * as Odeum::breaktext. * * If the process of normalizing a word creates an empty word, then it * is not added to the document's words. This usually happens for * punctation that isn't usualy searched for anyway. * * The Index used with this document is now required since that object holds * the information about how text is broken via the Index::setcharclass method. */ VALUE Document_add_content(VALUE self, VALUE index, VALUE content) { CBLIST *asis_words = NULL; CBLIST *norm_words = NULL; const char *asis = NULL; const char *norm = NULL; int asis_len = 0; int norm_len = 0; int i = 0; int count = 0; ODDOC *oddoc = NULL; ODEUM *odeum = NULL; DATA_GET(self,ODDOC, oddoc); DATA_GET(index,ODEUM, odeum); REQUIRE_TYPE(content, T_STRING); asis_words = cblistopen(); norm_words = cblistopen(); odanalyzetext(odeum, RSTRING(content)->ptr, asis_words, norm_words); // go through words and add them count = cblistnum(asis_words); for(i = 0; i < count; i++) { asis = cblistval(asis_words, i, &asis_len); norm = cblistval(norm_words, i, &norm_len); // only add words that normalize to some content oddocaddword(oddoc, norm, asis); } cblistclose(asis_words); cblistclose(norm_words); return self; }
/** * call-seq: * index.query(query) -> [[id,score], ... ] * * An implementation of a basic query language for Odeum. The query language * allows boolean expressions of search terms and '&', '|', '!' with parenthesis * as sub-expressions. The '!' operator implements NOTAND so that you can say, * "this AND NOT that" using "this ! that". Consecutive words are assumed to * have an implicit '&' between them. * * An example expression is: "Zed & shaw ! (frank blank)". The (frank blank) * part actually is interpreted as (frank & blank). * * It returns the same ResultSet as Index.search does. */ VALUE Index_query(VALUE self, VALUE word) { CBLIST *errors = NULL; int num_returned = 0; ODEUM *odeum = NULL; DATA_GET(self, ODEUM, odeum); REQUIRE_TYPE(word, T_STRING); errors = cblistopen(); ODPAIR *pairs = odquery(odeum, RSTRING(word)->ptr, &num_returned, errors); if(pairs == NULL) { // nothing found rb_raise(rb_eStandardError, "Query failure."); } return ResultSet_create(pairs, num_returned, errors); }
CBLIST *objtocblist(JNIEnv *env, jobject obj){ jclass list, it; jmethodID midit, midhn, midn; jobject itobj, eobj; jboolean icelem; CBLIST *tlist; const char *telem; assert(obj); tlist = cblistopen(); list = (*env)->GetObjectClass(env, obj); midit = (*env)->GetMethodID(env, list, "iterator", "()L" CLSITERATOR ";"); itobj = (*env)->CallObjectMethod(env, obj, midit); it = (*env)->GetObjectClass(env, itobj); midhn = (*env)->GetMethodID(env, it, "hasNext", "()Z"); midn = (*env)->GetMethodID(env, it, "next", "()L" CLSOBJECT ";"); while((*env)->CallBooleanMethod(env, itobj, midhn)){ eobj = (*env)->CallObjectMethod(env, itobj, midn); if(!isinstanceof(env, eobj, CLSSTRING)) continue; if(!(telem = (*env)->GetStringUTFChars(env, eobj, &icelem))) continue; cblistpush(tlist, telem, -1); if(icelem == JNI_TRUE) (*env)->ReleaseStringUTFChars(env, eobj, telem); } return tlist; }