예제 #1
0
파일: odeum_index.c 프로젝트: zedshaw/fcst
/** Converts an array of strings to a CBLIST. */
CBLIST *array_2_CBLIST(VALUE ary) 
{
    long i = 0;
    CBLIST *result = cblistopen();
    VALUE str;
    
    for(i = 0; (str = rb_ary_entry(ary, i)) != Qnil; i++) {
        cblistpush(result, RSTRING(str)->ptr, RSTRING(str)->len);
    }
    
    return result;
}
예제 #2
0
파일: odeum_index.c 프로젝트: zedshaw/fcst
/**
 * call-seq:
 *   document.add_content(index, content) -> document
 *
 * Takes the contents, breaks the words up, and then puts them in the document
 * in normalized form.  This is the common pattern that people use a Document
 * with.  You may also use Document.addword to add one word a time, and
 * Document.add_word_list to add a list of words.
 *
 * It uses the default odanalyzetext method to break up the text,
 * which means you can use the Index::setcharclass method to configure
 * what is a DELIM, GLUE, and SPACE character.  The default is the same
 * as Odeum::breaktext.
 *
 * If the process of normalizing a word creates an empty word, then it
 * is not added to the document's words.  This usually happens for
 * punctation that isn't usualy searched for anyway.
 *
 * The Index used with this document is now required since that object holds
 * the information about how text is broken via the Index::setcharclass method.
 */
VALUE Document_add_content(VALUE self, VALUE index, VALUE content) {
    CBLIST *asis_words = NULL;
    CBLIST *norm_words = NULL;
    const char *asis = NULL;
    const char *norm = NULL;
    int asis_len = 0;
    int norm_len = 0;
    int i = 0;
    int count = 0;
    ODDOC *oddoc = NULL;
    ODEUM *odeum = NULL;
    
    DATA_GET(self,ODDOC, oddoc);
    DATA_GET(index,ODEUM, odeum);
    
    
    REQUIRE_TYPE(content, T_STRING);
    
    asis_words = cblistopen();
    norm_words = cblistopen();
    
    odanalyzetext(odeum, RSTRING(content)->ptr, asis_words, norm_words);
    
    // go through words and add them
    count = cblistnum(asis_words);
    
    for(i = 0; i < count;  i++) {
        asis = cblistval(asis_words, i, &asis_len);
        norm = cblistval(norm_words, i, &norm_len);
        
        // only add words that normalize to some content
        oddocaddword(oddoc, norm, asis);
    }
    
    cblistclose(asis_words);
    cblistclose(norm_words);
    
    return self;
}
예제 #3
0
파일: odeum_index.c 프로젝트: zedshaw/fcst
/**
 * call-seq:
 *    index.query(query) -> [[id,score], ... ]
 *
 * An implementation of a basic query language for Odeum.  The query language
 * allows boolean expressions of search terms and '&', '|', '!' with parenthesis
 * as sub-expressions.  The '!' operator implements NOTAND so that you can say, 
 * "this AND NOT that" using "this ! that".  Consecutive words are assumed to 
 * have an implicit '&' between them.
 *
 * An example expression is:  "Zed & shaw ! (frank blank)".  The (frank blank) 
 * part actually is interpreted as (frank & blank).
 *
 * It returns the same ResultSet as Index.search does.
 */
VALUE Index_query(VALUE self, VALUE word) {
    CBLIST *errors = NULL;
    int num_returned = 0;
    ODEUM *odeum = NULL;
    DATA_GET(self, ODEUM, odeum);

    REQUIRE_TYPE(word, T_STRING);
    
    errors = cblistopen();
    
    ODPAIR *pairs = odquery(odeum, RSTRING(word)->ptr, &num_returned, errors);
    if(pairs == NULL) {
        // nothing found
        rb_raise(rb_eStandardError, "Query failure.");
    } 
    
    return ResultSet_create(pairs, num_returned, errors);
}
예제 #4
0
CBLIST *objtocblist(JNIEnv *env, jobject obj){
  jclass list, it;
  jmethodID midit, midhn, midn;
  jobject itobj, eobj;
  jboolean icelem;
  CBLIST *tlist;
  const char *telem;
  assert(obj);
  tlist = cblistopen();
  list = (*env)->GetObjectClass(env, obj);
  midit = (*env)->GetMethodID(env, list, "iterator", "()L" CLSITERATOR ";");
  itobj = (*env)->CallObjectMethod(env, obj, midit);
  it = (*env)->GetObjectClass(env, itobj);
  midhn = (*env)->GetMethodID(env, it, "hasNext", "()Z");
  midn = (*env)->GetMethodID(env, it, "next", "()L" CLSOBJECT ";");
  while((*env)->CallBooleanMethod(env, itobj, midhn)){
    eobj = (*env)->CallObjectMethod(env, itobj, midn);
    if(!isinstanceof(env, eobj, CLSSTRING)) continue;
    if(!(telem = (*env)->GetStringUTFChars(env, eobj, &icelem))) continue;
    cblistpush(tlist, telem, -1);
    if(icelem == JNI_TRUE) (*env)->ReleaseStringUTFChars(env, eobj, telem);
  }
  return tlist;
}