示例#1
0
lucy_RegexTokenizer*
lucy_RegexTokenizer_init(lucy_RegexTokenizer *self,
                         cfish_String *pattern) {
    lucy_Analyzer_init((lucy_Analyzer*)self);
    lucy_RegexTokenizerIVARS *const ivars = lucy_RegexTokenizer_IVARS(self);
    #define DEFAULT_PATTERN "\\w+(?:['\\x{2019}]\\w+)*"
    if (pattern) {
        if (CFISH_Str_Contains_Utf8(pattern, "\\p", 2)
            || CFISH_Str_Contains_Utf8(pattern, "\\P", 2)
           ) {
            CFISH_DECREF(self);
            THROW(CFISH_ERR, "\\p and \\P constructs forbidden");
        }
        ivars->pattern = CFISH_Str_Clone(pattern);
    }
    else {
        ivars->pattern = cfish_Str_new_from_trusted_utf8(
                            DEFAULT_PATTERN, sizeof(DEFAULT_PATTERN) - 1);
    }

    // Acquire a compiled regex engine for matching one token.
    dTHX;
    SV *token_re = S_compile_token_re(aTHX_ ivars->pattern);
#if (PERL_VERSION > 10)
    REGEXP *rx = SvRX((SV*)token_re);
#else
    if (!SvROK(token_re)) {
        THROW(CFISH_ERR, "token_re is not a qr// entity");
    }
    SV *inner = SvRV(token_re);
    MAGIC *magic = NULL;
    if (SvMAGICAL((SV*)inner)) {
        magic = mg_find((SV*)inner, PERL_MAGIC_qr);
    }
    if (!magic) {
        THROW(CFISH_ERR, "token_re is not a qr// entity");
    }
    REGEXP *rx = (REGEXP*)magic->mg_obj;
#endif
    if (rx == NULL) {
        THROW(CFISH_ERR, "Failed to extract REGEXP from token_re '%s'",
              SvPV_nolen((SV*)token_re));
    }
    ivars->token_re = rx;
    (void)ReREFCNT_inc(((REGEXP*)ivars->token_re));
    SvREFCNT_dec(token_re);

    return self;
}
示例#2
0
cfish_Vector*
LUCY_Doc_Field_Names_IMP(lucy_Doc *self) {
    dTHX;
    lucy_DocIVARS *const ivars = lucy_Doc_IVARS(self);

    HV           *fields     = (HV*)ivars->fields;
    I32           num_fields = hv_iterinit(fields);
    cfish_Vector *retval     = cfish_Vec_new(num_fields);

    while (num_fields--) {
        HE *entry = hv_iternext(fields);
        STRLEN key_size;
        const char *key = XSBind_hash_key_to_utf8(aTHX_ entry, &key_size);
        cfish_String *key_str = cfish_Str_new_from_trusted_utf8(key, key_size);
        CFISH_Vec_Push(retval, (cfish_Obj*)key_str);
    }

    return retval;
}
示例#3
0
Obj*
cfish_inc_refcount(void *vself) {
    Obj *self = (Obj*)vself;

    // Handle special cases.
    cfish_Class *const klass = self->klass;
    if (klass->flags & CFISH_fREFCOUNTSPECIAL) {
        if (SI_is_string_type(klass)) {
            // Only copy-on-incref Strings get special-cased.  Ordinary
            // strings fall through to the general case.
            if (CFISH_Str_Is_Copy_On_IncRef((cfish_String*)self)) {
                const char *utf8 = CFISH_Str_Get_Ptr8((cfish_String*)self);
                size_t size = CFISH_Str_Get_Size((cfish_String*)self);
                return (cfish_Obj*)cfish_Str_new_from_trusted_utf8(utf8, size);
            }
        }
        else if (SI_immortal(klass)) {
            return self;
        }
    }

    self->refcount++;
    return self;
}