int sqlite3Fts3InitTokenizer( Fts3Hash *pHash, /* Tokenizer hash table */ const char *zArg, /* Tokenizer name */ sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ char **pzErr /* OUT: Set to malloced error message */ ){ int rc; char *z = (char *)zArg; int n = 0; char *zCopy; char *zEnd; /* Pointer to nul-term of zCopy */ sqlite3_tokenizer_module *m; zCopy = sqlite3_mprintf("%s", zArg); if( !zCopy ) return SQLITE_NOMEM; zEnd = &zCopy[strlen(zCopy)]; z = (char *)sqlite3Fts3NextToken(zCopy, &n); if( z==0 ){ assert( n==0 ); z = zCopy; } z[n] = '\0'; sqlite3Fts3Dequote(z); m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); if( !m ){ sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z); rc = SQLITE_ERROR; }else{ char const **aArg = 0; int iArg = 0; z = &z[n+1]; while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){ int nNew = sizeof(char *)*(iArg+1); char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew); if( !aNew ){ sqlite3_free(zCopy); sqlite3_free((void *)aArg); return SQLITE_NOMEM; } aArg = aNew; aArg[iArg++] = z; z[n] = '\0'; sqlite3Fts3Dequote(z); z = &z[n+1]; } rc = m->xCreate(iArg, aArg, ppTok); assert( rc!=SQLITE_OK || *ppTok ); if( rc!=SQLITE_OK ){ sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer"); }else{ (*ppTok)->pModule = m; } sqlite3_free((void *)aArg); } sqlite3_free(zCopy); return rc; }
int sqlite3Fts3InitTokenizer( Fts3Hash *pHash, const char *zArg, sqlite3_tokenizer **ppTok, char **pzErr ){ int rc; char *z = (char *)zArg; int n; char *zCopy; char *zEnd; sqlite3_tokenizer_module *m; zCopy = sqlite3_mprintf("%s", zArg); if( !zCopy ) return SQLITE_NOMEM; zEnd = &zCopy[strlen(zCopy)]; z = (char *)sqlite3Fts3NextToken(zCopy, &n); z[n] = '\0'; sqlite3Fts3Dequote(z); m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); if( !m ){ *pzErr = sqlite3_mprintf("unknown tokenizer: %s", z); rc = SQLITE_ERROR; }else{ char const **aArg = 0; int iArg = 0; z = &z[n+1]; while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){ int nNew = sizeof(char *)*(iArg+1); char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew); if( !aNew ){ sqlite3_free(zCopy); sqlite3_free((void *)aArg); return SQLITE_NOMEM; } aArg = aNew; aArg[iArg++] = z; z[n] = '\0'; sqlite3Fts3Dequote(z); z = &z[n+1]; } rc = m->xCreate(iArg, aArg, ppTok); assert( rc!=SQLITE_OK || *ppTok ); if( rc!=SQLITE_OK ){ *pzErr = sqlite3_mprintf("unknown tokenizer"); }else{ (*ppTok)->pModule = m; } sqlite3_free((void *)aArg); } sqlite3_free(zCopy); return rc; }
/* ** Implementation of the SQL scalar function for accessing the underlying ** hash table. This function may be called as follows: ** ** SELECT <function-name>(<key-name>); ** SELECT <function-name>(<key-name>, <pointer>); ** ** where <function-name> is the name passed as the second argument ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). ** ** If the <pointer> argument is specified, it must be a blob value ** containing a pointer to be stored as the hash data corresponding ** to the string <key-name>. If <pointer> is not specified, then ** the string <key-name> must already exist in the has table. Otherwise, ** an error is returned. ** ** Whether or not the <pointer> argument is specified, the value returned ** is a blob containing the pointer stored as the hash data corresponding ** to string <key-name> (after the hash-table is updated, if applicable). */ static void scalarFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ Fts3Hash *pHash; void *pPtr = 0; const unsigned char *zName; int nName; assert( argc==1 || argc==2 ); pHash = (Fts3Hash *)sqlite3_user_data(context); zName = sqlite3_value_text(argv[0]); nName = sqlite3_value_bytes(argv[0])+1; if( argc==2 ){ #ifdef SQLITE_ENABLE_FTS3_TOKENIZER void *pOld; int n = sqlite3_value_bytes(argv[1]); if( zName==0 || n!=sizeof(pPtr) ){ sqlite3_result_error(context, "argument type mismatch", -1); return; } pPtr = *(void **)sqlite3_value_blob(argv[1]); pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); if( pOld==pPtr ){ sqlite3_result_error(context, "out of memory", -1); return; } #else sqlite3_result_error(context, "fts3tokenize: " "disabled - rebuild with -DSQLITE_ENABLE_FTS3_TOKENIZER", -1 ); return; #endif /* SQLITE_ENABLE_FTS3_TOKENIZER */ }else { if( zName ){ pPtr = sqlite3Fts3HashFind(pHash, zName, nName); } if( !pPtr ){ char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); sqlite3_result_error(context, zErr, -1); sqlite3_free(zErr); return; } } sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); }
SQLITE_EXTENSION_INIT1 #endif #include "fts3Int.h" #include <assert.h> #include <string.h> static void scalarFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ Fts3Hash *pHash; void *pPtr = 0; const unsigned char *zName; int nName; assert( argc==1 || argc==2 ); pHash = (Fts3Hash *)sqlite3_user_data(context); zName = sqlite3_value_text(argv[0]); nName = sqlite3_value_bytes(argv[0])+1; if( argc==2 ){ void *pOld; int n = sqlite3_value_bytes(argv[1]); if( n!=sizeof(pPtr) ){ sqlite3_result_error(context, "argument type mismatch", -1); return; } pPtr = *(void **)sqlite3_value_blob(argv[1]); pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); if( pOld==pPtr ){ sqlite3_result_error(context, "out of memory", -1); return; } }else{ pPtr = sqlite3Fts3HashFind(pHash, zName, nName); if( !pPtr ){ char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); sqlite3_result_error(context, zErr, -1); sqlite3_free(zErr); return; } } sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); }
/* ** Query FTS for the tokenizer implementation named zName. */ static int fts3tokQueryTokenizer( Fts3Hash *pHash, const char *zName, const sqlite3_tokenizer_module **pp, char **pzErr ){ sqlite3_tokenizer_module *p; int nName = (int)strlen(zName); p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); if( !p ){ *pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName); return SQLITE_ERROR; } *pp = p; return SQLITE_OK; }
/* ** Implementation of a special SQL scalar function for testing tokenizers ** designed to be used in concert with the Tcl testing framework. This ** function must be called with two or more arguments: ** ** SELECT <function-name>(<key-name>, ..., <input-string>); ** ** where <function-name> is the name passed as the second argument ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') ** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). ** ** The return value is a string that may be interpreted as a Tcl ** list. For each token in the <input-string>, three elements are ** added to the returned list. The first is the token position, the ** second is the token text (folded, stemmed, etc.) and the third is the ** substring of <input-string> associated with the token. For example, ** using the built-in "simple" tokenizer: ** ** SELECT fts_tokenizer_test('simple', 'I don't see how'); ** ** will return the string: ** ** "{0 i I 1 dont don't 2 see see 3 how how}" ** */ static void testFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ Fts3Hash *pHash; sqlite3_tokenizer_module *p; sqlite3_tokenizer *pTokenizer = 0; sqlite3_tokenizer_cursor *pCsr = 0; const char *zErr = 0; const char *zName; int nName; const char *zInput; int nInput; const char *azArg[64]; const char *zToken; int nToken = 0; int iStart = 0; int iEnd = 0; int iPos = 0; int i; Tcl_Obj *pRet; if( argc<2 ){ sqlite3_result_error(context, "insufficient arguments", -1); return; } nName = sqlite3_value_bytes(argv[0]); zName = (const char *)sqlite3_value_text(argv[0]); nInput = sqlite3_value_bytes(argv[argc-1]); zInput = (const char *)sqlite3_value_text(argv[argc-1]); pHash = (Fts3Hash *)sqlite3_user_data(context); p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); if( !p ){ char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName); sqlite3_result_error(context, zErr2, -1); sqlite3_free(zErr2); return; } pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); for(i=1; i<argc-1; i++){ azArg[i-1] = (const char *)sqlite3_value_text(argv[i]); } if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){ zErr = "error in xCreate()"; goto finish; } pTokenizer->pModule = p; if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ zErr = "error in xOpen()"; goto finish; } while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); zToken = &zInput[iStart]; nToken = iEnd-iStart; Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); } if( SQLITE_OK!=p->xClose(pCsr) ){ zErr = "error in xClose()"; goto finish; } if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ zErr = "error in xDestroy()"; goto finish; } finish: if( zErr ){ sqlite3_result_error(context, zErr, -1); }else{ sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); } Tcl_DecrRefCount(pRet); }
SQLITE_EXTENSION_INIT1 #endif #include "fts3_hash.h" #include "fts3_tokenizer.h" #include <assert.h> /* ** Implementation of the SQL scalar function for accessing the underlying ** hash table. This function may be called as follows: ** ** SELECT <function-name>(<key-name>); ** SELECT <function-name>(<key-name>, <pointer>); ** ** where <function-name> is the name passed as the second argument ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). ** ** If the <pointer> argument is specified, it must be a blob value ** containing a pointer to be stored as the hash data corresponding ** to the string <key-name>. If <pointer> is not specified, then ** the string <key-name> must already exist in the has table. Otherwise, ** an error is returned. ** ** Whether or not the <pointer> argument is specified, the value returned ** is a blob containing the pointer stored as the hash data corresponding ** to string <key-name> (after the hash-table is updated, if applicable). */ static void scalarFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ fts3Hash *pHash; void *pPtr = 0; const unsigned char *zName; int nName; assert( argc==1 || argc==2 ); pHash = (fts3Hash *)sqlite3_user_data(context); zName = sqlite3_value_text(argv[0]); nName = sqlite3_value_bytes(argv[0])+1; if( argc==2 ){ void *pOld; int n = sqlite3_value_bytes(argv[1]); if( n!=sizeof(pPtr) ){ sqlite3_result_error(context, "argument type mismatch", -1); return; } pPtr = *(void **)sqlite3_value_blob(argv[1]); pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); if( pOld==pPtr ){ sqlite3_result_error(context, "out of memory", -1); return; } }else{ pPtr = sqlite3Fts3HashFind(pHash, zName, nName); if( !pPtr ){ char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); sqlite3_result_error(context, zErr, -1); sqlite3_free(zErr); return; } } sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); }
/* ** Implementation of a special SQL scalar function for testing tokenizers ** designed to be used in concert with the Tcl testing framework. This ** function must be called with two arguments: ** ** SELECT <function-name>(<key-name>, <input-string>); ** SELECT <function-name>(<key-name>, <pointer>); ** ** where <function-name> is the name passed as the second argument ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') ** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). ** ** The return value is a string that may be interpreted as a Tcl ** list. For each token in the <input-string>, three elements are ** added to the returned list. The first is the token position, the ** second is the token text (folded, stemmed, etc.) and the third is the ** substring of <input-string> associated with the token. For example, ** using the built-in "simple" tokenizer: ** ** SELECT fts_tokenizer_test('simple', 'I don't see how'); ** ** will return the string: ** ** "{0 i I 1 dont don't 2 see see 3 how how}" ** */ static void testFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ fts3Hash *pHash; sqlite3_tokenizer_module *p; sqlite3_tokenizer *pTokenizer = 0; sqlite3_tokenizer_cursor *pCsr = 0; const char *zErr = 0; const char *zName; int nName; const char *zInput; int nInput; const char *zArg = 0; const char *zToken; int nToken; int iStart; int iEnd; int iPos; Tcl_Obj *pRet; assert( argc==2 || argc==3 ); nName = sqlite3_value_bytes(argv[0]); zName = (const char *)sqlite3_value_text(argv[0]); nInput = sqlite3_value_bytes(argv[argc-1]); zInput = (const char *)sqlite3_value_text(argv[argc-1]); if( argc==3 ){ zArg = (const char *)sqlite3_value_text(argv[1]); } pHash = (fts3Hash *)sqlite3_user_data(context); p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); if( !p ){ char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); sqlite3_result_error(context, zErr, -1); sqlite3_free(zErr); return; } pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){ zErr = "error in xCreate()"; goto finish; } pTokenizer->pModule = p; if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ zErr = "error in xOpen()"; goto finish; } pCsr->pTokenizer = pTokenizer; while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); zToken = &zInput[iStart]; nToken = iEnd-iStart; Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); } if( SQLITE_OK!=p->xClose(pCsr) ){ zErr = "error in xClose()"; goto finish; } if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ zErr = "error in xDestroy()"; goto finish; } finish: if( zErr ){ sqlite3_result_error(context, zErr, -1); }else{ sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); } Tcl_DecrRefCount(pRet); }
int sqlite3Fts3InitTokenizer( Fts3Hash *pHash, /* Tokenizer hash table */ const char *zArg, /* Possible tokenizer specification */ sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ const char **pzTokenizer, /* OUT: Set to zArg if is tokenizer */ char **pzErr /* OUT: Set to malloced error message */ ){ int rc; char *z = (char *)zArg; int n; char *zCopy; char *zEnd; /* Pointer to nul-term of zCopy */ sqlite3_tokenizer_module *m; if( !z ){ zCopy = sqlite3_mprintf("simple"); }else{ if( sqlite3_strnicmp(z, "tokenize", 8) || fts3IsIdChar(z[8])){ return SQLITE_OK; } zCopy = sqlite3_mprintf("%s", &z[8]); *pzTokenizer = zArg; } if( !zCopy ){ return SQLITE_NOMEM; } zEnd = &zCopy[strlen(zCopy)]; z = (char *)sqlite3Fts3NextToken(zCopy, &n); z[n] = '\0'; sqlite3Fts3Dequote(z); m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, z, (int)strlen(z)+1); if( !m ){ *pzErr = sqlite3_mprintf("unknown tokenizer: %s", z); rc = SQLITE_ERROR; }else{ char const **aArg = 0; int iArg = 0; z = &z[n+1]; while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){ int nNew = sizeof(char *)*(iArg+1); char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew); if( !aNew ){ sqlite3_free(zCopy); sqlite3_free((void *)aArg); return SQLITE_NOMEM; } aArg = aNew; aArg[iArg++] = z; z[n] = '\0'; sqlite3Fts3Dequote(z); z = &z[n+1]; } rc = m->xCreate(iArg, aArg, ppTok); assert( rc!=SQLITE_OK || *ppTok ); if( rc!=SQLITE_OK ){ *pzErr = sqlite3_mprintf("unknown tokenizer"); }else{ (*ppTok)->pModule = m; } sqlite3_free((void *)aArg); } sqlite3_free(zCopy); return rc; }