/* ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). */ void sqlite3Fts3ExprFree(Fts3Expr *p){ if( p ){ sqlite3Fts3ExprFree(p->pLeft); sqlite3Fts3ExprFree(p->pRight); sqlite3_free(p); } }
/* ** Parameters z and n contain a pointer to and length of a buffer containing ** an fts3 query expression, respectively. This function attempts to parse the ** query expression and create a tree of Fts3Expr structures representing the ** parsed expression. If successful, *ppExpr is set to point to the head ** of the parsed expression tree and SQLITE_OK is returned. If an error ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse ** error) is returned and *ppExpr is set to 0. ** ** If parameter n is a negative number, then z is assumed to point to a ** nul-terminated string and the length is determined using strlen(). ** ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to ** use to normalize query tokens while parsing the expression. The azCol[] ** array, which is assumed to contain nCol entries, should contain the names ** of each column in the target fts3 table, in order from left to right. ** Column names must be nul-terminated strings. ** ** The iDefaultCol parameter should be passed the index of the table column ** that appears on the left-hand-side of the MATCH operator (the default ** column to match against for tokens for which a column name is not explicitly ** specified as part of the query string), or -1 if tokens may by default ** match any table column. */ int sqlite3Fts3ExprParse( sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ char **azCol, /* Array of column names for fts3 table */ int nCol, /* Number of entries in azCol[] */ int iDefaultCol, /* Default column to query */ const char *z, int n, /* Text of MATCH query */ Fts3Expr **ppExpr /* OUT: Parsed query structure */ ){ int nParsed; int rc; ParseContext sParse; sParse.pTokenizer = pTokenizer; sParse.azCol = (const char **)azCol; sParse.nCol = nCol; sParse.iDefaultCol = iDefaultCol; sParse.nNest = 0; if( z==0 ){ *ppExpr = 0; return SQLITE_OK; } if( n<0 ){ n = (int)strlen(z); } rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); /* Check for mismatched parenthesis */ if( rc==SQLITE_OK && sParse.nNest ){ rc = SQLITE_ERROR; sqlite3Fts3ExprFree(*ppExpr); *ppExpr = 0; } return rc; }
/* ** Parameters z and n contain a pointer to and length of a buffer containing ** an fts3 query expression, respectively. This function attempts to parse the ** query expression and create a tree of Fts3Expr structures representing the ** parsed expression. If successful, *ppExpr is set to point to the head ** of the parsed expression tree and SQLITE_OK is returned. If an error ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse ** error) is returned and *ppExpr is set to 0. ** ** If parameter n is a negative number, then z is assumed to point to a ** nul-terminated string and the length is determined using strlen(). ** ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to ** use to normalize query tokens while parsing the expression. The azCol[] ** array, which is assumed to contain nCol entries, should contain the names ** of each column in the target fts3 table, in order from left to right. ** Column names must be nul-terminated strings. ** ** The iDefaultCol parameter should be passed the index of the table column ** that appears on the left-hand-side of the MATCH operator (the default ** column to match against for tokens for which a column name is not explicitly ** specified as part of the query string), or -1 if tokens may by default ** match any table column. */ int sqlite3Fts3ExprParse( sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ int iLangid, /* Language id for tokenizer */ char **azCol, /* Array of column names for fts3 table */ int bFts4, /* True to allow FTS4-only syntax */ int nCol, /* Number of entries in azCol[] */ int iDefaultCol, /* Default column to query */ const char *z, int n, /* Text of MATCH query */ Fts3Expr **ppExpr, /* OUT: Parsed query structure */ char **pzErr /* OUT: Error message (sqlite3_malloc) */ ){ static const int MAX_EXPR_DEPTH = 12; int rc = fts3ExprParseUnbalanced( pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr ); /* Rebalance the expression. And check that its depth does not exceed ** MAX_EXPR_DEPTH. */ if( rc==SQLITE_OK && *ppExpr ){ rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH); if( rc==SQLITE_OK ){ rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH); } } if( rc!=SQLITE_OK ){ sqlite3Fts3ExprFree(*ppExpr); *ppExpr = 0; if( rc==SQLITE_TOOBIG ){ *pzErr = sqlite3_mprintf( "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH ); rc = SQLITE_ERROR; }else if( rc==SQLITE_ERROR ){ *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); } } return rc; }
/* ** This is the implementation of a scalar SQL function used to test the ** expression parser. It should be called as follows: ** ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); ** ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used ** to parse the query expression (see README.tokenizers). The second argument ** is the query expression to parse. Each subsequent argument is the name ** of a column of the fts3 table that the query expression may refer to. ** For example: ** ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); */ static void fts3ExprTest( sqlite3_context *context, int argc, sqlite3_value **argv ){ sqlite3_tokenizer_module const *pModule = 0; sqlite3_tokenizer *pTokenizer = 0; int rc; char **azCol = 0; const char *zExpr; int nExpr; int nCol; int ii; Fts3Expr *pExpr; char *zBuf = 0; sqlite3 *db = sqlite3_context_db_handle(context); if( argc<3 ){ sqlite3_result_error(context, "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 ); return; } rc = queryTestTokenizer(db, (const char *)sqlite3_value_text(argv[0]), &pModule); if( rc==SQLITE_NOMEM ){ sqlite3_result_error_nomem(context); goto exprtest_out; }else if( !pModule ){ sqlite3_result_error(context, "No such tokenizer module", -1); goto exprtest_out; } rc = pModule->xCreate(0, 0, &pTokenizer); assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); if( rc==SQLITE_NOMEM ){ sqlite3_result_error_nomem(context); goto exprtest_out; } pTokenizer->pModule = pModule; zExpr = (const char *)sqlite3_value_text(argv[1]); nExpr = sqlite3_value_bytes(argv[1]); nCol = argc-2; azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); if( !azCol ){ sqlite3_result_error_nomem(context); goto exprtest_out; } for(ii=0; ii<nCol; ii++){ azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); } rc = sqlite3Fts3ExprParse( pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr ); if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ sqlite3_result_error(context, "Error parsing expression", -1); }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ sqlite3_result_error_nomem(context); }else{ sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); sqlite3_free(zBuf); } sqlite3Fts3ExprFree(pExpr); exprtest_out: if( pModule && pTokenizer ){ rc = pModule->xDestroy(pTokenizer); } sqlite3_free(azCol); }
/* ** Parse the fts3 query expression found in buffer z, length n. This function ** returns either when the end of the buffer is reached or an unmatched ** closing bracket - ')' - is encountered. ** ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the ** parsed form of the expression and *pnConsumed is set to the number of ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM ** (out of memory error) or SQLITE_ERROR (parse error) is returned. */ static int fts3ExprParse( ParseContext *pParse, /* fts3 query parse context */ const char *z, int n, /* Text of MATCH query */ Fts3Expr **ppExpr, /* OUT: Parsed query structure */ int *pnConsumed /* OUT: Number of bytes consumed */ ){ Fts3Expr *pRet = 0; Fts3Expr *pPrev = 0; Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ int nIn = n; const char *zIn = z; int rc = SQLITE_OK; int isRequirePhrase = 1; while( rc==SQLITE_OK ){ Fts3Expr *p = 0; int nByte = 0; rc = getNextNode(pParse, zIn, nIn, &p, &nByte); if( rc==SQLITE_OK ){ int isPhrase; if( !sqlite3_fts3_enable_parentheses && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot ){ /* Create an implicit NOT operator. */ Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); if( !pNot ){ sqlite3Fts3ExprFree(p); rc = SQLITE_NOMEM; goto exprparse_out; } pNot->eType = FTSQUERY_NOT; pNot->pRight = p; if( pNotBranch ){ pNot->pLeft = pNotBranch; } pNotBranch = pNot; p = pPrev; }else{ int eType = p->eType; assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); /* The isRequirePhrase variable is set to true if a phrase or ** an expression contained in parenthesis is required. If a ** binary operator (AND, OR, NOT or NEAR) is encounted when ** isRequirePhrase is set, this is a syntax error. */ if( !isPhrase && isRequirePhrase ){ sqlite3Fts3ExprFree(p); rc = SQLITE_ERROR; goto exprparse_out; } if( isPhrase && !isRequirePhrase ){ /* Insert an implicit AND operator. */ Fts3Expr *pAnd; assert( pRet && pPrev ); pAnd = fts3MallocZero(sizeof(Fts3Expr)); if( !pAnd ){ sqlite3Fts3ExprFree(p); rc = SQLITE_NOMEM; goto exprparse_out; } pAnd->eType = FTSQUERY_AND; insertBinaryOperator(&pRet, pPrev, pAnd); pPrev = pAnd; } /* This test catches attempts to make either operand of a NEAR ** operator something other than a phrase. For example, either of ** the following: ** ** (bracketed expression) NEAR phrase ** phrase NEAR (bracketed expression) ** ** Return an error in either case. */ if( pPrev && ( (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) )){ sqlite3Fts3ExprFree(p); rc = SQLITE_ERROR; goto exprparse_out; } if( isPhrase ){ if( pRet ){ assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); pPrev->pRight = p; p->pParent = pPrev; }else{ pRet = p; } }else{ insertBinaryOperator(&pRet, pPrev, p); } isRequirePhrase = !isPhrase; } assert( nByte>0 ); } assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); nIn -= nByte; zIn += nByte; pPrev = p; } if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ rc = SQLITE_ERROR; } if( rc==SQLITE_DONE ){ rc = SQLITE_OK; if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ if( !pRet ){ rc = SQLITE_ERROR; }else{ Fts3Expr *pIter = pNotBranch; while( pIter->pLeft ){ pIter = pIter->pLeft; } pIter->pLeft = pRet; pRet = pNotBranch; } } } *pnConsumed = n - nIn; exprparse_out: if( rc!=SQLITE_OK ){ sqlite3Fts3ExprFree(pRet); sqlite3Fts3ExprFree(pNotBranch); pRet = 0; } *ppExpr = pRet; return rc; }
/* ** This function attempts to transform the expression tree at (*pp) to ** an equivalent but more balanced form. The tree is modified in place. ** If successful, SQLITE_OK is returned and (*pp) set to point to the ** new root expression node. ** ** nMaxDepth is the maximum allowable depth of the balanced sub-tree. ** ** Otherwise, if an error occurs, an SQLite error code is returned and ** expression (*pp) freed. */ static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ int rc = SQLITE_OK; /* Return code */ Fts3Expr *pRoot = *pp; /* Initial root node */ Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ int eType = pRoot->eType; /* Type of node in this tree */ if( nMaxDepth==0 ){ rc = SQLITE_ERROR; } if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ Fts3Expr **apLeaf; apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); if( 0==apLeaf ){ rc = SQLITE_NOMEM; }else{ memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); } if( rc==SQLITE_OK ){ int i; Fts3Expr *p; /* Set $p to point to the left-most leaf in the tree of eType nodes. */ for(p=pRoot; p->eType==eType; p=p->pLeft){ assert( p->pParent==0 || p->pParent->pLeft==p ); assert( p->pLeft && p->pRight ); } /* This loop runs once for each leaf in the tree of eType nodes. */ while( 1 ){ int iLvl; Fts3Expr *pParent = p->pParent; /* Current parent of p */ assert( pParent==0 || pParent->pLeft==p ); p->pParent = 0; if( pParent ){ pParent->pLeft = 0; }else{ pRoot = 0; } rc = fts3ExprBalance(&p, nMaxDepth-1); if( rc!=SQLITE_OK ) break; for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ if( apLeaf[iLvl]==0 ){ apLeaf[iLvl] = p; p = 0; }else{ assert( pFree ); pFree->pLeft = apLeaf[iLvl]; pFree->pRight = p; pFree->pLeft->pParent = pFree; pFree->pRight->pParent = pFree; p = pFree; pFree = pFree->pParent; p->pParent = 0; apLeaf[iLvl] = 0; } } if( p ){ sqlite3Fts3ExprFree(p); rc = SQLITE_TOOBIG; break; } /* If that was the last leaf node, break out of the loop */ if( pParent==0 ) break; /* Set $p to point to the next leaf in the tree of eType nodes */ for(p=pParent->pRight; p->eType==eType; p=p->pLeft); /* Remove pParent from the original tree. */ assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); pParent->pRight->pParent = pParent->pParent; if( pParent->pParent ){ pParent->pParent->pLeft = pParent->pRight; }else{ assert( pParent==pRoot ); pRoot = pParent->pRight; } /* Link pParent into the free node list. It will be used as an ** internal node of the new tree. */ pParent->pParent = pFree; pFree = pParent; } if( rc==SQLITE_OK ){ p = 0; for(i=0; i<nMaxDepth; i++){ if( apLeaf[i] ){ if( p==0 ){ p = apLeaf[i]; p->pParent = 0; }else{ assert( pFree!=0 ); pFree->pRight = p; pFree->pLeft = apLeaf[i]; pFree->pLeft->pParent = pFree; pFree->pRight->pParent = pFree; p = pFree; pFree = pFree->pParent; p->pParent = 0; } } } pRoot = p; }else{ /* An error occurred. Delete the contents of the apLeaf[] array ** and pFree list. Everything else is cleaned up by the call to ** sqlite3Fts3ExprFree(pRoot) below. */ Fts3Expr *pDel; for(i=0; i<nMaxDepth; i++){ sqlite3Fts3ExprFree(apLeaf[i]); } while( (pDel=pFree)!=0 ){ pFree = pDel->pParent; sqlite3_free(pDel); } } assert( pFree==0 ); sqlite3_free( apLeaf ); } } if( rc!=SQLITE_OK ){ sqlite3Fts3ExprFree(pRoot); pRoot = 0; } *pp = pRoot; return rc; }