/* ** Parameters z and n contain a pointer to and length of a buffer containing ** an fts3 query expression, respectively. This function attempts to parse the ** query expression and create a tree of Fts3Expr structures representing the ** parsed expression. If successful, *ppExpr is set to point to the head ** of the parsed expression tree and SQLITE_OK is returned. If an error ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse ** error) is returned and *ppExpr is set to 0. ** ** If parameter n is a negative number, then z is assumed to point to a ** nul-terminated string and the length is determined using strlen(). ** ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to ** use to normalize query tokens while parsing the expression. The azCol[] ** array, which is assumed to contain nCol entries, should contain the names ** of each column in the target fts3 table, in order from left to right. ** Column names must be nul-terminated strings. ** ** The iDefaultCol parameter should be passed the index of the table column ** that appears on the left-hand-side of the MATCH operator (the default ** column to match against for tokens for which a column name is not explicitly ** specified as part of the query string), or -1 if tokens may by default ** match any table column. */ int sqlite3Fts3ExprParse( sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ char **azCol, /* Array of column names for fts3 table */ int nCol, /* Number of entries in azCol[] */ int iDefaultCol, /* Default column to query */ const char *z, int n, /* Text of MATCH query */ Fts3Expr **ppExpr /* OUT: Parsed query structure */ ){ int nParsed; int rc; ParseContext sParse; sParse.pTokenizer = pTokenizer; sParse.azCol = (const char **)azCol; sParse.nCol = nCol; sParse.iDefaultCol = iDefaultCol; sParse.nNest = 0; if( z==0 ){ *ppExpr = 0; return SQLITE_OK; } if( n<0 ){ n = (int)strlen(z); } rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); /* Check for mismatched parenthesis */ if( rc==SQLITE_OK && sParse.nNest ){ rc = SQLITE_ERROR; sqlite3Fts3ExprFree(*ppExpr); *ppExpr = 0; } return rc; }
/* ** This function is similar to sqlite3Fts3ExprParse(), with the following ** differences: ** ** 1. It does not do expression rebalancing. ** 2. It does not check that the expression does not exceed the ** maximum allowable depth. ** 3. Even if it fails, *ppExpr may still be set to point to an ** expression tree. It should be deleted using sqlite3Fts3ExprFree() ** in this case. */ static int fts3ExprParseUnbalanced( sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ int iLangid, /* Language id for tokenizer */ char **azCol, /* Array of column names for fts3 table */ int bFts4, /* True to allow FTS4-only syntax */ int nCol, /* Number of entries in azCol[] */ int iDefaultCol, /* Default column to query */ const char *z, int n, /* Text of MATCH query */ Fts3Expr **ppExpr /* OUT: Parsed query structure */ ){ int nParsed; int rc; ParseContext sParse; memset(&sParse, 0, sizeof(ParseContext)); sParse.pTokenizer = pTokenizer; sParse.iLangid = iLangid; sParse.azCol = (const char **)azCol; sParse.nCol = nCol; sParse.iDefaultCol = iDefaultCol; sParse.bFts4 = bFts4; if( z==0 ){ *ppExpr = 0; return SQLITE_OK; } if( n<0 ){ n = (int)strlen(z); } rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); assert( rc==SQLITE_OK || *ppExpr==0 ); /* Check for mismatched parenthesis */ if( rc==SQLITE_OK && sParse.nNest ){ rc = SQLITE_ERROR; } return rc; }
/* ** The output variable *ppExpr is populated with an allocated Fts3Expr ** structure, or set to 0 if the end of the input buffer is reached. ** ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. ** If SQLITE_ERROR is returned, pContext is populated with an error message. */ static int getNextNode( ParseContext *pParse, /* fts3 query parse context */ const char *z, int n, /* Input string */ Fts3Expr **ppExpr, /* OUT: expression */ int *pnConsumed /* OUT: Number of bytes consumed */ ){ static const struct Fts3Keyword { char *z; /* Keyword text */ unsigned char n; /* Length of the keyword */ unsigned char parenOnly; /* Only valid in paren mode */ unsigned char eType; /* Keyword code */ } aKeyword[] = { { "OR" , 2, 0, FTSQUERY_OR }, { "AND", 3, 1, FTSQUERY_AND }, { "NOT", 3, 1, FTSQUERY_NOT }, { "NEAR", 4, 0, FTSQUERY_NEAR } }; int ii; int iCol; int iColLen; int rc; Fts3Expr *pRet = 0; const char *zInput = z; int nInput = n; /* Skip over any whitespace before checking for a keyword, an open or ** close bracket, or a quoted string. */ while( nInput>0 && fts3isspace(*zInput) ){ nInput--; zInput++; } if( nInput==0 ){ return SQLITE_DONE; } /* See if we are dealing with a keyword. */ for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ const struct Fts3Keyword *pKey = &aKeyword[ii]; if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ continue; } if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; int nKey = pKey->n; char cNext; /* If this is a "NEAR" keyword, check for an explicit nearness. */ if( pKey->eType==FTSQUERY_NEAR ){ assert( nKey==4 ); if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ nNear = 0; for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ nNear = nNear * 10 + (zInput[nKey] - '0'); } } } /* At this point this is probably a keyword. But for that to be true, ** the next byte must contain either whitespace, an open or close ** parenthesis, a quote character, or EOF. */ cNext = zInput[nKey]; if( fts3isspace(cNext) || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 ){ pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); if( !pRet ){ return SQLITE_NOMEM; } pRet->eType = pKey->eType; pRet->nNear = nNear; *ppExpr = pRet; *pnConsumed = (int)((zInput - z) + nKey); return SQLITE_OK; } /* Turns out that wasn't a keyword after all. This happens if the ** user has supplied a token such as "ORacle". Continue. */ } } /* Check for an open bracket. */ if( sqlite3_fts3_enable_parentheses ){ if( *zInput=='(' ){ int nConsumed; pParse->nNest++; rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } *pnConsumed = (int)((zInput - z) + 1 + nConsumed); return rc; } /* Check for a close bracket. */ if( *zInput==')' ){ pParse->nNest--; *pnConsumed = (int)((zInput - z) + 1); return SQLITE_DONE; } } /* See if we are dealing with a quoted phrase. If this is the case, then ** search for the closing quote and pass the whole string to getNextString() ** for processing. This is easy to do, as fts3 has no syntax for escaping ** a quote character embedded in a string. */ if( *zInput=='"' ){ for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); *pnConsumed = (int)((zInput - z) + ii + 1); if( ii==nInput ){ return SQLITE_ERROR; } return getNextString(pParse, &zInput[1], ii-1, ppExpr); } /* If control flows to this point, this must be a regular token, or ** the end of the input. Read a regular token using the sqlite3_tokenizer ** interface. Before doing so, figure out if there is an explicit ** column specifier for the token. ** ** TODO: Strangely, it is not possible to associate a column specifier ** with a quoted phrase, only with a single token. Not sure if this was ** an implementation artifact or an intentional decision when fts3 was ** first implemented. Whichever it was, this module duplicates the ** limitation. */ iCol = pParse->iDefaultCol; iColLen = 0; for(ii=0; ii<pParse->nCol; ii++){ const char *zStr = pParse->azCol[ii]; int nStr = (int)strlen(zStr); if( nInput>nStr && zInput[nStr]==':' && sqlite3_strnicmp(zStr, zInput, nStr)==0 ){ iCol = ii; iColLen = (int)((zInput - z) + nStr + 1); break; } } rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); *pnConsumed += iColLen; return rc; }