/* ** Translate UTF-8 to UTF-8. ** ** This has the effect of making sure that the string is well-formed ** UTF-8. Miscoded characters are removed. ** ** The translation is done in-place and aborted if the output ** overruns the input. */ int sqlite3Utf8To8(unsigned char *zIn){ unsigned char *zOut = zIn; unsigned char *zStart = zIn; u32 c; while( zIn[0] && zOut<=zIn ){ c = sqlite3Utf8Read((const u8**)&zIn); if( c!=0xfffd ){ WRITE_UTF8(zOut, c); } } *zOut = 0; return (int)(zOut - zStart); }
/* ** Translate UTF-8 to UTF-8. ** ** This has the effect of making sure that the string is well-formed ** UTF-8. Miscoded characters are removed. ** ** The translation is done in-place (since it is impossible for the ** correct UTF-8 encoding to be longer than a malformed encoding). */ int sqlite3Utf8To8(unsigned char *zIn){ unsigned char *zOut = zIn; unsigned char *zStart = zIn; unsigned char *zTerm; u32 c; while( zIn[0] ){ c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); if( c!=0xfffd ){ WRITE_UTF8(zOut, c); } } *zOut = 0; return zOut - zStart; }
/* ** Implementation of the like() SQL function. This function implements ** the build-in LIKE operator. The first argument to the function is the ** pattern and the second argument is the string. So, the SQL statements: ** ** A LIKE B ** ** is implemented as like(B,A). ** ** This same function (with a different compareInfo structure) computes ** the GLOB operator. */ static void likeFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const unsigned char *zA, *zB; int escape = 0; int nPat; sqlite3 *db = sqlite3_context_db_handle(context); zB = sqlite3_value_text(argv[0]); zA = sqlite3_value_text(argv[1]); /* Limit the length of the LIKE or GLOB pattern to avoid problems ** of deep recursion and N*N behavior in patternCompare(). */ nPat = sqlite3_value_bytes(argv[0]); testcase( nPat==db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH] ); testcase( nPat==db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]+1 ); if( nPat > db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH] ){ sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); return; } assert( zB==sqlite3_value_text(argv[0]) ); /* Encoding did not change */ if( argc==3 ){ /* The escape character string must consist of a single UTF-8 character. ** Otherwise, return an error. */ const unsigned char *zEsc = sqlite3_value_text(argv[2]); if( zEsc==0 ) return; if( sqlite3Utf8CharLen((char*)zEsc, -1)!=1 ){ sqlite3_result_error(context, "ESCAPE expression must be a single character", -1); return; } escape = sqlite3Utf8Read(zEsc, &zEsc); } if( zA && zB ){ struct compareInfo *pInfo = sqlite3_user_data(context); #ifdef SQLITE_TEST sqlite3_like_count++; #endif sqlite3_result_int(context, patternCompare(zB, zA, pInfo, escape)); } }
/* ** This routine is called from the TCL test function "translate_selftest". ** It checks that the primitives for serializing and deserializing ** characters in each encoding are inverses of each other. */ void sqlite3UtfSelfTest(void){ unsigned int i, t; unsigned char zBuf[20]; unsigned char *z; int n; unsigned int c; for(i=0; i<0x00110000; i++){ z = zBuf; WRITE_UTF8(z, i); n = (int)(z-zBuf); assert( n>0 && n<=4 ); z[0] = 0; z = zBuf; c = sqlite3Utf8Read((const u8**)&z); t = i; if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD; if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD; assert( c==t ); assert( (z-zBuf)==n ); } for(i=0; i<0x00110000; i++){ if( i>=0xD800 && i<0xE000 ) continue; z = zBuf; WRITE_UTF16LE(z, i); n = (int)(z-zBuf); assert( n>0 && n<=4 ); z[0] = 0; z = zBuf; READ_UTF16LE(z, 1, c); assert( c==i ); assert( (z-zBuf)==n ); } for(i=0; i<0x00110000; i++){ if( i>=0xD800 && i<0xE000 ) continue; z = zBuf; WRITE_UTF16BE(z, i); n = (int)(z-zBuf); assert( n>0 && n<=4 ); z[0] = 0; z = zBuf; READ_UTF16BE(z, 1, c); assert( c==i ); assert( (z-zBuf)==n ); } }
/* ** Compare two UTF-8 strings for equality where the first string can ** potentially be a "glob" expression. Return true (1) if they ** are the same and false (0) if they are different. ** ** Globbing rules: ** ** '*' Matches any sequence of zero or more characters. ** ** '?' Matches exactly one character. ** ** [...] Matches one character from the enclosed list of ** characters. ** ** [^...] Matches one character not in the enclosed list. ** ** With the [...] and [^...] matching, a ']' character can be included ** in the list by making it the first character after '[' or '^'. A ** range of characters can be specified using '-'. Example: ** "[a-z]" matches any single lower-case letter. To match a '-', make ** it the last character in the list. ** ** This routine is usually quick, but can be N**2 in the worst case. ** ** Hints: to match '*' or '?', put them in "[]". Like this: ** ** abc[*]xyz Matches "abc*xyz" only */ static int patternCompare( const u8 *zPattern, /* The glob pattern */ const u8 *zString, /* The string to compare against the glob */ const struct compareInfo *pInfo, /* Information about how to do the compare */ const int esc /* The escape character */ ){ int c, c2; int invert; int seen; u8 matchOne = pInfo->matchOne; u8 matchAll = pInfo->matchAll; u8 matchSet = pInfo->matchSet; u8 noCase = pInfo->noCase; int prevEscape = 0; /* True if the previous character was 'escape' */ while( (c = sqlite3Utf8Read(zPattern,&zPattern))!=0 ){ if( !prevEscape && c==matchAll ){ while( (c=sqlite3Utf8Read(zPattern,&zPattern)) == matchAll || c == matchOne ){ if( c==matchOne && sqlite3Utf8Read(zString, &zString)==0 ){ return 0; } } if( c==0 ){ return 1; }else if( c==esc ){ c = sqlite3Utf8Read(zPattern, &zPattern); if( c==0 ){ return 0; } }else if( c==matchSet ){ assert( esc==0 ); /* This is GLOB, not LIKE */ assert( matchSet<0x80 ); /* '[' is a single-byte character */ while( *zString && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){ SQLITE_SKIP_UTF8(zString); } return *zString!=0; } while( (c2 = sqlite3Utf8Read(zString,&zString))!=0 ){ if( noCase ){ GlogUpperToLower(c2); GlogUpperToLower(c); while( c2 != 0 && c2 != c ){ c2 = sqlite3Utf8Read(zString, &zString); GlogUpperToLower(c2); } }else{ while( c2 != 0 && c2 != c ){ c2 = sqlite3Utf8Read(zString, &zString); } } if( c2==0 ) return 0; if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } return 0; }else if( !prevEscape && c==matchOne ){ if( sqlite3Utf8Read(zString, &zString)==0 ){ return 0; } }else if( c==matchSet ){ int prior_c = 0; assert( esc==0 ); /* This only occurs for GLOB, not LIKE */ seen = 0; invert = 0; c = sqlite3Utf8Read(zString, &zString); if( c==0 ) return 0; c2 = sqlite3Utf8Read(zPattern, &zPattern); if( c2=='^' ){ invert = 1; c2 = sqlite3Utf8Read(zPattern, &zPattern); } if( c2==']' ){ if( c==']' ) seen = 1; c2 = sqlite3Utf8Read(zPattern, &zPattern); } while( c2 && c2!=']' ){ if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){ c2 = sqlite3Utf8Read(zPattern, &zPattern); if( c>=prior_c && c<=c2 ) seen = 1; prior_c = 0; }else{ if( c==c2 ){ seen = 1; } prior_c = c2; } c2 = sqlite3Utf8Read(zPattern, &zPattern); } if( c2==0 || (seen ^ invert)==0 ){ return 0; } }else if( esc==c && !prevEscape ){ prevEscape = 1; }else{ c2 = sqlite3Utf8Read(zString, &zString); if( noCase ){ GlogUpperToLower(c); GlogUpperToLower(c2); } if( c!=c2 ){ return 0; } prevEscape = 0; } } return *zString==0; }