static int match_stext(fz_context *ctx, fz_stext_page *page, const char *s, int n) { int orig = n; int c; while (*s) { s += fz_chartorune(&c, (char *)s); if (iswhite(c) && iswhite(charat(ctx, page, n))) { const char *s_next; /* Skip over whitespace in the document */ do n++; while (iswhite(charat(ctx, page, n))); /* Skip over multiple whitespace in the search string */ while (s_next = s + fz_chartorune(&c, (char *)s), iswhite(c)) s = s_next; } else { if (fz_tolower(c) != fz_tolower(charat(ctx, page, n))) return 0; n++; } } return n - orig; }
//notice that this version of counting sort is a bit different because we are //using strings and we are trying to get the character for the counts via the //charat function which can return -1 so in order to have that correctly index //into counts i need to add one more than usual when dereferencing counts void counting_sort(const std::vector<std::string> & strings,std::vector<int> &v, std::vector<int> &aux, int start, int end, int chrindx, int counts[]) { //do counting sort on the l-th column for(int i = start; i < end; i++) { int offset = v[i]; //offset of the ith string const std::string & currstr = strings[offset]; char c = charat(currstr,chrindx); counts[c + 1]++; out("%d) strings[%d]=%s counts[%d + 1]=%d \n",i, offset, currstr.c_str(), c, counts[c + 1]); } //accumulate the counts for(int i = 0; i < R+1; i++) { if(counts[i+1] > 0) out("counts[%d]=%d+%d\n",i-1, counts[i + 1], counts[i]); counts[i + 1] += counts[i]; } for(int i = start; i < end; i++) { int offset = v[i]; //offset of the ith string const std::string & currstr = strings[offset]; char c = charat(currstr,chrindx); aux[counts[c]++ ] = v[i]; out("aux[%d]=%d\n", start + counts[c], v[i]); } out("chrindx=%d\n",chrindx); for(int i = start; i < end; i++) { v[i] = aux[i - start]; out("%d) strings[%d]=%s\n", i, v[i], strings[v[i]].c_str()); } }
//pick a pivot, maybe randomly //put all points above, less than and equal to in separate places of v[i] //NOTICE: that we dont swap characters or strings, but rather utilize //a vector of indices which is the one that determines the sorted order void threeway_quicksort(const std::vector<char *> & strings, std::vector<int> &v, int start, int end, int chrindx) { int n = end - start; if(n < 2) return; if(n < 20) { insertion_sort(strings, v, start, end, chrindx); return; } //get the pivot int p = rand() % n; int c = charat(strings[v[start+p]],chrindx); int lessthan = start; int gtthan = end; int i = start; while(i < gtthan) { int lc = charat(strings[v[i]],chrindx); if(lc < c) { swap(v[lessthan], v[i]); lessthan++; i++; } else if(lc > c) { gtthan--; swap(v[gtthan], v[i]); } else { //equal i++; } } threeway_quicksort(strings, v, start, lessthan, chrindx); if(c >=0) threeway_quicksort(strings, v, lessthan, gtthan, chrindx + 1); threeway_quicksort(strings, v, gtthan, end, chrindx); }
int main() { char s[] = "thisistgk"; char c = 's'; printf("the position of char %c int the string is: %d\n", c, charat(s, c)); }
static int match(fz_text_page *page, const char *s, int n) { int orig = n; int c; while (*s) { s += fz_chartorune(&c, (char *)s); if (c == ' ' && charat(page, n) == ' ') { while (charat(page, n) == ' ') n++; } else { if (tolower(c) != tolower(charat(page, n))) return 0; n++; } } return n - orig; }
static int match(char *s, fz_text_span *span, int n) { int orig = n; int c; while ((c = *s++)) { if (c == ' ' && charat(span, n) == ' ') { while (charat(span, n) == ' ') n++; } else { if (tolower(c) != tolower(charat(span, n))) return 0; n++; } } return n - orig; }