static void findfileconflicts_alias_cb(void *cbdatav, const char *fn, struct filelistinfo *info) { int isdir = S_ISDIR(info->mode); struct cbdata *cbdata = cbdatav; const char *dp; Id idx, dirid; Id hx, qx; Hashval h, hh; idx = cbdata->idx; if (!info->dirlen) return; dp = fn + info->dirlen; if (info->diridx != cbdata->lastdiridx) { cbdata->lastdiridx = info->diridx; cbdata->lastdirhash = 0; } dp = fn + info->dirlen; hx = strhash(dp); if (!hx) hx = strlen(fn) + 1; h = hx & cbdata->cflmapn; hh = HASHCHAIN_START; for (;;) { qx = cbdata->cflmap[2 * h]; if (!qx) break; if (qx == hx) break; h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn); } if (!qx || cbdata->cflmap[2 * h + 1] != -1) return; if (!cbdata->lastdirhash) cbdata->lastdirhash = strnhash(fn, dp - fn); dirid = normalizedir(cbdata, fn, dp - fn, cbdata->lastdirhash, 1); queue_push2(&cbdata->lookat, hx, idx); queue_push2(&cbdata->lookat, cbdata->lastdirhash, isdir ? -dirid : dirid); }
static void findfileconflicts2_cb(void *cbdatav, const char *fn, struct filelistinfo *info) { struct cbdata *cbdata = cbdatav; Hashval hx; const char *dp; char md5padded[34]; Id off; if (!info->dirlen) return; dp = fn + info->dirlen; if (info->diridx != cbdata->lastdiridx) { cbdata->lastdiridx = info->diridx; cbdata->lastdirhash = strnhash(fn, dp - fn); } if (cbdata->aliases) { if (cbdata->lastdirhash != cbdata->dirhash) return; hx = strhash(dp); } else { hx = cbdata->lastdirhash; hx = strhash_cont(dp, hx); } if (!hx) hx = strlen(fn) + 1; if ((Id)hx != cbdata->hx) return; if (cbdata->dirid && cbdata->dirid != normalizedir(cbdata, fn, dp - fn, cbdata->dirhash, 0)) return; strncpy(md5padded, info->digest, 32); md5padded[32] = 0; md5padded[33] = info->color; /* printf("%d, hx %x -> %s %d %s\n", cbdata->idx, hx, fn, info->mode, info->digest); */ off = addfilesspace(cbdata, strlen(fn) + (34 + 1)); memcpy(cbdata->filesspace + off, (unsigned char *)md5padded, 34); strcpy((char *)cbdata->filesspace + off + 34, fn); queue_push(&cbdata->files, off); }
static int crm_vector_tokenize_old ( char *txtptr, // input string (null-safe!) int txtstart, // start tokenizing at this byte. int txtlen, // how many bytes of input. const char *regex, // the parsing regex (might be ignored) int regexlen, // length of the parsing regex const int *coeff_array, // the pipeline coefficient control array int pipe_len, // how long a pipeline (== coeff_array row length) int pipe_iters, // how many rows are there in coeff_array crmhash_t *features, // where the output features go int featureslen, // how many output features (max) int features_stride, // Spacing (in words) between features int *features_out, // how many longs did we actually use up int *next_offset // next invocation should start at this offset ) { int hashpipe[UNIFIED_WINDOW_LEN]; // the pipeline for hashes int keepgoing; // the loop controller regex_t regcb; // the compiled regex regmatch_t match[5]; // we only care about the outermost match int i, j, k; // some handy index vars int regcomp_status; int text_offset; int max_offset; int irow, icol; crmhash_t ihash; char errortext[4096]; // now do the work. *features_out = 0; keepgoing = 1; j = 0; // Compile the regex. if (regexlen) { regcomp_status = crm_regcomp(®cb, regex, regexlen, REG_EXTENDED); if (regcomp_status > 0) { crm_regerror(regcomp_status, ®cb, errortext, 4096); nonfatalerror("Regular Expression Compilation Problem: ", errortext); return -1; } } // fill the hashpipe with initialization for (i = 0; i < UNIFIED_WINDOW_LEN; i++) hashpipe[i] = 0xDEADBEEF; // Run the hashpipe, either with regex, or without. // text_offset = txtstart; max_offset = txtstart + txtlen; if (internal_trace) fprintf(stderr, "Text offset: %d, length: %d\n", text_offset, txtlen); while (keepgoing) { // If the pattern is empty, assume non-graph-delimited tokens // (supposedly an 8% speed gain over regexec) if (regexlen == 0) { k = 0; // k == 0 means found another token.... same as regexec // skip non-graphical characthers match[0].rm_so = 0; while (!crm_isgraph(txtptr[text_offset + match[0].rm_so]) && text_offset + match[0].rm_so < max_offset) { match[0].rm_so++; } match[0].rm_eo = match[0].rm_so; while (crm_isgraph(txtptr[text_offset + match[0].rm_eo]) && text_offset + match[0].rm_eo < max_offset) { match[0].rm_eo++; } if (match[0].rm_so == match[0].rm_eo) k = 1; } else { k = crm_regexec(®cb, &txtptr[text_offset], max_offset - text_offset, 5, match, REG_EXTENDED, NULL); } // Are we done? if (k == 0) { // Not done,we have another token (the text in text[match[0].rm_so, // of length match[0].rm_eo - match[0].rm_so size) // if (user_trace) { fprintf(stderr, "Token; k: %d T.O: %d len %d ( %d %d on >", k, text_offset, match[0].rm_eo - match[0].rm_so, match[0].rm_so, match[0].rm_eo); for (k = match[0].rm_so + text_offset; k < match[0].rm_eo + text_offset; k++) fprintf(stderr, "%c", txtptr[k]); fprintf(stderr, "< )\n"); } // Now slide the hashpipe up one slot, and stuff this new token // into the front of the pipeline // // for (i = UNIFIED_WINDOW_LEN; i > 0; i--) // GerH points out that // hashpipe [i] = hashpipe[i-1]; // this smashes stack memmove(&hashpipe[1], hashpipe, sizeof(hashpipe) - sizeof(hashpipe[0])); hashpipe[0] = strnhash(&txtptr[match[0].rm_so + text_offset], match[0].rm_eo - match[0].rm_so); // Now, for each row in the coefficient array, we create a // feature. // for (irow = 0; irow < pipe_iters; irow++) { ihash = 0; for (icol = 0; icol < pipe_len; icol++) ihash = ihash + hashpipe[icol] *coeff_array[(pipe_len * irow) + icol]; // Stuff the final ihash value into reatures array features[*features_out] = ihash; if (internal_trace) fprintf(stderr, "New Feature: %lx at %d\n", (unsigned long int)ihash, *features_out); *features_out = *features_out + features_stride; } // And finally move on to the next place in the input. // // Move to end of current token. text_offset = text_offset + match[0].rm_eo; } else // Failed to match. This is the end... { keepgoing = 0; } // Check to see if we have space left to add more // features assuming there are any left to add. if (*features_out + pipe_iters + 3 > featureslen) { keepgoing = 0; } } if (next_offset) *next_offset = text_offset + match[0].rm_eo; features[*features_out] = 0; features[*features_out + 1] = 0; if (internal_trace) fprintf(stderr, "VT: Total features generated: %d\n", *features_out); return 0; }
static Id unifywithcanon(struct cbdata *cbdata, Id diroff, int dirl) { Id dirnameid; int i, l, ll, lo; struct stat stb; #if 0 printf("UNIFY %.*s\n", dirl, (char *)cbdata->filesspace + diroff); #endif if (!dirl || cbdata->filesspace[diroff] != '/') return diroff; /* strip / at end*/ while (dirl && cbdata->filesspace[diroff + dirl - 1] == '/') dirl--; if (!dirl) return diroff; /* find dirname */ for (i = dirl - 1; i > 0; i--) if (cbdata->filesspace[diroff + i] == '/') break; i++; /* include trailing / */ /* normalize dirname */ dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + diroff, i, strnhash((char *)cbdata->filesspace + diroff, i), 1); if (dirnameid == -1) return diroff; /* hit "in progress" marker, some cyclic link */ /* sanity check result */ if (cbdata->filesspace[dirnameid] != '/') return diroff; /* hmm */ l = strlen((char *)cbdata->filesspace + dirnameid); if (l && cbdata->filesspace[dirnameid + l - 1] != '/') return diroff; /* hmm */ /* special handling for "." and ".." basename */ if (cbdata->filesspace[diroff + i] == '.') { if (dirl - i == 1) return dirnameid; if (dirl - i == 2 && cbdata->filesspace[diroff + i + 1] == '.') { if (l <= 2) return dirnameid; /* we hit our root */ for (i = l - 2; i > 0; i--) if (cbdata->filesspace[dirnameid + i] == '/') break; i++; /* include trailing / */ dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + dirnameid, i, strnhash((char *)cbdata->filesspace + dirnameid, i), 1); return dirnameid == -1 ? diroff : dirnameid; } } /* append basename to normalized dirname */ if (cbdata->rootdirl + l + dirl - i + 1 > cbdata->canonspacen) { cbdata->canonspacen = cbdata->rootdirl + l + dirl - i + 20; cbdata->canonspace = solv_realloc(cbdata->canonspace, cbdata->canonspacen); strcpy(cbdata->canonspace, cbdata->rootdir); } strcpy(cbdata->canonspace + cbdata->rootdirl, (char *)cbdata->filesspace + dirnameid); strncpy(cbdata->canonspace + cbdata->rootdirl + l, (char *)cbdata->filesspace + diroff + i, dirl - i); cbdata->canonspace[cbdata->rootdirl + l + dirl - i] = 0; #if 0 printf("stat()ing %s\n", cbdata->canonspace); #endif cbdata->statsmade++; if (lstat(cbdata->canonspace, &stb) != 0 || !S_ISLNK(stb.st_mode)) { /* not a symlink or stat failed, have new canon entry */ diroff = addfilesspace(cbdata, l + dirl - i + 2); strcpy((char *)cbdata->filesspace + diroff, cbdata->canonspace + cbdata->rootdirl); l += dirl - i; /* add trailing / */ if (cbdata->filesspace[diroff + l - 1] != '/') { cbdata->filesspace[diroff + l++] = '/'; cbdata->filesspace[diroff + l] = 0; } /* call normalizedir on new entry for unification purposes */ dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + diroff, l, strnhash((char *)cbdata->filesspace + diroff, l), 1); return dirnameid == -1 ? diroff : dirnameid; } /* oh no, a symlink! follow */ lo = cbdata->rootdirl + l + dirl - i + 1; if (lo + stb.st_size + 2 > cbdata->canonspacen) { cbdata->canonspacen = lo + stb.st_size + 20; cbdata->canonspace = solv_realloc(cbdata->canonspace, cbdata->canonspacen); } ll = readlink(cbdata->canonspace, cbdata->canonspace + lo, stb.st_size); if (ll < 0 || ll > stb.st_size) return diroff; /* hmm */ if (ll == 0) return dirnameid; /* empty means current dir */ if (cbdata->canonspace[lo + ll - 1] != '/') cbdata->canonspace[lo + ll++] = '/'; /* add trailing / */ cbdata->canonspace[lo + ll] = 0; /* zero terminate */ if (cbdata->canonspace[lo] != '/') { /* relative link, concatenate to dirname */ memmove(cbdata->canonspace + cbdata->rootdirl + l, cbdata->canonspace + lo, ll + 1); lo = cbdata->rootdirl; ll += l; } dirnameid = normalizedir(cbdata, cbdata->canonspace + lo, ll, strnhash(cbdata->canonspace + lo, ll), 1); return dirnameid == -1 ? diroff : dirnameid; }
static void findfileconflicts_cb(void *cbdatav, const char *fn, struct filelistinfo *info) { struct cbdata *cbdata = cbdatav; int isdir = S_ISDIR(info->mode); const char *dp; Id idx, oidx; Id hx, qx; Hashval h, hh, dhx; idx = cbdata->idx; if (!info->dirlen) return; dp = fn + info->dirlen; if (info->diridx != cbdata->lastdiridx) { cbdata->lastdiridx = info->diridx; cbdata->lastdirhash = strnhash(fn, dp - fn); } dhx = cbdata->lastdirhash; /* this mirrors the "if (!hx) hx = strlen(fn) + 1" in finddirs_cb */ if (!isindirmap(cbdata, dhx ? dhx : dp - fn + 1)) return; hx = strhash_cont(dp, dhx); if (!hx) hx = strlen(fn) + 1; h = hx & cbdata->cflmapn; hh = HASHCHAIN_START; for (;;) { qx = cbdata->cflmap[2 * h]; if (!qx) break; if (qx == hx) break; h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn); } if (!qx) { /* a miss */ if (!cbdata->create) return; cbdata->cflmap[2 * h] = hx; cbdata->cflmap[2 * h + 1] = (isdir ? ~idx : idx); if (++cbdata->cflmapused * 2 > cbdata->cflmapn) cbdata->cflmap = growhash(cbdata->cflmap, &cbdata->cflmapn); return; } oidx = cbdata->cflmap[2 * h + 1]; if (oidx < 0) { int i; if (isdir) { /* both are directories. delay the conflict, keep oidx in slot */ queue_push2(&cbdata->lookat_dir, hx, idx); return; } oidx = ~oidx; /* now have file, had directories before. */ cbdata->cflmap[2 * h + 1] = oidx; /* make it a file */ /* dump all delayed directory hits for hx */ for (i = 0; i < cbdata->lookat_dir.count; i += 2) if (cbdata->lookat_dir.elements[i] == hx) { queue_push2(&cbdata->lookat, hx, cbdata->lookat_dir.elements[i + 1]); queue_push2(&cbdata->lookat, 0, 0); } } else if (oidx == idx) return; /* no conflicts with ourself, please */ queue_push2(&cbdata->lookat, hx, oidx); queue_push2(&cbdata->lookat, 0, 0); queue_push2(&cbdata->lookat, hx, idx); queue_push2(&cbdata->lookat, 0, 0); }