Esempio n. 1
0
static void
findfileconflicts_alias_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
{
  int isdir = S_ISDIR(info->mode);
  struct cbdata *cbdata = cbdatav;
  const char *dp;
  Id idx, dirid;
  Id hx, qx;
  Hashval h, hh;

  idx = cbdata->idx;

  if (!info->dirlen)
    return;
  dp = fn + info->dirlen;
  if (info->diridx != cbdata->lastdiridx)
    {
      cbdata->lastdiridx = info->diridx;
      cbdata->lastdirhash = 0;
    }
  dp = fn + info->dirlen;
  hx = strhash(dp);
  if (!hx)
    hx = strlen(fn) + 1;

  h = hx & cbdata->cflmapn;
  hh = HASHCHAIN_START;
  for (;;)
    {
      qx = cbdata->cflmap[2 * h];
      if (!qx)
	break;
      if (qx == hx)
	break;
      h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
    }
  if (!qx || cbdata->cflmap[2 * h + 1] != -1)
    return;
  if (!cbdata->lastdirhash)
    cbdata->lastdirhash = strnhash(fn, dp - fn);
  dirid = normalizedir(cbdata, fn, dp - fn, cbdata->lastdirhash, 1);
  queue_push2(&cbdata->lookat, hx, idx);
  queue_push2(&cbdata->lookat, cbdata->lastdirhash, isdir ? -dirid : dirid);
}
Esempio n. 2
0
static void
findfileconflicts2_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
{
  struct cbdata *cbdata = cbdatav;
  Hashval hx;
  const char *dp;
  char md5padded[34];
  Id off;

  if (!info->dirlen)
    return;
  dp = fn + info->dirlen;
  if (info->diridx != cbdata->lastdiridx)
    {
      cbdata->lastdiridx = info->diridx;
      cbdata->lastdirhash = strnhash(fn, dp - fn);
    }
  if (cbdata->aliases)
    {
      if (cbdata->lastdirhash != cbdata->dirhash)
	return;
      hx = strhash(dp);
    }
  else
    {
      hx = cbdata->lastdirhash;
      hx = strhash_cont(dp, hx);
    }
  if (!hx)
    hx = strlen(fn) + 1;
  if ((Id)hx != cbdata->hx)
    return;
  if (cbdata->dirid && cbdata->dirid != normalizedir(cbdata, fn, dp - fn, cbdata->dirhash, 0))
    return;
  strncpy(md5padded, info->digest, 32);
  md5padded[32] = 0;
  md5padded[33] = info->color;
  /* printf("%d, hx %x -> %s   %d %s\n", cbdata->idx, hx, fn, info->mode, info->digest); */
  off = addfilesspace(cbdata, strlen(fn) + (34 + 1));
  memcpy(cbdata->filesspace + off, (unsigned char *)md5padded, 34);
  strcpy((char *)cbdata->filesspace + off + 34, fn);
  queue_push(&cbdata->files, off);
}
Esempio n. 3
0
static int crm_vector_tokenize_old
(
        char *txtptr,                // input string (null-safe!)
        int txtstart,                //     start tokenizing at this byte.
        int txtlen,                  //   how many bytes of input.
        const char *regex,           // the parsing regex (might be ignored)
        int regexlen,                //   length of the parsing regex
        const int  *coeff_array,     // the pipeline coefficient control array
        int pipe_len,                //  how long a pipeline (== coeff_array row length)
        int pipe_iters,              //  how many rows are there in coeff_array
        crmhash_t  *features,        // where the output features go
        int featureslen,             //   how many output features (max)
        int features_stride,         //   Spacing (in words) between features
        int        *features_out,    // how many longs did we actually use up
        int        *next_offset      // next invocation should start at this offset
)
{
    int hashpipe[UNIFIED_WINDOW_LEN];  // the pipeline for hashes
    int keepgoing;                     // the loop controller
    regex_t regcb;                     // the compiled regex
    regmatch_t match[5];               // we only care about the outermost match
    int i, j, k;                       // some handy index vars
    int regcomp_status;
    int text_offset;
    int max_offset;
    int irow, icol;
    crmhash_t ihash;
    char errortext[4096];

    //    now do the work.

    *features_out = 0;
    keepgoing = 1;
    j = 0;

    //    Compile the regex.
    if (regexlen)
    {
        regcomp_status = crm_regcomp(&regcb, regex, regexlen, REG_EXTENDED);
        if (regcomp_status > 0)
        {
            crm_regerror(regcomp_status, &regcb, errortext, 4096);
            nonfatalerror("Regular Expression Compilation Problem: ",
                          errortext);
            return -1;
        }
    }

    // fill the hashpipe with initialization
    for (i = 0; i < UNIFIED_WINDOW_LEN; i++)
        hashpipe[i] = 0xDEADBEEF;

    //   Run the hashpipe, either with regex, or without.
    //
    text_offset = txtstart;
    max_offset = txtstart + txtlen;
    if (internal_trace)
        fprintf(stderr, "Text offset: %d, length: %d\n", text_offset, txtlen);
    while (keepgoing)
    {
        //  If the pattern is empty, assume non-graph-delimited tokens
        //  (supposedly an 8% speed gain over regexec)
        if (regexlen == 0)
        {
            k = 0;  // k == 0 means found another token.... same as regexec
            //         skip non-graphical characthers
            match[0].rm_so = 0;
            while (!crm_isgraph(txtptr[text_offset + match[0].rm_so])
                  && text_offset + match[0].rm_so < max_offset)
            {
                match[0].rm_so++;
            }
            match[0].rm_eo = match[0].rm_so;
            while (crm_isgraph(txtptr[text_offset + match[0].rm_eo])
                  && text_offset + match[0].rm_eo < max_offset)
            {
                match[0].rm_eo++;
            }
            if (match[0].rm_so == match[0].rm_eo)
                k = 1;
        }
        else
        {
            k = crm_regexec(&regcb,
                            &txtptr[text_offset],
                            max_offset - text_offset,
                            5, match,
                            REG_EXTENDED, NULL);
        }


        //   Are we done?
        if (k == 0)
        {
            //   Not done,we have another token (the text in text[match[0].rm_so,
            //    of length match[0].rm_eo - match[0].rm_so size)

            //
            if (user_trace)
            {
                fprintf(stderr, "Token; k: %d T.O: %d len %d ( %d %d on >",
                        k,
                        text_offset,
                        match[0].rm_eo - match[0].rm_so,
                        match[0].rm_so,
                        match[0].rm_eo);
                for (k = match[0].rm_so + text_offset;
                     k < match[0].rm_eo + text_offset;
                     k++)
                    fprintf(stderr, "%c", txtptr[k]);
                fprintf(stderr, "< )\n");
            }

            //   Now slide the hashpipe up one slot, and stuff this new token
            //   into the front of the pipeline
            //
            // for (i = UNIFIED_WINDOW_LEN; i > 0; i--)  // GerH points out that
            //  hashpipe [i] = hashpipe[i-1];            //  this smashes stack
            memmove(&hashpipe[1], hashpipe,
                    sizeof(hashpipe) - sizeof(hashpipe[0]));

            hashpipe[0] = strnhash(&txtptr[match[0].rm_so + text_offset],
                                   match[0].rm_eo - match[0].rm_so);

            //    Now, for each row in the coefficient array, we create a
            //   feature.
            //
            for (irow = 0; irow < pipe_iters; irow++)
            {
                ihash = 0;
                for (icol = 0; icol < pipe_len; icol++)
                    ihash = ihash
                            + hashpipe[icol] *coeff_array[(pipe_len * irow) + icol];

                //    Stuff the final ihash value into reatures array
                features[*features_out] = ihash;
                if (internal_trace)
                    fprintf(stderr,
                            "New Feature: %lx at %d\n", (unsigned long int)ihash, *features_out);
                *features_out = *features_out + features_stride;
            }

            //   And finally move on to the next place in the input.
            //
            //  Move to end of current token.
            text_offset = text_offset + match[0].rm_eo;
        }
        else
        //     Failed to match.  This is the end...
        {
            keepgoing = 0;
        }

        //    Check to see if we have space left to add more
        //    features assuming there are any left to add.
        if (*features_out + pipe_iters + 3 > featureslen)
        {
            keepgoing = 0;
        }
    }
    if (next_offset)
        *next_offset = text_offset + match[0].rm_eo;
    features[*features_out] = 0;
    features[*features_out + 1] = 0;

    if (internal_trace)
        fprintf(stderr, "VT: Total features generated: %d\n", *features_out);
    return 0;
}
Esempio n. 4
0
static Id
unifywithcanon(struct cbdata *cbdata, Id diroff, int dirl)
{
  Id dirnameid;
  int i, l, ll, lo;
  struct stat stb;

#if 0
  printf("UNIFY %.*s\n", dirl, (char *)cbdata->filesspace + diroff);
#endif
  if (!dirl || cbdata->filesspace[diroff] != '/')
    return diroff;
  /* strip / at end*/
  while (dirl && cbdata->filesspace[diroff + dirl - 1] == '/')
    dirl--;
  if (!dirl)
    return diroff;

  /* find dirname */
  for (i = dirl - 1; i > 0; i--)
    if (cbdata->filesspace[diroff + i] == '/')
      break;
  i++;				/* include trailing / */

  /* normalize dirname */
  dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + diroff, i, strnhash((char *)cbdata->filesspace + diroff, i), 1);
  if (dirnameid == -1)
    return diroff;		/* hit "in progress" marker, some cyclic link */

  /* sanity check result */
  if (cbdata->filesspace[dirnameid] != '/')
    return diroff;		/* hmm */
  l = strlen((char *)cbdata->filesspace + dirnameid);
  if (l && cbdata->filesspace[dirnameid + l - 1] != '/')
    return diroff;		/* hmm */

  /* special handling for "." and ".." basename */
  if (cbdata->filesspace[diroff + i] == '.')
    {
      if (dirl - i == 1)
	return dirnameid;
      if (dirl - i == 2 && cbdata->filesspace[diroff + i + 1] == '.')
	{
	  if (l <= 2)
	    return dirnameid;	/* we hit our root */
	  for (i = l - 2; i > 0; i--)
	    if (cbdata->filesspace[dirnameid + i] == '/')
	      break;
	  i++;	/* include trailing / */
	  dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + dirnameid, i, strnhash((char *)cbdata->filesspace + dirnameid, i), 1);
	  return dirnameid == -1 ? diroff : dirnameid;
	}
    }

  /* append basename to normalized dirname */
  if (cbdata->rootdirl + l + dirl - i + 1 > cbdata->canonspacen)
    {
      cbdata->canonspacen = cbdata->rootdirl + l + dirl - i + 20;
      cbdata->canonspace = solv_realloc(cbdata->canonspace, cbdata->canonspacen);
      strcpy(cbdata->canonspace, cbdata->rootdir);
    }
  strcpy(cbdata->canonspace + cbdata->rootdirl, (char *)cbdata->filesspace + dirnameid);
  strncpy(cbdata->canonspace + cbdata->rootdirl + l, (char *)cbdata->filesspace + diroff + i, dirl - i);
  cbdata->canonspace[cbdata->rootdirl + l + dirl - i] = 0;

#if 0
  printf("stat()ing %s\n", cbdata->canonspace);
#endif
  cbdata->statsmade++;
  if (lstat(cbdata->canonspace, &stb) != 0 || !S_ISLNK(stb.st_mode))
    {
      /* not a symlink or stat failed, have new canon entry */
      diroff = addfilesspace(cbdata, l + dirl - i + 2);
      strcpy((char *)cbdata->filesspace + diroff, cbdata->canonspace + cbdata->rootdirl);
      l += dirl - i;
      /* add trailing / */
      if (cbdata->filesspace[diroff + l - 1] != '/')
	{
	  cbdata->filesspace[diroff + l++] = '/';
	  cbdata->filesspace[diroff + l] = 0;
	}
      /* call normalizedir on new entry for unification purposes */
      dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + diroff, l, strnhash((char *)cbdata->filesspace + diroff, l), 1);
      return dirnameid == -1 ? diroff : dirnameid;
    }
  /* oh no, a symlink! follow */
  lo = cbdata->rootdirl + l + dirl - i + 1;
  if (lo + stb.st_size + 2 > cbdata->canonspacen)
    {
      cbdata->canonspacen = lo + stb.st_size + 20;
      cbdata->canonspace = solv_realloc(cbdata->canonspace, cbdata->canonspacen);
    }
  ll = readlink(cbdata->canonspace, cbdata->canonspace + lo, stb.st_size);
  if (ll < 0 || ll > stb.st_size)
    return diroff;		/* hmm */
  if (ll == 0)
    return dirnameid;		/* empty means current dir */
  if (cbdata->canonspace[lo + ll - 1] != '/')
    cbdata->canonspace[lo + ll++] = '/';	/* add trailing / */
  cbdata->canonspace[lo + ll] = 0;		/* zero terminate */
  if (cbdata->canonspace[lo] != '/')
    {
      /* relative link, concatenate to dirname */
      memmove(cbdata->canonspace + cbdata->rootdirl + l, cbdata->canonspace + lo, ll + 1);
      lo = cbdata->rootdirl;
      ll += l;
    }
  dirnameid = normalizedir(cbdata, cbdata->canonspace + lo, ll, strnhash(cbdata->canonspace + lo, ll), 1);
  return dirnameid == -1 ? diroff : dirnameid;
}
Esempio n. 5
0
static void
findfileconflicts_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
{
  struct cbdata *cbdata = cbdatav;
  int isdir = S_ISDIR(info->mode);
  const char *dp;
  Id idx, oidx;
  Id hx, qx;
  Hashval h, hh, dhx;

  idx = cbdata->idx;

  if (!info->dirlen)
    return;
  dp = fn + info->dirlen;
  if (info->diridx != cbdata->lastdiridx)
    {
      cbdata->lastdiridx = info->diridx;
      cbdata->lastdirhash = strnhash(fn, dp - fn);
    }
  dhx = cbdata->lastdirhash;
  /* this mirrors the "if (!hx) hx = strlen(fn) + 1" in finddirs_cb */
  if (!isindirmap(cbdata, dhx ? dhx : dp - fn + 1))
    return;
  hx = strhash_cont(dp, dhx);
  if (!hx)
    hx = strlen(fn) + 1;

  h = hx & cbdata->cflmapn;
  hh = HASHCHAIN_START;
  for (;;)
    {
      qx = cbdata->cflmap[2 * h];
      if (!qx)
	break;
      if (qx == hx)
	break;
      h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
    }
  if (!qx)
    {
      /* a miss */
      if (!cbdata->create)
	return;
      cbdata->cflmap[2 * h] = hx;
      cbdata->cflmap[2 * h + 1] = (isdir ? ~idx : idx);
      if (++cbdata->cflmapused * 2 > cbdata->cflmapn)
	cbdata->cflmap = growhash(cbdata->cflmap, &cbdata->cflmapn);
      return;
    }
  oidx = cbdata->cflmap[2 * h + 1];
  if (oidx < 0)
    {
      int i;
      if (isdir)
	{
	  /* both are directories. delay the conflict, keep oidx in slot */
          queue_push2(&cbdata->lookat_dir, hx, idx);
	  return;
	}
      oidx = ~oidx;
      /* now have file, had directories before. */
      cbdata->cflmap[2 * h + 1] = oidx;	/* make it a file */
      /* dump all delayed directory hits for hx */
      for (i = 0; i < cbdata->lookat_dir.count; i += 2)
	if (cbdata->lookat_dir.elements[i] == hx)
	  {
	    queue_push2(&cbdata->lookat, hx, cbdata->lookat_dir.elements[i + 1]);
	    queue_push2(&cbdata->lookat, 0, 0);
	  }
    }
  else if (oidx == idx)
    return;	/* no conflicts with ourself, please */
  queue_push2(&cbdata->lookat, hx, oidx);
  queue_push2(&cbdata->lookat, 0, 0);
  queue_push2(&cbdata->lookat, hx, idx);
  queue_push2(&cbdata->lookat, 0, 0);
}