Exemple #1
0
/*
** Create a new delta.
**
** The delta is written into a preallocated buffer, zDelta, which 
** should be at least 60 bytes longer than the target file, zOut.
** The delta string will be NUL-terminated, but it might also contain
** embedded NUL characters if either the zSrc or zOut files are
** binary.  This function returns the length of the delta string
** in bytes, excluding the final NUL terminator character.
**
** Output Format:
**
** The delta begins with a base64 number followed by a newline.  This
** number is the number of bytes in the TARGET file.  Thus, given a
** delta file z, a program can compute the size of the output file
** simply by reading the first line and decoding the base-64 number
** found there.  The delta_output_size() routine does exactly this.
**
** After the initial size number, the delta consists of a series of
** literal text segments and commands to copy from the SOURCE file.  
** A copy command looks like this:
**
**     NNN@MMM,
**
** where NNN is the number of bytes to be copied and MMM is the offset
** into the source file of the first byte (both base-64).   If NNN is 0
** it means copy the rest of the input file.  Literal text is like this:
**
**     NNN:TTTTT
**
** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
**
** The last term is of the form
**
**     NNN;
**
** In this case, NNN is a 32-bit bigendian checksum of the output file
** that can be used to verify that the delta applied correctly.  All
** numbers are in base-64.
**
** Pure text files generate a pure text delta.  Binary files generate a
** delta that may contain some binary data.
**
** Algorithm:
**
** The encoder first builds a hash table to help it find matching
** patterns in the source file.  16-byte chunks of the source file
** sampled at evenly spaced intervals are used to populate the hash
** table.
**
** Next we begin scanning the target file using a sliding 16-byte
** window.  The hash of the 16-byte window in the target is used to
** search for a matching section in the source file.  When a match
** is found, a copy command is added to the delta.  An effort is
** made to extend the matching section to regions that come before
** and after the 16-byte hash window.  A copy command is only issued
** if the result would use less space that just quoting the text
** literally. Literal text is added to the delta for sections that 
** do not match or which can not be encoded efficiently using copy
** commands.
*/
int delta_create(
  const char *zSrc,      /* The source or pattern file */
  unsigned int lenSrc,   /* Length of the source file */
  const char *zOut,      /* The target file */
  unsigned int lenOut,   /* Length of the target file */
  char *zDelta           /* Write the delta into this buffer */
){
  int i, base;
  char *zOrigDelta = zDelta;
  hash h;
  int nHash;                 /* Number of hash table entries */
  int *landmark;             /* Primary hash table */
  int *collide;              /* Collision chain */
  int lastRead = -1;         /* Last byte of zSrc read by a COPY command */

  /* Add the target file size to the beginning of the delta
  */
  putInt(lenOut, &zDelta);
  *(zDelta++) = '\n';

  /* If the source file is very small, it means that we have no
  ** chance of ever doing a copy command.  Just output a single
  ** literal segment for the entire target and exit.
  */
  if( lenSrc<=NHASH ){
    putInt(lenOut, &zDelta);
    *(zDelta++) = ':';
    memcpy(zDelta, zOut, lenOut);
    zDelta += lenOut;
    putInt(checksum(zOut, lenOut), &zDelta);
    *(zDelta++) = ';';
    return zDelta - zOrigDelta;
  }

  /* Compute the hash table used to locate matching sections in the
  ** source file.
  */
  nHash = lenSrc/NHASH;
  collide = fossil_malloc( nHash*2*sizeof(int) );
  landmark = &collide[nHash];
  memset(landmark, -1, nHash*sizeof(int));
  memset(collide, -1, nHash*sizeof(int));
  for(i=0; i<lenSrc-NHASH; i+=NHASH){
    int hv;
    hash_init(&h, &zSrc[i]);
    hv = hash_32bit(&h) % nHash;
    collide[i/NHASH] = landmark[hv];
    landmark[hv] = i/NHASH;
  }

  /* Begin scanning the target file and generating copy commands and
  ** literal sections of the delta.
  */
  base = 0;    /* We have already generated everything before zOut[base] */
  while( base+NHASH<lenOut ){
    int iSrc, iBlock;
    unsigned int bestCnt, bestOfst=0, bestLitsz=0;
    hash_init(&h, &zOut[base]);
    i = 0;     /* Trying to match a landmark against zOut[base+i] */
    bestCnt = 0;
    while( 1 ){
      int hv;
      int limit = 250;

      hv = hash_32bit(&h) % nHash;
      DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); )
      iBlock = landmark[hv];
      while( iBlock>=0 && (limit--)>0 ){
        /*
        ** The hash window has identified a potential match against 
        ** landmark block iBlock.  But we need to investigate further.
        ** 
        ** Look for a region in zOut that matches zSrc. Anchor the search
        ** at zSrc[iSrc] and zOut[base+i].  Do not include anything prior to
        ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
        **
        ** Set cnt equal to the length of the match and set ofst so that
        ** zSrc[ofst] is the first element of the match.  litsz is the number
        ** of characters between zOut[base] and the beginning of the match.
        ** sz will be the overhead (in bytes) needed to encode the copy
        ** command.  Only generate copy command if the overhead of the
        ** copy command is less than the amount of literal text to be copied.
        */
        int cnt, ofst, litsz;
        int j, k, x, y;
        int sz;

        /* Beginning at iSrc, match forwards as far as we can.  j counts
        ** the number of characters that match */
        iSrc = iBlock*NHASH;
        for(j=0, x=iSrc, y=base+i; x<lenSrc && y<lenOut; j++, x++, y++){
          if( zSrc[x]!=zOut[y] ) break;
        }
        j--;

        /* Beginning at iSrc-1, match backwards as far as we can.  k counts
        ** the number of characters that match */
        for(k=1; k<iSrc && k<=i; k++){
          if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
        }
        k--;

        /* Compute the offset and size of the matching region */
        ofst = iSrc-k;
        cnt = j+k+1;
        litsz = i-k;  /* Number of bytes of literal text before the copy */
        DEBUG2( printf("MATCH %d bytes at %d: [%s] litsz=%d\n",
                        cnt, ofst, print16(&zSrc[ofst]), litsz); )
        /* sz will hold the number of bytes needed to encode the "insert"
        ** command and the copy command, not counting the "insert" text */
        sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
        if( cnt>=sz && cnt>bestCnt ){
          /* Remember this match only if it is the best so far and it
          ** does not increase the file size */
          bestCnt = cnt;
          bestOfst = iSrc-k;
          bestLitsz = litsz;
          DEBUG2( printf("... BEST SO FAR\n"); )
        }

        /* Check the next matching block */
        iBlock = collide[iBlock];
      }
Exemple #2
0
Fichier : kex.c Projet : gpg/gsti
/* Calculate the exchange hash value and put it into the handle.  */
static gsti_error_t
calc_exchange_hash (gsti_ctx_t ctx, gsti_bstr_t i_c, gsti_bstr_t i_s,
		    gsti_bstr_t k_s, gcry_mpi_t e, gcry_mpi_t f)
{
  gsti_error_t err;
  gcry_md_hd_t md;
  gsti_bstr_t pp;
  const char *ver = host_version_string;
  int algo = GCRY_MD_SHA1;
  int dlen;

  err = gcry_md_open (&md, algo, 0);
  if (err)
    return err;
  
  /* gcry_md_debug (md, "hash"); */

  /* _gsti_dump_hexbuf (ctx, "client kex data: ", gsti_bstr_data (i_c), */
  /*                    gsti_bstr_length (i_c)); */
  /* _gsti_dump_hexbuf (ctx, "server kex data: ", gsti_bstr_data (i_s), */
  /*                    gsti_bstr_length (i_s)); */
  
  if (ctx->we_are_server)
    {
      _gsti_bstring_hash (md, ctx->peer_version_string);
      err = gsti_bstr_make (&pp, ver, strlen (ver));
      if (err)
        return err;
      _gsti_bstring_hash (md, pp);
      _gsti_free (pp);
    }
  else
    {
      err = gsti_bstr_make (&pp, ver, strlen (ver));
      if (err)
        return err;
      _gsti_bstring_hash (md, pp);
      _gsti_free (pp);
      _gsti_bstring_hash (md, ctx->peer_version_string);
    }
  _gsti_bstring_hash (md, i_c);
  _gsti_bstring_hash (md, i_s);
  _gsti_bstring_hash (md, k_s);
  if (ctx->gex.used)
    {
      if (ctx->we_are_server)
        {
          hash_32bit (md, ctx->gex.peer_min);
          hash_32bit (md, ctx->gex.peer_n);
          hash_32bit (md, ctx->gex.peer_max);
        }
      else
        {
          hash_32bit (md, ctx->gex.min);
          hash_32bit (md, ctx->gex.n);
          hash_32bit (md, ctx->gex.max);
        }
      hash_mpi (md, ctx->kex.p);
      hash_mpi (md, ctx->kex.g);
    } 
  hash_mpi (md, e);
  hash_mpi (md, f);
  hash_mpi (md, ctx->kex.k);

  dlen = gcry_md_get_algo_dlen (algo);
  err = gsti_bstr_make (&ctx->kex.h, gcry_md_read (md, algo), dlen);
  if (err)
    {
      gcry_md_close (md);
      return err;
    }
  if (!ctx->session_id)	/* Initialize the session id the first time.  */
    err = gsti_bstr_make (&ctx->session_id, gcry_md_read (md, algo), dlen);
  gcry_md_close (md);
  _gsti_dump_hexbuf (ctx, "SesID=", gsti_bstr_data (ctx->session_id),
		     gsti_bstr_length (ctx->session_id));
  return err;
}