/* ** Create a new delta. ** ** The delta is written into a preallocated buffer, zDelta, which ** should be at least 60 bytes longer than the target file, zOut. ** The delta string will be NUL-terminated, but it might also contain ** embedded NUL characters if either the zSrc or zOut files are ** binary. This function returns the length of the delta string ** in bytes, excluding the final NUL terminator character. ** ** Output Format: ** ** The delta begins with a base64 number followed by a newline. This ** number is the number of bytes in the TARGET file. Thus, given a ** delta file z, a program can compute the size of the output file ** simply by reading the first line and decoding the base-64 number ** found there. The delta_output_size() routine does exactly this. ** ** After the initial size number, the delta consists of a series of ** literal text segments and commands to copy from the SOURCE file. ** A copy command looks like this: ** ** NNN@MMM, ** ** where NNN is the number of bytes to be copied and MMM is the offset ** into the source file of the first byte (both base-64). If NNN is 0 ** it means copy the rest of the input file. Literal text is like this: ** ** NNN:TTTTT ** ** where NNN is the number of bytes of text (base-64) and TTTTT is the text. ** ** The last term is of the form ** ** NNN; ** ** In this case, NNN is a 32-bit bigendian checksum of the output file ** that can be used to verify that the delta applied correctly. All ** numbers are in base-64. ** ** Pure text files generate a pure text delta. Binary files generate a ** delta that may contain some binary data. ** ** Algorithm: ** ** The encoder first builds a hash table to help it find matching ** patterns in the source file. 16-byte chunks of the source file ** sampled at evenly spaced intervals are used to populate the hash ** table. ** ** Next we begin scanning the target file using a sliding 16-byte ** window. The hash of the 16-byte window in the target is used to ** search for a matching section in the source file. When a match ** is found, a copy command is added to the delta. An effort is ** made to extend the matching section to regions that come before ** and after the 16-byte hash window. A copy command is only issued ** if the result would use less space that just quoting the text ** literally. Literal text is added to the delta for sections that ** do not match or which can not be encoded efficiently using copy ** commands. */ int delta_create( const char *zSrc, /* The source or pattern file */ unsigned int lenSrc, /* Length of the source file */ const char *zOut, /* The target file */ unsigned int lenOut, /* Length of the target file */ char *zDelta /* Write the delta into this buffer */ ){ int i, base; char *zOrigDelta = zDelta; hash h; int nHash; /* Number of hash table entries */ int *landmark; /* Primary hash table */ int *collide; /* Collision chain */ int lastRead = -1; /* Last byte of zSrc read by a COPY command */ /* Add the target file size to the beginning of the delta */ putInt(lenOut, &zDelta); *(zDelta++) = '\n'; /* If the source file is very small, it means that we have no ** chance of ever doing a copy command. Just output a single ** literal segment for the entire target and exit. */ if( lenSrc<=NHASH ){ putInt(lenOut, &zDelta); *(zDelta++) = ':'; memcpy(zDelta, zOut, lenOut); zDelta += lenOut; putInt(checksum(zOut, lenOut), &zDelta); *(zDelta++) = ';'; return zDelta - zOrigDelta; } /* Compute the hash table used to locate matching sections in the ** source file. */ nHash = lenSrc/NHASH; collide = fossil_malloc( nHash*2*sizeof(int) ); landmark = &collide[nHash]; memset(landmark, -1, nHash*sizeof(int)); memset(collide, -1, nHash*sizeof(int)); for(i=0; i<lenSrc-NHASH; i+=NHASH){ int hv; hash_init(&h, &zSrc[i]); hv = hash_32bit(&h) % nHash; collide[i/NHASH] = landmark[hv]; landmark[hv] = i/NHASH; } /* Begin scanning the target file and generating copy commands and ** literal sections of the delta. */ base = 0; /* We have already generated everything before zOut[base] */ while( base+NHASH<lenOut ){ int iSrc, iBlock; unsigned int bestCnt, bestOfst=0, bestLitsz=0; hash_init(&h, &zOut[base]); i = 0; /* Trying to match a landmark against zOut[base+i] */ bestCnt = 0; while( 1 ){ int hv; int limit = 250; hv = hash_32bit(&h) % nHash; DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); ) iBlock = landmark[hv]; while( iBlock>=0 && (limit--)>0 ){ /* ** The hash window has identified a potential match against ** landmark block iBlock. But we need to investigate further. ** ** Look for a region in zOut that matches zSrc. Anchor the search ** at zSrc[iSrc] and zOut[base+i]. Do not include anything prior to ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen]. ** ** Set cnt equal to the length of the match and set ofst so that ** zSrc[ofst] is the first element of the match. litsz is the number ** of characters between zOut[base] and the beginning of the match. ** sz will be the overhead (in bytes) needed to encode the copy ** command. Only generate copy command if the overhead of the ** copy command is less than the amount of literal text to be copied. */ int cnt, ofst, litsz; int j, k, x, y; int sz; /* Beginning at iSrc, match forwards as far as we can. j counts ** the number of characters that match */ iSrc = iBlock*NHASH; for(j=0, x=iSrc, y=base+i; x<lenSrc && y<lenOut; j++, x++, y++){ if( zSrc[x]!=zOut[y] ) break; } j--; /* Beginning at iSrc-1, match backwards as far as we can. k counts ** the number of characters that match */ for(k=1; k<iSrc && k<=i; k++){ if( zSrc[iSrc-k]!=zOut[base+i-k] ) break; } k--; /* Compute the offset and size of the matching region */ ofst = iSrc-k; cnt = j+k+1; litsz = i-k; /* Number of bytes of literal text before the copy */ DEBUG2( printf("MATCH %d bytes at %d: [%s] litsz=%d\n", cnt, ofst, print16(&zSrc[ofst]), litsz); ) /* sz will hold the number of bytes needed to encode the "insert" ** command and the copy command, not counting the "insert" text */ sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3; if( cnt>=sz && cnt>bestCnt ){ /* Remember this match only if it is the best so far and it ** does not increase the file size */ bestCnt = cnt; bestOfst = iSrc-k; bestLitsz = litsz; DEBUG2( printf("... BEST SO FAR\n"); ) } /* Check the next matching block */ iBlock = collide[iBlock]; }
/* Calculate the exchange hash value and put it into the handle. */ static gsti_error_t calc_exchange_hash (gsti_ctx_t ctx, gsti_bstr_t i_c, gsti_bstr_t i_s, gsti_bstr_t k_s, gcry_mpi_t e, gcry_mpi_t f) { gsti_error_t err; gcry_md_hd_t md; gsti_bstr_t pp; const char *ver = host_version_string; int algo = GCRY_MD_SHA1; int dlen; err = gcry_md_open (&md, algo, 0); if (err) return err; /* gcry_md_debug (md, "hash"); */ /* _gsti_dump_hexbuf (ctx, "client kex data: ", gsti_bstr_data (i_c), */ /* gsti_bstr_length (i_c)); */ /* _gsti_dump_hexbuf (ctx, "server kex data: ", gsti_bstr_data (i_s), */ /* gsti_bstr_length (i_s)); */ if (ctx->we_are_server) { _gsti_bstring_hash (md, ctx->peer_version_string); err = gsti_bstr_make (&pp, ver, strlen (ver)); if (err) return err; _gsti_bstring_hash (md, pp); _gsti_free (pp); } else { err = gsti_bstr_make (&pp, ver, strlen (ver)); if (err) return err; _gsti_bstring_hash (md, pp); _gsti_free (pp); _gsti_bstring_hash (md, ctx->peer_version_string); } _gsti_bstring_hash (md, i_c); _gsti_bstring_hash (md, i_s); _gsti_bstring_hash (md, k_s); if (ctx->gex.used) { if (ctx->we_are_server) { hash_32bit (md, ctx->gex.peer_min); hash_32bit (md, ctx->gex.peer_n); hash_32bit (md, ctx->gex.peer_max); } else { hash_32bit (md, ctx->gex.min); hash_32bit (md, ctx->gex.n); hash_32bit (md, ctx->gex.max); } hash_mpi (md, ctx->kex.p); hash_mpi (md, ctx->kex.g); } hash_mpi (md, e); hash_mpi (md, f); hash_mpi (md, ctx->kex.k); dlen = gcry_md_get_algo_dlen (algo); err = gsti_bstr_make (&ctx->kex.h, gcry_md_read (md, algo), dlen); if (err) { gcry_md_close (md); return err; } if (!ctx->session_id) /* Initialize the session id the first time. */ err = gsti_bstr_make (&ctx->session_id, gcry_md_read (md, algo), dlen); gcry_md_close (md); _gsti_dump_hexbuf (ctx, "SesID=", gsti_bstr_data (ctx->session_id), gsti_bstr_length (ctx->session_id)); return err; }