void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* The Avg filter predicts each pixel as the (truncated) average of a and b. * There's no pixel to the left of the first pixel. Luckily, it's * predicted to be half of the pixel above it. So again, this works * perfectly with our loop if we make sure a starts at zero. */ png_size_t rb; const __m128i zero = _mm_setzero_si128(); __m128i b; __m128i a, d = zero; png_debug(1, "in png_read_filter_row_avg4_sse2"); rb = row_info->rowbytes+4; while (rb > 4) { __m128i avg; b = load4(prev); a = d; d = load4(row ); /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */ avg = _mm_avg_epu8(a,b); /* ...but we can fix it up by subtracting off 1 if it rounded up. */ avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b), _mm_set1_epi8(1))); d = _mm_add_epi8(d, avg); store4(row, d); prev += 4; row += 4; rb -= 4; } }
void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* Paeth tries to predict pixel d using the pixel to the left of it, a, * and two pixels from the previous row, b and c: * prev: c b * row: a d * The Paeth function predicts d to be whichever of a, b, or c is nearest to * p=a+b-c. * * The first pixel has no left context, and so uses an Up filter, p = b. * This works naturally with our main loop's p = a+b-c if we force a and c * to zero. * Here we zero b and d, which become c and a respectively at the start of * the loop. */ png_debug(1, "in png_read_filter_row_paeth4_sse2"); const __m128i zero = _mm_setzero_si128(); __m128i c, b = zero, a, d = zero; int rb = row_info->rowbytes; while (rb > 0) { /* It's easiest to do this math (particularly, deal with pc) with 16-bit * intermediates. */ c = b; b = _mm_unpacklo_epi8(load4(prev), zero); a = d; d = _mm_unpacklo_epi8(load4(row ), zero); /* (p-a) == (a+b-c - a) == (b-c) */ __m128i pa = _mm_sub_epi16(b,c); /* (p-b) == (a+b-c - b) == (a-c) */ __m128i pb = _mm_sub_epi16(a,c); /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */ __m128i pc = _mm_add_epi16(pa,pb); pa = abs_i16(pa); /* |p-a| */ pb = abs_i16(pb); /* |p-b| */ pc = abs_i16(pc); /* |p-c| */ __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); /* Paeth breaks ties favoring a over b over c. */ __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c)); /* Note `_epi8`: we need addition to wrap modulo 255. */ d = _mm_add_epi8(d, nearest); store4(row, _mm_packus_epi16(d,d)); prev += 4; row += 4; rb -= 4; } }
static void store3(void* p, __m128i v) { /* We'll pull from SSE as a 32-bit int, then write * its bottom two bytes, then its third byte. */ png_uint_32 v012; store4(&v012, v); png_uint_16* p01 = p; png_byte* p2 = (png_byte*)(p01+1); *p01 = v012; *p2 = v012 >> 16; }
void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev) { /* The Sub filter predicts each pixel as the previous pixel, a. * There is no pixel to the left of the first pixel. It's encoded directly. * That works with our main loop if we just say that left pixel was zero. */ png_debug(1, "in png_read_filter_row_sub4_sse2"); __m128i a, d = _mm_setzero_si128(); int rb = row_info->rowbytes; while (rb > 0) { a = d; d = load4(row); d = _mm_add_epi8(d, a); store4(row, d); row += 4; rb -= 4; } }
void fht_SSE2(FLOAT * fz, int n) { const FLOAT *tri = costab; int k4; FLOAT *fi, *gi; FLOAT const *fn; n <<= 1; /* to get BLKSIZE, because of 3DNow! ASM routine */ fn = fz + n; k4 = 4; do { FLOAT s1, c1; int i, k1, k2, k3, kx; kx = k4 >> 1; k1 = k4; k2 = k4 << 1; k3 = k2 + k1; k4 = k2 << 1; fi = fz; gi = fi + kx; do { FLOAT f0, f1, f2, f3; f1 = fi[0] - fi[k1]; f0 = fi[0] + fi[k1]; f3 = fi[k2] - fi[k3]; f2 = fi[k2] + fi[k3]; fi[k2] = f0 - f2; fi[0] = f0 + f2; fi[k3] = f1 - f3; fi[k1] = f1 + f3; f1 = gi[0] - gi[k1]; f0 = gi[0] + gi[k1]; f3 = SQRT2 * gi[k3]; f2 = SQRT2 * gi[k2]; gi[k2] = f0 - f2; gi[0] = f0 + f2; gi[k3] = f1 - f3; gi[k1] = f1 + f3; gi += k4; fi += k4; } while (fi < fn); c1 = tri[0]; s1 = tri[1]; for (i = 1; i < kx; i++) { __m128 v_s2; __m128 v_c2; __m128 v_c1; __m128 v_s1; FLOAT c2, s2, s1_2 = s1+s1; c2 = 1 - s1_2 * s1; s2 = s1_2 * c1; fi = fz + i; gi = fz + k1 - i; v_c1 = _mm_set_ps1(c1); v_s1 = _mm_set_ps1(s1); v_c2 = _mm_set_ps1(c2); v_s2 = _mm_set_ps1(s2); { static const vecfloat_union sign_mask = {{0x80000000,0,0,0}}; v_c1 = _mm_xor_ps(sign_mask._m128, v_c1); /* v_c1 := {-c1, +c1, +c1, +c1} */ } { static const vecfloat_union sign_mask = {{0,0x80000000,0,0}}; v_s1 = _mm_xor_ps(sign_mask._m128, v_s1); /* v_s1 := {+s1, -s1, +s1, +s1} */ } { static const vecfloat_union sign_mask = {{0,0,0x80000000,0x80000000}}; v_c2 = _mm_xor_ps(sign_mask._m128, v_c2); /* v_c2 := {+c2, +c2, -c2, -c2} */ } do { __m128 p, q, r; q = _mm_setr_ps(fi[k1], fi[k3], gi[k1], gi[k3]); /* Q := {fi_k1,fi_k3,gi_k1,gi_k3}*/ p = _mm_mul_ps(_mm_set_ps1(s2), q); /* P := s2 * Q */ q = _mm_mul_ps(v_c2, q); /* Q := c2 * Q */ q = _mm_shuffle_ps(q, q, _MM_SHUFFLE(1,0,3,2)); /* Q := {-c2*gi_k1,-c2*gi_k3,c2*fi_k1,c2*fi_k3} */ p = _mm_add_ps(p, q); r = _mm_setr_ps(gi[0], gi[k2], fi[0], fi[k2]); /* R := {gi_0,gi_k2,fi_0,fi_k2} */ q = _mm_sub_ps(r, p); /* Q := {gi_0-p0,gi_k2-p1,fi_0-p2,fi_k2-p3} */ r = _mm_add_ps(r, p); /* R := {gi_0+p0,gi_k2+p1,fi_0+p2,fi_k2+p3} */ p = _mm_shuffle_ps(q, r, _MM_SHUFFLE(2,0,2,0)); /* P := {q0,q2,r0,r2} */ p = _mm_shuffle_ps(p, p, _MM_SHUFFLE(3,1,2,0)); /* P := {q0,r0,q2,r2} */ q = _mm_shuffle_ps(q, r, _MM_SHUFFLE(3,1,3,1)); /* Q := {q1,q3,r1,r3} */ r = _mm_mul_ps(v_c1, q); q = _mm_mul_ps(v_s1, q); q = _mm_shuffle_ps(q, q, _MM_SHUFFLE(0,1,2,3)); /* Q := {q3,q2,q1,q0} */ q = _mm_add_ps(q, r); store4(_mm_sub_ps(p, q), &gi[k3], &gi[k2], &fi[k3], &fi[k2]); store4(_mm_add_ps(p, q), &gi[k1], &gi[ 0], &fi[k1], &fi[ 0]); gi += k4; fi += k4; } while (fi < fn); c2 = c1; c1 = c2 * tri[0] - s1 * tri[1]; s1 = c2 * tri[1] + s1 * tri[0]; } tri += 2; } while (k4 < n); }
void lzvn_decode(lzvn_decoder_state *state) { #if HAVE_LABELS_AS_VALUES // Jump table for all instructions static const void *opc_tbl[256] = { &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m}; #endif size_t src_len = state->src_end - state->src; size_t dst_len = state->dst_end - state->dst; if (src_len == 0 || dst_len == 0) return; // empty buffer const unsigned char *src_ptr = state->src; unsigned char *dst_ptr = state->dst; size_t D = state->d_prev; size_t M; size_t L; size_t opc_len; // Do we have a partially expanded match saved in state? if (state->L != 0 || state->M != 0) { L = state->L; M = state->M; D = state->D; opc_len = 0; // we already skipped the op state->L = state->M = state->D = 0; if (M == 0) goto copy_literal; if (L == 0) goto copy_match; goto copy_literal_and_match; } unsigned char opc = src_ptr[0]; #if HAVE_LABELS_AS_VALUES goto *opc_tbl[opc]; #else for (;;) { switch (opc) { #endif // =============================================================== // These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a // literal and a match. The bulk of their implementations are shared; // each label here only does the work of setting the opcode length (not // including any literal bytes), and extracting the literal length, match // length, and match distance (except in pre_d). They then jump into the // shared implementation to actually output the literal and match bytes. // // No error checking happens in the first stage, except for ensuring that // the source has enough length to represent the full opcode before // reading past the first byte. sml_d: #if !HAVE_LABELS_AS_VALUES case 0: case 1: case 2: case 3: case 4: case 5: case 8: case 9: case 10: case 11: case 12: case 13: case 16: case 17: case 18: case 19: case 20: case 21: case 24: case 25: case 26: case 27: case 28: case 29: case 32: case 33: case 34: case 35: case 36: case 37: case 40: case 41: case 42: case 43: case 44: case 45: case 48: case 49: case 50: case 51: case 52: case 53: case 56: case 57: case 58: case 59: case 60: case 61: case 64: case 65: case 66: case 67: case 68: case 69: case 72: case 73: case 74: case 75: case 76: case 77: case 80: case 81: case 82: case 83: case 84: case 85: case 88: case 89: case 90: case 91: case 92: case 93: case 96: case 97: case 98: case 99: case 100: case 101: case 104: case 105: case 106: case 107: case 108: case 109: case 128: case 129: case 130: case 131: case 132: case 133: case 136: case 137: case 138: case 139: case 140: case 141: case 144: case 145: case 146: case 147: case 148: case 149: case 152: case 153: case 154: case 155: case 156: case 157: case 192: case 193: case 194: case 195: case 196: case 197: case 200: case 201: case 202: case 203: case 204: case 205: #endif UPDATE_GOOD; // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL // where the length of literal (0-3 bytes) is encoded by the high 2 bits of // the first byte. We first extract the literal length so we know how long // the opcode is, then check that the source can hold both this opcode and // at least one byte of the next (because any valid input stream must be // terminated with an eos token). opc_len = 2; L = (size_t)extract(opc, 6, 2); M = (size_t)extract(opc, 3, 3) + 3; // We need to ensure that the source buffer is long enough that we can // safely read this entire opcode, the literal that follows, and the first // byte of the next opcode. Once we satisfy this requirement, we can // safely unpack the match distance. A check similar to this one is // present in all the opcode implementations. if (src_len <= opc_len + L) return; // source truncated D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1]; goto copy_literal_and_match; med_d: #if !HAVE_LABELS_AS_VALUES case 160: case 161: case 162: case 163: case 164: case 165: case 166: case 167: case 168: case 169: case 170: case 171: case 172: case 173: case 174: case 175: case 176: case 177: case 178: case 179: case 180: case 181: case 182: case 183: case 184: case 185: case 186: case 187: case 188: case 189: case 190: case 191: #endif UPDATE_GOOD; // "medium distance": This is a minor variant of the "small distance" // encoding, where we will now use two extra bytes instead of one to encode // the restof the match length and distance. This allows an extra two bits // for the match length, and an extra three bits for the match distance. The // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL. opc_len = 3; L = (size_t)extract(opc, 3, 2); if (src_len <= opc_len + L) return; // source truncated uint16_t opc23 = load2(&src_ptr[1]); M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3); D = (size_t)extract(opc23, 2, 14); goto copy_literal_and_match; lrg_d: #if !HAVE_LABELS_AS_VALUES case 7: case 15: case 23: case 31: case 39: case 47: case 55: case 63: case 71: case 79: case 87: case 95: case 103: case 111: case 135: case 143: case 151: case 159: case 199: case 207: #endif UPDATE_GOOD; // "large distance": This is another variant of the "small distance" // encoding, where we will now use two extra bytes to encode the match // distance, which allows distances up to 65535 to be represented. The full // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL. opc_len = 3; L = (size_t)extract(opc, 6, 2); M = (size_t)extract(opc, 3, 3) + 3; if (src_len <= opc_len + L) return; // source truncated D = load2(&src_ptr[1]); goto copy_literal_and_match; pre_d: #if !HAVE_LABELS_AS_VALUES case 70: case 78: case 86: case 94: case 102: case 110: case 134: case 142: case 150: case 158: case 198: case 206: #endif UPDATE_GOOD; // "previous distance": This opcode has the structure LLMMM110, where the // length of the literal (0-3 bytes) is encoded by the high 2 bits of the // first byte. We first extract the literal length so we know how long // the opcode is, then check that the source can hold both this opcode and // at least one byte of the next (because any valid input stream must be // terminated with an eos token). opc_len = 1; L = (size_t)extract(opc, 6, 2); M = (size_t)extract(opc, 3, 3) + 3; if (src_len <= opc_len + L) return; // source truncated goto copy_literal_and_match; copy_literal_and_match: // Common implementation of writing data for opcodes that have both a // literal and a match. We begin by advancing the source pointer past // the opcode, so that it points at the first literal byte (if L // is non-zero; otherwise it points at the next opcode). PTR_LEN_INC(src_ptr, src_len, opc_len); // Now we copy the literal from the source pointer to the destination. if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) { // The literal is 0-3 bytes; if we are not near the end of the buffer, // we can safely just do a 4 byte copy (which is guaranteed to cover // the complete literal, and may include some other bytes as well). store4(dst_ptr, load4(src_ptr)); } else if (L <= dst_len) { // We are too close to the end of either the input or output stream // to be able to safely use a four-byte copy, but we will not exhaust // either stream (we already know that the source will not be // exhausted from checks in the individual opcode implementations, // and we just tested that dst_len > L). Thus, we need to do a // byte-by-byte copy of the literal. This is slow, but it can only ever // happen near the very end of a buffer, so it is not an important case to // optimize. for (size_t i = 0; i < L; ++i) dst_ptr[i] = src_ptr[i]; } else { // Destination truncated: fill DST, and store partial match // Copy partial literal for (size_t i = 0; i < dst_len; ++i) dst_ptr[i] = src_ptr[i]; // Save state state->src = src_ptr + dst_len; state->dst = dst_ptr + dst_len; state->L = L - dst_len; state->M = M; state->D = D; return; // destination truncated } // Having completed the copy of the literal, we advance both the source // and destination pointers by the number of literal bytes. PTR_LEN_INC(dst_ptr, dst_len, L); PTR_LEN_INC(src_ptr, src_len, L); // Check if the match distance is valid; matches must not reference // bytes that preceed the start of the output buffer, nor can the match // distance be zero. if (D > dst_ptr - state->dst_begin || D == 0) goto invalid_match_distance; copy_match: // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep // in mind that we may have D < M, in which case the source and destination // windows overlap in the copy. The semantics of the match copy are *not* // those of memmove( ); if the buffers overlap it needs to behave as though // we were copying byte-by-byte in increasing address order. If, for example, // D is 1, the copy operation is equivalent to: // // memset(dst_ptr, dst_ptr[-1], M); // // i.e. it splats the previous byte. This means that we need to be very // careful about using wide loads or stores to perform the copy operation. if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) { // We are not near the end of the buffer, and the match distance // is at least eight. Thus, we can safely loop using eight byte // copies. The last of these may slop over the intended end of // the match, but this is OK because we know we have a safety bound // away from the end of the destination buffer. for (size_t i = 0; i < M; i += 8) store8(&dst_ptr[i], load8(&dst_ptr[i - D])); } else if (M <= dst_len) { // Either the match distance is too small, or we are too close to // the end of the buffer to safely use eight byte copies. Fall back // on a simple byte-by-byte implementation. for (size_t i = 0; i < M; ++i) dst_ptr[i] = dst_ptr[i - D]; } else { // Destination truncated: fill DST, and store partial match // Copy partial match for (size_t i = 0; i < dst_len; ++i) dst_ptr[i] = dst_ptr[i - D]; // Save state state->src = src_ptr; state->dst = dst_ptr + dst_len; state->L = 0; state->M = M - dst_len; state->D = D; return; // destination truncated } // Update the destination pointer and length to account for the bytes // written by the match, then load the next opcode byte and branch to // the appropriate implementation. PTR_LEN_INC(dst_ptr, dst_len, M); opc = src_ptr[0]; #if HAVE_LABELS_AS_VALUES goto *opc_tbl[opc]; #else break; #endif // =============================================================== // Opcodes representing only a match (no literal). // These two opcodes (lrg_m and sml_m) encode only a match. The match // distance is carried over from the previous opcode, so all they need // to encode is the match length. We are able to reuse the match copy // sequence from the literal and match opcodes to perform the actual // copy implementation. sml_m: #if !HAVE_LABELS_AS_VALUES case 241: case 242: case 243: case 244: case 245: case 246: case 247: case 248: case 249: case 250: case 251: case 252: case 253: case 254: case 255: #endif UPDATE_GOOD; // "small match": This opcode has no literal, and uses the previous match // distance (i.e. it encodes only the match length), in a single byte as // 1111MMMM. opc_len = 1; if (src_len <= opc_len) return; // source truncated M = (size_t)extract(opc, 0, 4); PTR_LEN_INC(src_ptr, src_len, opc_len); goto copy_match; lrg_m: #if !HAVE_LABELS_AS_VALUES case 240: #endif UPDATE_GOOD; // "large match": This opcode has no literal, and uses the previous match // distance (i.e. it encodes only the match length). It is encoded in two // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can // be represented by sml_m, there is an implicit bias of 16 on the match // length; the representable values are [16,271]. opc_len = 2; if (src_len <= opc_len) return; // source truncated M = src_ptr[1] + 16; PTR_LEN_INC(src_ptr, src_len, opc_len); goto copy_match; // =============================================================== // Opcodes representing only a literal (no match). // These two opcodes (lrg_l and sml_l) encode only a literal. There is no // match length or match distance to worry about (but we need to *not* // touch D, as it must be preserved between opcodes). sml_l: #if !HAVE_LABELS_AS_VALUES case 225: case 226: case 227: case 228: case 229: case 230: case 231: case 232: case 233: case 234: case 235: case 236: case 237: case 238: case 239: #endif UPDATE_GOOD; // "small literal": This opcode has no match, and encodes only a literal // of length up to 15 bytes. The format is 1110LLLL LITERAL. opc_len = 1; L = (size_t)extract(opc, 0, 4); goto copy_literal; lrg_l: #if !HAVE_LABELS_AS_VALUES case 224: #endif UPDATE_GOOD; // "large literal": This opcode has no match, and uses the previous match // distance (i.e. it encodes only the match length). It is encoded in two // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16 // bytes can be represented by sml_l, there is an implicit bias of 16 on // the literal length; the representable values are [16,271]. opc_len = 2; if (src_len <= 2) return; // source truncated L = src_ptr[1] + 16; goto copy_literal; copy_literal: // Check that the source buffer is large enough to hold the complete // literal and at least the first byte of the next opcode. If so, advance // the source pointer to point to the first byte of the literal and adjust // the source length accordingly. if (src_len <= opc_len + L) return; // source truncated PTR_LEN_INC(src_ptr, src_len, opc_len); // Now we copy the literal from the source pointer to the destination. if (dst_len >= L + 7 && src_len >= L + 7) { // We are not near the end of the source or destination buffers; thus // we can safely copy the literal using wide copies, without worrying // about reading or writing past the end of either buffer. for (size_t i = 0; i < L; i += 8) store8(&dst_ptr[i], load8(&src_ptr[i])); } else if (L <= dst_len) { // We are too close to the end of either the input or output stream // to be able to safely use an eight-byte copy. Instead we copy the // literal byte-by-byte. for (size_t i = 0; i < L; ++i) dst_ptr[i] = src_ptr[i]; } else { // Destination truncated: fill DST, and store partial match // Copy partial literal for (size_t i = 0; i < dst_len; ++i) dst_ptr[i] = src_ptr[i]; // Save state state->src = src_ptr + dst_len; state->dst = dst_ptr + dst_len; state->L = L - dst_len; state->M = 0; state->D = D; return; // destination truncated } // Having completed the copy of the literal, we advance both the source // and destination pointers by the number of literal bytes. PTR_LEN_INC(dst_ptr, dst_len, L); PTR_LEN_INC(src_ptr, src_len, L); // Load the first byte of the next opcode, and jump to its implementation. opc = src_ptr[0]; #if HAVE_LABELS_AS_VALUES goto *opc_tbl[opc]; #else break; #endif // =============================================================== // Other opcodes nop: #if !HAVE_LABELS_AS_VALUES case 6: case 14: #endif UPDATE_GOOD; opc_len = 1; if (src_len <= opc_len) return; // source truncated PTR_LEN_INC(src_ptr, src_len, opc_len); opc = src_ptr[0]; #if HAVE_LABELS_AS_VALUES goto *opc_tbl[opc]; #else break; #endif eos: #if !HAVE_LABELS_AS_VALUES case 22: #endif opc_len = 8; if (src_len < opc_len) return; // source truncated (here we don't need an extra byte for next op // code) PTR_LEN_INC(src_ptr, src_len, opc_len); state->end_of_stream = 1; UPDATE_GOOD; return; // end-of-stream // =============================================================== // Return on error udef: #if !HAVE_LABELS_AS_VALUES case 30: case 38: case 46: case 54: case 62: case 112: case 113: case 114: case 115: case 116: case 117: case 118: case 119: case 120: case 121: case 122: case 123: case 124: case 125: case 126: case 127: case 208: case 209: case 210: case 211: case 212: case 213: case 214: case 215: case 216: case 217: case 218: case 219: case 220: case 221: case 222: case 223: #endif invalid_match_distance: return; // we already updated state #if !HAVE_LABELS_AS_VALUES } } #endif }