void SHAify::add(unsigned char *data, int datalen) { int i; for (i = 0; i < datalen; i++) { W[lenW / 4] <<= 8; W[lenW / 4] |= (unsigned long)data[i]; if (!(++lenW & 63)) { int t; unsigned long A = H[0]; unsigned long B = H[1]; unsigned long C = H[2]; unsigned long D = H[3]; unsigned long E = H[4]; for (t = 16; t < 80; t++) W[t] = SHA_ROTL(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1); for (t = 0; t < 20; t++) { unsigned long TEMP = SHA_ROTL(A,5) + E + W[t] + 0x5a827999L + (((C^D)&B)^D); SHUFFLE(); } for (; t < 40; t++) { unsigned long TEMP = SHA_ROTL(A,5) + E + W[t] + 0x6ed9eba1L + (B^C^D); SHUFFLE(); } for (; t < 60; t++) { unsigned long TEMP = SHA_ROTL(A,5) + E + W[t] + 0x8f1bbcdcL + ((B&C)|(D&(B|C))); SHUFFLE(); } for (; t < 80; t++) { unsigned long TEMP = SHA_ROTL(A,5) + E + W[t] + 0xca62c1d6L + (B^C^D); SHUFFLE(); } H[0] += A; H[1] += B; H[2] += C; H[3] += D; H[4] += E; lenW = 0; } size[0] += 8; if (size[0] < 8) size[1]++; } }
INLINE float SVec4::Dot(const SVec4 &vec1, const SVec4 &vec2) { #ifdef USE_SSE auto v = _mm_mul_ps( vec1.m_128, vec2.m_128 ); auto m_y = _mm_shuffle_ps( v, v, SHUFFLE(3, 2, 1, 0) ); v = _mm_add_ps( v, m_y ); auto m_w = _mm_shuffle_ps( m_y, m_y, SHUFFLE(2, 3, 0, 1) ); SVec4 answer = SVec4( _mm_add_ps( m_y, m_w ) ); return answer.X( ) + answer.Y( ); #else return vec1.X() * vec2.X() + vec1.Y() * vec2.Y() + vec1.Z() * vec2.Z() + vec1.m_w * vec2.m_w; #endif }
/* get a random starting alignment */ void start_aln(glam2_aln *aln, data *d) { int i; #if 0 aln->width = d->a.min_width; /* ?? initial number of columns */ aln->width = sqrt(d->a.max_width * d->a.min_width); /* geometric mean */ #endif aln->width = d->a.init_width; aln_zero(aln); SHUFFLE(d->seq_order, aln->seq_num); for (i = 0; i < aln->seq_num; ++i) site_sample(aln, d->seq_order[i], d, 1); aln->score = aln_score(&d->scorer, aln); }
void Sobel::extractSingleChannelSSE(const Image& srcImage, Sobel::Image1D& destImage, Channel channel, int yStart) { ASSERT(srcImage.width % 16 == 0); ASSERT(yStart >= 0); ASSERT(yStart <= srcImage.height); destImage.setResolution(srcImage.width, srcImage.height); destImage.yStart = yStart; unsigned char offset; switch(channel) { case Channel::Y: offset = offsetof(Image::Pixel, y); break; case Channel::Cb: offset = offsetof(Image::Pixel, cb); break; case Channel::Cr: offset = offsetof(Image::Pixel, cr); break; default: ASSERT(false); } unsigned char mask[16] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; mask[0] = offset; mask[1] = offset + 4; mask[2] = offset + 8; mask[3] = offset + 12; const __m128i mMask = _mm_loadu_si128(reinterpret_cast<__m128i*>(&mask)); const __m128i* srcPixel; const __m128i* srcPixelLineEnd; __m128i* destPixel; __m128i p0; __m128i p1; __m128i p2; __m128i p3; int height = srcImage.height; destPixel = reinterpret_cast<__m128i*>(destImage[yStart]); for(int y = yStart; y < height; ++y) { for(srcPixel = reinterpret_cast<const __m128i*>(srcImage[y]), srcPixelLineEnd = reinterpret_cast<const __m128i*>(srcImage[y] + srcImage.width); srcPixel < srcPixelLineEnd; srcPixel += 4, ++destPixel) { p0 = _mm_loadu_si128(srcPixel); p1 = _mm_loadu_si128(srcPixel + 1); p2 = _mm_loadu_si128(srcPixel + 2); p3 = _mm_loadu_si128(srcPixel + 3); p0 = SHUFFLE(p0, mMask); // y0 y1 y2 y3 0 0 0 0 0 0 0 0 0 0 0 0 p1 = SHUFFLE(p1, mMask); // y4 y5 y6 y7 0 0 0 0 0 0 0 0 0 0 0 0 p2 = SHUFFLE(p2, mMask); // y8 y9 y10 y11 0 0 0 0 0 0 0 0 0 0 0 0 p3 = SHUFFLE(p3, mMask); // y12 y13 y14 y15 0 0 0 0 0 0 0 0 0 0 0 0 __m128i mLow = _mm_unpacklo_epi32(p0, p1); // y0 y1 y2 y3 y4 y5 y6 y7 0 0 0 0 0 0 0 0 __m128i mHigh = _mm_unpacklo_epi32(p2, p3); // y8 y9 y10 y11 y12 y13 y14 y15 0 0 0 0 0 0 0 0 *destPixel = _mm_unpacklo_epi64(mLow, mHigh); } } }
static inline char main_game(unsigned char plinkos) { unsigned char STATUS = RUNNING, x, y = 0, oldx = 59; unsigned int Amount, Total = 0; char buffer[15]; FontSetSys(F_4x6); while( plinkos-- ) { ClearGrayScreen2B(virtual_dark, virtual_light); GraySpriteX8_OR(0, 0, 100, plinkol, plinkod, 20, virtual_light, virtual_dark); PRINT_TOTAL(Total); SHOW_PLINKOS(5, plinkos - 1, virtual_light, virtual_dark); new_chip(oldx, plinkos); x = oldx; y = 2; DRAWALL(x, y); while (STATUS == RUNNING) { if (_keytest (RR_LEFT) && x > 3) MOVE_LEFT( &x ); if (_keytest (RR_RIGHT) && x < 115) MOVE_RIGHT( &x ); if (_keytest (RR_ESC)) STATUS = EXIT; if (_keytest (RR_2ND) || _keytest (RR_DOWN)) STATUS = DROP; } if (STATUS == EXIT) return -1; oldx = x; Amount = DROPPING( &x ); // After this, x will be from 0 to 8 if (Amount == EXIT) return -1; Total += Amount; //GRAPHIC(30, 20, /* HEIGHT */14, /* WIDTH */32, (char*) Zerol, (char*) Zerod, SMALL); /*for(y = 1; y < 6; y++) { period = 15000*(1+random( y )); memset (Actived, 255, LCD_SIZE); // clear virtual gray planes memset (Activel, 255, LCD_SIZE); for(wait = 1; wait < period; wait++); period = 15000*(1+random( y )); DRAWALL(14 * x + 3, 93); for(wait = 1; wait < period; wait++); }*/ SHUFFLE( x ); DRAWALL(14 * x + 3, 93); DRAWALL(14 * x + 3, 93); GRAPHIC(45, 36, x, FALSE);//, gfx_light[x], gfx_dark[x], x); GrayDBufToggleSync(); // switches two sets of planes Waitkey() STATUS = RUNNING; } sprintf(buffer, "Score %d", Total); x = (LCD_WIDTH - DrawStrWidth(buffer, F_6x8)) / 2 - 16; DrawGrayStrExt2B(x, 2, (char*)buffer, A_NORMAL|A_SHADOWED, F_6x8, Activel, Actived); wait_for_keypress(); if (CHECK_FOR_HIGH(Total) == QUIT) // Checks whether or not the user got a high score, writes a new // high score if they did get one, and calls a function to display the new high scores return QUIT; return STATUS; }
void ThumbnailProvider::shrinkGrayscale4x4SSE(const Image& srcImage, Thumbnail::ThumbnailImageGrayscale& destImage) { union { __m128i a; long long b[2]; } splitter; const int scaleFactor = 4; const int width = srcImage.width; const int height = srcImage.height; ASSERT(width % scaleFactor == 0); ASSERT(height % scaleFactor == 0); destImage.setResolution(width / scaleFactor, height / scaleFactor); const unsigned char offset = offsetof(Image::Pixel, y); unsigned char mask[16] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; mask[0] = offset; mask[1] = offset + 4; mask[2] = offset + 8; mask[3] = offset + 12; const __m128i mMask = _mm_loadu_si128(reinterpret_cast<__m128i*>(mask)); const __m128i zero = _mm_setzero_si128(); const int summsSize = destImage.width * 16; __m128i* summs = reinterpret_cast<__m128i*>(SystemCall::alignedMalloc(summsSize, 16)); memset(summs, 0, summsSize); const Image::Pixel* pSrc; Thumbnail::ThumbnailImageGrayscale::PixelType* pDest; __m128i* pSumms; __m128i p0; __m128i p1; __m128i p2; __m128i p3; for(int y = 0; y < height; ++y) { if(y % scaleFactor == 0) { pDest = destImage[y / scaleFactor]; } pSrc = srcImage[y]; pSumms = summs; for(int x = 0; x < width; x += 8, pSrc += 8, ++pSumms) { p0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pSrc)); p1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pSrc + 4)); p0 = SHUFFLE(p0, mMask); // y0 y1 y2 y3 0 0 0 0 0 0 0 0 0 0 0 0 p1 = SHUFFLE(p1, mMask); // y4 y5 y6 y7 0 0 0 0 0 0 0 0 0 0 0 0 p0 = _mm_unpacklo_epi32(p0, p1); // y0 y1 y2 y3 y4 y5 y6 y7 0 0 0 0 0 0 0 0 p0 = _mm_unpacklo_epi8(p0, zero); // y0 y1 y2 y3 y4 y5 y6 y7 *pSumms = _mm_add_epi16(*pSumms, p0); } if(y % scaleFactor == scaleFactor - 1) { pSumms = summs; for (int i = 0; i < destImage.width; i += 8, pSumms += 4, pDest += 8) { p0 = *pSumms; p1 = *(pSumms + 1); p2 = *(pSumms + 2); p3 = *(pSumms + 3); p0 = _mm_hadd_epi16(p0, p1); p1 = _mm_hadd_epi16(p2, p3); p0 = _mm_hadd_epi16(p0, p1); p0 = _mm_srli_epi16(p0, 4); p0 = _mm_packus_epi16(p0, zero); splitter.a = p0; *reinterpret_cast<long long*>(pDest) = splitter.b[0]; } memset(summs, 0, summsSize); } } SystemCall::alignedFree(summs); }