INLINE Int32 Decimal::Compare(Decimal const &v) const { if (_high != v._high) // 先对比整数部分 return _high < v._high ? -1 : 1; else { // 再对比小数中一方含 0 的部分 if (_low == v._low) return 0; else { if (_low == 0) { if (_high < 0) return v._low > 0 ? 1: -1; } else if (v._low == 0) { if (_high < 0) return _low > 0 ? -1: 1; } else { // 最后按10进制左对齐后比较大小 var v1 = _low, v2 = v._low; ShiftLeft(v1); ShiftLeft(v2); if (_high < 0) return v1 > v2 ? -1 : 1; else return v1 < v2 ? -1 : 1; } } } return 0; }
template <bool align, bool increment> void InterferenceChange(int16_t * statistic, size_t stride, size_t width, size_t height, uint8_t value, int16_t saturation) { assert(width >= HA); if(align) assert(Aligned(statistic) && Aligned(stride, HA)); size_t alignedWidth = Simd::AlignLo(width, HA); v128_s16 tailMask = (v128_s16)ShiftLeft(K16_FFFF, HA - width + alignedWidth); v128_s16 _value = SetI16(value); v128_s16 _saturation = SetI16(saturation); for(size_t row = 0; row < height; ++row) { Loader<align> statisticSrc(statistic); Storer<align> statisticDst(statistic); InterferenceChange<align, true, increment>(statisticSrc, _value, _saturation, statisticDst); for(size_t col = HA; col < alignedWidth; col += HA) InterferenceChange<align, false, increment>(statisticSrc, _value, _saturation, statisticDst); Flush(statisticDst); if(alignedWidth != width) { Loader<false> statisticSrc(statistic + width - HA); Storer<false> statisticDst(statistic + width - HA); InterferenceChange<false, true, increment>(statisticSrc, vec_and(_value, tailMask), _saturation, statisticDst); Flush(statisticDst); } statistic += stride; } }
void cGUIEdit::SelectionCut() { if(m_sel1==m_sel2) return; // no selection int s1 = sel1(), s2 = sel2(); ShiftLeft(s2,s2-s1); m_sel1 = m_sel2 = s1; }
template <bool align> void SquaredDifferenceSum( const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride, size_t width, size_t height, uint64_t * sum) { assert(width < 0x10000); if(align) { assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride)); } size_t bodyWidth = AlignLo(width, A); __m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + bodyWidth); __m128i fullSum = _mm_setzero_si128(); for(size_t row = 0; row < height; ++row) { __m128i rowSum = _mm_setzero_si128(); for(size_t col = 0; col < bodyWidth; col += A) { const __m128i a_ = Load<align>((__m128i*)(a + col)); const __m128i b_ = Load<align>((__m128i*)(b + col)); rowSum = _mm_add_epi32(rowSum, SquaredDifference(a_, b_)); } if(width - bodyWidth) { const __m128i a_ = _mm_and_si128(tailMask, Load<false>((__m128i*)(a + width - A))); const __m128i b_ = _mm_and_si128(tailMask, Load<false>((__m128i*)(b + width - A))); rowSum = _mm_add_epi32(rowSum, SquaredDifference(a_, b_)); } fullSum = _mm_add_epi64(fullSum, HorizontalSum32(rowSum)); a += aStride; b += bStride; } *sum = ExtractInt64Sum(fullSum); }
template <bool align> void SquaredDifferenceSum( const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride, size_t width, size_t height, uint64_t * sum) { assert(width < 0x10000); if (align) assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride)); size_t alignedWidth = Simd::AlignLo(width, A); uint8x16_t tailMask = ShiftLeft(K8_FF, A - width + alignedWidth); uint64x2_t _sum = K64_0000000000000000; for (size_t row = 0; row < height; ++row) { uint32x4_t rowSum = K32_00000000; for (size_t col = 0; col < alignedWidth; col += A) { uint8x16_t _a = Load<align>(a + col); uint8x16_t _b = Load<align>(b + col); rowSum = vaddq_u32(rowSum, SquaredDifferenceSum(_a, _b)); } if (width - alignedWidth) { uint8x16_t _a = Load<align>(a + width - A); uint8x16_t _b = Load<align>(b + width - A); rowSum = vaddq_u32(rowSum, SquaredDifferenceSumMasked(_a, _b, tailMask)); } _sum = vaddq_u64(_sum, vpaddlq_u32(rowSum)); a += aStride; b += bStride; } *sum = ExtractSum64u(_sum); }
void ConditionalCount8u(const uint8_t * src, size_t stride, size_t width, size_t height, uint8_t value, uint32_t * count) { assert(width >= A); if (align) assert(Aligned(src) && Aligned(stride)); size_t alignedWidth = AlignLo(width, QA); size_t bodyWidth = AlignLo(width, A); v128_u8 tailMask = ShiftLeft(K8_01, A - width + alignedWidth); v128_u8 _value = SIMD_VEC_SET1_EPI8(value); v128_u32 counts[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 }; for (size_t row = 0; row < height; ++row) { size_t col = 0; for (; col < alignedWidth; col += QA) { ConditionalCount8u<align, compareType>(src, col, _value, counts[0]); ConditionalCount8u<align, compareType>(src, col + A, _value, counts[1]); ConditionalCount8u<align, compareType>(src, col + 2 * A, _value, counts[2]); ConditionalCount8u<align, compareType>(src, col + 3 * A, _value, counts[3]); } for (; col < bodyWidth; col += A) ConditionalCount8u<align, compareType>(src, col, _value, counts[0]); if (alignedWidth != width) { const v128_u8 mask = vec_and(Compare8u<compareType>(Load<false>(src + width - A), _value), tailMask); counts[0] = vec_msum(mask, K8_01, counts[0]); } src += stride; } counts[0] = vec_add(vec_add(counts[0], counts[1]), vec_add(counts[2], counts[3])); *count = ExtractSum(counts[0]); }
template <bool align> void AbsDifferenceSum( const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride, size_t width, size_t height, uint64_t * sum) { assert(width >= A); if (align) assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride)); size_t alignedWidth = AlignLo(width, QA); size_t bodyWidth = AlignLo(width, A); v128_u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth); *sum = 0; for (size_t row = 0; row < height; ++row) { size_t col = 0; v128_u32 sums[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 }; for (; col < alignedWidth; col += QA) { AbsDifferenceSum<align>(a, b, col, sums[0]); AbsDifferenceSum<align>(a, b, col + A, sums[1]); AbsDifferenceSum<align>(a, b, col + 2 * A, sums[2]); AbsDifferenceSum<align>(a, b, col + 3 * A, sums[3]); } sums[0] = vec_add(vec_add(sums[0], sums[1]), vec_add(sums[2], sums[3])); for (; col < bodyWidth; col += A) AbsDifferenceSum<align>(a, b, col, sums[0]); if (width - bodyWidth) AbsDifferenceSumMasked<false>(a, b, width - A, tailMask, sums[0]); *sum += ExtractSum(sums[0]); a += aStride; b += bStride; } }
void ConditionalCount16i(const uint8_t * src, size_t stride, size_t width, size_t height, int16_t value, uint32_t * count) { assert(width >= HA); if (align) assert(Aligned(src) && Aligned(stride)); size_t alignedWidth = AlignLo(width, DA); size_t bodyWidth = Simd::AlignLo(width, HA); v128_u16 tailMask = ShiftLeft(K16_0001, HA - width + alignedWidth); v128_s16 _value = SIMD_VEC_SET1_EPI16(value); v128_u32 counts[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 }; for (size_t row = 0; row < height; ++row) { const int16_t * s = (const int16_t *)src; size_t col = 0; for (; col < alignedWidth; col += DA) { ConditionalCount16i<align, compareType>(s, col, _value, counts[0]); ConditionalCount16i<align, compareType>(s, col + HA, _value, counts[1]); ConditionalCount16i<align, compareType>(s, col + 2 * HA, _value, counts[2]); ConditionalCount16i<align, compareType>(s, col + 3 * HA, _value, counts[3]); } for (; col < bodyWidth; col += HA) ConditionalCount16i<align, compareType>(s, col, _value, counts[0]); if (alignedWidth != width) { const v128_u16 mask = vec_and((v128_u16)Compare16i<compareType>(Load<false>(s + width - HA), _value), tailMask); counts[0] = vec_msum(mask, K16_0001, counts[0]); } src += stride; } counts[0] = vec_add(vec_add(counts[0], counts[1]), vec_add(counts[2], counts[3])); *count = ExtractSum(counts[0]); }
template <bool align> void EdgeBackgroundAdjustRangeMasked(uint8_t * backgroundCount, size_t backgroundCountStride, size_t width, size_t height, uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t threshold, const uint8_t * mask, size_t maskStride) { assert(width >= A); if (align) { assert(Aligned(backgroundValue) && Aligned(backgroundValueStride)); assert(Aligned(backgroundCount) && Aligned(backgroundCountStride)); assert(Aligned(mask) && Aligned(maskStride)); } const uint8x16_t _threshold = vld1q_dup_u8(&threshold); size_t alignedWidth = AlignLo(width, A); uint8x16_t tailMask = ShiftLeft(K8_01, A - width + alignedWidth); for (size_t row = 0; row < height; ++row) { for (size_t col = 0; col < alignedWidth; col += A) EdgeBackgroundAdjustRangeMasked<align>(backgroundCount, backgroundValue, mask, col, _threshold, K8_01); if (alignedWidth != width) EdgeBackgroundAdjustRangeMasked<false>(backgroundCount, backgroundValue, mask, width - A, _threshold, tailMask); backgroundValue += backgroundValueStride; backgroundCount += backgroundCountStride; mask += maskStride; } }
template <bool align> void AddFeatureDifference(const uint8_t * value, size_t valueStride, size_t width, size_t height, const uint8_t * lo, size_t loStride, const uint8_t * hi, size_t hiStride, uint16_t weight, uint8_t * difference, size_t differenceStride) { assert(width >= A); if(align) { assert(Aligned(value) && Aligned(valueStride)); assert(Aligned(lo) && Aligned(loStride)); assert(Aligned(hi) && Aligned(hiStride)); assert(Aligned(difference) && Aligned(differenceStride)); } size_t alignedWidth = AlignLo(width, A); __m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + alignedWidth); __m128i _weight = _mm_set1_epi16((short)weight); for(size_t row = 0; row < height; ++row) { for(size_t col = 0; col < alignedWidth; col += A) AddFeatureDifference<align>(value, lo, hi, difference, col, _weight, K_INV_ZERO); if(alignedWidth != width) AddFeatureDifference<false>(value, lo, hi, difference, width - A, _weight, tailMask); value += valueStride; lo += loStride; hi += hiStride; difference += differenceStride; } }
template <bool align, bool increment> void InterferenceChangeMasked(int16_t * statistic, size_t statisticStride, size_t width, size_t height, uint8_t value, int16_t saturation, const uint8_t * mask, size_t maskStride, uint8_t index) { assert(width >= A); if(align) assert(Aligned(statistic) && Aligned(statisticStride, HA) && Aligned(mask) && Aligned(maskStride)); size_t alignedWidth = Simd::AlignLo(width, A); v128_u8 tailMask = ShiftLeft(K8_FF, A - width + alignedWidth); v128_s16 _value = SetI16(value); v128_s16 _saturation = SetI16(saturation); v128_u8 _index = SetU8(index); for(size_t row = 0; row < height; ++row) { Loader<align> statisticSrc(statistic), maskSrc(mask); Storer<align> statisticDst(statistic); InterferenceChangeMasked<align, true, increment>(statisticSrc, _value, _saturation, maskSrc, _index, K8_FF, statisticDst); for(size_t col = A; col < alignedWidth; col += A) InterferenceChangeMasked<align, false, increment>(statisticSrc, _value, _saturation, maskSrc, _index, K8_FF, statisticDst); Flush(statisticDst); if(alignedWidth != width) { Loader<false> statisticSrc(statistic + width - A), maskSrc(mask + width - A); Storer<false> statisticDst(statistic + width - A); InterferenceChangeMasked<false, true, increment>(statisticSrc, _value, _saturation, maskSrc, _index, tailMask, statisticDst); Flush(statisticDst); } statistic += statisticStride; mask += maskStride; } }
/** * Função contruída com o objetivo de facilitar o treinamento de uma rede. Utiliza critérios * de parada pré-definidos. O objetivo é paralizar o treinamento a partir do momento em que o * erro médio quadrático da rede em relação às amostras para de diminuir. Recebe um parâmetro * indicando um número mínimo de treinos, a a partir do qual se inicia a verificação da variaçao * do erro médio quadrático. Recebe também o número de treinamentos a ser executado até que uma * nova medição do erro seja feita. Caso a variância (porcentual) das últimas n medições seja * menor ou igual a um determinado valor (entre 0 e 1), paraliza o treinamento. * A função recebe ainda um conjunto de amostras (matriz de entradas/matriz de saídas), número * de amostras contidas nas matrizes, a dimensão de cada amostra de entrada e de cada amostra de * saída e um flag indicando se as amostras devem ser treinadas aleatoriamente ou em ordem. */ int BKPNeuralNet::AutoTrain( float**inMatrix, float **outMatrix, int inSize, int outSize, int nSamples, int minTrains, int varVectorSize, float minStdDev, int numTrains, TrainType type, float l_rate, float momentum, int* retExecutedTrains ) { // Casos de retorno: if( (!inMatrix) || (!outMatrix) || (inSize!=_nLayers[0]) || (_nLayers[_layers-1]!=outSize) ) return -1; // O número de treinamentos inicial tem que ser pelo menos 0: if( *retExecutedTrains < 0 ) *retExecutedTrains = 0; int thisSample = -1; //< Variável auxiliar, indica a amostra a ser treinada. // Executando os treinamentos obrigatórios: for( int i=0 ; i<minTrains ; i++ ) { if( type == ORDERED_TRAIN ) thisSample = (++thisSample)%nSamples; if( type == RANDOM_TRAIN ) thisSample = RandInt(0, (nSamples-1)); Train( inSize, inMatrix[thisSample], outSize, outMatrix[thisSample], l_rate, momentum ); } // Executando os demais treinamentos: float* varVector = new float[varVectorSize]; //< Vetor para conter as últimas medições de erro. int ptVarVector = 0; //< Aponta para a primeira posição vazia de varVector. float lastVariance = (float)MAX_VALUE; //< Variâvel que mantém o valor da varirância. float StdDev = (float)MAX_VALUE; //< Variâvel que mantém o valor do desvio-padrão. thisSample = -1; int nTrains=minTrains + *retExecutedTrains; //< Mantém o número de treinamentos executados. bool varFlag = false; while( StdDev > minStdDev ) { if( type == ORDERED_TRAIN ) thisSample = (++thisSample)%nSamples; if( type == RANDOM_TRAIN ) thisSample = RandInt(0, (nSamples-1)); Train( inSize, inMatrix[thisSample], outSize, outMatrix[thisSample], l_rate, momentum ); if( (nTrains%numTrains) == 0 ) //< A cada numTrains treinamentos, testa o erro: { float retRMS_Error = 0; float mean = 0; RMS_error( inMatrix, outMatrix, inSize, outSize, nSamples, &retRMS_Error ); varFlag = ShiftLeft( varVector, varVectorSize, retRMS_Error, ptVarVector ); if( varFlag == true ) { lastVariance = Variance( varVector, varVectorSize, &mean ); StdDev = ((float)sqrt(lastVariance))/mean; } ptVarVector++; } nTrains++; if( nTrains >= 90000 ) //< O número máximo de treinamentos será 150000. StdDev = minStdDev; } *retExecutedTrains = nTrains; return 0; }
byte_t *BreakECBFixedKey( const byte_t *dec, const size_t mida ) { int i,j; byte_t myString[16]; //Initialize to known string for( i = 0; i < 16; i++ ) myString[i] = 'A'; size_t nmida = 17; byte_t *result = malloc(mida+1); printf("mida:%lu\n",mida); //For each unknown block for( i = 0; i < nmida/16; i++ ){ if( (16*i) > mida ) break; for( j = 0; j < 16; j++ ) { //For each byte in the block if( (16*i+j) > mida ) break; //Generate the key we are looking for /////////ORACLE FUNCTION, CHANGE THIS TO USE ANOTHER ONE byte_t *obj = EncryptionOracle( dec, myString, mida, 15-j, &nmida ); ///////// myString[15] = 0x00; int k; //generate the dictionary int pos; for( k = 0; k < 256; k++ ) { /////////ORACLE FUNCTION, CHANGE THIS TO USE ANOTHER ONE byte_t * res = EncryptionOracle( dec, myString, mida, 16, &nmida ); ///////// if( !memcmp( obj+16*i, res, 16 ) ){ printf("found! %c\n",k); pos = k; free(res); break; } free(res); myString[15]++; } myString[15] = pos; ShiftLeft(myString); result[i*16+j] = (char)pos; printf("Progress:%f%% \n",(((float)16*i+j)/((float)mida))*100); } printf("---NEXT BLOCK---\n"); } result[mida] = '\0'; return result; }
int main(int argc, char const *argv[]) { int S[8] = {1,2,3,4,5,6,7,8}; ShiftLeft(S, 8, 3); int i; for (i = 0; i < 8; ++i) { printf("%d\n", S[i]); } return 0; }
void DjvuPic::FitZone1(CRect& Rgn, int bckg1) { if(!(Buffer->arr)) return; MyRect Rect; Param.bckg=bckg1; Param.www=BckgRgn.Width(); ////////////Shift Right Side///////////////// Rect=CRect(CPoint(Rgn.right,Rgn.top),CSize(Param.www,Param.www)); Param.zz=Rgn.Height(); if(Rect.right<PicDim.cx) {ShiftRight(Rect,Param);Rgn.right=Rect.right;} //////////////Shift Left Side////////////// Rect=CRect(CPoint(Rgn.left-Param.www,Rgn.top),CSize(Param.www,Param.www)); Param.zz=Rgn.Height(); if(Rect.left>0) {ShiftLeft(Rect,Param);Rgn.left=Rect.left;} ////////////////Shift Top Side/////////////// Rect=CRect(CPoint(Rgn.left,Rgn.top-Param.www),CSize(Param.www,Param.www)); Param.zz=Rgn.Width(); if(Rect.top>0) {ShiftTop(Rect,Param);Rgn.top=Rect.top;} //////////////////Shift Bottom Side//////////// Rect=CRect(CPoint(Rgn.left,Rgn.bottom),CSize(Param.www,Param.www)); Param.zz=Rgn.Width(); if(Rect.bottom<PicDim.cy) {ShiftBottom(Rect,Param);Rgn.bottom=Rect.bottom;} ////////////Shift Right Side///////////////// Rect=CRect(CPoint(Rgn.right,Rgn.top),CSize(Param.www,Param.www)); Param.zz=Rgn.Height(); if(Rect.right<PicDim.cx) {ShiftRight(Rect,Param);Rgn.right=Rect.right;} //////////////Shift Left Side////////////// Rect=CRect(CPoint(Rgn.left-Param.www,Rgn.top),CSize(Param.www,Param.www)); Param.zz=Rgn.Height(); if(Rect.left>0) {ShiftLeft(Rect,Param);Rgn.left=Rect.left;} ////////////////Shift Top Side/////////////// Rect=CRect(CPoint(Rgn.left,Rgn.top-Param.www),CSize(Param.www,Param.www)); Param.zz=Rgn.Width(); if(Rect.top>0) {ShiftTop(Rect,Param);Rgn.top=Rect.top;} //////////////////Shift Bottom Side//////////// Rect=CRect(CPoint(Rgn.left,Rgn.bottom),CSize(Param.www,Param.www)); Param.zz=Rgn.Width(); if(Rect.bottom<PicDim.cy) {ShiftBottom(Rect,Param);Rgn.bottom=Rect.bottom;} }
// Rabin-Miller method for finding a strong pseudo-prime // Preconditions: High bit and low bit of n = 1 bool RabinMillerPrimeTest( IRandom *prng, const u32 *n, // Number to check for primality int limbs, // Number of limbs in n u32 k) // Confidence level (40 is pretty good) { // n1 = n - 1 u32 *n1 = (u32 *)alloca(limbs*4); Set(n1, limbs, n); Subtract32(n1, limbs, 1); // d = n1 u32 *d = (u32 *)alloca(limbs*4); Set(d, limbs, n1); // remove factors of two from d while (!(d[0] & 1)) ShiftRight(limbs, d, d, 1); u32 *a = (u32 *)alloca(limbs*4); u32 *t = (u32 *)alloca(limbs*4); u32 *p = (u32 *)alloca((limbs*2)*4); u32 n_inv = MonReducePrecomp(n[0]); // iterate k times while (k--) { do prng->Generate(a, limbs*4); while (GreaterOrEqual(a, limbs, n, limbs)); // a = a ^ d (Mod n) ExpMod(a, limbs, d, limbs, n, limbs, n_inv, a); Set(t, limbs, d); while (!Equal(limbs, t, n1) && !Equal32(a, limbs, 1) && !Equal(limbs, a, n1)) { // TODO: verify this is actually working // a = a^2 (Mod n), non-critical path Square(limbs, p, a); Modulus(p, limbs*2, n, limbs, a); // t <<= 1 ShiftLeft(limbs, t, t, 1); } if (!Equal(limbs, a, n1) && !(t[0] & 1)) return false; } return true; }
static bool IsRotatePrime(int number) { int next = number; while((next = ShiftLeft(next)) != number && next != InvalidRotatedNumber) { if(!IsPrime(next)) return false; } return (next == number); }
template <bool align> void AbsDifferenceSums3x3Masked(const uint8_t *current, size_t currentStride, const uint8_t *background, size_t backgroundStride, const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sums) { assert(height > 2 && width >= A + 2); if (align) assert(Aligned(background) && Aligned(backgroundStride)); width -= 2; height -= 2; current += 1 + currentStride; background += 1 + backgroundStride; mask += 1 + maskStride; size_t bodyWidth = AlignLo(width, A); v128_u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth); v128_u8 _index = SetU8(index); for (size_t i = 0; i < 9; ++i) sums[i] = 0; for (size_t row = 0; row < height; ++row) { v128_u32 _sums[9]; for (size_t i = 0; i < 9; ++i) _sums[i] = K32_00000000; for (size_t col = 0; col < bodyWidth; col += A) { const v128_u8 _mask = LoadMaskU8<false>(mask + col, _index); const v128_u8 _current = vec_and(Load<false>(current + col), _mask); AbsDifferenceSums3x3Masked<align>(_current, background + col, backgroundStride, _mask, _sums); } if (width - bodyWidth) { const v128_u8 _mask = vec_and(LoadMaskU8<false>(mask + width - A, _index), tailMask); const v128_u8 _current = vec_and(Load<false>(current + width - A), _mask); AbsDifferenceSums3x3Masked<false>(_current, background + width - A, backgroundStride, _mask, _sums); } for (size_t i = 0; i < 9; ++i) sums[i] += ExtractSum(_sums[i]); current += currentStride; background += backgroundStride; mask += maskStride; } }
void ConditionalSquareGradientSum(const uint8_t * src, size_t srcStride, size_t width, size_t height, const uint8_t * mask, size_t maskStride, uint8_t value, uint64_t * sum) { assert(width >= A + 2 && height >= 3); if (align) assert(Aligned(src) && Aligned(srcStride) && Aligned(mask) && Aligned(maskStride)); src += srcStride; mask += maskStride; height -= 2; size_t bodyWidth = Simd::AlignLo(width - 1, A); v128_u8 noseMask = ShiftRight(K8_FF, 1); v128_u8 tailMask = ShiftLeft(K8_FF, A - width + 1 + bodyWidth); size_t alignedWidth = Simd::AlignLo(bodyWidth - A, DA); v128_u8 _value = SetU8(value); *sum = 0; for (size_t row = 0; row < height; ++row) { v128_u32 sums[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 }; { const v128_u8 _mask = vec_and(Compare8u<compareType>(Load<false>(mask + 1), _value), noseMask); AddSquareDifference<false>(src + 1, 1, _mask, sums[0]); AddSquareDifference<false>(src + 1, srcStride, _mask, sums[1]); } size_t col = A; for (; col < alignedWidth; col += DA) { ConditionalSquareGradientSum<align, compareType>(src, srcStride, mask, col, _value, sums); ConditionalSquareGradientSum<align, compareType>(src, srcStride, mask, col + A, _value, sums + 2); } for (; col < bodyWidth; col += A) ConditionalSquareGradientSum<align, compareType>(src, srcStride, mask, col, _value, sums); if (bodyWidth != width - 1) { size_t offset = width - A - 1; const v128_u8 _mask = vec_and(Compare8u<compareType>(Load<false>(mask + offset), _value), tailMask); AddSquareDifference<false>(src + offset, 1, _mask, sums[0]); AddSquareDifference<false>(src + offset, srcStride, _mask, sums[1]); } sums[0] = vec_add(vec_add(sums[0], sums[1]), vec_add(sums[2], sums[3])); *sum += ExtractSum(sums[0]); src += srcStride; mask += maskStride; } }
bool StateGrid::MoveForward(int32_t id) { int32_t collideId = m_horses[id].Collide(); if(collideId >= 0) { // try shift right if(m_shifted[collideId] || !ShiftRight(collideId)) { // try shift left if(!ShiftLeft(collideId)) { // try move forward } } } }
void AnimateHorizontal(uint8_t prev, uint8_t gear) { prev += SYMBOL_GEAR_NUMBER; gear += SYMBOL_GEAR_NUMBER; for(uint8_t i=0;i<=8;i++) { if( gear > prev ) ShiftLeft(i, (PGM_P)FONTTAB+prev*8, (PGM_P)FONTTAB+gear*8 ); else ShiftRight(i, (PGM_P)FONTTAB+prev*8, (PGM_P)FONTTAB+gear*8 ); _delay_ms(GEAR_ANIM_DELAY); } ledPutc(gear); }
template <bool align> void LaplaceAbsSum(const uint8_t * src, size_t stride, size_t width, size_t height, uint64_t * sum) { assert(width > A); if(align) assert(Aligned(src) && Aligned(stride)); size_t bodyWidth = Simd::AlignHi(width, A) - A; const uint8_t *src0, *src1, *src2; v128_u8 a[3][3]; v128_u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth); *sum = 0; for(size_t row = 0; row < height; ++row) { src0 = src + stride*(row - 1); src1 = src0 + stride; src2 = src1 + stride; if(row == 0) src0 = src1; if(row == height - 1) src2 = src1; v128_u32 sums[2] = {K32_00000000, K32_00000000}; LoadNose3<align, 1>(src0 + 0, a[0]); LoadNose3<align, 1>(src1 + 0, a[1]); LoadNose3<align, 1>(src2 + 0, a[2]); LaplaceAbsSum(a, sums); for(size_t col = A; col < bodyWidth; col += A) { LoadBody3<align, 1>(src0 + col, a[0]); LoadBody3<align, 1>(src1 + col, a[1]); LoadBody3<align, 1>(src2 + col, a[2]); LaplaceAbsSum(a, sums); } LoadTail3<false, 1>(src0 + width - A, a[0]); LoadTail3<false, 1>(src1 + width - A, a[1]); LoadTail3<false, 1>(src2 + width - A, a[2]); SetMask3x3(a, tailMask); LaplaceAbsSum(a, sums); *sum += ExtractSum(vec_add(sums[0], sums[1])); } }
template <bool align> void AbsDifferenceSums3x3(const uint8_t * current, size_t currentStride, const uint8_t * background, size_t backgroundStride, size_t width, size_t height, uint64_t * sums) { assert(height > 2 && width >= A + 2); if (align) assert(Aligned(background) && Aligned(backgroundStride)); width -= 2; height -= 2; current += 1 + currentStride; background += 1 + backgroundStride; size_t alignedWidth = AlignLo(width, DA); size_t bodyWidth = AlignLo(width, A); v128_u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth); memset(sums, 0, 9 * sizeof(uint64_t)); for (size_t row = 0; row < height; ++row) { v128_u32 _sums[2][9]; memset(_sums, 0, 18 * sizeof(v128_u32)); size_t col = 0; for (; col < alignedWidth; col += DA) { AbsDifferenceSums3x3<align>(Load<false>(current + col), background + col, backgroundStride, _sums[0]); AbsDifferenceSums3x3<align>(Load<false>(current + col + A), background + col + A, backgroundStride, _sums[0]); } for (; col < bodyWidth; col += A) AbsDifferenceSums3x3<align>(Load<false>(current + col), background + col, backgroundStride, _sums[0]); if (width - bodyWidth) { const v128_u8 _current = vec_and(tailMask, Load<false>(current + width - A)); AbsDifferenceSums3x3Masked<false>(_current, background + width - A, backgroundStride, tailMask, _sums[0]); } for (size_t i = 0; i < 9; ++i) sums[i] += ExtractSum(vec_add(_sums[0][i], _sums[1][i])); current += currentStride; background += backgroundStride; } }
void SobelDxAbsSum(const uint8_t * src, size_t stride, size_t width, size_t height, uint64_t * sum) { assert(width > A); size_t bodyWidth = Simd::AlignHi(width, A) - A; const uint8_t *src0, *src1, *src2; v16u8 a[3][3]; v2u64 fullSum = Zero<v2u64>(); const v16u8 K8_FF = Fill((uint8_t)0xff); v16u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth); for (size_t row = 0; row < height; ++row) { src0 = src + stride*(row - 1); src1 = src0 + stride; src2 = src1 + stride; if (row == 0) src0 = src1; if (row == height - 1) src2 = src1; v4u32 rowSum = Zero<v4u32>(); LoadNoseDx(src0 + 0, a[0]); LoadNoseDx(src1 + 0, a[1]); LoadNoseDx(src2 + 0, a[2]); SobelDxAbsSum(a, rowSum); for (size_t col = A; col < bodyWidth; col += A) { LoadBodyDx(src0 + col, a[0]); LoadBodyDx(src1 + col, a[1]); LoadBodyDx(src2 + col, a[2]); SobelDxAbsSum(a, rowSum); } LoadTailDx(src0 + width - A, a[0]); LoadTailDx(src1 + width - A, a[1]); LoadTailDx(src2 + width - A, a[2]); SetMask3x3(a, tailMask); SobelDxAbsSum(a, rowSum); fullSum = PadSum(fullSum,rowSum); } *sum = ExtractSum(fullSum); }
/** * Scrolls text from right to left. * * \param szText Pointer to text in RAM. It may be handy to add one space * at the beginning got cool looking scrolling. * * \param Len Length of the text without terminating null. * * \param pOffset Offset, from 0 to 8 * \a Len * * \par Example * char test[] = " HELLO"; * int offset=0; * do * { * ScrollLeft( test, sizeof(test)-1, &offset); * } while( offset ); */ int ScrollLeft( const char* szText, int Len, int* pOffset ) { int c = *pOffset / 8; int bit = *pOffset % 8; if( c >= Len ) { *pOffset = 0; return *pOffset; } char c1 = szText[c++]; char c2 = c <= Len-1 ? szText[c] : ' '/*extra space at the end*/; ShiftLeft( bit, pCurrentFont+c1*8, pCurrentFont+c2*8 ); (*pOffset)++; return *pOffset; }
template <bool align> void EdgeBackgroundAdjustRange(uint8_t * backgroundCount, size_t backgroundCountStride, size_t width, size_t height, uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t threshold) { assert(width >= A); if(align) { assert(Aligned(backgroundValue) && Aligned(backgroundValueStride) && Aligned(backgroundCount) && Aligned(backgroundCountStride)); } const __m128i _threshold = _mm_set1_epi8((char)threshold); size_t alignedWidth = AlignLo(width, A); __m128i tailMask = ShiftLeft(K8_01, A - width + alignedWidth); for(size_t row = 0; row < height; ++row) { for(size_t col = 0; col < alignedWidth; col += A) EdgeBackgroundAdjustRange<align>(backgroundCount, backgroundValue, col, _threshold, K8_01); if(alignedWidth != width) EdgeBackgroundAdjustRange<false>(backgroundCount, backgroundValue, width - A, _threshold, tailMask); backgroundValue += backgroundValueStride; backgroundCount += backgroundCountStride; } }
template <bool align> void EdgeBackgroundGrowRangeSlow(const uint8_t * value, size_t valueStride, size_t width, size_t height, uint8_t * background, size_t backgroundStride) { assert(width >= A); if (align) { assert(Aligned(value) && Aligned(valueStride)); assert(Aligned(background) && Aligned(backgroundStride)); } size_t alignedWidth = AlignLo(width, A); uint8x16_t tailMask = ShiftLeft(K8_01, A - width + alignedWidth); for (size_t row = 0; row < height; ++row) { for (size_t col = 0; col < alignedWidth; col += A) EdgeBackgroundGrowRangeSlow<align>(value + col, background + col, K8_01); if (alignedWidth != width) EdgeBackgroundGrowRangeSlow<false>(value + width - A, background + width - A, tailMask); value += valueStride; background += backgroundStride; } }
template <bool align> void EdgeBackgroundShiftRange(const uint8_t * value, size_t valueStride, size_t width, size_t height, uint8_t * background, size_t backgroundStride) { assert(width >= A); if(align) { assert(Aligned(value) && Aligned(valueStride)); assert(Aligned(background) && Aligned(backgroundStride)); } size_t alignedWidth = AlignLo(width, A); __m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + alignedWidth); for(size_t row = 0; row < height; ++row) { for(size_t col = 0; col < alignedWidth; col += A) EdgeBackgroundShiftRange<align>(value, background, col, K_INV_ZERO); if(alignedWidth != width) EdgeBackgroundShiftRange<false>(value, background, width - A, tailMask); value += valueStride; background += backgroundStride; } }
template <bool align> void EdgeBackgroundIncrementCount(const uint8_t * value, size_t valueStride, size_t width, size_t height, const uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t * backgroundCount, size_t backgroundCountStride) { assert(width >= A); if (align) { assert(Aligned(value) && Aligned(valueStride)); assert(Aligned(backgroundValue) && Aligned(backgroundValueStride) && Aligned(backgroundCount) && Aligned(backgroundCountStride)); } size_t alignedWidth = AlignLo(width, A); uint8x16_t tailMask = ShiftLeft(K8_01, A - width + alignedWidth); for (size_t row = 0; row < height; ++row) { for (size_t col = 0; col < alignedWidth; col += A) EdgeBackgroundIncrementCount<align>(value, backgroundValue, backgroundCount, col, K8_01); if (alignedWidth != width) EdgeBackgroundIncrementCount<false>(value, backgroundValue, backgroundCount, width - A, tailMask); value += valueStride; backgroundValue += backgroundValueStride; backgroundCount += backgroundCountStride; } }
template <bool align, size_t channelCount> void AlphaBlending(const uint8_t *src, size_t srcStride, size_t width, size_t height, const uint8_t *alpha, size_t alphaStride, uint8_t *dst, size_t dstStride) { size_t alignedWidth = AlignLo(width, A); __m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + alignedWidth); size_t step = channelCount*A; for(size_t row = 0; row < height; ++row) { for(size_t col = 0, offset = 0; col < alignedWidth; col += A, offset += step) { __m128i _alpha = Load<align>((__m128i*)(alpha + col)); AlphaBlender<align, channelCount>()((__m128i*)(src + offset), (__m128i*)(dst + offset), _alpha); } if(alignedWidth != width) { __m128i _alpha = _mm_and_si128(Load<false>((__m128i*)(alpha + width - A)), tailMask); AlphaBlender<false, channelCount>()((__m128i*)(src + (width - A)*channelCount), (__m128i*)(dst + (width - A)*channelCount), _alpha); } src += srcStride; alpha += alphaStride; dst += dstStride; } }