MVDegrain1::MVDegrain1(PClip _child, PClip mvbw, PClip mvfw, int _thSAD, int _thSADC, int _YUVplanes, int _nLimit, PClip _pelclip, int _nIdx, int _nSCD1, int _nSCD2, bool _mmx, bool _isse, int _MaxThreads, int _PreFetch, int _SadMode, IScriptEnvironment* env) : MVDegrainBase(_child, 1, _YUVplanes, _nLimit, _pelclip, _nIdx, _mmx, _isse, env, mvfw, "MVDegrain1", -1, _MaxThreads, _PreFetch, _SadMode) { // PreFetch max 21 since 21*3=63 and 64 is max threads at one time if (_PreFetch<1 || _PreFetch>21) env->ThrowError("MVDegrain1: PreFetch must be >=1 and <=21"); // initialize MVClip's for (unsigned int PreFetchNum=0; PreFetchNum<static_cast<unsigned int>(_PreFetch); ++PreFetchNum) { pmvClipF[PreFetchNum][0]=new MVClip(mvfw, _nSCD1, _nSCD2, env, true); pmvClipB[PreFetchNum][0]=new MVClip(mvbw, _nSCD1, _nSCD2, env, true); } CheckSimilarity(*pmvClipF[0][0], "mvfw", env); CheckSimilarity(*pmvClipB[0][0], "mvbw", env); // normalize thSAD thSAD = _thSAD*pmvClipB[0][0]->GetThSCD1()/_nSCD1; // normalize to block SAD thSADC = _thSADC*pmvClipB[0][0]->GetThSCD1()/_nSCD1; // chroma // find the maximum extent unsigned int MaxDelta=static_cast<unsigned int>(pmvClipF[0][0]->GetDeltaFrame()); if (static_cast<unsigned int>(pmvClipB[0][0]->GetDeltaFrame())>MaxDelta) MaxDelta=static_cast<unsigned int>(pmvClipB[0][0]->GetDeltaFrame()); // numframes 2*MaxDelta+1, i.e. to cover all possible frames in sliding window mvCore->AddFrames(nIdx, (2*MaxDelta)*_PreFetch+1, pmvClipB[0][0]->GetLevelCount(), nWidth, nHeight, nPel, nHPadding, nVPadding, YUVPLANES, _isse, yRatioUV); }
MVDegrainMulti::MVDegrainMulti(PClip _child, PClip mvMulti, int _RefFrames, int _thSAD, int _thSADC, int _YUVplanes, int _nLimit, PClip _pelclip, int _nIdx, int _nSCD1, int _nSCD2, bool _mmx, bool _isse, int _MaxThreads, int _PreFetch, int _SadMode, IScriptEnvironment* env) : MVDegrainBase(_child, _RefFrames, _YUVplanes, _nLimit, _pelclip, _nIdx, _mmx, _isse, env, mvMulti, "MVDegrainMulti", 0, _MaxThreads, _PreFetch, _SadMode), RefFrames(_RefFrames) { if (RefFrames<1 || RefFrames>32) env->ThrowError("MVDegrainMulti: refframes must be >=1 and <=32"); // get the true number of reference frames VideoInfo mvMultivi=mvMulti->GetVideoInfo(); unsigned int RefFramesAvailable=mvMultivi.height/2; // if refframes is greater than MVAnalyseMulti height then limit to height if (RefFramesAvailable<RefFrames) { RefFrames=RefFramesAvailable; UpdateNumRefFrames(RefFrames, env); } // PreFetch max 21 since 21*3=63 and 64 is max threads at one time if (_PreFetch<1 || _PreFetch>21) env->ThrowError("MVDegrainMulti: PreFetch must be >=1 and <=21"); if (_PreFetch*RefFrames>32) env->ThrowError("MVDegrainMulti: PreFetch*RefFrames<=32"); // initialize MVClip's which are in order BX, ..., B3, B2, B1, F1, F2, F3, ..., FX in mvMulti for (unsigned int PreFetchNum=0; PreFetchNum<static_cast<unsigned int>(_PreFetch); ++PreFetchNum) { if (RefFrames<RefFramesAvailable) { // we are taking a subset of the mvMulti clip for(unsigned int RefNum=0; RefNum<RefFrames; ++RefNum) { pmvClipF[PreFetchNum][RefNum]=new MVClip(mvMulti, _nSCD1, _nSCD2, env, true, RefFramesAvailable+RefNum); pmvClipB[PreFetchNum][RefNum]=new MVClip(mvMulti, _nSCD1, _nSCD2, env, true, RefFramesAvailable-RefNum-1); } } else { // we are taking the full mvMulti clip for(unsigned int RefNum=0; RefNum<RefFrames; ++RefNum) { pmvClipF[PreFetchNum][RefNum]=new MVClip(mvMulti, _nSCD1, _nSCD2, env, true, RefFrames+RefNum); pmvClipB[PreFetchNum][RefNum]=new MVClip(mvMulti, _nSCD1, _nSCD2, env, true, RefFrames-RefNum-1); } } } // check simularities CheckSimilarity(*pmvClipF[0][0], "mvMulti", env); // only need to check one since they are grouped together // normalize thSAD thSAD = _thSAD*pmvClipB[0][0]->GetThSCD1()/_nSCD1; // normalize to block SAD thSADC = _thSADC*pmvClipB[0][0]->GetThSCD1()/_nSCD1; // chroma // find the maximum extent unsigned int MaxDelta=static_cast<unsigned int>(pmvClipF[0][RefFrames-1]->GetDeltaFrame()); if (static_cast<unsigned int>(pmvClipB[0][RefFrames-1]->GetDeltaFrame())>MaxDelta) MaxDelta=static_cast<unsigned int>(pmvClipB[0][RefFrames-1]->GetDeltaFrame()); // numframes 2*MaxDelta+1, i.e. to cover all possible frames in sliding window mvCore->AddFrames(nIdx, (2*MaxDelta)*_PreFetch+1, pmvClipB[0][0]->GetLevelCount(), nWidth, nHeight, nPel, nHPadding, nVPadding, YUVPLANES, _isse, yRatioUV); }
void Conversions::Copy(Image& Source, Image& Dest) { CheckSimilarity(Source, Dest); if (Source.Step() == Dest.Step()) { Source.SendIfNeeded(); m_CL->GetQueue().enqueueCopyBuffer(Source, Dest, 0, 0, Source.Step() * Source.Height()); Dest.SetInDevice(); return; } Kernel(copy, Source, Dest, Source.Step(), Dest.Step()); }
MVFlowBlur::MVFlowBlur(PClip _child, PClip super, PClip _mvbw, PClip _mvfw, int _blur256, int _prec, int nSCD1, int nSCD2, bool _isse, bool _planar, IScriptEnvironment* env) : GenericVideoFilter(_child), MVFilter(_mvfw, "MFlowBlur", env, 1, 0), mvClipB(_mvbw, nSCD1, nSCD2, env, 1, 0), mvClipF(_mvfw, nSCD1, nSCD2, env, 1, 0) { blur256 = _blur256; prec = _prec; isse = _isse; planar = _planar; CheckSimilarity(mvClipB, "mvbw", env); CheckSimilarity(mvClipF, "mvfw", env); SuperParams64Bits params; memcpy(¶ms, &super->GetVideoInfo().num_audio_samples, 8); int nHeightS = params.nHeight; int nSuperHPad = params.nHPad; int nSuperVPad = params.nVPad; int nSuperPel = params.nPel; int nSuperModeYUV = params.nModeYUV; int nSuperLevels = params.nLevels; int nSuperWidth = super->GetVideoInfo().width; // really super int nSuperHeight = super->GetVideoInfo().height; if ( nHeight != nHeightS || nWidth != nSuperWidth - nSuperHPad * 2 || nPel != nSuperPel) { env->ThrowError("MFlowBlur : wrong super frame clip"); } if (nPel==1) finest = super; // v2.0.9.1 else { finest = new MVFinest(super, isse, env); AVSValue cache_args[1] = { finest }; finest = env->Invoke("InternalCache", AVSValue(cache_args,1)).AsClip(); // add cache for speed } // if ( nWidth != vi.width || (nWidth + nHPadding*2)*nPel != finest->GetVideoInfo().width // || nHeight != vi.height || (nHeight + nVPadding*2)*nPel != finest->GetVideoInfo().height) // env->ThrowError("MVFlowBlur: wrong source of finest frame size"); nHeightUV = nHeight/yRatioUV; nWidthUV = nWidth/2;// for YV12 nHPaddingUV = nHPadding/2; nVPaddingUV = nHPadding/yRatioUV; VPitchY = nWidth; VPitchUV= nWidthUV; VXFullYB = new BYTE [nHeight*VPitchY]; VXFullUVB = new BYTE [nHeightUV*VPitchUV]; VYFullYB = new BYTE [nHeight*VPitchY]; VYFullUVB = new BYTE [nHeightUV*VPitchUV]; VXFullYF = new BYTE [nHeight*VPitchY]; VXFullUVF = new BYTE [nHeightUV*VPitchUV]; VYFullYF = new BYTE [nHeight*VPitchY]; VYFullUVF = new BYTE [nHeightUV*VPitchUV]; VXSmallYB = new BYTE [nBlkX*nBlkY]; VYSmallYB = new BYTE [nBlkX*nBlkY]; VXSmallUVB = new BYTE [nBlkX*nBlkY]; VYSmallUVB = new BYTE [nBlkX*nBlkY]; VXSmallYF = new BYTE [nBlkX*nBlkY]; VYSmallYF = new BYTE [nBlkX*nBlkY]; VXSmallUVF = new BYTE [nBlkX*nBlkY]; VYSmallUVF = new BYTE [nBlkX*nBlkY]; MaskSmallB = new BYTE [nBlkX*nBlkY]; MaskFullYB = new BYTE [nHeight*VPitchY]; MaskFullUVB = new BYTE [nHeightUV*VPitchUV]; MaskSmallF = new BYTE [nBlkX*nBlkY]; MaskFullYF = new BYTE [nHeight*VPitchY]; MaskFullUVF = new BYTE [nHeightUV*VPitchUV]; int CPUF_Resize = env->GetCPUFlags(); if (!isse) CPUF_Resize = (CPUF_Resize & !CPUF_INTEGER_SSE) & !CPUF_SSE2; upsizer = new SimpleResize(nWidth, nHeight, nBlkX, nBlkY, CPUF_Resize); upsizerUV = new SimpleResize(nWidthUV, nHeightUV, nBlkX, nBlkY, CPUF_Resize); if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 && !planar) { DstPlanes = new YUY2Planes(nWidth, nHeight); } }
MVFlowBlur::MVFlowBlur(PClip _child, PClip _mvbw, PClip _mvfw, int _blur256, int _prec, PClip _pelclip, int _nIdx, int nSCD1, int nSCD2, bool _mmx, bool _isse, IScriptEnvironment* env) : GenericVideoFilter(_child), MVFilter(_mvfw, "MVFlowBlur", env), mvClipB(_mvbw, nSCD1, nSCD2, env, true), mvClipF(_mvfw, nSCD1, nSCD2, env, true), pelclip(_pelclip) { blur256 = _blur256; prec = _prec; nIdx = _nIdx; mmx = _mmx; isse = _isse; CheckSimilarity(mvClipB, "mvbw", env); CheckSimilarity(mvClipF, "mvfw", env); mvCore->AddFrames(nIdx, 3, mvClipB.GetLevelCount(), nWidth, nHeight, nPel, nHPadding, nVPadding, YUVPLANES, isse, yRatioUV); // mvCore->AddFrames(nIdx, MV_BUFFER_FRAMES, mvClipF.GetLevelCount(), nWidth, nHeight, // nPel, nBlkSize, nBlkSize, YUVPLANES, isse, yRatioUV); usePelClipHere = false; if (pelclip && (nPel > 1)) { if (pelclip->GetVideoInfo().width != nWidth*nPel || pelclip->GetVideoInfo().height != nHeight*nPel) env->ThrowError("MVFlowBlur: pelclip frame size must be 2X of source!"); else usePelClipHere = true; } nHeightUV = nHeight/yRatioUV; nWidthUV = nWidth/2;// for YV12 nHPaddingUV = nHPadding/2; nVPaddingUV = nHPadding/yRatioUV; VPitchY = nWidth; VPitchUV= nWidthUV; VXFullYB = new BYTE [nHeight*VPitchY]; VXFullUVB = new BYTE [nHeightUV*VPitchUV]; VYFullYB = new BYTE [nHeight*VPitchY]; VYFullUVB = new BYTE [nHeightUV*VPitchUV]; VXFullYF = new BYTE [nHeight*VPitchY]; VXFullUVF = new BYTE [nHeightUV*VPitchUV]; VYFullYF = new BYTE [nHeight*VPitchY]; VYFullUVF = new BYTE [nHeightUV*VPitchUV]; VXSmallYB = new BYTE [nBlkX*nBlkY]; VYSmallYB = new BYTE [nBlkX*nBlkY]; VXSmallUVB = new BYTE [nBlkX*nBlkY]; VYSmallUVB = new BYTE [nBlkX*nBlkY]; VXSmallYF = new BYTE [nBlkX*nBlkY]; VYSmallYF = new BYTE [nBlkX*nBlkY]; VXSmallUVF = new BYTE [nBlkX*nBlkY]; VYSmallUVF = new BYTE [nBlkX*nBlkY]; MaskSmallB = new BYTE [nBlkX*nBlkY]; MaskFullYB = new BYTE [nHeight*VPitchY]; MaskFullUVB = new BYTE [nHeightUV*VPitchUV]; MaskSmallF = new BYTE [nBlkX*nBlkY]; MaskFullYF = new BYTE [nHeight*VPitchY]; MaskFullUVF = new BYTE [nHeightUV*VPitchUV]; int pel2WidthY = (nWidth + 2*nHPadding)*nPel; pel2HeightY = (nHeight + 2*nVPadding)*nPel; int pel2WidthUV = (nWidthUV + 2*nHPaddingUV)*nPel; pel2HeightUV = (nHeightUV + 2*nVPaddingUV)*nPel; pel2PitchY = (pel2WidthY + 15) & (~15); pel2PitchUV = (pel2WidthUV + 15) & (~15); pel2OffsetY = pel2PitchY * nVPadding*nPel + nHPadding*nPel; pel2OffsetUV = pel2PitchUV * nVPaddingUV*nPel + nHPaddingUV*nPel; if (nPel>1) { pel2PlaneYB = new BYTE [pel2PitchY*pel2HeightY]; pel2PlaneUB = new BYTE [pel2PitchUV*pel2HeightUV]; pel2PlaneVB = new BYTE [pel2PitchUV*pel2HeightUV]; } int CPUF_Resize = env->GetCPUFlags(); if (!isse) CPUF_Resize = (CPUF_Resize & !CPUF_INTEGER_SSE) & !CPUF_SSE2; upsizer = new SimpleResize(nWidth, nHeight, nBlkX, nBlkY, CPUF_Resize); upsizerUV = new SimpleResize(nWidthUV, nHeightUV, nBlkX, nBlkY, CPUF_Resize); DstPlanes = new YUY2Planes(nWidth, nHeight, vi.pixel_type, isse); }
MVDegrain1::MVDegrain1( PClip _child, PClip _super, PClip mvbw, PClip mvfw, int _thSAD, int _thSADC, int _YUVplanes, int _nLimit, int _nLimitC, int _nSCD1, int _nSCD2, bool _isse, bool _planar, bool _lsb_flag, bool mt_flag, IScriptEnvironment* env ) : GenericVideoFilter(_child) , MVFilter ((! mvfw) ? mvbw : mvfw, "MDegrain1", env, (! mvfw) ? 2 : 1, (! mvfw) ? 1 : 0) , mvClipF ((! mvfw) ? mvbw : mvfw, _nSCD1, _nSCD2, env, (! mvfw) ? 2 : 1, (! mvfw) ? 1 : 0) , mvClipB ((! mvfw) ? mvbw : mvbw, _nSCD1, _nSCD2, env, (! mvfw) ? 2 : 1, (! mvfw) ? 0 : 0) , super (_super) , lsb_flag (_lsb_flag) , height_lsb_mul ((_lsb_flag) ? 2 : 1) , DstShort (0) , DstInt (0) { thSAD = _thSAD*mvClipB.GetThSCD1()/_nSCD1; // normalize to block SAD thSADC = _thSADC*mvClipB.GetThSCD1()/_nSCD1; // chroma threshold, normalized to block SAD YUVplanes = _YUVplanes; nLimit = _nLimit; nLimitC = _nLimitC; isse = _isse; planar = _planar; CheckSimilarity(mvClipF, "mvfw", env); CheckSimilarity(mvClipB, "mvbw", env); const ::VideoInfo & vi_super = _super->GetVideoInfo (); // get parameters of prepared super clip - v2.0 SuperParams64Bits params; memcpy(¶ms, &vi_super.num_audio_samples, 8); int nHeightS = params.nHeight; int nSuperHPad = params.nHPad; int nSuperVPad = params.nVPad; int nSuperPel = params.nPel; nSuperModeYUV = params.nModeYUV; int nSuperLevels = params.nLevels; pRefBGOF = new MVGroupOfFrames(nSuperLevels, nWidth, nHeight, nSuperPel, nSuperHPad, nSuperVPad, nSuperModeYUV, isse, yRatioUV, mt_flag); pRefFGOF = new MVGroupOfFrames(nSuperLevels, nWidth, nHeight, nSuperPel, nSuperHPad, nSuperVPad, nSuperModeYUV, isse, yRatioUV, mt_flag); int nSuperWidth = vi_super.width; int nSuperHeight = vi_super.height; if ( nHeight != nHeightS || nHeight != vi.height || nWidth != nSuperWidth-nSuperHPad*2 || nWidth != vi.width || nPel != nSuperPel) { env->ThrowError("MDegrain1 : wrong source or super frame size"); } if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 && !planar) { DstPlanes = new YUY2Planes(nWidth, nHeight * height_lsb_mul); SrcPlanes = new YUY2Planes(nWidth, nHeight); } dstShortPitch = ((nWidth + 15)/16)*16; dstIntPitch = dstShortPitch; if (nOverlapX >0 || nOverlapY>0) { OverWins = new OverlapWindows(nBlkSizeX, nBlkSizeY, nOverlapX, nOverlapY); OverWinsUV = new OverlapWindows(nBlkSizeX/2, nBlkSizeY/yRatioUV, nOverlapX/2, nOverlapY/yRatioUV); if (lsb_flag) { DstInt = new int [dstIntPitch * nHeight]; } else { DstShort = new unsigned short[dstShortPitch*nHeight]; } } switch (nBlkSizeX) { case 32: if (nBlkSizeY==16) { OVERSLUMALSB = OverlapsLsb_C<32,16>; if (yRatioUV==2) { OVERSCHROMALSB = OverlapsLsb_C<16,8>; } else { OVERSCHROMALSB = OverlapsLsb_C<16,16>; } } else if (nBlkSizeY==32) { OVERSLUMALSB = OverlapsLsb_C<32,32>; if (yRatioUV==2) { OVERSCHROMALSB = OverlapsLsb_C<16,16>; } else { OVERSCHROMALSB = OverlapsLsb_C<16,32>; } } break; case 16: if (nBlkSizeY==16) { OVERSLUMALSB = OverlapsLsb_C<16,16>; if (yRatioUV==2) { OVERSCHROMALSB = OverlapsLsb_C<8,8>; } else { OVERSCHROMALSB = OverlapsLsb_C<8,16>; } } else if (nBlkSizeY==8) { OVERSLUMALSB = OverlapsLsb_C<16,8>; if (yRatioUV==2) { OVERSCHROMALSB = OverlapsLsb_C<8,4>; } else { OVERSCHROMALSB = OverlapsLsb_C<8,8>; } } else if (nBlkSizeY==2) { OVERSLUMALSB = OverlapsLsb_C<16,2>; if (yRatioUV==2) { OVERSCHROMALSB = OverlapsLsb_C<8,1>; } else { OVERSCHROMALSB = OverlapsLsb_C<8,2>; } } break; case 4: OVERSLUMALSB = OverlapsLsb_C<4,4>; if (yRatioUV==2) { OVERSCHROMALSB = OverlapsLsb_C<2,2>; } else { OVERSCHROMALSB = OverlapsLsb_C<2,4>; } break; case 8: default: if (nBlkSizeY==8) { OVERSLUMALSB = OverlapsLsb_C<8,8>; if (yRatioUV==2) { OVERSCHROMALSB = OverlapsLsb_C<4,4>; } else { OVERSCHROMALSB = OverlapsLsb_C<4,8>; } }else if (nBlkSizeY==4) { OVERSLUMALSB = OverlapsLsb_C<8,4>; if (yRatioUV==2) { OVERSCHROMALSB = OverlapsLsb_C<4,2>; } else { OVERSCHROMALSB = OverlapsLsb_C<4,4>; } } } if ( ((env->GetCPUFlags() & CPUF_SSE2) != 0) & isse) { switch (nBlkSizeX) { case 32: if (nBlkSizeY==16) { OVERSLUMA = Overlaps32x16_sse2; DEGRAINLUMA = Degrain1_sse2<32,16>; if (yRatioUV==2) { OVERSCHROMA = Overlaps16x8_sse2; DEGRAINCHROMA = Degrain1_sse2<16,8>; } else { OVERSCHROMA = Overlaps16x16_sse2;DEGRAINCHROMA = Degrain1_sse2<16,16>; } } else if (nBlkSizeY==32) { OVERSLUMA = Overlaps32x32_sse2; DEGRAINLUMA = Degrain1_sse2<32,32>; if (yRatioUV==2) { OVERSCHROMA = Overlaps16x16_sse2; DEGRAINCHROMA = Degrain1_sse2<16,16>; } else { OVERSCHROMA = Overlaps16x32_sse2; DEGRAINCHROMA = Degrain1_sse2<16,32>; } } break; case 16: if (nBlkSizeY==16) { OVERSLUMA = Overlaps16x16_sse2; DEGRAINLUMA = Degrain1_sse2<16,16>; if (yRatioUV==2) { OVERSCHROMA = Overlaps8x8_sse2; DEGRAINCHROMA = Degrain1_sse2<8,8>; } else { OVERSCHROMA = Overlaps8x16_sse2;DEGRAINCHROMA = Degrain1_sse2<8,16>; } } else if (nBlkSizeY==8) { OVERSLUMA = Overlaps16x8_sse2; DEGRAINLUMA = Degrain1_sse2<16,8>; if (yRatioUV==2) { OVERSCHROMA = Overlaps8x4_sse2; DEGRAINCHROMA = Degrain1_sse2<8,4>; } else { OVERSCHROMA = Overlaps8x8_sse2; DEGRAINCHROMA = Degrain1_sse2<8,8>; } } else if (nBlkSizeY==2) { OVERSLUMA = Overlaps16x2_sse2; DEGRAINLUMA = Degrain1_sse2<16,2>; if (yRatioUV==2) { OVERSCHROMA = Overlaps8x1_sse2; DEGRAINCHROMA = Degrain1_sse2<8,1>; } else { OVERSCHROMA = Overlaps8x2_sse2; DEGRAINCHROMA = Degrain1_sse2<8,2>; } } break; case 4: OVERSLUMA = Overlaps4x4_sse2; DEGRAINLUMA = Degrain1_mmx<4,4>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<2,2>; DEGRAINCHROMA = Degrain1_C<2,2>; } else { OVERSCHROMA = Overlaps_C<2,4>; DEGRAINCHROMA = Degrain1_C<2,4>; } break; case 8: default: if (nBlkSizeY==8) { OVERSLUMA = Overlaps8x8_sse2; DEGRAINLUMA = Degrain1_sse2<8,8>; if (yRatioUV==2) { OVERSCHROMA = Overlaps4x4_sse2; DEGRAINCHROMA = Degrain1_mmx<4,4>; } else { OVERSCHROMA = Overlaps4x8_sse2; DEGRAINCHROMA = Degrain1_mmx<4,8>; } }else if (nBlkSizeY==4) { OVERSLUMA = Overlaps8x4_sse2; DEGRAINLUMA = Degrain1_sse2<8,4>; if (yRatioUV==2) { OVERSCHROMA = Overlaps4x2_sse2; DEGRAINCHROMA = Degrain1_mmx<4,2>; } else { OVERSCHROMA = Overlaps4x4_sse2; DEGRAINCHROMA = Degrain1_mmx<4,4>; } } } } else if ( isse ) { switch (nBlkSizeX) { case 32: if (nBlkSizeY==16) { OVERSLUMA = Overlaps32x16_sse2; DEGRAINLUMA = Degrain1_mmx<32,16>; if (yRatioUV==2) { OVERSCHROMA = Overlaps16x8_sse2; DEGRAINCHROMA = Degrain1_mmx<16,8>; } else { OVERSCHROMA = Overlaps16x16_sse2;DEGRAINCHROMA = Degrain1_mmx<16,16>; } } else if (nBlkSizeY==32) { OVERSLUMA = Overlaps32x32_sse2; DEGRAINLUMA = Degrain1_mmx<32,32>; if (yRatioUV==2) { OVERSCHROMA = Overlaps16x16_sse2; DEGRAINCHROMA = Degrain1_mmx<16,16>; } else { OVERSCHROMA = Overlaps16x32_sse2; DEGRAINCHROMA = Degrain1_mmx<16,32>; } } break; case 16: if (nBlkSizeY==16) { OVERSLUMA = Overlaps16x16_sse2; DEGRAINLUMA = Degrain1_mmx<16,16>; if (yRatioUV==2) { OVERSCHROMA = Overlaps8x8_sse2; DEGRAINCHROMA = Degrain1_mmx<8,8>; } else { OVERSCHROMA = Overlaps8x16_sse2;DEGRAINCHROMA = Degrain1_mmx<8,16>; } } else if (nBlkSizeY==8) { OVERSLUMA = Overlaps16x8_sse2; DEGRAINLUMA = Degrain1_mmx<16,8>; if (yRatioUV==2) { OVERSCHROMA = Overlaps8x4_sse2; DEGRAINCHROMA = Degrain1_mmx<8,4>; } else { OVERSCHROMA = Overlaps8x8_sse2; DEGRAINCHROMA = Degrain1_mmx<8,8>; } } else if (nBlkSizeY==2) { OVERSLUMA = Overlaps16x2_sse2; DEGRAINLUMA = Degrain1_mmx<16,2>; if (yRatioUV==2) { OVERSCHROMA = Overlaps8x1_sse2; DEGRAINCHROMA = Degrain1_mmx<8,1>; } else { OVERSCHROMA = Overlaps8x2_sse2; DEGRAINCHROMA = Degrain1_mmx<8,2>; } } break; case 4: OVERSLUMA = Overlaps4x4_sse2; DEGRAINLUMA = Degrain1_mmx<4,4>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<2,2>; DEGRAINCHROMA = Degrain1_C<2,2>; } else { OVERSCHROMA = Overlaps_C<2,4>; DEGRAINCHROMA = Degrain1_C<2,4>; } break; case 8: default: if (nBlkSizeY==8) { OVERSLUMA = Overlaps8x8_sse2; DEGRAINLUMA = Degrain1_mmx<8,8>; if (yRatioUV==2) { OVERSCHROMA = Overlaps4x4_sse2; DEGRAINCHROMA = Degrain1_mmx<4,4>; } else { OVERSCHROMA = Overlaps4x8_sse2; DEGRAINCHROMA = Degrain1_mmx<4,8>; } }else if (nBlkSizeY==4) { OVERSLUMA = Overlaps8x4_sse2; DEGRAINLUMA = Degrain1_mmx<8,4>; if (yRatioUV==2) { OVERSCHROMA = Overlaps4x2_sse2; DEGRAINCHROMA = Degrain1_mmx<4,2>; } else { OVERSCHROMA = Overlaps4x4_sse2; DEGRAINCHROMA = Degrain1_mmx<4,4>; } } } } else { switch (nBlkSizeX) { case 32: if (nBlkSizeY==16) { OVERSLUMA = Overlaps_C<32,16>; DEGRAINLUMA = Degrain1_C<32,16>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<16,8>; DEGRAINCHROMA = Degrain1_C<16,8>; } else { OVERSCHROMA = Overlaps_C<16,16>;DEGRAINCHROMA = Degrain1_C<16,16>; } } else if (nBlkSizeY==32) { OVERSLUMA = Overlaps_C<32,32>; DEGRAINLUMA = Degrain1_C<32,32>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<16,16>; DEGRAINCHROMA = Degrain1_C<16,16>; } else { OVERSCHROMA = Overlaps_C<16,32>; DEGRAINCHROMA = Degrain1_C<16,32>; } } break; case 16: if (nBlkSizeY==16) { OVERSLUMA = Overlaps_C<16,16>; DEGRAINLUMA = Degrain1_C<16,16>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<8,8>; DEGRAINCHROMA = Degrain1_C<8,8>; } else { OVERSCHROMA = Overlaps_C<8,16>; DEGRAINCHROMA = Degrain1_C<8,16>; } } else if (nBlkSizeY==8) { OVERSLUMA = Overlaps_C<16,8>; DEGRAINLUMA = Degrain1_C<16,8>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<8,4>; DEGRAINCHROMA = Degrain1_C<8,4>; } else { OVERSCHROMA = Overlaps_C<8,8>; DEGRAINCHROMA = Degrain1_C<8,8>; } } else if (nBlkSizeY==2) { OVERSLUMA = Overlaps_C<16,2>; DEGRAINLUMA = Degrain1_C<16,2>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<8,1>; DEGRAINCHROMA = Degrain1_C<8,1>; } else { OVERSCHROMA = Overlaps_C<8,2>; DEGRAINCHROMA = Degrain1_C<8,2>; } } break; case 4: OVERSLUMA = Overlaps_C<4,4>; DEGRAINLUMA = Degrain1_C<4,4>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<2,2>; DEGRAINCHROMA = Degrain1_C<2,2>; } else { OVERSCHROMA = Overlaps_C<2,4>; DEGRAINCHROMA = Degrain1_C<2,4>; } break; case 8: default: if (nBlkSizeY==8) { OVERSLUMA = Overlaps_C<8,8>; DEGRAINLUMA = Degrain1_C<8,8>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<4,4>; DEGRAINCHROMA = Degrain1_C<4,4>; } else { OVERSCHROMA = Overlaps_C<4,8>; DEGRAINCHROMA = Degrain1_C<4,8>; } }else if (nBlkSizeY==4) { OVERSLUMA = Overlaps_C<8,4>; DEGRAINLUMA = Degrain1_C<8,4>; if (yRatioUV==2) { OVERSCHROMA = Overlaps_C<4,2>; DEGRAINCHROMA = Degrain1_C<4,2>; } else { OVERSCHROMA = Overlaps_C<4,4>; DEGRAINCHROMA = Degrain1_C<4,4>; } } } } const int tmp_size = 32 * 32; tmpBlock = new BYTE[tmp_size * height_lsb_mul]; tmpBlockLsb = (lsb_flag) ? (tmpBlock + tmp_size) : 0; if (lsb_flag) { vi.height <<= 1; } }
MVFlowInter::MVFlowInter(PClip _child, PClip super, PClip _mvbw, PClip _mvfw, int _time256, double _ml, bool _blend, int nSCD1, int nSCD2, bool _isse, bool _planar, IScriptEnvironment* env) : GenericVideoFilter(_child), MVFilter(_mvfw, "MFlowInter", env), mvClipB(_mvbw, nSCD1, nSCD2, env), mvClipF(_mvfw, nSCD1, nSCD2, env) { time256 = _time256; ml = _ml; isse = _isse; planar = planar; blend = _blend; if (!mvClipB.IsBackward()) env->ThrowError("MFlowInter: wrong backward vectors"); if (mvClipF.IsBackward()) env->ThrowError("MFlowInter: wrong forward vectors"); CheckSimilarity(mvClipB, "mvbw", env); CheckSimilarity(mvClipF, "mvfw", env); SuperParams64Bits params; memcpy(¶ms, &super->GetVideoInfo().num_audio_samples, 8); int nHeightS = params.nHeight; int nSuperHPad = params.nHPad; int nSuperVPad = params.nVPad; int nSuperPel = params.nPel; int nSuperModeYUV = params.nModeYUV; int nSuperLevels = params.nLevels; int nSuperWidth = super->GetVideoInfo().width; // really super int nSuperHeight = super->GetVideoInfo().height; if (nHeight != nHeightS || nWidth != nSuperWidth-nSuperHPad*2) env->ThrowError("MFlowInter : wrong super frame clip"); if (nPel==1) finest = super; // v2.0.9.1 else { finest = new MVFinest(super, isse, env); AVSValue cache_args[1] = { finest }; finest = env->Invoke("InternalCache", AVSValue(cache_args,1)).AsClip(); // add cache for speed } // if (nWidth != vi.width || (nWidth + nHPadding*2)*nPel != finest->GetVideoInfo().width || // nHeight != vi.height || (nHeight + nVPadding*2)*nPel != finest->GetVideoInfo().height ) // env->ThrowError("MVFlowInter: wrong source or finest frame size"); // may be padded for full frame cover nBlkXP = (nBlkX*(nBlkSizeX - nOverlapX) + nOverlapX < nWidth) ? nBlkX+1 : nBlkX; nBlkYP = (nBlkY*(nBlkSizeY - nOverlapY) + nOverlapY < nHeight) ? nBlkY+1 : nBlkY; nWidthP = nBlkXP*(nBlkSizeX - nOverlapX) + nOverlapX; nHeightP = nBlkYP*(nBlkSizeY - nOverlapY) + nOverlapY; // for YV12 nWidthPUV = nWidthP/2; nHeightPUV = nHeightP/yRatioUV; nHeightUV = nHeight/yRatioUV; nWidthUV = nWidth/2; nHPaddingUV = nHPadding/2; nVPaddingUV = nVPadding/yRatioUV; VPitchY = (nWidthP + 15) & (~15); VPitchUV = (nWidthPUV + 15) & (~15); VXFullYB = new BYTE [nHeightP*VPitchY]; VXFullUVB = new BYTE [nHeightPUV*VPitchUV]; VYFullYB = new BYTE [nHeightP*VPitchY]; VYFullUVB = new BYTE [nHeightPUV*VPitchUV]; VXFullYF = new BYTE [nHeightP*VPitchY]; VXFullUVF = new BYTE [nHeightPUV*VPitchUV]; VYFullYF = new BYTE [nHeightP*VPitchY]; VYFullUVF = new BYTE [nHeightPUV*VPitchUV]; VXSmallYB = new BYTE [nBlkXP*nBlkYP]; VYSmallYB = new BYTE [nBlkXP*nBlkYP]; VXSmallUVB = new BYTE [nBlkXP*nBlkYP]; VYSmallUVB = new BYTE [nBlkXP*nBlkYP]; VXSmallYF = new BYTE [nBlkXP*nBlkYP]; VYSmallYF = new BYTE [nBlkXP*nBlkYP]; VXSmallUVF = new BYTE [nBlkXP*nBlkYP]; VYSmallUVF = new BYTE [nBlkXP*nBlkYP]; VXFullYBB = new BYTE [nHeightP*VPitchY]; VXFullUVBB = new BYTE [nHeightPUV*VPitchUV]; VYFullYBB = new BYTE [nHeightP*VPitchY]; VYFullUVBB = new BYTE [nHeightPUV*VPitchUV]; VXFullYFF = new BYTE [nHeightP*VPitchY]; VXFullUVFF = new BYTE [nHeightPUV*VPitchUV]; VYFullYFF = new BYTE [nHeightP*VPitchY]; VYFullUVFF = new BYTE [nHeightPUV*VPitchUV]; VXSmallYBB = new BYTE [nBlkXP*nBlkYP]; VYSmallYBB = new BYTE [nBlkXP*nBlkYP]; VXSmallUVBB = new BYTE [nBlkXP*nBlkYP]; VYSmallUVBB = new BYTE [nBlkXP*nBlkYP]; VXSmallYFF = new BYTE [nBlkXP*nBlkYP]; VYSmallYFF = new BYTE [nBlkXP*nBlkYP]; VXSmallUVFF = new BYTE [nBlkXP*nBlkYP]; VYSmallUVFF = new BYTE [nBlkXP*nBlkYP]; MaskSmallB = new BYTE [nBlkXP*nBlkYP]; MaskFullYB = new BYTE [nHeightP*VPitchY]; MaskFullUVB = new BYTE [nHeightPUV*VPitchUV]; MaskSmallF = new BYTE [nBlkXP*nBlkYP]; MaskFullYF = new BYTE [nHeightP*VPitchY]; MaskFullUVF = new BYTE [nHeightPUV*VPitchUV]; SADMaskSmallB = new BYTE [nBlkXP*nBlkYP]; SADMaskSmallF = new BYTE [nBlkXP*nBlkYP]; int CPUF_Resize = env->GetCPUFlags(); if (!isse) CPUF_Resize = (CPUF_Resize & !CPUF_INTEGER_SSE) & !CPUF_SSE2; upsizer = new SimpleResize(nWidthP, nHeightP, nBlkXP, nBlkYP, CPUF_Resize); upsizerUV = new SimpleResize(nWidthPUV, nHeightPUV, nBlkXP, nBlkYP, CPUF_Resize); LUTVB = new int[256]; LUTVF = new int[256]; Create_LUTV(time256, LUTVB, LUTVF); if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 && !planar) { DstPlanes = new YUY2Planes(nWidth, nHeight); } }
MVFlowInter::MVFlowInter(PClip _child, PClip _mvbw, PClip _mvfw, int _time256, double _ml, double _mSAD, double _mSADC, PClip _pelclip, int _nIdx, int nSCD1, int nSCD2, bool _mmx, bool _isse, IScriptEnvironment* env) : GenericVideoFilter(_child), MVFilter(_mvfw, "MVFlowInter", env), mvClipB(_mvbw, nSCD1, nSCD2, env, true), mvClipF(_mvfw, nSCD1, nSCD2, env, true), pelclip(_pelclip) { time256 = _time256; ml = _ml; mSAD = _mSAD; mSADC = _mSADC; nIdx = _nIdx; mmx = _mmx; isse = _isse; normSAD1024 = unsigned int( 1024 * 255 / (mSAD * nBlkSizeX*nBlkSizeY) ); //normalize normSADC1024 = unsigned int( 1024 * 255 / (mSADC * nBlkSizeX*nBlkSizeY) ); //normalize if (!mvClipB.IsBackward()) env->ThrowError("MVFlowInter: wrong backward vectors"); if (mvClipF.IsBackward()) env->ThrowError("MVFlowInter: wrong forward vectors"); CheckSimilarity(mvClipB, "mvbw", env); CheckSimilarity(mvClipF, "mvfw", env); mvCore->AddFrames(nIdx, 3, mvClipB.GetLevelCount(), nWidth, nHeight, nPel, nHPadding, nVPadding, YUVPLANES, isse, yRatioUV); usePelClipHere = false; if (pelclip && (nPel > 1)) { if (pelclip->GetVideoInfo().width != nWidth*nPel || pelclip->GetVideoInfo().height != nHeight*nPel) env->ThrowError("MVFlowInter: pelclip frame size must be Pel of source!"); else usePelClipHere = true; } // may be padded for full frame cover nBlkXP = (nBlkX*(nBlkSizeX - nOverlapX) + nOverlapX < nWidth) ? nBlkX+1 : nBlkX; nBlkYP = (nBlkY*(nBlkSizeY - nOverlapY) + nOverlapY < nHeight) ? nBlkY+1 : nBlkY; nWidthP = nBlkXP*(nBlkSizeX - nOverlapX) + nOverlapX; nHeightP = nBlkYP*(nBlkSizeY - nOverlapY) + nOverlapY; // for YV12 nWidthPUV = nWidthP/2; nHeightPUV = nHeightP/yRatioUV; nHeightUV = nHeight/yRatioUV; nWidthUV = nWidth/2; nHPaddingUV = nHPadding/2; nVPaddingUV = nVPadding/yRatioUV; VPitchY = (nWidthP + 15) & (~15); VPitchUV = (nWidthPUV + 15) & (~15); VXFullYB = new BYTE [nHeightP*VPitchY]; VXFullUVB = new BYTE [nHeightPUV*VPitchUV]; VYFullYB = new BYTE [nHeightP*VPitchY]; VYFullUVB = new BYTE [nHeightPUV*VPitchUV]; VXFullYF = new BYTE [nHeightP*VPitchY]; VXFullUVF = new BYTE [nHeightPUV*VPitchUV]; VYFullYF = new BYTE [nHeightP*VPitchY]; VYFullUVF = new BYTE [nHeightPUV*VPitchUV]; VXSmallYB = new BYTE [nBlkXP*nBlkYP]; VYSmallYB = new BYTE [nBlkXP*nBlkYP]; VXSmallUVB = new BYTE [nBlkXP*nBlkYP]; VYSmallUVB = new BYTE [nBlkXP*nBlkYP]; VXSmallYF = new BYTE [nBlkXP*nBlkYP]; VYSmallYF = new BYTE [nBlkXP*nBlkYP]; VXSmallUVF = new BYTE [nBlkXP*nBlkYP]; VYSmallUVF = new BYTE [nBlkXP*nBlkYP]; VXFullYBB = new BYTE [nHeightP*VPitchY]; VXFullUVBB = new BYTE [nHeightPUV*VPitchUV]; VYFullYBB = new BYTE [nHeightP*VPitchY]; VYFullUVBB = new BYTE [nHeightPUV*VPitchUV]; VXFullYFF = new BYTE [nHeightP*VPitchY]; VXFullUVFF = new BYTE [nHeightPUV*VPitchUV]; VYFullYFF = new BYTE [nHeightP*VPitchY]; VYFullUVFF = new BYTE [nHeightPUV*VPitchUV]; VXSmallYBB = new BYTE [nBlkXP*nBlkYP]; VYSmallYBB = new BYTE [nBlkXP*nBlkYP]; VXSmallUVBB = new BYTE [nBlkXP*nBlkYP]; VYSmallUVBB = new BYTE [nBlkXP*nBlkYP]; VXSmallYFF = new BYTE [nBlkXP*nBlkYP]; VYSmallYFF = new BYTE [nBlkXP*nBlkYP]; VXSmallUVFF = new BYTE [nBlkXP*nBlkYP]; VYSmallUVFF = new BYTE [nBlkXP*nBlkYP]; MaskSmallB = new BYTE [nBlkXP*nBlkYP]; MaskFullYB = new BYTE [nHeightP*VPitchY]; MaskFullUVB = new BYTE [nHeightPUV*VPitchUV]; MaskSmallF = new BYTE [nBlkXP*nBlkYP]; MaskFullYF = new BYTE [nHeightP*VPitchY]; MaskFullUVF = new BYTE [nHeightPUV*VPitchUV]; SADMaskSmallB = new BYTE [nBlkXP*nBlkYP]; SADMaskSmallF = new BYTE [nBlkXP*nBlkYP]; int pel2WidthY = (nWidth + 2*nHPadding)*nPel; pel2HeightY = (nHeight + 2*nVPadding)*nPel; int pel2WidthUV = (nWidthUV + 2*nHPaddingUV)*nPel; pel2HeightUV = (nHeightUV + 2*nVPaddingUV)*nPel; pel2PitchY = (pel2WidthY + 15) & (~15); pel2PitchUV = (pel2WidthUV + 15) & (~15); pel2OffsetY = pel2PitchY * nVPadding*nPel + nHPadding*nPel; pel2OffsetUV = pel2PitchUV * nVPaddingUV*nPel + nHPaddingUV*nPel; if (nPel>1) { pel2PlaneYB = new BYTE [pel2PitchY*pel2HeightY]; pel2PlaneUB = new BYTE [pel2PitchUV*pel2HeightUV]; pel2PlaneVB = new BYTE [pel2PitchUV*pel2HeightUV]; pel2PlaneYF = new BYTE [pel2PitchY*pel2HeightY]; pel2PlaneUF = new BYTE [pel2PitchUV*pel2HeightUV]; pel2PlaneVF = new BYTE [pel2PitchUV*pel2HeightUV]; } int CPUF_Resize = env->GetCPUFlags(); if (!isse) CPUF_Resize = (CPUF_Resize & !CPUF_INTEGER_SSE) & !CPUF_SSE2; upsizer = new SimpleResize(nWidthP, nHeightP, nBlkXP, nBlkYP, CPUF_Resize); upsizerUV = new SimpleResize(nWidthPUV, nHeightPUV, nBlkXP, nBlkYP, CPUF_Resize); Create_LUTV(time256, LUTVB, LUTVF); SrcPlanes = new YUY2Planes(nWidth, nHeight, vi.pixel_type, isse); RefPlanes = new YUY2Planes(nWidth, nHeight, vi.pixel_type, isse); DstPlanes = new YUY2Planes(nWidth, nHeight, vi.pixel_type, isse); }