Ejemplo n.º 1
0
static void merge_plane(BYTE* srcp, const BYTE* otherp, int src_pitch, int other_pitch, int src_width, int src_height, float weight, IScriptEnvironment *env) {
  if ((weight>0.4961f) && (weight<0.5039f)) 
  {
    //average of two planes
    if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(otherp, 16)) {
      average_plane_sse2(srcp, otherp, src_pitch, other_pitch, src_width, src_height);
    } 
    else 
#ifdef X86_32
      if (env->GetCPUFlags() & CPUF_INTEGER_SSE) {
        average_plane_isse(srcp, otherp, src_pitch, other_pitch, src_width, src_height);
      } else
#endif
      {
        average_plane_c(srcp, otherp, src_pitch, other_pitch, src_width, src_height);
      }
  } 
  else 
  {
    int iweight = (int)(weight*32767.0f);
    int invweight = 32767-iweight;
    //real merge
    if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(otherp, 16))
    {
      weighted_merge_planar_sse2(srcp, otherp, src_pitch, other_pitch, src_width, src_height, iweight, invweight);
    }
    else
#ifdef X86_32
      if (env->GetCPUFlags() & CPUF_MMX)
      {
        weighted_merge_planar_mmx(srcp, otherp, src_pitch, other_pitch, src_width, src_height, iweight, invweight);
      }
      else
#endif
      {
        iweight = (int)(weight*65535.0f);
        invweight = 65535-iweight;
        weighted_merge_planar_c(srcp, otherp, src_pitch, other_pitch, src_width, src_height, iweight, invweight);
      }
  }
}
Ejemplo n.º 2
0
extern "C" void Store_SSE2(UINT32* dst, UINT32 rgba, unsigned len)
{
	OP_ASSERT(IsPtrAligned(dst, 4));

	// Guard against short runs where the setup cost will be too
	// high to benefit from the unrolling and 128 bit stores.
	// It's also set high enough to avoid an extra check when
	// aligning memory.
	if (len > 15)
	{
		// Aligned memory accesses can be significantly faster,
		// write single pixels util we get to an aligned memory
		// address.
		int unaligned = PixelsUntilAligned(dst);
		if (unaligned)
		{
			len -= unaligned;
			switch (unaligned)
			{
			case 3: *dst++ = rgba;
			case 2: *dst++ = rgba;
			case 1: *dst++ = rgba;
			}
		}

		// Duplicate the source color four times in a 128 bit register.
		// This is needed for both the unrolled and normal
		// inner loop.
		__m128i src = _mm_cvtsi32_si128(rgba);
		src = _mm_shuffle_epi32(src, _MM_SHUFFLE(0, 0, 0, 0));

		// Unrolled inner loop that stores 32 pixels at a time.
		unsigned int length = len / 32;
		while (length--)
		{
			_mm_store_si128((__m128i *)(dst + 0 * 4), src);
			_mm_store_si128((__m128i *)(dst + 1 * 4), src);
			_mm_store_si128((__m128i *)(dst + 2 * 4), src);
			_mm_store_si128((__m128i *)(dst + 3 * 4), src);
			_mm_store_si128((__m128i *)(dst + 4 * 4), src);
			_mm_store_si128((__m128i *)(dst + 5 * 4), src);
			_mm_store_si128((__m128i *)(dst + 6 * 4), src);
			_mm_store_si128((__m128i *)(dst + 7 * 4), src);

			dst += 32;
		}

		// Handle the remaining pixels four at a time.
		len &= 31;
		length = len / 4;
		while (length--)
		{
			_mm_store_si128((__m128i *)dst, src);
			dst += 4;
		}

		// Leave the last 1-3 pixels for the regular loop.
		len &= 3;
	}

	// One pixel at a time.
	while (len--)
		*dst++ = rgba;
}
Ejemplo n.º 3
0
PVideoFrame __stdcall MergeLuma::GetFrame(int n, IScriptEnvironment* env)
{
  PVideoFrame src = child->GetFrame(n, env);

  if (weight<0.0039f) return src;

  PVideoFrame luma = clip->GetFrame(n, env);

  if (vi.IsYUY2()) {
    env->MakeWritable(&src);
    BYTE* srcp = src->GetWritePtr();
    const BYTE* lumap = luma->GetReadPtr();

    int isrc_pitch = src->GetPitch(); 
    int iluma_pitch = luma->GetPitch();

    int h = src->GetHeight();
    int w = src->GetRowSize();

    if (weight<0.9961f)
    {
      if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(lumap, 16))
      {
        weighted_merge_luma_yuy2_sse2(srcp, lumap, isrc_pitch, iluma_pitch, w, h, (int)(weight*32768.0f), 32768-(int)(weight*32768.0f));
      }
      else
#ifdef X86_32
      if (env->GetCPUFlags() & CPUF_MMX)
      {
        weighted_merge_luma_yuy2_mmx(srcp, lumap, isrc_pitch, iluma_pitch, w, h, (int)(weight*32768.0f), 32768-(int)(weight*32768.0f));
      }
      else
#endif
      {
        weighted_merge_luma_yuy2_c(srcp, lumap, isrc_pitch, iluma_pitch, w, h, (int)(weight*32768.0f), 32768-(int)(weight*32768.0f));
      }
    }
    else
    {
      if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(lumap, 16))
      {
        replace_luma_yuy2_sse2(srcp,lumap,isrc_pitch,iluma_pitch,w,h);
      }
      else
#ifdef X86_32
      if (env->GetCPUFlags() & CPUF_MMX)
      {
        replace_luma_yuy2_mmx(srcp,lumap,isrc_pitch,iluma_pitch,w,h);
      }
      else
#endif
      {
        replace_luma_yuy2_c(srcp,lumap,isrc_pitch,iluma_pitch,w,h);
      }
    }
    return src;
  }  // Planar
  if (weight>0.9961f) {
    const VideoInfo& vi2 = clip->GetVideoInfo();
    if (luma->IsWritable() && vi.IsSameColorspace(vi2)) {
      if (luma->GetRowSize(PLANAR_U)) {
        luma->GetWritePtr(PLANAR_Y); //Must be requested BUT only if we actually do something
        env->BitBlt(luma->GetWritePtr(PLANAR_U),luma->GetPitch(PLANAR_U),src->GetReadPtr(PLANAR_U),src->GetPitch(PLANAR_U),src->GetRowSize(PLANAR_U),src->GetHeight(PLANAR_U));
        env->BitBlt(luma->GetWritePtr(PLANAR_V),luma->GetPitch(PLANAR_V),src->GetReadPtr(PLANAR_V),src->GetPitch(PLANAR_V),src->GetRowSize(PLANAR_V),src->GetHeight(PLANAR_V));
      }
      return luma;
    }
    else { // avoid the cost of 2 chroma blits
      PVideoFrame dst = env->NewVideoFrame(vi);
      
      env->BitBlt(dst->GetWritePtr(PLANAR_Y),dst->GetPitch(PLANAR_Y),luma->GetReadPtr(PLANAR_Y),luma->GetPitch(PLANAR_Y),luma->GetRowSize(PLANAR_Y),luma->GetHeight(PLANAR_Y));
      if (src->GetRowSize(PLANAR_U) && dst->GetRowSize(PLANAR_U)) {
        env->BitBlt(dst->GetWritePtr(PLANAR_U),dst->GetPitch(PLANAR_U),src->GetReadPtr(PLANAR_U),src->GetPitch(PLANAR_U),src->GetRowSize(PLANAR_U),src->GetHeight(PLANAR_U));
        env->BitBlt(dst->GetWritePtr(PLANAR_V),dst->GetPitch(PLANAR_V),src->GetReadPtr(PLANAR_V),src->GetPitch(PLANAR_V),src->GetRowSize(PLANAR_V),src->GetHeight(PLANAR_V));
      }
      return dst;
    }
  } else { // weight <= 0.9961f
    env->MakeWritable(&src);
    BYTE* srcpY = (BYTE*)src->GetWritePtr(PLANAR_Y);
    BYTE* lumapY = (BYTE*)luma->GetReadPtr(PLANAR_Y);
    int src_pitch = src->GetPitch(PLANAR_Y);
    int luma_pitch = luma->GetPitch(PLANAR_Y);
    int src_width = src->GetRowSize(PLANAR_Y);
    int src_height = src->GetHeight(PLANAR_Y);

    merge_plane(srcpY, lumapY, src_pitch, luma_pitch, src_width, src_height, weight, env);
  }

  return src;
}
Ejemplo n.º 4
0
PVideoFrame __stdcall MergeChroma::GetFrame(int n, IScriptEnvironment* env)
{
  PVideoFrame src = child->GetFrame(n, env);

  if (weight<0.0039f) return src;

  PVideoFrame chroma = clip->GetFrame(n, env);

  int h = src->GetHeight();
  int w = src->GetRowSize(); // width in pixels

  if (weight<0.9961f) {
    if (vi.IsYUY2()) {
      env->MakeWritable(&src);
      BYTE* srcp = src->GetWritePtr();
      const BYTE* chromap = chroma->GetReadPtr();

      int src_pitch = src->GetPitch(); 
      int chroma_pitch = chroma->GetPitch();

      if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(chromap, 16))
      {
        weighted_merge_chroma_yuy2_sse2(srcp,chromap,src_pitch,chroma_pitch,w,h,(int)(weight*32768.0f),32768-(int)(weight*32768.0f));
      }
      else
#ifdef X86_32
      if (env->GetCPUFlags() & CPUF_MMX)
      {
        weighted_merge_chroma_yuy2_mmx(srcp,chromap,src_pitch,chroma_pitch,w,h,(int)(weight*32768.0f),32768-(int)(weight*32768.0f));
      }
      else
#endif
      {
        weighted_merge_chroma_yuy2_c(srcp,chromap,src_pitch,chroma_pitch,w,h,(int)(weight*32768.0f),32768-(int)(weight*32768.0f));
      }
    } else {  // Planar
      env->MakeWritable(&src);
      src->GetWritePtr(PLANAR_Y); //Must be requested

      BYTE* srcpU = (BYTE*)src->GetWritePtr(PLANAR_U);
      BYTE* chromapU = (BYTE*)chroma->GetReadPtr(PLANAR_U);
      BYTE* srcpV = (BYTE*)src->GetWritePtr(PLANAR_V);
      BYTE* chromapV = (BYTE*)chroma->GetReadPtr(PLANAR_V);
      int src_pitch_uv = src->GetPitch(PLANAR_U);
      int chroma_pitch_uv = chroma->GetPitch(PLANAR_U);
      int src_width_u = src->GetRowSize(PLANAR_U_ALIGNED);
      int src_width_v = src->GetRowSize(PLANAR_V_ALIGNED);
      int src_height_uv = src->GetHeight(PLANAR_U);

      merge_plane(srcpU, chromapU, src_pitch_uv, chroma_pitch_uv, src_width_u, src_height_uv, weight, env);
      merge_plane(srcpV, chromapV, src_pitch_uv, chroma_pitch_uv, src_width_v, src_height_uv, weight, env);
    }
  } else { // weight == 1.0
    if (vi.IsYUY2()) {
      const BYTE* srcp = src->GetReadPtr();
      env->MakeWritable(&chroma);
      BYTE* chromap = chroma->GetWritePtr();

      int src_pitch = src->GetPitch();  
      int chroma_pitch = chroma->GetPitch(); 

      if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(chromap, 16) && IsPtrAligned(srcp, 16))
      {
        replace_luma_yuy2_sse2(chromap,srcp,chroma_pitch,src_pitch,w,h);  // Just swap luma/chroma
      }
      else
#ifdef X86_32
      if (env->GetCPUFlags() & CPUF_MMX)
      {
        replace_luma_yuy2_mmx(chromap,srcp,chroma_pitch,src_pitch,w,h);  // Just swap luma/chroma
      }
      else
#endif
      {
        replace_luma_yuy2_c(chromap,srcp,chroma_pitch,src_pitch,w,h);  // Just swap luma/chroma
      }

      return chroma;
    } else {
      if (src->IsWritable()) {
        src->GetWritePtr(PLANAR_Y); //Must be requested
        env->BitBlt(src->GetWritePtr(PLANAR_U),src->GetPitch(PLANAR_U),chroma->GetReadPtr(PLANAR_U),chroma->GetPitch(PLANAR_U),chroma->GetRowSize(PLANAR_U),chroma->GetHeight(PLANAR_U));
        env->BitBlt(src->GetWritePtr(PLANAR_V),src->GetPitch(PLANAR_V),chroma->GetReadPtr(PLANAR_V),chroma->GetPitch(PLANAR_V),chroma->GetRowSize(PLANAR_V),chroma->GetHeight(PLANAR_V));
      }
      else { // avoid the cost of 2 chroma blits
        PVideoFrame dst = env->NewVideoFrame(vi);
        
        env->BitBlt(dst->GetWritePtr(PLANAR_Y),dst->GetPitch(PLANAR_Y),src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y),src->GetRowSize(PLANAR_Y),src->GetHeight(PLANAR_Y));
        env->BitBlt(dst->GetWritePtr(PLANAR_U),dst->GetPitch(PLANAR_U),chroma->GetReadPtr(PLANAR_U),chroma->GetPitch(PLANAR_U),chroma->GetRowSize(PLANAR_U),chroma->GetHeight(PLANAR_U));
        env->BitBlt(dst->GetWritePtr(PLANAR_V),dst->GetPitch(PLANAR_V),chroma->GetReadPtr(PLANAR_V),chroma->GetPitch(PLANAR_V),chroma->GetRowSize(PLANAR_V),chroma->GetHeight(PLANAR_V));
        return dst;
      }
    }
  }
  return src;
}