Exemplo n.º 1
0
static int startread(sox_format_t * ft)
{
  priv_t * ffmpeg = (priv_t *)ft->priv;
  AVFormatParameters params;
  int ret;
  int i;

  ffmpeg->audio_buf_raw = lsx_calloc(1, (size_t)AVCODEC_MAX_AUDIO_FRAME_SIZE + 32);
  ffmpeg->audio_buf_aligned = ALIGN16(ffmpeg->audio_buf_raw);

  /* Signal audio stream not found */
  ffmpeg->audio_index = -1;

  /* register all CODECs, demux and protocols */
  av_register_all();

  /* Open file and get format */
  memset(&params, 0, sizeof(params));
  if ((ret = av_open_input_file(&ffmpeg->ctxt, ft->filename, NULL, 0, &params)) < 0) {
    lsx_fail("ffmpeg cannot open file for reading: %s (code %d)", ft->filename, ret);
    return SOX_EOF;
  }

  /* Get CODEC parameters */
  if ((ret = av_find_stream_info(ffmpeg->ctxt)) < 0) {
    lsx_fail("ffmpeg could not find CODEC parameters for %s", ft->filename);
    return SOX_EOF;
  }

  /* Now we can begin to play (RTSP stream only) */
  av_read_play(ffmpeg->ctxt);

  /* Find audio stream (FIXME: allow different stream to be selected) */
  for (i = 0; (unsigned)i < ffmpeg->ctxt->nb_streams; i++) {
    AVCodecContext *enc = ffmpeg->ctxt->streams[i]->codec;
    if (enc->codec_type == CODEC_TYPE_AUDIO && ffmpeg->audio_index < 0) {
      ffmpeg->audio_index = i;
      break;
    }
  }

  /* Open the stream */
  if (ffmpeg->audio_index < 0 ||
      stream_component_open(ffmpeg, ffmpeg->audio_index) < 0 ||
      ffmpeg->audio_stream < 0) {
    lsx_fail("ffmpeg could not open CODECs for %s", ft->filename);
    return SOX_EOF;
  }

  /* Copy format info */
  ft->signal.rate = ffmpeg->audio_st->codec->sample_rate;
  ft->encoding.bits_per_sample = 16;
  ft->encoding.encoding = SOX_ENCODING_SIGN2;
  ft->signal.channels = ffmpeg->audio_st->codec->channels;
  ft->signal.length = 0; /* Currently we can't seek; no idea how to get this
		     info from ffmpeg anyway (in time, yes, but not in
		     samples); but ffmpeg *can* seek */

  return SOX_SUCCESS;
}
Exemplo n.º 2
0
static int
aes_gcm_decrypt(void *_ctx, const void *src, size_t src_size,
		void *dst, size_t dst_size)
{
	struct aes_gcm_ctx *ctx = _ctx;
	int blocks = src_size / GCM_BLOCK_SIZE;
	int exp_blocks = blocks * GCM_BLOCK_SIZE;
	int rest = src_size - (exp_blocks);
	uint32_t counter;

	gcm_ghash(ctx, src, src_size);
	ctx->gcm.len.u[1] += src_size;

	if (blocks > 0) {
		ctr32_encrypt_blocks(src, dst,
				     blocks,
				     ALIGN16(&ctx->expanded_key),
				     ctx->gcm.Yi.c);

		counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12);
		counter += blocks;
		_gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12);
	}

	if (rest > 0)		/* last incomplete block */
		ctr_encrypt_last(ctx, src, dst, exp_blocks, rest);

	return 0;
}
Exemplo n.º 3
0
static int
aes_ssse3_encrypt(void *_ctx, const void *src, size_t src_size,
	    void *dst, size_t dst_size)
{
	struct aes_ctx *ctx = _ctx;

	vpaes_cbc_encrypt(src, dst, src_size, ALIGN16(&ctx->expanded_key),
			  ctx->iv, 1);
	return 0;
}
Exemplo n.º 4
0
static int
aes_aarch64_decrypt(void *_ctx, const void *src, size_t src_size,
	    void *dst, size_t dst_size)
{
	struct aes_ctx *ctx = _ctx;

	aes_v8_cbc_encrypt(src, dst, src_size, ALIGN16(&ctx->expanded_key),
			  ctx->iv, 0);

	return 0;
}
Exemplo n.º 5
0
static int aes_setiv(void *_ctx, const void *iv, size_t iv_size)
{
	struct padlock_ctx *ctx = _ctx;
	struct padlock_cipher_data *pce;

	pce = ALIGN16(&ctx->expanded_key);

	memcpy(pce->iv, iv, 16);

	return 0;
}
Exemplo n.º 6
0
static int
aes_ssse3_cipher_setkey(void *_ctx, const void *userkey, size_t keysize)
{
	struct aes_ctx *ctx = _ctx;
	int ret;

	if (ctx->enc)
		ret =
		    vpaes_set_encrypt_key(userkey, keysize * 8,
					  ALIGN16(&ctx->expanded_key));
	else
		ret =
		    vpaes_set_decrypt_key(userkey, keysize * 8,
					  ALIGN16(&ctx->expanded_key));

	if (ret != 0)
		return gnutls_assert_val(GNUTLS_E_ENCRYPTION_FAILED);

	return 0;
}
int
padlock_aes_cipher_setkey (void *_ctx, const void *userkey, size_t keysize)
{
  struct padlock_ctx *ctx = _ctx;
  struct padlock_cipher_data *pce;
#ifdef HAVE_LIBNETTLE
  struct aes_ctx nc;
#endif

  memset (_ctx, 0, sizeof (struct padlock_cipher_data));

  pce = ALIGN16 (&ctx->expanded_key);

  pce->cword.b.encdec = (ctx->enc == 0);

  switch (keysize)
    {
    case 16:
      pce->cword.b.ksize = 0;
      pce->cword.b.rounds = 10;
      memcpy (pce->ks.rd_key, userkey, 16);
      pce->cword.b.keygen = 0;
      break;
#ifdef HAVE_LIBNETTLE
    case 24:
      pce->cword.b.ksize = 1;
      pce->cword.b.rounds = 12;
      goto common_24_32;
    case 32:
      pce->cword.b.ksize = 2;
      pce->cword.b.rounds = 14;
    common_24_32:
      /* expand key using nettle */
      if (ctx->enc)
        aes_set_encrypt_key (&nc, keysize, userkey);
      else
        aes_set_decrypt_key (&nc, keysize, userkey);

      memcpy (pce->ks.rd_key, nc.keys, sizeof (nc.keys));
      pce->ks.rounds = nc.nrounds;

      pce->cword.b.keygen = 1;
      break;
#endif
    default:
      return gnutls_assert_val (GNUTLS_E_ENCRYPTION_FAILED);
    }

  padlock_reload_key ();

  return 0;
}
Exemplo n.º 8
0
IV_STATUS_T read_mb_info(app_ctxt_t *ps_app_ctxt, void *pv_mb_info)
{
    IV_STATUS_T ret = IV_SUCCESS;
    WORD32 num_mbs;
    WORD32 size;
    WORD32 bytes;

    num_mbs = ALIGN16(ps_app_ctxt->u4_wd) *  ALIGN16(ps_app_ctxt->u4_ht);
    num_mbs /= 256;

    switch(ps_app_ctxt->u4_mb_info_type)
    {
        case 1:
            size = sizeof(ih264e_mb_info1_t) * num_mbs;
            ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info1_t);
            break;
        case 2:
            size = sizeof(ih264e_mb_info2_t) * num_mbs;
            ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info2_t);
            break;
        case 3:
            size = sizeof(ih264e_mb_info3_t) * num_mbs;
            ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info3_t);
            break;
        case 4:
            size = sizeof(ih264e_mb_info4_t) * num_mbs;
            ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info4_t);
            break;
        default:
            size = 0;
            break;
    }

    bytes = fread(pv_mb_info, 1, size, ps_app_ctxt->fp_mb_info);
    if(bytes != size)
        ret = IV_FAIL;

    return ret;
}
Exemplo n.º 9
0
static int
aes_encrypt(void *_ctx, const void *src, size_t src_size,
	    void *dst, size_t dst_size)
{
	struct aes_ctx *ctx = _ctx;

	if (unlikely(src_size % 16 != 0))
		return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST);

	aesni_cbc_encrypt(src, dst, src_size, ALIGN16(&ctx->expanded_key),
			  ctx->iv, 1);
	return 0;
}
Exemplo n.º 10
0
static int
padlock_aes_cbc_decrypt(void *_ctx, const void *src, size_t src_size,
			void *dst, size_t dst_size)
{
	struct padlock_ctx *ctx = _ctx;
	struct padlock_cipher_data *pcd;

	pcd = ALIGN16(&ctx->expanded_key);

	padlock_cbc_encrypt(dst, src, pcd, src_size);

	return 0;
}
Exemplo n.º 11
0
static int
aes_gcm_cipher_setkey(void *_ctx, const void *userkey, size_t keysize)
{
	struct aes_gcm_ctx *ctx = _ctx;
	int ret;

	CHECK_AES_KEYSIZE(keysize);

	ret =
	    aes_v8_set_encrypt_key(userkey, keysize * 8,
				  ALIGN16(&ctx->expanded_key));
	if (ret != 0)
		return gnutls_assert_val(GNUTLS_E_ENCRYPTION_FAILED);

	aes_v8_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, ALIGN16(&ctx->expanded_key));

	ctx->gcm.H.u[0] = bswap_64(ctx->gcm.H.u[0]);
	ctx->gcm.H.u[1] = bswap_64(ctx->gcm.H.u[1]);

	gcm_init_v8(ctx->gcm.Htable, ctx->gcm.H.u);

	return 0;
}
Exemplo n.º 12
0
static inline void
ctr_encrypt_last(struct aes_gcm_ctx *ctx, const uint8_t * src,
		 uint8_t * dst, size_t pos, size_t length)
{
	uint8_t tmp[GCM_BLOCK_SIZE];
	uint8_t out[GCM_BLOCK_SIZE];

	memcpy(tmp, &src[pos], length);
	ctr32_encrypt_blocks(tmp, out, 1,
			     ALIGN16(&ctx->expanded_key),
			     ctx->gcm.Yi.c);

	memcpy(&dst[pos], out, length);

}
Exemplo n.º 13
0
static int aes_gcm_setiv(void *_ctx, const void *iv, size_t iv_size)
{
	struct aes_gcm_ctx *ctx = _ctx;

	if (iv_size != GCM_BLOCK_SIZE - 4)
		return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST);

	memset(ctx->gcm.Xi.c, 0, sizeof(ctx->gcm.Xi.c));
	memset(ctx->gcm.len.c, 0, sizeof(ctx->gcm.len.c));

	memcpy(ctx->gcm.Yi.c, iv, GCM_BLOCK_SIZE - 4);
	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 4] = 0;
	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 3] = 0;
	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 2] = 0;
	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 1;

	aes_v8_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c,
			ALIGN16(&ctx->expanded_key));
	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 2;
	return 0;
}
Exemplo n.º 14
0
static int open_audio(priv_t * ffmpeg, AVStream *st)
{
  AVCodecContext *c;
  AVCodec *codec;

  c = st->codec;

  /* find the audio encoder */
  codec = avcodec_find_encoder(c->codec_id);
  if (!codec) {
    lsx_fail("ffmpeg CODEC not found");
    return SOX_EOF;
  }

  /* open it */
  if (avcodec_open(c, codec) < 0) {
    lsx_fail("ffmpeg could not open CODEC");
    return SOX_EOF;
  }

  ffmpeg->audio_buf_raw = lsx_malloc((size_t)AVCODEC_MAX_AUDIO_FRAME_SIZE + 32);
  ffmpeg->audio_buf_aligned = ALIGN16(ffmpeg->audio_buf_raw);

  /* ugly hack for PCM codecs (will be removed ASAP with new PCM
     support to compute the input frame size in samples */
  if (c->frame_size <= 1) {
    ffmpeg->audio_input_frame_size = AVCODEC_MAX_AUDIO_FRAME_SIZE / c->channels;
    switch(st->codec->codec_id) {
    case CODEC_ID_PCM_S16LE:
    case CODEC_ID_PCM_S16BE:
    case CODEC_ID_PCM_U16LE:
    case CODEC_ID_PCM_U16BE:
      ffmpeg->audio_input_frame_size >>= 1;
      break;
    default:
      break;
    }
  } else
Exemplo n.º 15
0
void EmitColorIndices_Intrinsics( const byte *colorBlock, const byte *minColor, const byte *maxColor, byte *&outData )
{
	ALIGN16( byte color0[16] );
	ALIGN16( byte color1[16] );
	ALIGN16( byte color2[16] );
	ALIGN16( byte color3[16] );
	ALIGN16( byte result[16] );

	// mov esi, maxColor
	// mov edi, minColor

	__m128i t0, t1, t2, t3, t4, t5, t6, t7;

	t7 = _mm_setzero_si128();
	//t7 = _mm_xor_si128(t7, t7);
	_mm_store_si128 ( (__m128i*) &result, t7 );


	//t0 = _mm_load_si128 ( (__m128i*)  maxColor );
	t0 = _mm_cvtsi32_si128( *(int*)maxColor);

	// Bitwise AND
	__m128i tt = _mm_load_si128 ( (__m128i*) SIMD_SSE2_byte_colorMask );
	t0 = _mm_and_si128(t0, tt);

	t0 = _mm_unpacklo_epi8(t0, t7);

	t4 = _mm_shufflelo_epi16( t0, R_SHUFFLE_D( 0, 3, 2, 3 ));
	t5 = _mm_shufflelo_epi16( t0, R_SHUFFLE_D( 3, 1, 3, 3 ));

	t4 = _mm_srli_epi16(t4, 5);
	t5 = _mm_srli_epi16(t5, 6);

	// Bitwise Logical OR
	t0 = _mm_or_si128(t0, t4);
	t0 = _mm_or_si128(t0, t5);   // t0 contains color0 in 565




	//t1 = _mm_load_si128 ( (__m128i*)  minColor );
	t1 = _mm_cvtsi32_si128( *(int*)minColor);

	t1 = _mm_and_si128(t1, tt);

	t1 = _mm_unpacklo_epi8(t1, t7);

	t4 = _mm_shufflelo_epi16( t1, R_SHUFFLE_D( 0, 3, 2, 3 ));
	t5 = _mm_shufflelo_epi16( t1, R_SHUFFLE_D( 3, 1, 3, 3 ));

	t4 = _mm_srli_epi16(t4, 5);
	t5 = _mm_srli_epi16(t5, 6);

	t1 = _mm_or_si128(t1, t4);
	t1 = _mm_or_si128(t1, t5);  // t1 contains color1 in 565



	t2 = t0;

	t2 = _mm_packus_epi16(t2, t7);

	t2 = _mm_shuffle_epi32( t2, R_SHUFFLE_D( 0, 1, 0, 1 ));

	_mm_store_si128 ( (__m128i*) &color0, t2 );

	t6 = t0;
	t6 = _mm_add_epi16(t6, t0);
	t6 = _mm_add_epi16(t6, t1);

	// Multiply Packed Signed Integers and Store High Result
	__m128i tw3 = _mm_load_si128 ( (__m128i*) SIMD_SSE2_word_div_by_3 );
	t6 = _mm_mulhi_epi16(t6, tw3);
	t6 = _mm_packus_epi16(t6, t7);

	t6 = _mm_shuffle_epi32( t6, R_SHUFFLE_D( 0, 1, 0, 1 ));

	_mm_store_si128 ( (__m128i*) &color2, t6 );

	t3 = t1;
	t3 = _mm_packus_epi16(t3, t7);
	t3 = _mm_shuffle_epi32( t3, R_SHUFFLE_D( 0, 1, 0, 1 ));

	_mm_store_si128 ( (__m128i*) &color1, t3 );

	t1 = _mm_add_epi16(t1, t1);
	t0 = _mm_add_epi16(t0, t1);

	t0 = _mm_mulhi_epi16(t0, tw3);
	t0 = _mm_packus_epi16(t0, t7);

	t0 = _mm_shuffle_epi32( t0, R_SHUFFLE_D( 0, 1, 0, 1 ));
	_mm_store_si128 ( (__m128i*) &color3, t0 );

	__m128i w0 = _mm_load_si128 ( (__m128i*) SIMD_SSE2_word_0);
	__m128i w1 = _mm_load_si128 ( (__m128i*) SIMD_SSE2_word_1);
	__m128i w2 = _mm_load_si128 ( (__m128i*) SIMD_SSE2_word_2);

	    // mov eax, 32
	    // mov esi, colorBlock
	int x = 32;
	//const byte *c = colorBlock;
	while (x >= 0)
	  {
	    t3 = _mm_loadl_epi64( (__m128i*) (colorBlock+x+0));
	    t3 = _mm_shuffle_epi32( t3, R_SHUFFLE_D( 0, 2, 1, 3 ));

	    t5 = _mm_loadl_epi64( (__m128i*) (colorBlock+x+8));
	    t5 = _mm_shuffle_epi32( t5, R_SHUFFLE_D( 0, 2, 1, 3 ));

	    t0 = t3;
	    t6 = t5;
	    // Compute Sum of Absolute Difference
	    __m128i c0 = _mm_load_si128 ( (__m128i*)  color0 );
	    t0 = _mm_sad_epu8(t0, c0);
	    t6 = _mm_sad_epu8(t6, c0);
	    // Pack with Signed Saturation
	    t0 = _mm_packs_epi32 (t0, t6);

	    t1 = t3;
	    t6 = t5;
	    __m128i c1 = _mm_load_si128 ( (__m128i*)  color1 );
	    t1 = _mm_sad_epu8(t1, c1);
	    t6 = _mm_sad_epu8(t6, c1);
	    t1 = _mm_packs_epi32 (t1, t6);

	    t2 = t3;
	    t6 = t5;
	    __m128i c2 = _mm_load_si128 ( (__m128i*)  color2 );
	    t2 = _mm_sad_epu8(t2, c2);
	    t6 = _mm_sad_epu8(t6, c2);
	    t2 = _mm_packs_epi32 (t2, t6);

	    __m128i c3 = _mm_load_si128 ( (__m128i*)  color3 );
	    t3 = _mm_sad_epu8(t3, c3);
	    t5 = _mm_sad_epu8(t5, c3);
	    t3 = _mm_packs_epi32 (t3, t5);


	    t4 = _mm_loadl_epi64( (__m128i*) (colorBlock+x+16));
	    t4 = _mm_shuffle_epi32( t4, R_SHUFFLE_D( 0, 2, 1, 3 ));

	    t5 = _mm_loadl_epi64( (__m128i*) (colorBlock+x+24));
	    t5 = _mm_shuffle_epi32( t5, R_SHUFFLE_D( 0, 2, 1, 3 ));

	    t6 = t4;
	    t7 = t5;
	    t6 = _mm_sad_epu8(t6, c0);
	    t7 = _mm_sad_epu8(t7, c0);
	    t6 = _mm_packs_epi32 (t6, t7);
	    t0 = _mm_packs_epi32 (t0, t6);  // d0

	    t6 = t4;
	    t7 = t5;
	    t6 = _mm_sad_epu8(t6, c1);
	    t7 = _mm_sad_epu8(t7, c1);
	    t6 = _mm_packs_epi32 (t6, t7);
	    t1 = _mm_packs_epi32 (t1, t6);  // d1

	    t6 = t4;
	    t7 = t5;
	    t6 = _mm_sad_epu8(t6, c2);
	    t7 = _mm_sad_epu8(t7, c2);
	    t6 = _mm_packs_epi32 (t6, t7);
	    t2 = _mm_packs_epi32 (t2, t6);  // d2

	    t4 = _mm_sad_epu8(t4, c3);
	    t5 = _mm_sad_epu8(t5, c3);
	    t4 = _mm_packs_epi32 (t4, t5);
	    t3 = _mm_packs_epi32 (t3, t4);  // d3

	    t7 = _mm_load_si128 ( (__m128i*) result );

	    t7 = _mm_slli_epi32( t7, 16);

	    t4 = t0;
	    t5 = t1;
	    // Compare Packed Signed Integers for Greater Than
	    t0 = _mm_cmpgt_epi16(t0, t3); // b0
	    t1 = _mm_cmpgt_epi16(t1, t2); // b1
	    t4 = _mm_cmpgt_epi16(t4, t2); // b2
	    t5 = _mm_cmpgt_epi16(t5, t3); // b3
	    t2 = _mm_cmpgt_epi16(t2, t3); // b4

	    t4 = _mm_and_si128(t4, t1); // x0
	    t5 = _mm_and_si128(t5, t0); // x1
	    t2 = _mm_and_si128(t2, t0); // x2

	    t4 = _mm_or_si128(t4, t5);
	    t2 = _mm_and_si128(t2, w1);
	    t4 = _mm_and_si128(t4, w2);
	    t2 = _mm_or_si128(t2, t4);

	    t5 = _mm_shuffle_epi32( t2, R_SHUFFLE_D( 2, 3, 0, 1 ));

	    // Unpack Low Data
	    t2 = _mm_unpacklo_epi16 ( t2, w0);
	    t5 = _mm_unpacklo_epi16 ( t5, w0);

	    //t5 = _mm_slli_si128 ( t5, 8);
	    t5 = _mm_slli_epi32( t5, 8);

	    t7 = _mm_or_si128(t7, t5);
	    t7 = _mm_or_si128(t7, t2);

	    _mm_store_si128 ( (__m128i*) &result, t7 );

	    x -=32;
	  }

	t4 = _mm_shuffle_epi32( t7, R_SHUFFLE_D( 1, 2, 3, 0 ));
	t5 = _mm_shuffle_epi32( t7, R_SHUFFLE_D( 2, 3, 0, 1 ));
	t6 = _mm_shuffle_epi32( t7, R_SHUFFLE_D( 3, 0, 1, 2 ));

	t4 = _mm_slli_epi32 ( t4, 2);
	t5 = _mm_slli_epi32 ( t5, 4);
	t6 = _mm_slli_epi32 ( t6, 6);

	t7 = _mm_or_si128(t7, t4);
	t7 = _mm_or_si128(t7, t5);
	t7 = _mm_or_si128(t7, t6);

	//_mm_store_si128 ( (__m128i*) outData, t7 );

	int r = _mm_cvtsi128_si32 (t7);
	memcpy(outData, &r, 4);   // Anything better ?

	outData += 4;
}
Exemplo n.º 16
0
int main (int argc, char * argv[])
{
	UIOMux * uiomux;
	uiomux_resource_t uiores;

	char * infilename[2] = {NULL, NULL}, * outfilename = NULL;
	FILE * infile[2], * outfile = NULL;
	size_t nread;
	size_t input_size[2], output_size;
	SHVIO *vio;
	struct ren_vid_surface src[2];
	const struct ren_vid_surface *srclist[2] = {
		&src[0], &src[1]
	};
	struct ren_vid_surface dst;
	void *inbuf[2], *outbuf;
	int ret;
	int frameno=0;

	int show_version = 0;
	int show_help = 0;
	int show_list_vio = 0;
	char * progname;
	char * viodev = NULL;
	int error = 0;

	int c;
	char * optstring = "hvo:O:c:s:C:S:f:u:l";

#ifdef HAVE_GETOPT_LONG
	static struct option long_options[] = {
		{"help", no_argument, 0, 'h'},
		{"version", no_argument, 0, 'v'},
		{"output", required_argument, 0, 'o'},
		{"overlay", required_argument, 0, 'O'},
		{"input-colorspace", required_argument, 0, 'c'},
		{"input-size", required_argument, 0, 's'},
		{"output-colorspace", required_argument, 0, 'C'},
		{"output-size", required_argument, 0, 'S'},
		{"filter", required_argument, 0, 'f'},
		{"vio", required_argument, 0, 'u'},
		{"list", no_argument, 0, 'l'},
		{NULL,0,0,0}
	};
#endif

#if defined(USE_MERAM_RA) || defined(USE_MERAM_WB)
#define ALIGN16(_x)	(((_x) + 15) / 16 * 16)
#define ADJUST_PITCH(_p, _w)			\
	{					\
		(_p) = ((_w) - 1) | 1023;	\
		(_p) = (_p) | ((_p) >> 1);	\
		(_p) = (_p) | ((_p) >> 2);	\
		(_p) += 1;			\
	}

	unsigned long val;
	MERAM *meram = meram_open();
	MERAM_REG *regs = meram_lock_reg(meram);
	size_t sz;
	unsigned long mblock;
	ICB *icbr, *icbw;
#endif /* defined(USE_MERAM_RA) || defined(USE_MERAM_WB) */
	memset(src, 0, sizeof (src[0]) * 2);
	src[0].w = -1;
	src[0].h = -1;
	dst.w = -1;
	dst.h = -1;
	src[0].format = REN_UNKNOWN;
	dst.format = REN_UNKNOWN;
	src[0].bpitchy = src[0].bpitchc = src[0].bpitcha = 0;
	dst.bpitchy = dst.bpitchc = dst.bpitcha = 0;

	memcpy((void *)&src[1], (void *)&src[0], sizeof(src[0]));

	src[1].blend_out.x = 0;
	src[1].blend_out.y = 0;
	src[1].blend_out.w = 220;
	src[1].blend_out.h = 440;

	progname = argv[0];

	if (argc < 2) {
		usage (progname);
		return (1);
	}

	while (1) {
#ifdef HAVE_GETOPT_LONG
		c = getopt_long (argc, argv, optstring, long_options, NULL);
#else
		c = getopt (argc, argv, optstring);
#endif
		if (c == -1) break;
		if (c == ':') {
			usage (progname);
			goto exit_err;
		}

		switch (c) {
		case 'h': /* help */
			show_help = 1;
			break;
		case 'v': /* version */
			show_version = 1;
			break;
		case 'o': /* output */
			outfilename = optarg;
			break;
		case 'O': /* ovalery */
			infilename[1] = optarg;
			break;
		case 'c': /* input colorspace */
			set_colorspace (optarg, &src[0].format);
			break;
		case 's': /* input size */
			set_size (optarg, &src[0].w, &src[0].h);
			break;
		case 'C': /* output colorspace */
			set_colorspace (optarg, &dst.format);
			break;
		case 'S': /* output size */
			set_size (optarg, &dst.w, &dst.h);
			break;
		case 'f': /* filter mode */
			rotation = strtoul(optarg, NULL, 0);
			break;
		case 'l':
			show_list_vio = 1;
			break;
		case 'u':
			viodev = optarg;
			break;
		default:
			break;
		}
	}

	if (show_version) {
		printf ("%s version " VERSION "\n", progname);
	}

	if (show_help) {
		usage (progname);
	}
#if 0
	if (show_list_vio) {
		char **vio;
		int i, n;

		if (shvio_list_vio(&vio, &n) < 0) {
			printf ("Can't get a list of VIO available...\n");
		} else {
			for(i = 0; i < n; i++)
				printf("%s", vio[i]);
			printf("Total: %d VIOs available.\n", n);
		}
	}
#endif

	if (show_version || show_help || show_list_vio) {
		goto exit_ok;
	}

	if (optind >= argc) {
		usage (progname);
		goto exit_err;
	}

	infilename[0] = argv[optind++];

	if (optind < argc) {
		outfilename = argv[optind++];
	}

	printf ("Input file: %s\n", infilename[0]);
	if (infilename[1] != NULL)
		printf ("Overlay file: %s\n", infilename[1]);
	printf ("Output file: %s\n", outfilename);

	guess_colorspace (infilename[0], &src[0].format);
	if (infilename[1])
		guess_colorspace (infilename[1], &src[1].format);
	guess_colorspace (outfilename, &dst.format);
	/* If the output colorspace isn't given and can't be guessed, then default to
	 * the input colorspace (ie. no colorspace conversion) */
	if (dst.format == REN_UNKNOWN)
		dst.format = src[0].format;

	guess_size (infilename[0], src[0].format, &src[0].w, &src[0].h);
	if (rotation & 0xF) {
		/* Swap width/height for rotation */
		dst.w = src[0].h;
		dst.h = src[0].w;
	} else if (dst.w == -1 && dst.h == -1) {
		/* If the output size isn't given and can't be guessed, then default to
		 * the input size (ie. no rescaling) */
		dst.w = src[0].w;
		dst.h = src[0].h;
	}
	if (infilename[1])
		guess_size (infilename[1], src[1].format, &src[1].w, &src[1].h);

	/* Setup memory pitch */
	src[0].pitch = src[0].w;
	src[1].pitch = src[1].w;
	dst.pitch = dst.w;

	/* Check that all parameters are set */
	if (src[0].format == REN_UNKNOWN) {
		fprintf (stderr, "ERROR: Input colorspace unspecified\n");
		error = 1;
	}
	if (src[0].w == -1) {
		fprintf (stderr, "ERROR: Input width unspecified\n");
		error = 1;
	}
	if (src[0].h == -1) {
		fprintf (stderr, "ERROR: Input height unspecified\n");
		error = 1;
	}

	if (dst.format == REN_UNKNOWN) {
		fprintf (stderr, "ERROR: Output colorspace unspecified\n");
		error = 1;
	}
	if (dst.w == -1) {
		fprintf (stderr, "ERROR: Output width unspecified\n");
		error = 1;
	}
	if (dst.h == -1) {
		fprintf (stderr, "ERROR: Output height unspecified\n");
		error = 1;
	}

	if (error) goto exit_err;

	printf ("Input colorspace:\t%s\n", show_colorspace (src[0].format));
	printf ("Input size:\t\t%dx%d %s\n", src[0].w, src[0].h, show_size (src[0].w, src[0].h));
	printf ("Output colorspace:\t%s\n", show_colorspace (dst.format));
	printf ("Output size:\t\t%dx%d %s\n", dst.w, dst.h, show_size (dst.w, dst.h));
	printf ("Rotation:\t\t%s\n", show_rotation (rotation));

	input_size[0] = imgsize (src[0].format, src[0].w, src[0].h);
	if (infilename[1] != NULL)
		input_size[1] = imgsize (src[1].format, src[1].w, src[1].h);
	output_size = imgsize (dst.format, dst.w, dst.h);

	if (/*viodev*/ 1) {
		const char *blocks[2] = { "VPU5", NULL };
		uiomux = uiomux_open_named(blocks);
		uiores = 1 << 0;

	} else {
		uiomux = uiomux_open ();
		uiores = UIOMUX_SH_VEU;
	} 

	/* Set up memory buffers */
	src[0].py = inbuf[0] = uiomux_malloc (uiomux, uiores, input_size[0], 32);
	if (src[0].format == REN_RGB565) {
		src[0].pc = 0;
	} else if (src[0].format == REN_YV12) {
		src[0].pc2 = src[0].py + (src[0].w * src[0].h);	/* Cr(V) */
		src[0].pc = src[0].pc2 + (src[0].w * src[0].h) / 4;	/* Cb(U) */
	} else if (src[0].format == REN_YV16) {
		src[0].pc2 = src[0].py + (src[0].w * src[0].h);	/* Cr(V) */
		src[0].pc = src[0].pc2 + (src[0].w * src[0].h) / 2;	/* Cb(U) */
	} else {
		src[0].pc = src[0].py + (src[0].w * src[0].h);	/* CbCr(UV) */
	}

	if (infilename[1] != NULL) {
		src[1].py = inbuf[1] = uiomux_malloc (uiomux, uiores, input_size[1], 32);
		if (src[1].format == REN_RGB565) {
			src[1].pc = 0;
		} else if (src[1].format == REN_YV12) {
			src[1].pc2 = src[1].py + (src[1].w * src[1].h);	/* Cr(V) */
			src[1].pc = src[1].pc2 + (src[1].w * src[1].h) / 4;	/* Cb(U) */
		} else if (src[1].format == REN_YV16) {
			src[1].pc2 = src[1].py + (src[1].w * src[1].h);	/* Cr(V) */
			src[1].pc = src[1].pc2 + (src[1].w * src[1].h) / 2;	/* Cb(U) */
		} else {
			src[1].pc = src[1].py + (src[1].w * src[1].h);	/* CbCr(UV) */
		}
	}

	dst.py = outbuf = uiomux_malloc (uiomux, uiores, output_size, 32);
	if (dst.format == REN_RGB565) {
		dst.pc = 0;
	} else if (dst.format == REN_YV12) {
		dst.pc2 = dst.py + (dst.w * dst.h);	/* Cr(V) */
		dst.pc = dst.pc2 + (dst.w * dst.h) / 4;	/* Cb(U) */
	} else if (dst.format == REN_YV16) {
		dst.pc2 = dst.py + (dst.w * dst.h);	/* Cr(V) */
		dst.pc = dst.pc2 + (dst.w * dst.h) / 2;	/* Cb(U) */
	} else {
		dst.pc = dst.py + (dst.w * dst.h);	/* CbCr(UV) */
	}

#if defined(USE_MERAM_RA) || defined(USE_MERAM_WB)
#error aaaa
	meram_read_reg(meram, regs, MEVCR1, &val);
	val |= 1 << 29;		/* use 0xc0000000-0xdfffffff */
	meram_write_reg(meram, regs, MEVCR1, val);
	meram_unlock_reg(meram, regs);
#endif /* defined(USE_MERAM_RA) || defined(USE_MERAM_WB) */

#if defined(USE_MERAM_RA)
#error bbbb
	/* calcurate byte-pitch */
	src[0].bpitchy = size_y(src[0].format, src[0].pitch, 0);

	/* set up read-ahead cache for input */
	icbr = meram_lock_icb(meram, 0);
	val = (3 << 24) |		/* KRBNM: ((3+1) << 1) = 8 lines */
		((16 - 1) << 16);	/* BNM: 16 = KRBNM * 2 lines */
	ADJUST_PITCH(sz, src[0].bpitchy);
	sz *= 16;			/* 16 lines */
	if (src[0].format == REN_NV12) {
		val |= 2 << 12;	/* CPL: YCbCr420 */
		sz = sz * 3 / 2;
	} else if (src[0].format == REN_NV16) {
		val |= 3 << 12;	/* CPL: YCbCr422 */
		sz = sz * 2;
	}
	meram_write_icb(meram, icbr, MExxMCNF, val);

	sz = (sz + 1023) / 1024;
	mblock = meram_alloc_icb_memory(meram, icbr,
					    (sz == 0) ? 1 : sz);
	val = (1 << 28) |		/* BSZ: 2^1 line/block */
		(mblock << 16) |	/* MSAR */
		(3 << 9) |		/* WD: (constant) */
		(1 << 8) |		/* WS: (constant) */
		(1 << 3) |		/* CM: address mode 1 */
		1;			/* MD: read buffer mode */
	meram_write_icb(meram, icbr, MExxCTRL, val);

	val = ((src[0].h - 1) << 16) |	/* YSZM1 */
		(src[0].bpitchy - 1);	/* XSZM1 */
	meram_write_icb(meram, icbr, MExxBSIZE, val);
	val = ALIGN16(src[0].bpitchy);	/* SBSIZE: 16 bytes aligned */
	meram_write_icb(meram, icbr, MExxSBSIZE, val);

	ADJUST_PITCH(src[0].bpitchy, src[0].bpitchy);
	src[0].bpitchc = src[0].bpitcha = src[0].bpitchy;

	val = uiomux_all_virt_to_phys(src[0].py);
	meram_write_icb(meram, icbr, MExxSSARA, val);

	src[0].py = (void *)meram_get_icb_address(meram, icbr, 0);
	uiomux_register(src[0].py, (unsigned long)src[0].py, 8 << 20);
	if (is_ycbcr(src[0].format)) {
		val = uiomux_all_virt_to_phys(src[0].pc);
		meram_write_icb(meram, icbr, MExxSSARB, val);
		src[0].pc = (void *)meram_get_icb_address(meram, icbr, 1);
		uiomux_register(src[0].pc, (unsigned long)src[0].pc, 8 << 20);
	} else {
		meram_write_icb(meram, icbr, MExxSSARB, 0);
	}
#endif /* defined(USE_MERAM_RA) */

#if defined(USE_MERAM_WB)
	/* calcurate byte-pitch */
	dst.bpitchy = size_y(dst.format, dst.pitch, 0);

	/* set up write-back cache for input */
	icbw = meram_lock_icb(meram, 1);
	val = (3 << 28) |		/* KWBNM: ((3+1) << 1) = 8 lines */
		((16 - 1) << 16);	/* BNM: 16 = KWBNM * 2 lines */
	ADJUST_PITCH(sz, dst.bpitchy);
	sz *= 16;			/* 16 lines */
	if (dst.format == REN_NV12) {
		val |= 2 << 12;	/* CPL: YCbCr420 */
		sz = sz * 3 / 2;
	} else if (dst.format == REN_NV16) {
		val |= 3 << 12;	/* CPL: YCbCr422 */
		sz = sz * 2;
	}
	meram_write_icb(meram, icbw, MExxMCNF, val);
	sz = (sz + 1023) / 1024;
	mblock = meram_alloc_icb_memory(meram, icbw,
					(sz == 0) ? 1 : sz);
	val = (1 << 28) |		/* BSZ: 2^1 line/block */
		(mblock << 16) |	/* MSAR */
		(3 << 9) |		/* WD: (constant) */
		(1 << 8) |		/* WS: (constant) */
		(1 << 3) |		/* CM: address mode 1 */
		2;			/* MD: write buffer mode */
	meram_write_icb(meram, icbw, MExxCTRL, val);

	val = ((dst.h - 1) << 16) |	/* YSZM1 */
		(dst.bpitchy - 1);	/* XSZM1 */
	meram_write_icb(meram, icbw, MExxBSIZE, val);
	val = ALIGN16(dst.bpitchy);	/* SBSIZE: 16 bytes aligned */
	meram_write_icb(meram, icbw, MExxSBSIZE, val);

	ADJUST_PITCH(dst.bpitchy, dst.bpitchy);
	dst.bpitchc = dst.bpitcha = dst.bpitchy;

	val = uiomux_all_virt_to_phys(dst.py);
	meram_write_icb(meram, icbw, MExxSSARA, val);

	dst.py = (void *)meram_get_icb_address(meram, icbw, 0);
	uiomux_register(dst.py, (unsigned long)dst.py, 8 << 20);
	if (is_ycbcr(dst.format)) {
		val = uiomux_all_virt_to_phys(dst.pc);
		meram_write_icb(meram, icbw, MExxSSARB, val);
		dst.pc = (void *)meram_get_icb_address(meram, icbw, 1);
		uiomux_register(dst.pc, (unsigned long)dst.pc, 8 << 20);
	} else {
		meram_write_icb(meram, icbw, MExxSSARB, 0);
	}
#endif /* defined(USE_MERAM_WB) */

	if (strcmp (infilename[0], "-") == 0) {
		infile[0] = stdin;
	} else {
		infile[0] = fopen (infilename[0], "rb");
		if (infile[0] == NULL) {
			fprintf (stderr, "%s: unable to open input file %s\n",
				 progname, infilename[0]);
			goto exit_err;
		}
	}

	if (infilename[1] != NULL) {
		infile[1] = fopen (infilename[1], "rb");
		if (infile[1] == NULL) {
			fprintf (stderr, "%s: unable to open input file %s\n",
				 progname, infilename[1]);
			goto exit_err;
		}
	}

	if (outfilename != NULL) {
		if (strcmp (outfilename, "-") == 0) {
			outfile = stdout;
		} else {
			outfile = fopen (outfilename, "wb");
			if (outfile == NULL) {
				fprintf (stderr, "%s: unable to open output file %s\n",
					 progname, outfilename);
				goto exit_err;
			}
		}
	}

	if (!viodev)
		vio = shvio_open();
	else
		vio = shvio_open_named(viodev);

	if (vio == 0) {
		fprintf (stderr, "Error opening VIO\n");
		goto exit_err;
	}

	while (1) {
#ifdef DEBUG
		fprintf (stderr, "%s: Converting frame %d\n", progname, frameno);
#endif

		/* Read input */
		if ((nread = fread (inbuf[0], 1, input_size[0], infile[0])) != input_size[0]) {
			if (nread == 0 && feof (infile[0])) {
				break;
			} else {
				fprintf (stderr, "%p, %s: errors reading input file %s %d %d %d\n", inbuf[0],
					 progname, infilename[0], nread, input_size[0], ferror(infile[0]));
			}
		}
#if 1
		if (infilename[1] != NULL) {
			if ((nread = fread (inbuf[1], 1, input_size[1], infile[1])) != input_size[1]) {
				if (nread == 0 && feof (infile[1])) {
					break;
				} else {
					fprintf (stderr, "%s: error reading input file %s\n",
						 progname, infilename[1]);
				}
			}

			printf("invoke shvio_setup_blend()...\n");
			ret = shvio_setup_blend(vio, NULL, srclist, 2, &dst);
			shvio_start(vio);
			printf("shvio_start_blend() = %d\n", ret);
			ret = shvio_wait(vio);
		} else {
#endif
			if (rotation) {
				ret = shvio_rotate(vio, &src[0], &dst, rotation);
			} else {
				ret = shvio_resize(vio, &src[0], &dst);
			}
		}

#if defined(USE_MERAM_WB)
		meram_read_icb(meram, icbw, MExxCTRL, &val);
		val |= 1 << 5;	/* WF: flush data */
		meram_write_icb(meram, icbw, MExxCTRL, val);
#endif
#if defined(USE_MERAM_RA)
		meram_read_icb(meram, icbr, MExxCTRL, &val);
		val |= 1 << 4;	/* RF: flush data */
		meram_write_icb(meram, icbr, MExxCTRL, val);
#endif

		/* Write output */
		if (outfile && fwrite (outbuf, 1, output_size, outfile) != output_size) {
				fprintf (stderr, "%s: error writing input file %s\n",
					 progname, outfilename);
		}

		frameno++;
	}

	shvio_close (vio);

#if defined(USE_MERAM_RA)
	/* finialize the read-ahead cache */
	uiomux_unregister(src[0].py);
	if (is_ycbcr(src[0].format))
		uiomux_unregister(src[0].pc);
	meram_free_icb_memory(meram, icbr);
	meram_unlock_icb(meram, icbr);
#endif
#if defined(USE_MERAM_WB)
	/* finialize the write-back cache */
	uiomux_unregister(dst.py);
	if (is_ycbcr(dst.format))
		uiomux_unregister(dst.pc);
	meram_free_icb_memory(meram, icbw);
	meram_unlock_icb(meram, icbw);
#endif
#if defined(USE_MERAM_RA) || defined(USE_MERAM_WB)
	meram_close(meram);
#endif

	uiomux_free (uiomux, uiores, src[0].py, input_size[0]);
	if (infilename[1] != NULL)
		uiomux_free (uiomux, uiores, src[1].py, input_size[1]);
	uiomux_free (uiomux, uiores, dst.py, output_size);
	uiomux_close (uiomux);

	if (infile[0] != stdin) fclose (infile[0]);
	if (infilename[1] != NULL)
		fclose (infile[1]);

	if (outfile == stdout) {
		fflush (stdout);
	} else if (outfile) {
		fclose (outfile);
	}

	printf ("Frames:\t\t%d\n", frameno);

exit_ok:
	exit (0);

exit_err:
	exit (1);
}
Exemplo n.º 17
0
/*
============
TestMinMax
============
*/
void TestMinMax() {
	int i;
	TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
	ALIGN16( float fsrc0[COUNT] );
	ALIGN16( idVec2 v2src0[COUNT] );
	ALIGN16( idVec3 v3src0[COUNT] );
	ALIGN16( idDrawVert drawVerts[COUNT] );
	ALIGN16( triIndex_t indexes[COUNT] );
	float min = 0.0f, max = 0.0f, min2 = 0.0f, max2 = 0.0f;
	idVec2 v2min, v2max, v2min2, v2max2;
	idVec3 vmin, vmax, vmin2, vmax2;
	const char *result;

	idRandom srnd( RANDOM_SEED );

	for ( i = 0; i < COUNT; i++ ) {
		fsrc0[i] = srnd.CRandomFloat() * 10.0f;
		v2src0[i][0] = srnd.CRandomFloat() * 10.0f;
		v2src0[i][1] = srnd.CRandomFloat() * 10.0f;
		v3src0[i][0] = srnd.CRandomFloat() * 10.0f;
		v3src0[i][1] = srnd.CRandomFloat() * 10.0f;
		v3src0[i][2] = srnd.CRandomFloat() * 10.0f;
		drawVerts[i].xyz = v3src0[i];
		indexes[i] = i;
	}

	idLib::common->Printf("====================================\n" );

	bestClocksGeneric = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		min = idMath::INFINITY;
		max = -idMath::INFINITY;
		StartRecordTime( start );
		p_generic->MinMax( min, max, fsrc0, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksGeneric );
	}
	PrintClocks( "generic->MinMax( float[] )", COUNT, bestClocksGeneric );

	bestClocksSIMD = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_simd->MinMax( min2, max2, fsrc0, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksSIMD );
	}

	result = ( min == min2 && max == max2 ) ? "ok" : S_COLOR_RED"X";
	PrintClocks( va( "   simd->MinMax( float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );

	bestClocksGeneric = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_generic->MinMax( v2min, v2max, v2src0, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksGeneric );
	}
	PrintClocks( "generic->MinMax( idVec2[] )", COUNT, bestClocksGeneric );

	bestClocksSIMD = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_simd->MinMax( v2min2, v2max2, v2src0, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksSIMD );
	}

	result = ( v2min == v2min2 && v2max == v2max2 ) ? "ok" : S_COLOR_RED"X";
	PrintClocks( va( "   simd->MinMax( idVec2[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );

	bestClocksGeneric = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_generic->MinMax( vmin, vmax, v3src0, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksGeneric );
	}
	PrintClocks( "generic->MinMax( idVec3[] )", COUNT, bestClocksGeneric );

	bestClocksSIMD = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_simd->MinMax( vmin2, vmax2, v3src0, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksSIMD );
	}

	result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
	PrintClocks( va( "   simd->MinMax( idVec3[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );

	bestClocksGeneric = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_generic->MinMax( vmin, vmax, drawVerts, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksGeneric );
	}
	PrintClocks( "generic->MinMax( idDrawVert[] )", COUNT, bestClocksGeneric );

	bestClocksSIMD = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_simd->MinMax( vmin2, vmax2, drawVerts, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksSIMD );
	}

	result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
	PrintClocks( va( "   simd->MinMax( idDrawVert[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );

	bestClocksGeneric = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_generic->MinMax( vmin, vmax, drawVerts, indexes, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksGeneric );
	}
	PrintClocks( "generic->MinMax( idDrawVert[], indexes[] )", COUNT, bestClocksGeneric );

	bestClocksSIMD = 0;
	for ( i = 0; i < NUMTESTS; i++ ) {
		StartRecordTime( start );
		p_simd->MinMax( vmin2, vmax2, drawVerts, indexes, COUNT );
		StopRecordTime( end );
		GetBest( start, end, bestClocksSIMD );
	}

	result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
	PrintClocks( va( "   simd->MinMax( idDrawVert[], indexes[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
}
Exemplo n.º 18
0
static void JNICALL OnGetFrame(JNIEnv *env, jobject obj,
                               jbyteArray data, jint length,
			       jlong user_data)
{
    and_stream *strm = *(and_stream**)&user_data;
    pjmedia_frame f;
    pj_uint8_t *Y, *U, *V;
    pj_status_t status; 
    void *frame_buf, *data_buf;     

    strm->frame_ts.u64 += strm->ts_inc;
    if (!strm->vid_cb.capture_cb)
	return;

    if (strm->thread_initialized == 0 || !pj_thread_is_registered()) {
	pj_status_t status;
	pj_bzero(strm->thread_desc, sizeof(pj_thread_desc));
	status = pj_thread_register("and_cam", strm->thread_desc,
				    &strm->thread);
	if (status != PJ_SUCCESS)
	    return;
	strm->thread_initialized = 1;
	PJ_LOG(5,(THIS_FILE, "Android camera thread registered"));
    }

    f.type = PJMEDIA_FRAME_TYPE_VIDEO;
    f.size = length;
    f.timestamp.u64 = strm->frame_ts.u64;
    f.buf = data_buf = (*env)->GetByteArrayElements(env, data, 0);

    Y = (pj_uint8_t*)f.buf;
    U = Y + strm->vafp.plane_bytes[0];
    V = U + strm->vafp.plane_bytes[1];

    /* Convert NV21 -> I420, i.e: separate V/U interleaved data plane
     * into U & V planes.
     */
    if (strm->convert_to_i420 == 1) {
	pj_uint8_t *src = U;
	pj_uint8_t *dst_u = U;
	pj_uint8_t *end_u = U + strm->vafp.plane_bytes[1];
	pj_uint8_t *dst_v = strm->convert_buf;
	while (dst_u < end_u) {
	    *dst_v++ = *src++;
	    *dst_u++ = *src++;
	}
	pj_memcpy(V, strm->convert_buf, strm->vafp.plane_bytes[2]);
    }

    /* Convert YV12 -> I420, i.e: swap U & V planes. We also need to
     * strip out padding, if any.
     */
    else if (strm->convert_to_i420 == 2) {
	int y_stride  = ALIGN16(strm->vafp.size.w);
	int uv_stride = ALIGN16(strm->vafp.size.w/2);

	/* Strip out Y padding */
	if (y_stride > strm->vafp.size.w) {
	    int i;
	    pj_uint8_t *src = Y + y_stride;
	    pj_uint8_t *dst = Y + strm->vafp.size.w;

	    for (i = 1; i < strm->vafp.size.h; ++i) {
		memmove(dst, src, strm->vafp.size.w);
		src += y_stride;
		dst += strm->vafp.size.w;
	    }
	}

	/* Swap U & V planes */
	if (uv_stride == strm->vafp.size.w/2) {

	    /* No padding, note Y plane should be no padding too! */
	    pj_assert(y_stride == strm->vafp.size.w);
	    pj_memcpy(strm->convert_buf, U, strm->vafp.plane_bytes[1]);
	    pj_memmove(U, V, strm->vafp.plane_bytes[1]);
	    pj_memcpy(V, strm->convert_buf, strm->vafp.plane_bytes[1]);

	} else if (uv_stride > strm->vafp.size.w/2) {

	    /* Strip & copy V plane into conversion buffer */
	    pj_uint8_t *src = Y + y_stride*strm->vafp.size.h;
	    pj_uint8_t *dst = strm->convert_buf;
	    unsigned dst_stride = strm->vafp.size.w/2;
	    int i;
	    for (i = 0; i < strm->vafp.size.h/2; ++i) {
		memmove(dst, src, dst_stride);
		src += uv_stride;
		dst += dst_stride;
	    }

	    /* Strip U plane */
	    dst = U;
	    for (i = 0; i < strm->vafp.size.h/2; ++i) {
		memmove(dst, src, dst_stride);
		src += uv_stride;
		dst += dst_stride;
	    }

	    /* Get V plane data from conversion buffer */
	    pj_memcpy(V, strm->convert_buf, strm->vafp.plane_bytes[2]);

	}
    }
    
    status = pjmedia_vid_dev_conv_resize_and_rotate(&strm->conv, 
    						    f.buf,
    				       		    &frame_buf);
    if (status == PJ_SUCCESS) {
        f.buf = frame_buf;
    }

    (*strm->vid_cb.capture_cb)(&strm->base, strm->user_data, &f);
    (*env)->ReleaseByteArrayElements(env, data, data_buf, JNI_ABORT);
}
Exemplo n.º 19
0
int undo_put(GEM_WINDOW *gwnd, int x1, int y1, int x2, int y2, int type_modif, XUNDO *xundo)
{
  VXIMAGE  *vimage ;
  UNDO_DEF *undo_buf ;
  void     *spec = NULL ;
  long     taille = 0 ;
  int      xy[8] ;
  int      wx, wy ;
  int      larg, temp ;
  int      type_alloc ;
/*  int      redo = 0 ;*/
  int      err = 0 ;

  if ( config.nb_undo == 0 ) return( -1 ) ;
  if ( !GWIsWindowValid( gwnd ) ) return( -1 ) ;
  undo_buf = AllocateNewUndoBuffer( gwnd ) ;
  if ( undo_buf == NULL ) return( -3 ) ;

  vimage = (VXIMAGE *) gwnd->Extension ;
/*  if ( type_modif & REDO ) redo = 1 ;
  type_modif &= ~REDO ;
*/
  if ( xundo )
  {
    if ( xundo->nb_bytes_to_allocate_and_copy > 0 )
    {
      spec = malloc( xundo->nb_bytes_to_allocate_and_copy ) ;
      if ( spec ) memcpy( spec, xundo->spec, xundo->nb_bytes_to_allocate_and_copy ) ;
      else        return( -3 ) ;
    }
  }

  if ( x1 > x2 )
  {
    temp = x1 ;
    x1   = x2 ;
    x2   = temp ;
  }

  if ( y1 > y2 )
  {
    temp = y1 ;
    y1   = y2 ;
    y2   = temp ;
  }

  wx = x2 - x1 + 1 ;
  wy = y2 - y1 + 1 ;

  larg = ALIGN16( wx ) ;

  if ( need_palette( type_modif, spec ) )
  {
    size_t size ;

    undo_buf->nb_cpal = (int) vimage->inf_img.nb_cpal ;
    size              = undo_buf->nb_cpal * 3 * sizeof(int) ;
    undo_buf->palette = (int *) malloc( size ) ;
    if ( undo_buf->palette == NULL ) return( -3 ) ;
    else                             memcpy( undo_buf->palette, vimage->inf_img.palette, size ) ;
  }

  if ( x1 < 0 ) taille = 0 ; /* Pas de modif sur l'image (palette sans doute) */
  else          taille = img_size( larg, wy, nb_plane ) ;

  undo_buf->original_width  = vimage->raster.fd_w ;
  undo_buf->original_height = vimage->raster.fd_h ;

  type_alloc = must_alloc( type_modif, taille, spec ) ;
  if ( type_alloc == 0 ) /* Est-il n‚cessaire d'allouer de la m‚moire ? */
  {
    undo_buf->img.fd_addr = NULL ;
    undo_buf->x           = x1 ;
    undo_buf->y           = y1 ;
    undo_buf->w           = wx ;
    undo_buf->h           = wy ;
    undo_buf->mitem       = type_modif ;
    undo_buf->spec        = spec ;
  }
  else
  {
    if ( type_alloc == 2 ) undo_buf->disk        = 1 ;
    else                   undo_buf->img.fd_addr = malloc( taille ) ;
    if ( undo_buf->disk || ( undo_buf->img.fd_addr != NULL ) )
    {
      undo_buf->img.fd_w       = larg ; /* Si possible, on m‚morise */
      undo_buf->img.fd_h       = wy ;   /* Les nouvelles donn‚es    */
      undo_buf->img.fd_wdwidth = larg/16 ;
      undo_buf->img.fd_nplanes = nb_plane ;
      undo_buf->img.fd_stand   = 0 ;
      undo_buf->gwindow        = gwnd ;
      undo_buf->x              = x1 ;
      undo_buf->y              = y1 ;
      undo_buf->w              = wx ;
      undo_buf->h              = wy ;
      undo_buf->mitem          = type_modif ;
      undo_buf->spec           = spec ;

      xy[0] = x1 ;   xy[1] = y1 ;
      xy[2] = x2 ;   xy[3] = y2 ;
      xy[4] = 0 ;    xy[5] = 0 ;
      xy[6] = wx-1 ; xy[7] = wy-1 ;
      if ( undo_buf->disk ) undo_disk( gwnd, undo_buf, xy, &vimage->raster ) ;
      else                  vro_cpyfm( handle, S_ONLY, xy, &vimage->raster, &undo_buf->img ) ;
    }
  }

  if ( !err )
  {
    LIST_ENTRY* entry = GET_LIST_ENTRY_FROM_UNDO_DEF( undo_buf ) ;

    InsertHeadList( &vimage->UndoListHead, entry ) ;
    if ( type_modif != REDO ) FreeUUndoBuffer( gwnd ) ;
  }
  else
  {
    FreeUndoBuffer( undo_buf ) ;
    form_stop( 1, msg[MSG_UNDOERROR] ) ;
  }

  return( err ) ;
}
Exemplo n.º 20
0
void
encode_exp_blk_ch_sse2(uint8_t *exp, int ncoefs, int exp_strategy)
{
    int grpsize, ngrps, i, k, exp_min1, exp_min2;
    uint8_t v;

    ngrps = nexpgrptab[exp_strategy-1][ncoefs] * 3;
    grpsize = exp_strategy + (exp_strategy == EXP_D45);

    // for D15 strategy, there is no need to group/ungroup exponents
    switch (grpsize) {
    case 1: {
        // constraint for DC exponent
        exp[0] = MIN(exp[0], 15);

        // Decrease the delta between each groups to within 2
        // so that they can be differentially encoded
        for (i = 1; i <= ngrps; i++)
            exp[i] = MIN(exp[i], exp[i-1]+2);
        for (i = ngrps-1; i >= 0; i--)
            exp[i] = MIN(exp[i], exp[i+1]+2);

        return;
    }
    // for each group, compute the minimum exponent
    case 2: {
        ALIGN16(uint16_t) exp1[256];
        ALIGN16(const union __m128iui) vmask = {{0x00ff00ff, 0x00ff00ff, 0x00ff00ff, 0x00ff00ff}};

        i=0; k=1;
        for(; i < (ngrps & ~7); i += 8, k += 16) {
            __m128i v1 = _mm_loadu_si128((__m128i*)&exp[k]);
            __m128i v2 = _mm_srli_si128(v1, 1);
            v1 = _mm_and_si128(v1, vmask.v);
            v1 = _mm_min_epu8(v1, v2);
            _mm_store_si128((__m128i*)&exp1[i], v1);
        }
        switch (ngrps & 7) {
        case 7:
            exp1[i] = MIN(exp[k], exp[k+1]);
            ++i;
            k += 2;
        case 6:
            exp1[i] = MIN(exp[k], exp[k+1]);
            ++i;
            k += 2;
        case 5:
            exp1[i] = MIN(exp[k], exp[k+1]);
            ++i;
            k += 2;
        case 4:
            exp1[i] = MIN(exp[k], exp[k+1]);
            ++i;
            k += 2;
        case 3:
            exp1[i] = MIN(exp[k], exp[k+1]);
            ++i;
            k += 2;
        case 2:
            exp1[i] = MIN(exp[k], exp[k+1]);
            ++i;
            k += 2;
        case 1:
            exp1[i] = MIN(exp[k], exp[k+1]);
        case 0:
            ;
        }
        // constraint for DC exponent
        exp[0] = MIN(exp[0], 15);
        // Decrease the delta between each groups to within 2
        // so that they can be differentially encoded
        exp1[0] = MIN(exp1[0], (uint16_t)exp[0]+2);
        for (i = 1; i < ngrps; i++)
            exp1[i] = MIN(exp1[i], exp1[i-1]+2);
        for (i = ngrps-2; i >= 0; i--)
            exp1[i] = MIN(exp1[i], exp1[i+1]+2);
        // now we have the exponent values the decoder will see
        exp[0] = MIN(exp[0], exp1[0]+2); // DC exponent is handled separately

        i=0; k=1;
        for (; i < (ngrps & ~7); i += 8, k += 16) {
            __m128i v1 = _mm_load_si128((__m128i*)&exp1[i]);
            __m128i v2 = _mm_slli_si128(v1, 1);
            v1 = _mm_or_si128(v1, v2);
            _mm_storeu_si128((__m128i*)&exp[k], v1);
        }
        switch (ngrps & 7) {
        case 7:
            v = (uint8_t)exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            ++i;
            k += 2;
        case 6:
            v = (uint8_t)exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            ++i;
            k += 2;
        case 5:
            v = (uint8_t)exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            ++i;
            k += 2;
        case 4:
            v = (uint8_t)exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            ++i;
            k += 2;
        case 3:
            v = (uint8_t)exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            ++i;
            k += 2;
        case 2:
            v = (uint8_t)exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            ++i;
            k += 2;
        case 1:
            v = (uint8_t)exp1[i];
            exp[k] = v;
            exp[k+1] = v;
        case 0:
            ;
        }
        return;
        }
    default: {
        ALIGN16(uint32_t) exp1[256];
        ALIGN16(const union __m128iui) vmask2 = {{0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff}};

        i=0; k=1;
        for (; i < (ngrps & ~3); i += 4, k += 16) {
            __m128i v1 = _mm_loadu_si128((__m128i*)&exp[k]);
            __m128i v2 = _mm_srli_si128(v1, 1);
            v1 = _mm_min_epu8(v1, v2);
            v2 = _mm_srli_si128(v1, 2);
            v1 = _mm_min_epu8(v1, v2);
            v1 = _mm_and_si128(v1, vmask2.v);
            _mm_store_si128((__m128i*)&exp1[i], v1);
        }
        switch (ngrps & 3) {
        case 3:
            exp_min1 = MIN(exp[k  ], exp[k+1]);
            exp_min2 = MIN(exp[k+2], exp[k+3]);
            exp1[i]  = MIN(exp_min1, exp_min2);
            ++i;
            k += 4;
        case 2:
            exp_min1 = MIN(exp[k  ], exp[k+1]);
            exp_min2 = MIN(exp[k+2], exp[k+3]);
            exp1[i]  = MIN(exp_min1, exp_min2);
            ++i;
            k += 4;
        case 1:
            exp_min1 = MIN(exp[k  ], exp[k+1]);
            exp_min2 = MIN(exp[k+2], exp[k+3]);
            exp1[i]  = MIN(exp_min1, exp_min2);
        case 0:
            ;
        }
        // constraint for DC exponent
        exp[0] = MIN(exp[0], 15);
        // Decrease the delta between each groups to within 2
        // so that they can be differentially encoded
        exp1[0] = MIN(exp1[0], (uint32_t)exp[0]+2);
        for (i = 1; i < ngrps; i++)
            exp1[i] = MIN(exp1[i], exp1[i-1]+2);
        for (i = ngrps-2; i >= 0; i--)
            exp1[i] = MIN(exp1[i], exp1[i+1]+2);
        // now we have the exponent values the decoder will see
        exp[0] = MIN(exp[0], exp1[0]+2); // DC exponent is handled separately

        i=0; k=1;
        for (; i < (ngrps & ~3); i += 4, k += 16) {
            __m128i v1 = _mm_load_si128((__m128i*)&exp1[i]);
            __m128i v2 = _mm_slli_si128(v1, 1);
            v1 = _mm_or_si128(v1, v2);
            v2 = _mm_slli_si128(v1, 2);
            v1 = _mm_or_si128(v1, v2);
            _mm_storeu_si128((__m128i*)&exp[k], v1);
        }
        switch (ngrps & 3) {
        case 3:
            v = exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            exp[k+2] = v;
            exp[k+3] = v;
            ++i;
            k += 4;
        case 2:
            v = exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            exp[k+2] = v;
            exp[k+3] = v;
            ++i;
            k += 4;
        case 1:
            v = exp1[i];
            exp[k] = v;
            exp[k+1] = v;
            exp[k+2] = v;
            exp[k+3] = v;
        case 0:
            ;
        }
        return;
    }
    }
}
Exemplo n.º 21
0
    QSVEncoder(int fps_, int width, int height, int quality, CTSTR preset, bool bUse444, int maxBitrate, int bufferSize, bool bUseCFR_, bool bDupeFrames_)
        : enc(nullptr)
    {
        Log(TEXT("------------------------------------------"));
        for(int i = 0; i < sizeof(validImpl)/sizeof(validImpl[0]); i++)
        {
            mfxIMPL impl = validImpl[i];
            mfxVersion ver = version;
            auto result = session.Init(impl, &ver);
            if(result == MFX_ERR_NONE)
            {
                Log(TEXT("QSV version %u.%u using %s"), ver.Major, ver.Minor, implStr[impl]);
                break;
            }
        }

        fps = fps_;

        bUseCBR = AppConfig->GetInt(TEXT("Video Encoding"), TEXT("UseCBR")) != 0;
        bUseCFR = bUseCFR_;
        bDupeFrames = bDupeFrames_;

        memset(&params, 0, sizeof(params));
        params.AsyncDepth = 1;
        params.mfx.CodecId = MFX_CODEC_AVC;
        params.mfx.TargetUsage = MFX_TARGETUSAGE_BEST_QUALITY;
        params.mfx.TargetKbps = maxBitrate;
        params.mfx.MaxKbps = maxBitrate;
        params.mfx.InitialDelayInKB = 1;
        //params.mfx.GopRefDist = 1;
        //params.mfx.NumRefFrame = 0;
        params.mfx.RateControlMethod = bUseCBR ? MFX_RATECONTROL_CBR : MFX_RATECONTROL_VBR;
        params.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY;

        auto& fi = params.mfx.FrameInfo;
        ConvertFrameRate(fps, fi.FrameRateExtN, fi.FrameRateExtD);

        fi.FourCC = MFX_FOURCC_NV12;
        fi.ChromaFormat = bUse444 ? MFX_CHROMAFORMAT_YUV444 : MFX_CHROMAFORMAT_YUV420;
        fi.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;

        fi.Width = ALIGN16(width);
        fi.Height = ALIGN16(height);

        fi.CropX = 0;
        fi.CropY = 0;
        fi.CropW = width;
        fi.CropH = height;

        this->width  = width;
        this->height = height;

        enc.reset(new MFXVideoENCODE(session));
        enc->Close();

        auto result = enc->Init(&params);

        memset(&enc_surf, 0, sizeof(enc_surf));
        memcpy(&enc_surf.Info, &params.mfx.FrameInfo, sizeof(enc_surf.Info));

        decltype(params) query;
        memcpy(&query, &params, sizeof(params));
        enc->GetVideoParam(&query);

        unsigned size = max(query.mfx.BufferSizeInKB*1000, bufferSize*1024/8);
        bs_buff.SetSize(size+31);//.resize(size+31);
        bs.Data = (mfxU8*)(((size_t)bs_buff.Array() + 31) / 32 * 32);
        bs.MaxLength = size;
        params.mfx.BufferSizeInKB = size/1000;
        Log(TEXT("Buffer size: %u configured, %u suggested by QSV; using %u"),
            bufferSize, query.mfx.BufferSizeInKB*1000*8/1024, size*8/1024);

        Log(TEXT("------------------------------------------"));
        Log(TEXT("%s"), GetInfoString().Array());
        Log(TEXT("------------------------------------------"));

        memset(&ctrl, 0, sizeof(ctrl));
        ctrl.FrameType = MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF | MFX_FRAMETYPE_IDR;

        DataPacket packet;
        GetHeaders(packet);
    }
Exemplo n.º 22
0
void * alloc( int size ){
	void * result = mainMemory + allocated;
	allocated += ALIGN16(size);
	if(allocated>TOTAL_MEMORY) return (void*)-1;
	return result;
}
Exemplo n.º 23
0
int handleCommand( ppu_addr_t program_data_ea ){
	SPECommand cmd __ALIGNED__;
	int i;
	/* Load the type */
	mfc_get(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0);
	mfc_write_tag_mask(1<<0);
	mfc_read_tag_status_all();

	switch(cmd.command) {
	case SPE_CMD_INIT: /* resets stored data */
		reset();
		datatype = cmd.data.INIT.datatype;
		if(datatype < 0 || datatype > 4) {
			datatype = -1;
			return -1;
		}
		fixedDel = cmd.data.INIT.fixedDel;
		incDel = cmd.data.INIT.incDel;
		maxDbLen = cmd.data.INIT.dbMaxLen;

		/* reset some variables */
		profile = NULL;
		remote_profile = 0;
		blockStart = 0;
		blockSize = 0;
		s1 = NULL;
		ls1 = 0;
		simi = NULL;

		/* allocate memory for database string and inter-block
		 * buffers */
		s2 = (char *)alloc( maxDbLen*sizeof(char) );
		maxS = alloc( maxDbLen*dataSize[datatype] );
		delS = alloc( maxDbLen*dataSize[datatype] );
		break;

	case SPE_CMD_CREATE_PROFILE: /* downloads query sequence and scoring matrix and initializes the profile */
		if(profile != NULL || datatype == -1) return -1;

		mn = min(cmd.data.CREATE_PROFILE.matrix.min,min(fixedDel,incDel));
		mx = max(cmd.data.CREATE_PROFILE.matrix.max,max(fixedDel,incDel));
		ls1 = cmd.data.CREATE_PROFILE.query.len;

		/* allocate and load query sequence */
		s1 = alloc( ls1*sizeof(char) );
		for( i=0; i<ls1; i+=MAX_TRANSFER )
			mfc_get( s1+i, cmd.data.CREATE_PROFILE.query.addr+i, ALIGN16(min(ls1-i, MAX_TRANSFER)*sizeof(char)), 0, 0, 0 );

		/* allocate and load matrix */
		simi = alloc( MATRIX_DIM*MATRIX_DIM*dataSize[datatype] );
		mfc_get( simi, cmd.data.CREATE_PROFILE.matrix.addr, ALIGN16(MATRIX_DIM*MATRIX_DIM*dataSize[datatype]), 1, 0, 0 );

		/* wait for DMA to finish */
		mfc_write_tag_mask((1<<0)|(1<<1));
		mfc_read_tag_status_all();

		/* compute block size and allocate memory */
		if(memRemaining() <= 0) return -1;
		blockSize=(memRemaining() / ((MATRIX_DIM+3)*dataSize[datatype])) & -16;
		if (blockSize < 50) return -1;
		blockSize = ALIGN16(min(blockSize,ls1));

		/* allocate memory and initialize profile */
		profile  = alloc( blockSize * MATRIX_DIM * dataSize[datatype] );
		loadOpt  = alloc( blockSize * dataSize[datatype] );
		storeOpt = alloc( blockSize * dataSize[datatype] );
		rD       = alloc( blockSize * dataSize[datatype] );
		
		blockStart = 0;
#ifdef DEBUG_FETCH
		printf(">>>> creating profile\n");
#endif
		createProfile[datatype]();
		break;

	case SPE_CMD_PUT_PROFILE: /* upload profile to main memory */
		if(profile == NULL || s1 == NULL) return -1;

		/* normally we would expect the first block of the profile is
		 * already present in memory. If not generate it */
		if(blockStart != 0) {
			blockStart = 0;
			createProfile[datatype]();
		}
		cmd.data.PUT_PROFILE.blockSize = blockSize;

		/* create profile blockwise and upload it to main memory */
		for(blockStart=0; blockStart<ls1; blockStart+=blockSize ) {
			int64_t bs;
			int currentBlockSize = ALIGN16(min(ls1-blockStart,blockSize));
			if(blockStart != 0) createProfile[datatype]();

			for( bs=0; bs<currentBlockSize * MATRIX_DIM * dataSize[datatype]; bs+=MAX_TRANSFER ) {
				mfc_put( ((char*)profile)+bs, cmd.data.PUT_PROFILE.addr+blockStart*MATRIX_DIM*dataSize[datatype]+bs, ALIGN16(min(currentBlockSize*MATRIX_DIM*dataSize[datatype]-bs, (int64_t)MAX_TRANSFER)), 0, 0, 0 );

				/* wait for DMA to finish */
				mfc_write_tag_mask(1<<0);
				mfc_read_tag_status_all();
			}
		}

		/* Write back the data */
		mfc_put(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0);
		mfc_write_tag_mask(1<<0);
		mfc_read_tag_status_all();
		break;

	case SPE_CMD_GET_PROFILE: /* download profile from main memory */
		if(datatype == -1 || profile != NULL) return -1;
		remote_profile = cmd.data.GET_PROFILE.profile.addr;
		
		mn = min(cmd.data.GET_PROFILE.profile.min,min(fixedDel,incDel));
		mx = max(cmd.data.GET_PROFILE.profile.max,max(fixedDel,incDel));
		ls1 = cmd.data.GET_PROFILE.profile.len;
		blockSize = cmd.data.GET_PROFILE.profile.blockSize;

		profile  = alloc( blockSize * MATRIX_DIM * dataSize[datatype] );
		loadOpt  = alloc( blockSize * dataSize[datatype] );
		storeOpt = alloc( blockSize * dataSize[datatype] );
		rD       = alloc( blockSize * dataSize[datatype] );
		if(memRemaining() < 0) return -1;

		blockStart = 0;
#ifdef DEBUG_FETCH
		printf(">>>> fetching profile (%d bytes)\n",ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]));
#endif
		for( i=0; i<ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]); i+=MAX_TRANSFER ) {
			mfc_get( ((char*)profile)+i, remote_profile+i, ALIGN16(min(blockSize*MATRIX_DIM*dataSize[datatype]-i, (int64_t)MAX_TRANSFER)), 0, 0, 0 );

			/* wait for DMA to finish */
			mfc_write_tag_mask(1<<0);
			mfc_read_tag_status_all();
		}
		break;

	case SPE_CMD_ALIGN: /* perform a local alignment */
		if(profile == NULL) return -1;

		ls2 = cmd.data.ALIGN.db.len;

		/* download database sequence */
		for( i=0; i<ls2; i+=MAX_TRANSFER )
			mfc_get( s2+i, cmd.data.ALIGN.db.addr+i, ALIGN16(min(ls2-i, MAX_TRANSFER)*sizeof(char)), 0, 0, 0 );
		mfc_write_tag_mask(1<<0);
		mfc_read_tag_status_all();

		/* initialize the profile if it has not been initialized yet */
		if(blockStart != 0) {
			if(remote_profile == 0) {
				blockStart = 0;
#ifdef DEBUG_FETCH
				printf(">>>> creating profile\n");
#endif
				createProfile[datatype]();
			} else {
				blockStart = 0;
#ifdef DEBUG_FETCH
				printf(">>>> fetching profile (%d bytes)\n",ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]));
#endif
				for( i=0; i<ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]); i+=MAX_TRANSFER ) {
					mfc_get( ((char*)profile)+i, remote_profile+i, ALIGN16(min(blockSize*MATRIX_DIM*dataSize[datatype]-i, (int64_t)MAX_TRANSFER)), 0, 0, 0 );

					/* wait for DMA to finish */
					mfc_write_tag_mask(1<<0);
					mfc_read_tag_status_all();
				}
			}
		}

		cmd.data.ALIGN.result = dynProgLocal[datatype]();

		/* Write back the data */
		mfc_put(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0);
		mfc_write_tag_mask(1<<0);
		mfc_read_tag_status_all();
		break;

	default:
		return -1;
	}
	return 0;
}
Exemplo n.º 24
0
LDV_STATUS cdecl Run(LDV_IMAGE *in, LDV_PARAMS *params, LDV_IMAGE *out)
{
  LDV_PALETTE    *vdi_palette = &in->Palette ;
  VDI_ELEMENTPAL *vdi_epal ;
  double         rgamma, ggamma, bgamma ;
  short          vdi_index ;
  short          cancel = 0 ;

  if ( (params->Param[0].s / 100.0) != gamma_rgb )
  {
    gamma_rgb          = params->Param[0].s / 100.0 ;
    params->Param[1].s = params->Param[0].s ;
    params->Param[2].s = params->Param[0].s ;
    params->Param[3].s = params->Param[0].s ;
  }

  rgamma = params->Param[1].s / 100.0 ;
  ggamma = params->Param[2].s / 100.0 ;
  bgamma = params->Param[3].s / 100.0 ;
  if ( in->Raster.fd_nplanes <= 8 )
  {
    if ( vdi_palette == NULL ) return( ELDV_GENERALFAILURE ) ;
    vdi_epal = vdi_palette->Pal ;
    if ( vdi_epal == NULL ) return( ELDV_GENERALFAILURE ) ;
    for ( vdi_index = 0; vdi_index < vdi_palette->NbColors; vdi_index++, vdi_epal++ )
    {
      vdi_epal->Red   = (short) ( 0.5 + GammaFunc( vdi_epal->Red, 1000.0, rgamma ) ) ;
      if ( ggamma == rgamma ) vdi_epal->Green =  vdi_epal->Red ;
      else                    vdi_epal->Green   = (short) ( 0.5 + GammaFunc( vdi_epal->Green, 1000.0, ggamma ) ) ;
      if ( bgamma == rgamma ) vdi_epal->Blue =  vdi_epal->Red ;
      else                    vdi_epal->Blue = (short) ( 0.5 + GammaFunc( vdi_epal->Blue, 1000.0, bgamma ) ) ;
    }
  }
  else
  {
    REMAP_COLORS rc ;
    MFDB           *img = &in->Raster ;
    long           *pt_line32, nb_pts_in_line ;
    short          *pt_line16, y, pc, is_15bits, cancel=0 ;
    unsigned short nb_bits_red=8, nb_bits_green=8, nb_bits_blue=8 ; /* Sur 32 bits par defaut */

    CHECK_VAPI(Vapi) ;

    rc.red    = RedRemap ;
    rc.green  = GreenRemap ;
    rc.blue   = BlueRemap ;
    rc.nb_pts = (long) (1 + params->x2 - params->x1) ;
    is_15bits = Vapi->RaIs15Bits() ;
    if ( img->fd_nplanes == 16 )
    {
      nb_bits_red   = 5 ;
      nb_bits_green = is_15bits ? 5:6 ;
      nb_bits_blue  = 5 ;
    }
    /* Remet a jour les precalculs de pourcentage sur les composantes si necessaire */
    if ( rgamma != gamma_red )   ChangeColors( rgamma, RedRemap,   nb_bits_red ) ;
    if ( ggamma != gamma_green ) ChangeColors( ggamma, GreenRemap, nb_bits_green ) ;
    if ( bgamma != gamma_blue )  ChangeColors( bgamma, BlueRemap,  nb_bits_blue ) ;

    nb_pts_in_line = ALIGN16(img->fd_w) ;
    pt_line16  = (short *) img->fd_addr ;
    pt_line16 += (long)(params->y1) * nb_pts_in_line ;
    pt_line16 += params->x1 ;
    pt_line32  = (long *) img->fd_addr ;
    pt_line32 += (long)(params->y1) * nb_pts_in_line ;
    pt_line32 += params->x1 ;
    for ( y = params->y1; !cancel && (y <= params->y2); y++ )
    {
      if ( img->fd_nplanes == 16 )
      {
        rc.pt_img  = pt_line16 ;
        pt_line16 += nb_pts_in_line ;
        if ( is_15bits ) Vapi->RaTC15RemapColors( &rc ) ;
        else             Vapi->RaTC16RemapColors( &rc ) ;
      }
      else
      {
        rc.pt_img  = pt_line32 ;
        pt_line32 += nb_pts_in_line ;
        Vapi->RaTC32RemapColors( &rc ) ;
      }
      if ( ( y & 0x0F ) == 0x0F )
      {
        pc = (short) ( ( 100L * (long)(y - params->y1) ) / (long)(params->y2 - params->y1) ) ;
        cancel = Vapi->PrSetProgEx( pc ) ;
      }
    }
  }
  gamma_red   = rgamma ;
  gamma_green = ggamma ;
  gamma_blue  = bgamma ;

  return( cancel ? ELDV_CANCELLED : ELDV_NOERROR ) ;
}
Exemplo n.º 25
0
void allocate_input(app_ctxt_t *ps_app_ctxt)
{

    WORD32 num_bufs;
    WORD32 pic_size;
    WORD32 luma_size;
    WORD32 chroma_size;
    WORD32 num_mbs;
    WORD32 i;
    UWORD8 *pu1_buf[3];

    ih264e_ctl_getbufinfo_op_t *ps_get_buf_info_op = &ps_app_ctxt->s_get_buf_info_op;

    num_bufs = MAX(DEFAULT_NUM_INPUT_BUFS, ps_get_buf_info_op->s_ive_op.u4_min_inp_bufs);
    num_bufs = MIN(DEFAULT_MAX_INPUT_BUFS, num_bufs);

    /* Size of buffer */
    luma_size = ps_app_ctxt->u4_wd * ps_app_ctxt->u4_ht;
    chroma_size = luma_size >> 1;
    pic_size = luma_size + chroma_size;

    num_mbs = ALIGN16(ps_app_ctxt->u4_max_wd) *  ALIGN16(ps_app_ctxt->u4_max_ht);
    num_mbs /= 256;

    /* Memset the input buffer array to set is_free to 0 */
    memset(ps_app_ctxt->as_input_buf, 0, sizeof(input_buf_t) * DEFAULT_MAX_INPUT_BUFS);

    for(i = 0; i < num_bufs; i++)
    {
        pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, pic_size);
        if(NULL == pu1_buf[0])
        {
            CHAR ac_error[STRLENGTH];
            sprintf(ac_error, "Allocation failed for input buffer of size %d\n",
                    pic_size);
            codec_exit(ac_error);
        }
        ps_app_ctxt->as_input_buf[i].pu1_buf = pu1_buf[0];

        pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, num_mbs * sizeof(ih264e_mb_info_t));
        if(NULL == pu1_buf[0])
        {
            CHAR ac_error[STRLENGTH];
            sprintf(ac_error, "Allocation failed for mb info buffer of size %d\n",
                    (WORD32)(num_mbs * sizeof(ih264e_mb_info_t)));
            codec_exit(ac_error);
        }
        ps_app_ctxt->as_input_buf[i].pv_mb_info = pu1_buf[0];
        pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, sizeof(ih264e_pic_info2_t));
        if(NULL == pu1_buf[0])
        {
            CHAR ac_error[STRLENGTH];
            sprintf(ac_error, "Allocation failed for pic info buffer of size %d\n",
                   (WORD32) sizeof(ih264e_pic_info2_t));
            codec_exit(ac_error);
        }
        ps_app_ctxt->as_input_buf[i].pv_pic_info = pu1_buf[0];
        ps_app_ctxt->as_input_buf[i].u4_buf_size = pic_size;
        ps_app_ctxt->as_input_buf[i].u4_is_free = 1;
    }
    return;
}
Exemplo n.º 26
0
    QSVEncoder(int fps_, int width, int height, int quality, CTSTR preset, bool bUse444, int maxBitrate, int bufferSize, bool bUseCFR_, bool bDupeFrames_)
        : enc(nullptr)
    {
        Log(TEXT("------------------------------------------"));
        for(int i = 0; i < sizeof(validImpl)/sizeof(validImpl[0]); i++)
        {
            mfxIMPL impl = validImpl[i];
            ver = version;
            mfxStatus result = MFX_ERR_UNKNOWN;
            for(ver.Minor = 6; ver.Minor >= 4; ver.Minor -= 2)
            {
                result = session.Init(impl, &ver);
                if(result == MFX_ERR_NONE)
                {
                    Log(TEXT("QSV version %u.%u using %s"), ver.Major, ver.Minor, implStr[impl]);
                    break;
                }
            }
            if(result == MFX_ERR_NONE)
                break;
        }

        session.SetPriority(MFX_PRIORITY_HIGH);

        fps = fps_;

        bUseCBR = AppConfig->GetInt(TEXT("Video Encoding"), TEXT("UseCBR")) != 0;
        bUseCFR = bUseCFR_;
        bDupeFrames = bDupeFrames_;

        memset(&params, 0, sizeof(params));
        //params.AsyncDepth = 0;
        params.mfx.CodecId = MFX_CODEC_AVC;
        params.mfx.TargetUsage = MFX_TARGETUSAGE_BEST_QUALITY;//SPEED;
        params.mfx.TargetKbps = (mfxU16)(maxBitrate*0.9);
        params.mfx.MaxKbps = maxBitrate;
        //params.mfx.InitialDelayInKB = 1;
        //params.mfx.GopRefDist = 1;
        //params.mfx.NumRefFrame = 0;
        params.mfx.GopPicSize = 61;
        params.mfx.GopRefDist = 3;
        params.mfx.GopOptFlag = MFX_GOP_STRICT;
        params.mfx.IdrInterval = 2;
        params.mfx.NumSlice = 1;

        params.mfx.RateControlMethod = bUseCBR ? MFX_RATECONTROL_CBR : MFX_RATECONTROL_VBR;
        params.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY;

        auto& fi = params.mfx.FrameInfo;
        ConvertFrameRate(fps, fi.FrameRateExtN, fi.FrameRateExtD);

        fi.FourCC = MFX_FOURCC_NV12;
        fi.ChromaFormat = bUse444 ? MFX_CHROMAFORMAT_YUV444 : MFX_CHROMAFORMAT_YUV420;
        fi.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;

        fi.Width = ALIGN16(width);
        fi.Height = ALIGN16(height);

        fi.CropX = 0;
        fi.CropY = 0;
        fi.CropW = width;
        fi.CropH = height;

        this->width  = width;
        this->height = height;

        enc.reset(new MFXVideoENCODE(session));
        enc->Close();

        mfxFrameAllocRequest req;
        memset(&req, 0, sizeof(req));
        enc->QueryIOSurf(&params, &req);

        enc->Init(&params);

        decltype(params) query;
        memcpy(&query, &params, sizeof(params));
        enc->GetVideoParam(&query);

        unsigned num_surf = max(6, req.NumFrameSuggested + params.AsyncDepth);

        encode_tasks.SetSize(num_surf);

        const unsigned bs_size = max(query.mfx.BufferSizeInKB*1000, bufferSize*1024/8);
        bs_buff.SetSize(bs_size * encode_tasks.Num() + 31);
        params.mfx.BufferSizeInKB = bs_size/1000;

        mfxU8* bs_start = (mfxU8*)(((size_t)bs_buff.Array() + 31)/32*32);
        for(unsigned i = 0; i < encode_tasks.Num(); i++)
        {
            encode_tasks[i].sp = nullptr;

            mfxFrameSurface1& surf = encode_tasks[i].surf;
            memset(&surf, 0, sizeof(mfxFrameSurface1));
            memcpy(&surf.Info, &params.mfx.FrameInfo, sizeof(params.mfx.FrameInfo));
            
            mfxBitstream& bs = encode_tasks[i].bs;
            memset(&bs, 0, sizeof(mfxBitstream));
            bs.Data = bs_start + i*bs_size;
            bs.MaxLength = bs_size;

            idle_tasks << i;
        }

        frames.SetSize(num_surf+3); //+NUM_OUT_BUFFERS

        const unsigned lum_channel_size = fi.Width*fi.Height,
                       uv_channel_size = fi.Width*fi.Height,
                       frame_size = lum_channel_size + uv_channel_size;
        frame_buff.SetSize(frame_size * frames.Num() + 15);

        mfxU8* frame_start = (mfxU8*)(((size_t)frame_buff.Array() + 15)/16*16);
        memset(frame_start, 0, frame_size * frames.Num());
        for(unsigned i = 0; i < frames.Num(); i++)
        {
            mfxFrameData& frame = frames[i];
            memset(&frame, 0, sizeof(mfxFrameData));
            frame.Y = frame_start + i * frame_size;
            frame.UV = frame_start + i * frame_size + lum_channel_size;
            frame.V = frame.UV + 1;
            frame.Pitch = fi.Width;
        }

        Log(TEXT("Using %u encode tasks"), encode_tasks.Num());
        Log(TEXT("Buffer size: %u configured, %u suggested by QSV; using %u"),
            bufferSize, query.mfx.BufferSizeInKB*1000*8/1024, params.mfx.BufferSizeInKB*1000*8/1024);

        Log(TEXT("------------------------------------------"));
        Log(TEXT("%s"), GetInfoString().Array());
        Log(TEXT("------------------------------------------"));

        memset(&ctrl, 0, sizeof(ctrl));
        ctrl.FrameType = MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF | MFX_FRAMETYPE_IDR;

        deferredFrames = 0;

        bUsingDecodeTimestamp = false && ver.Minor >= 6;

        DataPacket packet;
        GetHeaders(packet);
    }
Exemplo n.º 27
0
/*
========================
LZWJobInternal
This job takes a stream of objects, which should already be zrle compressed, and then lzw compresses them
and builds a final delta packet ready to be sent to peers.
========================
*/
void LZWJobInternal( lzwParm_t * parm, unsigned int dmaTag ) {
	assert( parm->numObjects > 0 );

#ifndef ALLOW_MULTIPLE_DELTAS
	if ( parm->ioData->numlzwDeltas > 0 ) {
		// Currently, we don't use fragmented deltas.
		// We only send the first one and rely on a full snap being sent to get the whole snap across
		assert( parm->ioData->numlzwDeltas == 1 );
		assert( !parm->ioData->fullSnap );
		return;		
	}
#endif

	assert( parm->ioData->lzwBytes < parm->ioData->maxlzwMem );

	dmaTag = dmaTag;

	ALIGN16( idLZWCompressor lzwCompressor( parm->ioData->lzwData ) );

	if ( parm->fragmented ) {
		// This packet was partially written out, we need to continue writing, using previous lzw dictionary values
		ContinueLZWStream( parm, &lzwCompressor );
	} else {
		// We can start a new lzw dictionary
		NewLZWStream( parm, &lzwCompressor );
	}


	int numChangedObjProcessed = 0;

	for ( int i = 0; i < parm->numObjects; i++ ) {

		// This will eventually be gracefully caught in SnapshotProcessor.cpp.  
		// It's nice to know right when it happens though, so you can inspect the situation.
		assert( !lzwCompressor.IsOverflowed() || numChangedObjProcessed > 1 );

		// First, see if we need to finish the current lzw stream
		if ( lzwCompressor.IsOverflowed() || lzwCompressor.Length() >= parm->ioData->optimalLength ) {
			FinishLZWStream( parm, &lzwCompressor );
			// indicate how much needs to be DMA'ed back out
			parm->ioData->lzwDmaOut = parm->ioData->lzwBytes;
#ifdef ALLOW_MULTIPLE_DELTAS
			NewLZWStream( parm, &lzwCompressor );
#else
			// Currently, we don't use fragmented deltas.
			// We only send the first one and rely on a full snap being sent to get the whole snap across
			assert( !parm->ioData->fullSnap );
			assert( parm->ioData->numlzwDeltas == 1 );
			return;
#endif
		}		

		if ( numChangedObjProcessed > 0 ) {
			// We should be at a good spot in the stream if we've written at least one obj without overflowing, so save it
			lzwCompressor.Save();
		}

		// Get header
		objHeader_t * header = &parm->headers[i];

		if ( header->objID == -1 ) {
			assert( header->flags & OBJ_SAME );
			continue;			// Don't send object (which means ack)
		}

		numChangedObjProcessed++;

		// Write obj id as delta into stream
		lzwCompressor.WriteAgnostic<uint16>( (uint16)( header->objID - parm->ioData->lastObjId ) );
		parm->ioData->lastObjId = (uint16)header->objID;

		// Check special stale/notstale flags
		if ( header->flags & ( OBJ_VIS_STALE | OBJ_VIS_NOT_STALE ) ) {
			// Write stale/notstale flag
			objectSize_t value = ( header->flags & OBJ_VIS_STALE ) ? SIZE_STALE : SIZE_NOT_STALE;
			lzwCompressor.WriteAgnostic<objectSize_t>( value );
		}

		if ( header->flags & OBJ_VIS_STALE ) {
			continue;	// Don't write out data for stale objects
		}

		if ( header->flags & OBJ_DELETED ) {
			// Object was deleted
			lzwCompressor.WriteAgnostic<objectSize_t>( 0 );
			continue;
		}

		// Write size
		lzwCompressor.WriteAgnostic<objectSize_t>( (objectSize_t)header->size );

		// Get compressed data area
		uint8 * compressedData = header->data;

		if ( header->csize == -1 ) {
			// Wasn't zrle compressed, zrle now while lzw'ing
			idZeroRunLengthCompressor rleCompressor;
			rleCompressor.Start( NULL, &lzwCompressor, 0xFFFF );
			rleCompressor.WriteBytes( compressedData, header->size );
			rleCompressor.End();
		} else {
			// Write out zero-rle compressed data
			lzwCompressor.Write( compressedData, header->csize );
		}

#ifdef SNAPSHOT_CHECKSUMS
		// Write checksum
		lzwCompressor.WriteAgnostic( header->checksum );
#endif
		// This will eventually be gracefully caught in SnapshotProcessor.cpp.  
		// It's nice to know right when it happens though, so you can inspect the situation.
		assert( !lzwCompressor.IsOverflowed() || numChangedObjProcessed > 1 );		
	}

	if ( !parm->saveDictionary ) {
		// Write out terminator
		uint16 objectDelta = 0xFFFF - parm->ioData->lastObjId;
		lzwCompressor.WriteAgnostic( objectDelta );
		
		// Last stream
		FinishLZWStream( parm, &lzwCompressor );

		// indicate how much needs to be DMA'ed back out
		parm->ioData->lzwDmaOut = parm->ioData->lzwBytes;

		parm->ioData->fullSnap = true;		// We sent a full snap
	} else {
		// the compressor did some work, wrote data to lzwMem, but since we didn't call FinishLZWStream to end the compression,
		// we need to figure how much needs to be DMA'ed back out
		assert( parm->ioData->lzwBytes == 0 ); // I don't think we ever hit this with lzwBytes != 0, but adding it just in case
		parm->ioData->lzwDmaOut = parm->ioData->lzwBytes + lzwCompressor.Length();
	}

	assert( parm->ioData->lzwBytes < parm->ioData->maxlzwMem );
}