static int startread(sox_format_t * ft) { priv_t * ffmpeg = (priv_t *)ft->priv; AVFormatParameters params; int ret; int i; ffmpeg->audio_buf_raw = lsx_calloc(1, (size_t)AVCODEC_MAX_AUDIO_FRAME_SIZE + 32); ffmpeg->audio_buf_aligned = ALIGN16(ffmpeg->audio_buf_raw); /* Signal audio stream not found */ ffmpeg->audio_index = -1; /* register all CODECs, demux and protocols */ av_register_all(); /* Open file and get format */ memset(¶ms, 0, sizeof(params)); if ((ret = av_open_input_file(&ffmpeg->ctxt, ft->filename, NULL, 0, ¶ms)) < 0) { lsx_fail("ffmpeg cannot open file for reading: %s (code %d)", ft->filename, ret); return SOX_EOF; } /* Get CODEC parameters */ if ((ret = av_find_stream_info(ffmpeg->ctxt)) < 0) { lsx_fail("ffmpeg could not find CODEC parameters for %s", ft->filename); return SOX_EOF; } /* Now we can begin to play (RTSP stream only) */ av_read_play(ffmpeg->ctxt); /* Find audio stream (FIXME: allow different stream to be selected) */ for (i = 0; (unsigned)i < ffmpeg->ctxt->nb_streams; i++) { AVCodecContext *enc = ffmpeg->ctxt->streams[i]->codec; if (enc->codec_type == CODEC_TYPE_AUDIO && ffmpeg->audio_index < 0) { ffmpeg->audio_index = i; break; } } /* Open the stream */ if (ffmpeg->audio_index < 0 || stream_component_open(ffmpeg, ffmpeg->audio_index) < 0 || ffmpeg->audio_stream < 0) { lsx_fail("ffmpeg could not open CODECs for %s", ft->filename); return SOX_EOF; } /* Copy format info */ ft->signal.rate = ffmpeg->audio_st->codec->sample_rate; ft->encoding.bits_per_sample = 16; ft->encoding.encoding = SOX_ENCODING_SIGN2; ft->signal.channels = ffmpeg->audio_st->codec->channels; ft->signal.length = 0; /* Currently we can't seek; no idea how to get this info from ffmpeg anyway (in time, yes, but not in samples); but ffmpeg *can* seek */ return SOX_SUCCESS; }
static int aes_gcm_decrypt(void *_ctx, const void *src, size_t src_size, void *dst, size_t dst_size) { struct aes_gcm_ctx *ctx = _ctx; int blocks = src_size / GCM_BLOCK_SIZE; int exp_blocks = blocks * GCM_BLOCK_SIZE; int rest = src_size - (exp_blocks); uint32_t counter; gcm_ghash(ctx, src, src_size); ctx->gcm.len.u[1] += src_size; if (blocks > 0) { ctr32_encrypt_blocks(src, dst, blocks, ALIGN16(&ctx->expanded_key), ctx->gcm.Yi.c); counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12); counter += blocks; _gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12); } if (rest > 0) /* last incomplete block */ ctr_encrypt_last(ctx, src, dst, exp_blocks, rest); return 0; }
static int aes_ssse3_encrypt(void *_ctx, const void *src, size_t src_size, void *dst, size_t dst_size) { struct aes_ctx *ctx = _ctx; vpaes_cbc_encrypt(src, dst, src_size, ALIGN16(&ctx->expanded_key), ctx->iv, 1); return 0; }
static int aes_aarch64_decrypt(void *_ctx, const void *src, size_t src_size, void *dst, size_t dst_size) { struct aes_ctx *ctx = _ctx; aes_v8_cbc_encrypt(src, dst, src_size, ALIGN16(&ctx->expanded_key), ctx->iv, 0); return 0; }
static int aes_setiv(void *_ctx, const void *iv, size_t iv_size) { struct padlock_ctx *ctx = _ctx; struct padlock_cipher_data *pce; pce = ALIGN16(&ctx->expanded_key); memcpy(pce->iv, iv, 16); return 0; }
static int aes_ssse3_cipher_setkey(void *_ctx, const void *userkey, size_t keysize) { struct aes_ctx *ctx = _ctx; int ret; if (ctx->enc) ret = vpaes_set_encrypt_key(userkey, keysize * 8, ALIGN16(&ctx->expanded_key)); else ret = vpaes_set_decrypt_key(userkey, keysize * 8, ALIGN16(&ctx->expanded_key)); if (ret != 0) return gnutls_assert_val(GNUTLS_E_ENCRYPTION_FAILED); return 0; }
int padlock_aes_cipher_setkey (void *_ctx, const void *userkey, size_t keysize) { struct padlock_ctx *ctx = _ctx; struct padlock_cipher_data *pce; #ifdef HAVE_LIBNETTLE struct aes_ctx nc; #endif memset (_ctx, 0, sizeof (struct padlock_cipher_data)); pce = ALIGN16 (&ctx->expanded_key); pce->cword.b.encdec = (ctx->enc == 0); switch (keysize) { case 16: pce->cword.b.ksize = 0; pce->cword.b.rounds = 10; memcpy (pce->ks.rd_key, userkey, 16); pce->cword.b.keygen = 0; break; #ifdef HAVE_LIBNETTLE case 24: pce->cword.b.ksize = 1; pce->cword.b.rounds = 12; goto common_24_32; case 32: pce->cword.b.ksize = 2; pce->cword.b.rounds = 14; common_24_32: /* expand key using nettle */ if (ctx->enc) aes_set_encrypt_key (&nc, keysize, userkey); else aes_set_decrypt_key (&nc, keysize, userkey); memcpy (pce->ks.rd_key, nc.keys, sizeof (nc.keys)); pce->ks.rounds = nc.nrounds; pce->cword.b.keygen = 1; break; #endif default: return gnutls_assert_val (GNUTLS_E_ENCRYPTION_FAILED); } padlock_reload_key (); return 0; }
IV_STATUS_T read_mb_info(app_ctxt_t *ps_app_ctxt, void *pv_mb_info) { IV_STATUS_T ret = IV_SUCCESS; WORD32 num_mbs; WORD32 size; WORD32 bytes; num_mbs = ALIGN16(ps_app_ctxt->u4_wd) * ALIGN16(ps_app_ctxt->u4_ht); num_mbs /= 256; switch(ps_app_ctxt->u4_mb_info_type) { case 1: size = sizeof(ih264e_mb_info1_t) * num_mbs; ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info1_t); break; case 2: size = sizeof(ih264e_mb_info2_t) * num_mbs; ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info2_t); break; case 3: size = sizeof(ih264e_mb_info3_t) * num_mbs; ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info3_t); break; case 4: size = sizeof(ih264e_mb_info4_t) * num_mbs; ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info4_t); break; default: size = 0; break; } bytes = fread(pv_mb_info, 1, size, ps_app_ctxt->fp_mb_info); if(bytes != size) ret = IV_FAIL; return ret; }
static int aes_encrypt(void *_ctx, const void *src, size_t src_size, void *dst, size_t dst_size) { struct aes_ctx *ctx = _ctx; if (unlikely(src_size % 16 != 0)) return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST); aesni_cbc_encrypt(src, dst, src_size, ALIGN16(&ctx->expanded_key), ctx->iv, 1); return 0; }
static int padlock_aes_cbc_decrypt(void *_ctx, const void *src, size_t src_size, void *dst, size_t dst_size) { struct padlock_ctx *ctx = _ctx; struct padlock_cipher_data *pcd; pcd = ALIGN16(&ctx->expanded_key); padlock_cbc_encrypt(dst, src, pcd, src_size); return 0; }
static int aes_gcm_cipher_setkey(void *_ctx, const void *userkey, size_t keysize) { struct aes_gcm_ctx *ctx = _ctx; int ret; CHECK_AES_KEYSIZE(keysize); ret = aes_v8_set_encrypt_key(userkey, keysize * 8, ALIGN16(&ctx->expanded_key)); if (ret != 0) return gnutls_assert_val(GNUTLS_E_ENCRYPTION_FAILED); aes_v8_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, ALIGN16(&ctx->expanded_key)); ctx->gcm.H.u[0] = bswap_64(ctx->gcm.H.u[0]); ctx->gcm.H.u[1] = bswap_64(ctx->gcm.H.u[1]); gcm_init_v8(ctx->gcm.Htable, ctx->gcm.H.u); return 0; }
static inline void ctr_encrypt_last(struct aes_gcm_ctx *ctx, const uint8_t * src, uint8_t * dst, size_t pos, size_t length) { uint8_t tmp[GCM_BLOCK_SIZE]; uint8_t out[GCM_BLOCK_SIZE]; memcpy(tmp, &src[pos], length); ctr32_encrypt_blocks(tmp, out, 1, ALIGN16(&ctx->expanded_key), ctx->gcm.Yi.c); memcpy(&dst[pos], out, length); }
static int aes_gcm_setiv(void *_ctx, const void *iv, size_t iv_size) { struct aes_gcm_ctx *ctx = _ctx; if (iv_size != GCM_BLOCK_SIZE - 4) return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST); memset(ctx->gcm.Xi.c, 0, sizeof(ctx->gcm.Xi.c)); memset(ctx->gcm.len.c, 0, sizeof(ctx->gcm.len.c)); memcpy(ctx->gcm.Yi.c, iv, GCM_BLOCK_SIZE - 4); ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 4] = 0; ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 3] = 0; ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 2] = 0; ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 1; aes_v8_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, ALIGN16(&ctx->expanded_key)); ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 2; return 0; }
static int open_audio(priv_t * ffmpeg, AVStream *st) { AVCodecContext *c; AVCodec *codec; c = st->codec; /* find the audio encoder */ codec = avcodec_find_encoder(c->codec_id); if (!codec) { lsx_fail("ffmpeg CODEC not found"); return SOX_EOF; } /* open it */ if (avcodec_open(c, codec) < 0) { lsx_fail("ffmpeg could not open CODEC"); return SOX_EOF; } ffmpeg->audio_buf_raw = lsx_malloc((size_t)AVCODEC_MAX_AUDIO_FRAME_SIZE + 32); ffmpeg->audio_buf_aligned = ALIGN16(ffmpeg->audio_buf_raw); /* ugly hack for PCM codecs (will be removed ASAP with new PCM support to compute the input frame size in samples */ if (c->frame_size <= 1) { ffmpeg->audio_input_frame_size = AVCODEC_MAX_AUDIO_FRAME_SIZE / c->channels; switch(st->codec->codec_id) { case CODEC_ID_PCM_S16LE: case CODEC_ID_PCM_S16BE: case CODEC_ID_PCM_U16LE: case CODEC_ID_PCM_U16BE: ffmpeg->audio_input_frame_size >>= 1; break; default: break; } } else
void EmitColorIndices_Intrinsics( const byte *colorBlock, const byte *minColor, const byte *maxColor, byte *&outData ) { ALIGN16( byte color0[16] ); ALIGN16( byte color1[16] ); ALIGN16( byte color2[16] ); ALIGN16( byte color3[16] ); ALIGN16( byte result[16] ); // mov esi, maxColor // mov edi, minColor __m128i t0, t1, t2, t3, t4, t5, t6, t7; t7 = _mm_setzero_si128(); //t7 = _mm_xor_si128(t7, t7); _mm_store_si128 ( (__m128i*) &result, t7 ); //t0 = _mm_load_si128 ( (__m128i*) maxColor ); t0 = _mm_cvtsi32_si128( *(int*)maxColor); // Bitwise AND __m128i tt = _mm_load_si128 ( (__m128i*) SIMD_SSE2_byte_colorMask ); t0 = _mm_and_si128(t0, tt); t0 = _mm_unpacklo_epi8(t0, t7); t4 = _mm_shufflelo_epi16( t0, R_SHUFFLE_D( 0, 3, 2, 3 )); t5 = _mm_shufflelo_epi16( t0, R_SHUFFLE_D( 3, 1, 3, 3 )); t4 = _mm_srli_epi16(t4, 5); t5 = _mm_srli_epi16(t5, 6); // Bitwise Logical OR t0 = _mm_or_si128(t0, t4); t0 = _mm_or_si128(t0, t5); // t0 contains color0 in 565 //t1 = _mm_load_si128 ( (__m128i*) minColor ); t1 = _mm_cvtsi32_si128( *(int*)minColor); t1 = _mm_and_si128(t1, tt); t1 = _mm_unpacklo_epi8(t1, t7); t4 = _mm_shufflelo_epi16( t1, R_SHUFFLE_D( 0, 3, 2, 3 )); t5 = _mm_shufflelo_epi16( t1, R_SHUFFLE_D( 3, 1, 3, 3 )); t4 = _mm_srli_epi16(t4, 5); t5 = _mm_srli_epi16(t5, 6); t1 = _mm_or_si128(t1, t4); t1 = _mm_or_si128(t1, t5); // t1 contains color1 in 565 t2 = t0; t2 = _mm_packus_epi16(t2, t7); t2 = _mm_shuffle_epi32( t2, R_SHUFFLE_D( 0, 1, 0, 1 )); _mm_store_si128 ( (__m128i*) &color0, t2 ); t6 = t0; t6 = _mm_add_epi16(t6, t0); t6 = _mm_add_epi16(t6, t1); // Multiply Packed Signed Integers and Store High Result __m128i tw3 = _mm_load_si128 ( (__m128i*) SIMD_SSE2_word_div_by_3 ); t6 = _mm_mulhi_epi16(t6, tw3); t6 = _mm_packus_epi16(t6, t7); t6 = _mm_shuffle_epi32( t6, R_SHUFFLE_D( 0, 1, 0, 1 )); _mm_store_si128 ( (__m128i*) &color2, t6 ); t3 = t1; t3 = _mm_packus_epi16(t3, t7); t3 = _mm_shuffle_epi32( t3, R_SHUFFLE_D( 0, 1, 0, 1 )); _mm_store_si128 ( (__m128i*) &color1, t3 ); t1 = _mm_add_epi16(t1, t1); t0 = _mm_add_epi16(t0, t1); t0 = _mm_mulhi_epi16(t0, tw3); t0 = _mm_packus_epi16(t0, t7); t0 = _mm_shuffle_epi32( t0, R_SHUFFLE_D( 0, 1, 0, 1 )); _mm_store_si128 ( (__m128i*) &color3, t0 ); __m128i w0 = _mm_load_si128 ( (__m128i*) SIMD_SSE2_word_0); __m128i w1 = _mm_load_si128 ( (__m128i*) SIMD_SSE2_word_1); __m128i w2 = _mm_load_si128 ( (__m128i*) SIMD_SSE2_word_2); // mov eax, 32 // mov esi, colorBlock int x = 32; //const byte *c = colorBlock; while (x >= 0) { t3 = _mm_loadl_epi64( (__m128i*) (colorBlock+x+0)); t3 = _mm_shuffle_epi32( t3, R_SHUFFLE_D( 0, 2, 1, 3 )); t5 = _mm_loadl_epi64( (__m128i*) (colorBlock+x+8)); t5 = _mm_shuffle_epi32( t5, R_SHUFFLE_D( 0, 2, 1, 3 )); t0 = t3; t6 = t5; // Compute Sum of Absolute Difference __m128i c0 = _mm_load_si128 ( (__m128i*) color0 ); t0 = _mm_sad_epu8(t0, c0); t6 = _mm_sad_epu8(t6, c0); // Pack with Signed Saturation t0 = _mm_packs_epi32 (t0, t6); t1 = t3; t6 = t5; __m128i c1 = _mm_load_si128 ( (__m128i*) color1 ); t1 = _mm_sad_epu8(t1, c1); t6 = _mm_sad_epu8(t6, c1); t1 = _mm_packs_epi32 (t1, t6); t2 = t3; t6 = t5; __m128i c2 = _mm_load_si128 ( (__m128i*) color2 ); t2 = _mm_sad_epu8(t2, c2); t6 = _mm_sad_epu8(t6, c2); t2 = _mm_packs_epi32 (t2, t6); __m128i c3 = _mm_load_si128 ( (__m128i*) color3 ); t3 = _mm_sad_epu8(t3, c3); t5 = _mm_sad_epu8(t5, c3); t3 = _mm_packs_epi32 (t3, t5); t4 = _mm_loadl_epi64( (__m128i*) (colorBlock+x+16)); t4 = _mm_shuffle_epi32( t4, R_SHUFFLE_D( 0, 2, 1, 3 )); t5 = _mm_loadl_epi64( (__m128i*) (colorBlock+x+24)); t5 = _mm_shuffle_epi32( t5, R_SHUFFLE_D( 0, 2, 1, 3 )); t6 = t4; t7 = t5; t6 = _mm_sad_epu8(t6, c0); t7 = _mm_sad_epu8(t7, c0); t6 = _mm_packs_epi32 (t6, t7); t0 = _mm_packs_epi32 (t0, t6); // d0 t6 = t4; t7 = t5; t6 = _mm_sad_epu8(t6, c1); t7 = _mm_sad_epu8(t7, c1); t6 = _mm_packs_epi32 (t6, t7); t1 = _mm_packs_epi32 (t1, t6); // d1 t6 = t4; t7 = t5; t6 = _mm_sad_epu8(t6, c2); t7 = _mm_sad_epu8(t7, c2); t6 = _mm_packs_epi32 (t6, t7); t2 = _mm_packs_epi32 (t2, t6); // d2 t4 = _mm_sad_epu8(t4, c3); t5 = _mm_sad_epu8(t5, c3); t4 = _mm_packs_epi32 (t4, t5); t3 = _mm_packs_epi32 (t3, t4); // d3 t7 = _mm_load_si128 ( (__m128i*) result ); t7 = _mm_slli_epi32( t7, 16); t4 = t0; t5 = t1; // Compare Packed Signed Integers for Greater Than t0 = _mm_cmpgt_epi16(t0, t3); // b0 t1 = _mm_cmpgt_epi16(t1, t2); // b1 t4 = _mm_cmpgt_epi16(t4, t2); // b2 t5 = _mm_cmpgt_epi16(t5, t3); // b3 t2 = _mm_cmpgt_epi16(t2, t3); // b4 t4 = _mm_and_si128(t4, t1); // x0 t5 = _mm_and_si128(t5, t0); // x1 t2 = _mm_and_si128(t2, t0); // x2 t4 = _mm_or_si128(t4, t5); t2 = _mm_and_si128(t2, w1); t4 = _mm_and_si128(t4, w2); t2 = _mm_or_si128(t2, t4); t5 = _mm_shuffle_epi32( t2, R_SHUFFLE_D( 2, 3, 0, 1 )); // Unpack Low Data t2 = _mm_unpacklo_epi16 ( t2, w0); t5 = _mm_unpacklo_epi16 ( t5, w0); //t5 = _mm_slli_si128 ( t5, 8); t5 = _mm_slli_epi32( t5, 8); t7 = _mm_or_si128(t7, t5); t7 = _mm_or_si128(t7, t2); _mm_store_si128 ( (__m128i*) &result, t7 ); x -=32; } t4 = _mm_shuffle_epi32( t7, R_SHUFFLE_D( 1, 2, 3, 0 )); t5 = _mm_shuffle_epi32( t7, R_SHUFFLE_D( 2, 3, 0, 1 )); t6 = _mm_shuffle_epi32( t7, R_SHUFFLE_D( 3, 0, 1, 2 )); t4 = _mm_slli_epi32 ( t4, 2); t5 = _mm_slli_epi32 ( t5, 4); t6 = _mm_slli_epi32 ( t6, 6); t7 = _mm_or_si128(t7, t4); t7 = _mm_or_si128(t7, t5); t7 = _mm_or_si128(t7, t6); //_mm_store_si128 ( (__m128i*) outData, t7 ); int r = _mm_cvtsi128_si32 (t7); memcpy(outData, &r, 4); // Anything better ? outData += 4; }
int main (int argc, char * argv[]) { UIOMux * uiomux; uiomux_resource_t uiores; char * infilename[2] = {NULL, NULL}, * outfilename = NULL; FILE * infile[2], * outfile = NULL; size_t nread; size_t input_size[2], output_size; SHVIO *vio; struct ren_vid_surface src[2]; const struct ren_vid_surface *srclist[2] = { &src[0], &src[1] }; struct ren_vid_surface dst; void *inbuf[2], *outbuf; int ret; int frameno=0; int show_version = 0; int show_help = 0; int show_list_vio = 0; char * progname; char * viodev = NULL; int error = 0; int c; char * optstring = "hvo:O:c:s:C:S:f:u:l"; #ifdef HAVE_GETOPT_LONG static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, {"output", required_argument, 0, 'o'}, {"overlay", required_argument, 0, 'O'}, {"input-colorspace", required_argument, 0, 'c'}, {"input-size", required_argument, 0, 's'}, {"output-colorspace", required_argument, 0, 'C'}, {"output-size", required_argument, 0, 'S'}, {"filter", required_argument, 0, 'f'}, {"vio", required_argument, 0, 'u'}, {"list", no_argument, 0, 'l'}, {NULL,0,0,0} }; #endif #if defined(USE_MERAM_RA) || defined(USE_MERAM_WB) #define ALIGN16(_x) (((_x) + 15) / 16 * 16) #define ADJUST_PITCH(_p, _w) \ { \ (_p) = ((_w) - 1) | 1023; \ (_p) = (_p) | ((_p) >> 1); \ (_p) = (_p) | ((_p) >> 2); \ (_p) += 1; \ } unsigned long val; MERAM *meram = meram_open(); MERAM_REG *regs = meram_lock_reg(meram); size_t sz; unsigned long mblock; ICB *icbr, *icbw; #endif /* defined(USE_MERAM_RA) || defined(USE_MERAM_WB) */ memset(src, 0, sizeof (src[0]) * 2); src[0].w = -1; src[0].h = -1; dst.w = -1; dst.h = -1; src[0].format = REN_UNKNOWN; dst.format = REN_UNKNOWN; src[0].bpitchy = src[0].bpitchc = src[0].bpitcha = 0; dst.bpitchy = dst.bpitchc = dst.bpitcha = 0; memcpy((void *)&src[1], (void *)&src[0], sizeof(src[0])); src[1].blend_out.x = 0; src[1].blend_out.y = 0; src[1].blend_out.w = 220; src[1].blend_out.h = 440; progname = argv[0]; if (argc < 2) { usage (progname); return (1); } while (1) { #ifdef HAVE_GETOPT_LONG c = getopt_long (argc, argv, optstring, long_options, NULL); #else c = getopt (argc, argv, optstring); #endif if (c == -1) break; if (c == ':') { usage (progname); goto exit_err; } switch (c) { case 'h': /* help */ show_help = 1; break; case 'v': /* version */ show_version = 1; break; case 'o': /* output */ outfilename = optarg; break; case 'O': /* ovalery */ infilename[1] = optarg; break; case 'c': /* input colorspace */ set_colorspace (optarg, &src[0].format); break; case 's': /* input size */ set_size (optarg, &src[0].w, &src[0].h); break; case 'C': /* output colorspace */ set_colorspace (optarg, &dst.format); break; case 'S': /* output size */ set_size (optarg, &dst.w, &dst.h); break; case 'f': /* filter mode */ rotation = strtoul(optarg, NULL, 0); break; case 'l': show_list_vio = 1; break; case 'u': viodev = optarg; break; default: break; } } if (show_version) { printf ("%s version " VERSION "\n", progname); } if (show_help) { usage (progname); } #if 0 if (show_list_vio) { char **vio; int i, n; if (shvio_list_vio(&vio, &n) < 0) { printf ("Can't get a list of VIO available...\n"); } else { for(i = 0; i < n; i++) printf("%s", vio[i]); printf("Total: %d VIOs available.\n", n); } } #endif if (show_version || show_help || show_list_vio) { goto exit_ok; } if (optind >= argc) { usage (progname); goto exit_err; } infilename[0] = argv[optind++]; if (optind < argc) { outfilename = argv[optind++]; } printf ("Input file: %s\n", infilename[0]); if (infilename[1] != NULL) printf ("Overlay file: %s\n", infilename[1]); printf ("Output file: %s\n", outfilename); guess_colorspace (infilename[0], &src[0].format); if (infilename[1]) guess_colorspace (infilename[1], &src[1].format); guess_colorspace (outfilename, &dst.format); /* If the output colorspace isn't given and can't be guessed, then default to * the input colorspace (ie. no colorspace conversion) */ if (dst.format == REN_UNKNOWN) dst.format = src[0].format; guess_size (infilename[0], src[0].format, &src[0].w, &src[0].h); if (rotation & 0xF) { /* Swap width/height for rotation */ dst.w = src[0].h; dst.h = src[0].w; } else if (dst.w == -1 && dst.h == -1) { /* If the output size isn't given and can't be guessed, then default to * the input size (ie. no rescaling) */ dst.w = src[0].w; dst.h = src[0].h; } if (infilename[1]) guess_size (infilename[1], src[1].format, &src[1].w, &src[1].h); /* Setup memory pitch */ src[0].pitch = src[0].w; src[1].pitch = src[1].w; dst.pitch = dst.w; /* Check that all parameters are set */ if (src[0].format == REN_UNKNOWN) { fprintf (stderr, "ERROR: Input colorspace unspecified\n"); error = 1; } if (src[0].w == -1) { fprintf (stderr, "ERROR: Input width unspecified\n"); error = 1; } if (src[0].h == -1) { fprintf (stderr, "ERROR: Input height unspecified\n"); error = 1; } if (dst.format == REN_UNKNOWN) { fprintf (stderr, "ERROR: Output colorspace unspecified\n"); error = 1; } if (dst.w == -1) { fprintf (stderr, "ERROR: Output width unspecified\n"); error = 1; } if (dst.h == -1) { fprintf (stderr, "ERROR: Output height unspecified\n"); error = 1; } if (error) goto exit_err; printf ("Input colorspace:\t%s\n", show_colorspace (src[0].format)); printf ("Input size:\t\t%dx%d %s\n", src[0].w, src[0].h, show_size (src[0].w, src[0].h)); printf ("Output colorspace:\t%s\n", show_colorspace (dst.format)); printf ("Output size:\t\t%dx%d %s\n", dst.w, dst.h, show_size (dst.w, dst.h)); printf ("Rotation:\t\t%s\n", show_rotation (rotation)); input_size[0] = imgsize (src[0].format, src[0].w, src[0].h); if (infilename[1] != NULL) input_size[1] = imgsize (src[1].format, src[1].w, src[1].h); output_size = imgsize (dst.format, dst.w, dst.h); if (/*viodev*/ 1) { const char *blocks[2] = { "VPU5", NULL }; uiomux = uiomux_open_named(blocks); uiores = 1 << 0; } else { uiomux = uiomux_open (); uiores = UIOMUX_SH_VEU; } /* Set up memory buffers */ src[0].py = inbuf[0] = uiomux_malloc (uiomux, uiores, input_size[0], 32); if (src[0].format == REN_RGB565) { src[0].pc = 0; } else if (src[0].format == REN_YV12) { src[0].pc2 = src[0].py + (src[0].w * src[0].h); /* Cr(V) */ src[0].pc = src[0].pc2 + (src[0].w * src[0].h) / 4; /* Cb(U) */ } else if (src[0].format == REN_YV16) { src[0].pc2 = src[0].py + (src[0].w * src[0].h); /* Cr(V) */ src[0].pc = src[0].pc2 + (src[0].w * src[0].h) / 2; /* Cb(U) */ } else { src[0].pc = src[0].py + (src[0].w * src[0].h); /* CbCr(UV) */ } if (infilename[1] != NULL) { src[1].py = inbuf[1] = uiomux_malloc (uiomux, uiores, input_size[1], 32); if (src[1].format == REN_RGB565) { src[1].pc = 0; } else if (src[1].format == REN_YV12) { src[1].pc2 = src[1].py + (src[1].w * src[1].h); /* Cr(V) */ src[1].pc = src[1].pc2 + (src[1].w * src[1].h) / 4; /* Cb(U) */ } else if (src[1].format == REN_YV16) { src[1].pc2 = src[1].py + (src[1].w * src[1].h); /* Cr(V) */ src[1].pc = src[1].pc2 + (src[1].w * src[1].h) / 2; /* Cb(U) */ } else { src[1].pc = src[1].py + (src[1].w * src[1].h); /* CbCr(UV) */ } } dst.py = outbuf = uiomux_malloc (uiomux, uiores, output_size, 32); if (dst.format == REN_RGB565) { dst.pc = 0; } else if (dst.format == REN_YV12) { dst.pc2 = dst.py + (dst.w * dst.h); /* Cr(V) */ dst.pc = dst.pc2 + (dst.w * dst.h) / 4; /* Cb(U) */ } else if (dst.format == REN_YV16) { dst.pc2 = dst.py + (dst.w * dst.h); /* Cr(V) */ dst.pc = dst.pc2 + (dst.w * dst.h) / 2; /* Cb(U) */ } else { dst.pc = dst.py + (dst.w * dst.h); /* CbCr(UV) */ } #if defined(USE_MERAM_RA) || defined(USE_MERAM_WB) #error aaaa meram_read_reg(meram, regs, MEVCR1, &val); val |= 1 << 29; /* use 0xc0000000-0xdfffffff */ meram_write_reg(meram, regs, MEVCR1, val); meram_unlock_reg(meram, regs); #endif /* defined(USE_MERAM_RA) || defined(USE_MERAM_WB) */ #if defined(USE_MERAM_RA) #error bbbb /* calcurate byte-pitch */ src[0].bpitchy = size_y(src[0].format, src[0].pitch, 0); /* set up read-ahead cache for input */ icbr = meram_lock_icb(meram, 0); val = (3 << 24) | /* KRBNM: ((3+1) << 1) = 8 lines */ ((16 - 1) << 16); /* BNM: 16 = KRBNM * 2 lines */ ADJUST_PITCH(sz, src[0].bpitchy); sz *= 16; /* 16 lines */ if (src[0].format == REN_NV12) { val |= 2 << 12; /* CPL: YCbCr420 */ sz = sz * 3 / 2; } else if (src[0].format == REN_NV16) { val |= 3 << 12; /* CPL: YCbCr422 */ sz = sz * 2; } meram_write_icb(meram, icbr, MExxMCNF, val); sz = (sz + 1023) / 1024; mblock = meram_alloc_icb_memory(meram, icbr, (sz == 0) ? 1 : sz); val = (1 << 28) | /* BSZ: 2^1 line/block */ (mblock << 16) | /* MSAR */ (3 << 9) | /* WD: (constant) */ (1 << 8) | /* WS: (constant) */ (1 << 3) | /* CM: address mode 1 */ 1; /* MD: read buffer mode */ meram_write_icb(meram, icbr, MExxCTRL, val); val = ((src[0].h - 1) << 16) | /* YSZM1 */ (src[0].bpitchy - 1); /* XSZM1 */ meram_write_icb(meram, icbr, MExxBSIZE, val); val = ALIGN16(src[0].bpitchy); /* SBSIZE: 16 bytes aligned */ meram_write_icb(meram, icbr, MExxSBSIZE, val); ADJUST_PITCH(src[0].bpitchy, src[0].bpitchy); src[0].bpitchc = src[0].bpitcha = src[0].bpitchy; val = uiomux_all_virt_to_phys(src[0].py); meram_write_icb(meram, icbr, MExxSSARA, val); src[0].py = (void *)meram_get_icb_address(meram, icbr, 0); uiomux_register(src[0].py, (unsigned long)src[0].py, 8 << 20); if (is_ycbcr(src[0].format)) { val = uiomux_all_virt_to_phys(src[0].pc); meram_write_icb(meram, icbr, MExxSSARB, val); src[0].pc = (void *)meram_get_icb_address(meram, icbr, 1); uiomux_register(src[0].pc, (unsigned long)src[0].pc, 8 << 20); } else { meram_write_icb(meram, icbr, MExxSSARB, 0); } #endif /* defined(USE_MERAM_RA) */ #if defined(USE_MERAM_WB) /* calcurate byte-pitch */ dst.bpitchy = size_y(dst.format, dst.pitch, 0); /* set up write-back cache for input */ icbw = meram_lock_icb(meram, 1); val = (3 << 28) | /* KWBNM: ((3+1) << 1) = 8 lines */ ((16 - 1) << 16); /* BNM: 16 = KWBNM * 2 lines */ ADJUST_PITCH(sz, dst.bpitchy); sz *= 16; /* 16 lines */ if (dst.format == REN_NV12) { val |= 2 << 12; /* CPL: YCbCr420 */ sz = sz * 3 / 2; } else if (dst.format == REN_NV16) { val |= 3 << 12; /* CPL: YCbCr422 */ sz = sz * 2; } meram_write_icb(meram, icbw, MExxMCNF, val); sz = (sz + 1023) / 1024; mblock = meram_alloc_icb_memory(meram, icbw, (sz == 0) ? 1 : sz); val = (1 << 28) | /* BSZ: 2^1 line/block */ (mblock << 16) | /* MSAR */ (3 << 9) | /* WD: (constant) */ (1 << 8) | /* WS: (constant) */ (1 << 3) | /* CM: address mode 1 */ 2; /* MD: write buffer mode */ meram_write_icb(meram, icbw, MExxCTRL, val); val = ((dst.h - 1) << 16) | /* YSZM1 */ (dst.bpitchy - 1); /* XSZM1 */ meram_write_icb(meram, icbw, MExxBSIZE, val); val = ALIGN16(dst.bpitchy); /* SBSIZE: 16 bytes aligned */ meram_write_icb(meram, icbw, MExxSBSIZE, val); ADJUST_PITCH(dst.bpitchy, dst.bpitchy); dst.bpitchc = dst.bpitcha = dst.bpitchy; val = uiomux_all_virt_to_phys(dst.py); meram_write_icb(meram, icbw, MExxSSARA, val); dst.py = (void *)meram_get_icb_address(meram, icbw, 0); uiomux_register(dst.py, (unsigned long)dst.py, 8 << 20); if (is_ycbcr(dst.format)) { val = uiomux_all_virt_to_phys(dst.pc); meram_write_icb(meram, icbw, MExxSSARB, val); dst.pc = (void *)meram_get_icb_address(meram, icbw, 1); uiomux_register(dst.pc, (unsigned long)dst.pc, 8 << 20); } else { meram_write_icb(meram, icbw, MExxSSARB, 0); } #endif /* defined(USE_MERAM_WB) */ if (strcmp (infilename[0], "-") == 0) { infile[0] = stdin; } else { infile[0] = fopen (infilename[0], "rb"); if (infile[0] == NULL) { fprintf (stderr, "%s: unable to open input file %s\n", progname, infilename[0]); goto exit_err; } } if (infilename[1] != NULL) { infile[1] = fopen (infilename[1], "rb"); if (infile[1] == NULL) { fprintf (stderr, "%s: unable to open input file %s\n", progname, infilename[1]); goto exit_err; } } if (outfilename != NULL) { if (strcmp (outfilename, "-") == 0) { outfile = stdout; } else { outfile = fopen (outfilename, "wb"); if (outfile == NULL) { fprintf (stderr, "%s: unable to open output file %s\n", progname, outfilename); goto exit_err; } } } if (!viodev) vio = shvio_open(); else vio = shvio_open_named(viodev); if (vio == 0) { fprintf (stderr, "Error opening VIO\n"); goto exit_err; } while (1) { #ifdef DEBUG fprintf (stderr, "%s: Converting frame %d\n", progname, frameno); #endif /* Read input */ if ((nread = fread (inbuf[0], 1, input_size[0], infile[0])) != input_size[0]) { if (nread == 0 && feof (infile[0])) { break; } else { fprintf (stderr, "%p, %s: errors reading input file %s %d %d %d\n", inbuf[0], progname, infilename[0], nread, input_size[0], ferror(infile[0])); } } #if 1 if (infilename[1] != NULL) { if ((nread = fread (inbuf[1], 1, input_size[1], infile[1])) != input_size[1]) { if (nread == 0 && feof (infile[1])) { break; } else { fprintf (stderr, "%s: error reading input file %s\n", progname, infilename[1]); } } printf("invoke shvio_setup_blend()...\n"); ret = shvio_setup_blend(vio, NULL, srclist, 2, &dst); shvio_start(vio); printf("shvio_start_blend() = %d\n", ret); ret = shvio_wait(vio); } else { #endif if (rotation) { ret = shvio_rotate(vio, &src[0], &dst, rotation); } else { ret = shvio_resize(vio, &src[0], &dst); } } #if defined(USE_MERAM_WB) meram_read_icb(meram, icbw, MExxCTRL, &val); val |= 1 << 5; /* WF: flush data */ meram_write_icb(meram, icbw, MExxCTRL, val); #endif #if defined(USE_MERAM_RA) meram_read_icb(meram, icbr, MExxCTRL, &val); val |= 1 << 4; /* RF: flush data */ meram_write_icb(meram, icbr, MExxCTRL, val); #endif /* Write output */ if (outfile && fwrite (outbuf, 1, output_size, outfile) != output_size) { fprintf (stderr, "%s: error writing input file %s\n", progname, outfilename); } frameno++; } shvio_close (vio); #if defined(USE_MERAM_RA) /* finialize the read-ahead cache */ uiomux_unregister(src[0].py); if (is_ycbcr(src[0].format)) uiomux_unregister(src[0].pc); meram_free_icb_memory(meram, icbr); meram_unlock_icb(meram, icbr); #endif #if defined(USE_MERAM_WB) /* finialize the write-back cache */ uiomux_unregister(dst.py); if (is_ycbcr(dst.format)) uiomux_unregister(dst.pc); meram_free_icb_memory(meram, icbw); meram_unlock_icb(meram, icbw); #endif #if defined(USE_MERAM_RA) || defined(USE_MERAM_WB) meram_close(meram); #endif uiomux_free (uiomux, uiores, src[0].py, input_size[0]); if (infilename[1] != NULL) uiomux_free (uiomux, uiores, src[1].py, input_size[1]); uiomux_free (uiomux, uiores, dst.py, output_size); uiomux_close (uiomux); if (infile[0] != stdin) fclose (infile[0]); if (infilename[1] != NULL) fclose (infile[1]); if (outfile == stdout) { fflush (stdout); } else if (outfile) { fclose (outfile); } printf ("Frames:\t\t%d\n", frameno); exit_ok: exit (0); exit_err: exit (1); }
/* ============ TestMinMax ============ */ void TestMinMax() { int i; TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD; ALIGN16( float fsrc0[COUNT] ); ALIGN16( idVec2 v2src0[COUNT] ); ALIGN16( idVec3 v3src0[COUNT] ); ALIGN16( idDrawVert drawVerts[COUNT] ); ALIGN16( triIndex_t indexes[COUNT] ); float min = 0.0f, max = 0.0f, min2 = 0.0f, max2 = 0.0f; idVec2 v2min, v2max, v2min2, v2max2; idVec3 vmin, vmax, vmin2, vmax2; const char *result; idRandom srnd( RANDOM_SEED ); for ( i = 0; i < COUNT; i++ ) { fsrc0[i] = srnd.CRandomFloat() * 10.0f; v2src0[i][0] = srnd.CRandomFloat() * 10.0f; v2src0[i][1] = srnd.CRandomFloat() * 10.0f; v3src0[i][0] = srnd.CRandomFloat() * 10.0f; v3src0[i][1] = srnd.CRandomFloat() * 10.0f; v3src0[i][2] = srnd.CRandomFloat() * 10.0f; drawVerts[i].xyz = v3src0[i]; indexes[i] = i; } idLib::common->Printf("====================================\n" ); bestClocksGeneric = 0; for ( i = 0; i < NUMTESTS; i++ ) { min = idMath::INFINITY; max = -idMath::INFINITY; StartRecordTime( start ); p_generic->MinMax( min, max, fsrc0, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksGeneric ); } PrintClocks( "generic->MinMax( float[] )", COUNT, bestClocksGeneric ); bestClocksSIMD = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_simd->MinMax( min2, max2, fsrc0, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksSIMD ); } result = ( min == min2 && max == max2 ) ? "ok" : S_COLOR_RED"X"; PrintClocks( va( " simd->MinMax( float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric ); bestClocksGeneric = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_generic->MinMax( v2min, v2max, v2src0, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksGeneric ); } PrintClocks( "generic->MinMax( idVec2[] )", COUNT, bestClocksGeneric ); bestClocksSIMD = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_simd->MinMax( v2min2, v2max2, v2src0, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksSIMD ); } result = ( v2min == v2min2 && v2max == v2max2 ) ? "ok" : S_COLOR_RED"X"; PrintClocks( va( " simd->MinMax( idVec2[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric ); bestClocksGeneric = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_generic->MinMax( vmin, vmax, v3src0, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksGeneric ); } PrintClocks( "generic->MinMax( idVec3[] )", COUNT, bestClocksGeneric ); bestClocksSIMD = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_simd->MinMax( vmin2, vmax2, v3src0, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksSIMD ); } result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X"; PrintClocks( va( " simd->MinMax( idVec3[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric ); bestClocksGeneric = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_generic->MinMax( vmin, vmax, drawVerts, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksGeneric ); } PrintClocks( "generic->MinMax( idDrawVert[] )", COUNT, bestClocksGeneric ); bestClocksSIMD = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_simd->MinMax( vmin2, vmax2, drawVerts, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksSIMD ); } result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X"; PrintClocks( va( " simd->MinMax( idDrawVert[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric ); bestClocksGeneric = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_generic->MinMax( vmin, vmax, drawVerts, indexes, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksGeneric ); } PrintClocks( "generic->MinMax( idDrawVert[], indexes[] )", COUNT, bestClocksGeneric ); bestClocksSIMD = 0; for ( i = 0; i < NUMTESTS; i++ ) { StartRecordTime( start ); p_simd->MinMax( vmin2, vmax2, drawVerts, indexes, COUNT ); StopRecordTime( end ); GetBest( start, end, bestClocksSIMD ); } result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X"; PrintClocks( va( " simd->MinMax( idDrawVert[], indexes[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric ); }
static void JNICALL OnGetFrame(JNIEnv *env, jobject obj, jbyteArray data, jint length, jlong user_data) { and_stream *strm = *(and_stream**)&user_data; pjmedia_frame f; pj_uint8_t *Y, *U, *V; pj_status_t status; void *frame_buf, *data_buf; strm->frame_ts.u64 += strm->ts_inc; if (!strm->vid_cb.capture_cb) return; if (strm->thread_initialized == 0 || !pj_thread_is_registered()) { pj_status_t status; pj_bzero(strm->thread_desc, sizeof(pj_thread_desc)); status = pj_thread_register("and_cam", strm->thread_desc, &strm->thread); if (status != PJ_SUCCESS) return; strm->thread_initialized = 1; PJ_LOG(5,(THIS_FILE, "Android camera thread registered")); } f.type = PJMEDIA_FRAME_TYPE_VIDEO; f.size = length; f.timestamp.u64 = strm->frame_ts.u64; f.buf = data_buf = (*env)->GetByteArrayElements(env, data, 0); Y = (pj_uint8_t*)f.buf; U = Y + strm->vafp.plane_bytes[0]; V = U + strm->vafp.plane_bytes[1]; /* Convert NV21 -> I420, i.e: separate V/U interleaved data plane * into U & V planes. */ if (strm->convert_to_i420 == 1) { pj_uint8_t *src = U; pj_uint8_t *dst_u = U; pj_uint8_t *end_u = U + strm->vafp.plane_bytes[1]; pj_uint8_t *dst_v = strm->convert_buf; while (dst_u < end_u) { *dst_v++ = *src++; *dst_u++ = *src++; } pj_memcpy(V, strm->convert_buf, strm->vafp.plane_bytes[2]); } /* Convert YV12 -> I420, i.e: swap U & V planes. We also need to * strip out padding, if any. */ else if (strm->convert_to_i420 == 2) { int y_stride = ALIGN16(strm->vafp.size.w); int uv_stride = ALIGN16(strm->vafp.size.w/2); /* Strip out Y padding */ if (y_stride > strm->vafp.size.w) { int i; pj_uint8_t *src = Y + y_stride; pj_uint8_t *dst = Y + strm->vafp.size.w; for (i = 1; i < strm->vafp.size.h; ++i) { memmove(dst, src, strm->vafp.size.w); src += y_stride; dst += strm->vafp.size.w; } } /* Swap U & V planes */ if (uv_stride == strm->vafp.size.w/2) { /* No padding, note Y plane should be no padding too! */ pj_assert(y_stride == strm->vafp.size.w); pj_memcpy(strm->convert_buf, U, strm->vafp.plane_bytes[1]); pj_memmove(U, V, strm->vafp.plane_bytes[1]); pj_memcpy(V, strm->convert_buf, strm->vafp.plane_bytes[1]); } else if (uv_stride > strm->vafp.size.w/2) { /* Strip & copy V plane into conversion buffer */ pj_uint8_t *src = Y + y_stride*strm->vafp.size.h; pj_uint8_t *dst = strm->convert_buf; unsigned dst_stride = strm->vafp.size.w/2; int i; for (i = 0; i < strm->vafp.size.h/2; ++i) { memmove(dst, src, dst_stride); src += uv_stride; dst += dst_stride; } /* Strip U plane */ dst = U; for (i = 0; i < strm->vafp.size.h/2; ++i) { memmove(dst, src, dst_stride); src += uv_stride; dst += dst_stride; } /* Get V plane data from conversion buffer */ pj_memcpy(V, strm->convert_buf, strm->vafp.plane_bytes[2]); } } status = pjmedia_vid_dev_conv_resize_and_rotate(&strm->conv, f.buf, &frame_buf); if (status == PJ_SUCCESS) { f.buf = frame_buf; } (*strm->vid_cb.capture_cb)(&strm->base, strm->user_data, &f); (*env)->ReleaseByteArrayElements(env, data, data_buf, JNI_ABORT); }
int undo_put(GEM_WINDOW *gwnd, int x1, int y1, int x2, int y2, int type_modif, XUNDO *xundo) { VXIMAGE *vimage ; UNDO_DEF *undo_buf ; void *spec = NULL ; long taille = 0 ; int xy[8] ; int wx, wy ; int larg, temp ; int type_alloc ; /* int redo = 0 ;*/ int err = 0 ; if ( config.nb_undo == 0 ) return( -1 ) ; if ( !GWIsWindowValid( gwnd ) ) return( -1 ) ; undo_buf = AllocateNewUndoBuffer( gwnd ) ; if ( undo_buf == NULL ) return( -3 ) ; vimage = (VXIMAGE *) gwnd->Extension ; /* if ( type_modif & REDO ) redo = 1 ; type_modif &= ~REDO ; */ if ( xundo ) { if ( xundo->nb_bytes_to_allocate_and_copy > 0 ) { spec = malloc( xundo->nb_bytes_to_allocate_and_copy ) ; if ( spec ) memcpy( spec, xundo->spec, xundo->nb_bytes_to_allocate_and_copy ) ; else return( -3 ) ; } } if ( x1 > x2 ) { temp = x1 ; x1 = x2 ; x2 = temp ; } if ( y1 > y2 ) { temp = y1 ; y1 = y2 ; y2 = temp ; } wx = x2 - x1 + 1 ; wy = y2 - y1 + 1 ; larg = ALIGN16( wx ) ; if ( need_palette( type_modif, spec ) ) { size_t size ; undo_buf->nb_cpal = (int) vimage->inf_img.nb_cpal ; size = undo_buf->nb_cpal * 3 * sizeof(int) ; undo_buf->palette = (int *) malloc( size ) ; if ( undo_buf->palette == NULL ) return( -3 ) ; else memcpy( undo_buf->palette, vimage->inf_img.palette, size ) ; } if ( x1 < 0 ) taille = 0 ; /* Pas de modif sur l'image (palette sans doute) */ else taille = img_size( larg, wy, nb_plane ) ; undo_buf->original_width = vimage->raster.fd_w ; undo_buf->original_height = vimage->raster.fd_h ; type_alloc = must_alloc( type_modif, taille, spec ) ; if ( type_alloc == 0 ) /* Est-il n‚cessaire d'allouer de la m‚moire ? */ { undo_buf->img.fd_addr = NULL ; undo_buf->x = x1 ; undo_buf->y = y1 ; undo_buf->w = wx ; undo_buf->h = wy ; undo_buf->mitem = type_modif ; undo_buf->spec = spec ; } else { if ( type_alloc == 2 ) undo_buf->disk = 1 ; else undo_buf->img.fd_addr = malloc( taille ) ; if ( undo_buf->disk || ( undo_buf->img.fd_addr != NULL ) ) { undo_buf->img.fd_w = larg ; /* Si possible, on m‚morise */ undo_buf->img.fd_h = wy ; /* Les nouvelles donn‚es */ undo_buf->img.fd_wdwidth = larg/16 ; undo_buf->img.fd_nplanes = nb_plane ; undo_buf->img.fd_stand = 0 ; undo_buf->gwindow = gwnd ; undo_buf->x = x1 ; undo_buf->y = y1 ; undo_buf->w = wx ; undo_buf->h = wy ; undo_buf->mitem = type_modif ; undo_buf->spec = spec ; xy[0] = x1 ; xy[1] = y1 ; xy[2] = x2 ; xy[3] = y2 ; xy[4] = 0 ; xy[5] = 0 ; xy[6] = wx-1 ; xy[7] = wy-1 ; if ( undo_buf->disk ) undo_disk( gwnd, undo_buf, xy, &vimage->raster ) ; else vro_cpyfm( handle, S_ONLY, xy, &vimage->raster, &undo_buf->img ) ; } } if ( !err ) { LIST_ENTRY* entry = GET_LIST_ENTRY_FROM_UNDO_DEF( undo_buf ) ; InsertHeadList( &vimage->UndoListHead, entry ) ; if ( type_modif != REDO ) FreeUUndoBuffer( gwnd ) ; } else { FreeUndoBuffer( undo_buf ) ; form_stop( 1, msg[MSG_UNDOERROR] ) ; } return( err ) ; }
void encode_exp_blk_ch_sse2(uint8_t *exp, int ncoefs, int exp_strategy) { int grpsize, ngrps, i, k, exp_min1, exp_min2; uint8_t v; ngrps = nexpgrptab[exp_strategy-1][ncoefs] * 3; grpsize = exp_strategy + (exp_strategy == EXP_D45); // for D15 strategy, there is no need to group/ungroup exponents switch (grpsize) { case 1: { // constraint for DC exponent exp[0] = MIN(exp[0], 15); // Decrease the delta between each groups to within 2 // so that they can be differentially encoded for (i = 1; i <= ngrps; i++) exp[i] = MIN(exp[i], exp[i-1]+2); for (i = ngrps-1; i >= 0; i--) exp[i] = MIN(exp[i], exp[i+1]+2); return; } // for each group, compute the minimum exponent case 2: { ALIGN16(uint16_t) exp1[256]; ALIGN16(const union __m128iui) vmask = {{0x00ff00ff, 0x00ff00ff, 0x00ff00ff, 0x00ff00ff}}; i=0; k=1; for(; i < (ngrps & ~7); i += 8, k += 16) { __m128i v1 = _mm_loadu_si128((__m128i*)&exp[k]); __m128i v2 = _mm_srli_si128(v1, 1); v1 = _mm_and_si128(v1, vmask.v); v1 = _mm_min_epu8(v1, v2); _mm_store_si128((__m128i*)&exp1[i], v1); } switch (ngrps & 7) { case 7: exp1[i] = MIN(exp[k], exp[k+1]); ++i; k += 2; case 6: exp1[i] = MIN(exp[k], exp[k+1]); ++i; k += 2; case 5: exp1[i] = MIN(exp[k], exp[k+1]); ++i; k += 2; case 4: exp1[i] = MIN(exp[k], exp[k+1]); ++i; k += 2; case 3: exp1[i] = MIN(exp[k], exp[k+1]); ++i; k += 2; case 2: exp1[i] = MIN(exp[k], exp[k+1]); ++i; k += 2; case 1: exp1[i] = MIN(exp[k], exp[k+1]); case 0: ; } // constraint for DC exponent exp[0] = MIN(exp[0], 15); // Decrease the delta between each groups to within 2 // so that they can be differentially encoded exp1[0] = MIN(exp1[0], (uint16_t)exp[0]+2); for (i = 1; i < ngrps; i++) exp1[i] = MIN(exp1[i], exp1[i-1]+2); for (i = ngrps-2; i >= 0; i--) exp1[i] = MIN(exp1[i], exp1[i+1]+2); // now we have the exponent values the decoder will see exp[0] = MIN(exp[0], exp1[0]+2); // DC exponent is handled separately i=0; k=1; for (; i < (ngrps & ~7); i += 8, k += 16) { __m128i v1 = _mm_load_si128((__m128i*)&exp1[i]); __m128i v2 = _mm_slli_si128(v1, 1); v1 = _mm_or_si128(v1, v2); _mm_storeu_si128((__m128i*)&exp[k], v1); } switch (ngrps & 7) { case 7: v = (uint8_t)exp1[i]; exp[k] = v; exp[k+1] = v; ++i; k += 2; case 6: v = (uint8_t)exp1[i]; exp[k] = v; exp[k+1] = v; ++i; k += 2; case 5: v = (uint8_t)exp1[i]; exp[k] = v; exp[k+1] = v; ++i; k += 2; case 4: v = (uint8_t)exp1[i]; exp[k] = v; exp[k+1] = v; ++i; k += 2; case 3: v = (uint8_t)exp1[i]; exp[k] = v; exp[k+1] = v; ++i; k += 2; case 2: v = (uint8_t)exp1[i]; exp[k] = v; exp[k+1] = v; ++i; k += 2; case 1: v = (uint8_t)exp1[i]; exp[k] = v; exp[k+1] = v; case 0: ; } return; } default: { ALIGN16(uint32_t) exp1[256]; ALIGN16(const union __m128iui) vmask2 = {{0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff}}; i=0; k=1; for (; i < (ngrps & ~3); i += 4, k += 16) { __m128i v1 = _mm_loadu_si128((__m128i*)&exp[k]); __m128i v2 = _mm_srli_si128(v1, 1); v1 = _mm_min_epu8(v1, v2); v2 = _mm_srli_si128(v1, 2); v1 = _mm_min_epu8(v1, v2); v1 = _mm_and_si128(v1, vmask2.v); _mm_store_si128((__m128i*)&exp1[i], v1); } switch (ngrps & 3) { case 3: exp_min1 = MIN(exp[k ], exp[k+1]); exp_min2 = MIN(exp[k+2], exp[k+3]); exp1[i] = MIN(exp_min1, exp_min2); ++i; k += 4; case 2: exp_min1 = MIN(exp[k ], exp[k+1]); exp_min2 = MIN(exp[k+2], exp[k+3]); exp1[i] = MIN(exp_min1, exp_min2); ++i; k += 4; case 1: exp_min1 = MIN(exp[k ], exp[k+1]); exp_min2 = MIN(exp[k+2], exp[k+3]); exp1[i] = MIN(exp_min1, exp_min2); case 0: ; } // constraint for DC exponent exp[0] = MIN(exp[0], 15); // Decrease the delta between each groups to within 2 // so that they can be differentially encoded exp1[0] = MIN(exp1[0], (uint32_t)exp[0]+2); for (i = 1; i < ngrps; i++) exp1[i] = MIN(exp1[i], exp1[i-1]+2); for (i = ngrps-2; i >= 0; i--) exp1[i] = MIN(exp1[i], exp1[i+1]+2); // now we have the exponent values the decoder will see exp[0] = MIN(exp[0], exp1[0]+2); // DC exponent is handled separately i=0; k=1; for (; i < (ngrps & ~3); i += 4, k += 16) { __m128i v1 = _mm_load_si128((__m128i*)&exp1[i]); __m128i v2 = _mm_slli_si128(v1, 1); v1 = _mm_or_si128(v1, v2); v2 = _mm_slli_si128(v1, 2); v1 = _mm_or_si128(v1, v2); _mm_storeu_si128((__m128i*)&exp[k], v1); } switch (ngrps & 3) { case 3: v = exp1[i]; exp[k] = v; exp[k+1] = v; exp[k+2] = v; exp[k+3] = v; ++i; k += 4; case 2: v = exp1[i]; exp[k] = v; exp[k+1] = v; exp[k+2] = v; exp[k+3] = v; ++i; k += 4; case 1: v = exp1[i]; exp[k] = v; exp[k+1] = v; exp[k+2] = v; exp[k+3] = v; case 0: ; } return; } } }
QSVEncoder(int fps_, int width, int height, int quality, CTSTR preset, bool bUse444, int maxBitrate, int bufferSize, bool bUseCFR_, bool bDupeFrames_) : enc(nullptr) { Log(TEXT("------------------------------------------")); for(int i = 0; i < sizeof(validImpl)/sizeof(validImpl[0]); i++) { mfxIMPL impl = validImpl[i]; mfxVersion ver = version; auto result = session.Init(impl, &ver); if(result == MFX_ERR_NONE) { Log(TEXT("QSV version %u.%u using %s"), ver.Major, ver.Minor, implStr[impl]); break; } } fps = fps_; bUseCBR = AppConfig->GetInt(TEXT("Video Encoding"), TEXT("UseCBR")) != 0; bUseCFR = bUseCFR_; bDupeFrames = bDupeFrames_; memset(¶ms, 0, sizeof(params)); params.AsyncDepth = 1; params.mfx.CodecId = MFX_CODEC_AVC; params.mfx.TargetUsage = MFX_TARGETUSAGE_BEST_QUALITY; params.mfx.TargetKbps = maxBitrate; params.mfx.MaxKbps = maxBitrate; params.mfx.InitialDelayInKB = 1; //params.mfx.GopRefDist = 1; //params.mfx.NumRefFrame = 0; params.mfx.RateControlMethod = bUseCBR ? MFX_RATECONTROL_CBR : MFX_RATECONTROL_VBR; params.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY; auto& fi = params.mfx.FrameInfo; ConvertFrameRate(fps, fi.FrameRateExtN, fi.FrameRateExtD); fi.FourCC = MFX_FOURCC_NV12; fi.ChromaFormat = bUse444 ? MFX_CHROMAFORMAT_YUV444 : MFX_CHROMAFORMAT_YUV420; fi.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; fi.Width = ALIGN16(width); fi.Height = ALIGN16(height); fi.CropX = 0; fi.CropY = 0; fi.CropW = width; fi.CropH = height; this->width = width; this->height = height; enc.reset(new MFXVideoENCODE(session)); enc->Close(); auto result = enc->Init(¶ms); memset(&enc_surf, 0, sizeof(enc_surf)); memcpy(&enc_surf.Info, ¶ms.mfx.FrameInfo, sizeof(enc_surf.Info)); decltype(params) query; memcpy(&query, ¶ms, sizeof(params)); enc->GetVideoParam(&query); unsigned size = max(query.mfx.BufferSizeInKB*1000, bufferSize*1024/8); bs_buff.SetSize(size+31);//.resize(size+31); bs.Data = (mfxU8*)(((size_t)bs_buff.Array() + 31) / 32 * 32); bs.MaxLength = size; params.mfx.BufferSizeInKB = size/1000; Log(TEXT("Buffer size: %u configured, %u suggested by QSV; using %u"), bufferSize, query.mfx.BufferSizeInKB*1000*8/1024, size*8/1024); Log(TEXT("------------------------------------------")); Log(TEXT("%s"), GetInfoString().Array()); Log(TEXT("------------------------------------------")); memset(&ctrl, 0, sizeof(ctrl)); ctrl.FrameType = MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF | MFX_FRAMETYPE_IDR; DataPacket packet; GetHeaders(packet); }
void * alloc( int size ){ void * result = mainMemory + allocated; allocated += ALIGN16(size); if(allocated>TOTAL_MEMORY) return (void*)-1; return result; }
int handleCommand( ppu_addr_t program_data_ea ){ SPECommand cmd __ALIGNED__; int i; /* Load the type */ mfc_get(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0); mfc_write_tag_mask(1<<0); mfc_read_tag_status_all(); switch(cmd.command) { case SPE_CMD_INIT: /* resets stored data */ reset(); datatype = cmd.data.INIT.datatype; if(datatype < 0 || datatype > 4) { datatype = -1; return -1; } fixedDel = cmd.data.INIT.fixedDel; incDel = cmd.data.INIT.incDel; maxDbLen = cmd.data.INIT.dbMaxLen; /* reset some variables */ profile = NULL; remote_profile = 0; blockStart = 0; blockSize = 0; s1 = NULL; ls1 = 0; simi = NULL; /* allocate memory for database string and inter-block * buffers */ s2 = (char *)alloc( maxDbLen*sizeof(char) ); maxS = alloc( maxDbLen*dataSize[datatype] ); delS = alloc( maxDbLen*dataSize[datatype] ); break; case SPE_CMD_CREATE_PROFILE: /* downloads query sequence and scoring matrix and initializes the profile */ if(profile != NULL || datatype == -1) return -1; mn = min(cmd.data.CREATE_PROFILE.matrix.min,min(fixedDel,incDel)); mx = max(cmd.data.CREATE_PROFILE.matrix.max,max(fixedDel,incDel)); ls1 = cmd.data.CREATE_PROFILE.query.len; /* allocate and load query sequence */ s1 = alloc( ls1*sizeof(char) ); for( i=0; i<ls1; i+=MAX_TRANSFER ) mfc_get( s1+i, cmd.data.CREATE_PROFILE.query.addr+i, ALIGN16(min(ls1-i, MAX_TRANSFER)*sizeof(char)), 0, 0, 0 ); /* allocate and load matrix */ simi = alloc( MATRIX_DIM*MATRIX_DIM*dataSize[datatype] ); mfc_get( simi, cmd.data.CREATE_PROFILE.matrix.addr, ALIGN16(MATRIX_DIM*MATRIX_DIM*dataSize[datatype]), 1, 0, 0 ); /* wait for DMA to finish */ mfc_write_tag_mask((1<<0)|(1<<1)); mfc_read_tag_status_all(); /* compute block size and allocate memory */ if(memRemaining() <= 0) return -1; blockSize=(memRemaining() / ((MATRIX_DIM+3)*dataSize[datatype])) & -16; if (blockSize < 50) return -1; blockSize = ALIGN16(min(blockSize,ls1)); /* allocate memory and initialize profile */ profile = alloc( blockSize * MATRIX_DIM * dataSize[datatype] ); loadOpt = alloc( blockSize * dataSize[datatype] ); storeOpt = alloc( blockSize * dataSize[datatype] ); rD = alloc( blockSize * dataSize[datatype] ); blockStart = 0; #ifdef DEBUG_FETCH printf(">>>> creating profile\n"); #endif createProfile[datatype](); break; case SPE_CMD_PUT_PROFILE: /* upload profile to main memory */ if(profile == NULL || s1 == NULL) return -1; /* normally we would expect the first block of the profile is * already present in memory. If not generate it */ if(blockStart != 0) { blockStart = 0; createProfile[datatype](); } cmd.data.PUT_PROFILE.blockSize = blockSize; /* create profile blockwise and upload it to main memory */ for(blockStart=0; blockStart<ls1; blockStart+=blockSize ) { int64_t bs; int currentBlockSize = ALIGN16(min(ls1-blockStart,blockSize)); if(blockStart != 0) createProfile[datatype](); for( bs=0; bs<currentBlockSize * MATRIX_DIM * dataSize[datatype]; bs+=MAX_TRANSFER ) { mfc_put( ((char*)profile)+bs, cmd.data.PUT_PROFILE.addr+blockStart*MATRIX_DIM*dataSize[datatype]+bs, ALIGN16(min(currentBlockSize*MATRIX_DIM*dataSize[datatype]-bs, (int64_t)MAX_TRANSFER)), 0, 0, 0 ); /* wait for DMA to finish */ mfc_write_tag_mask(1<<0); mfc_read_tag_status_all(); } } /* Write back the data */ mfc_put(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0); mfc_write_tag_mask(1<<0); mfc_read_tag_status_all(); break; case SPE_CMD_GET_PROFILE: /* download profile from main memory */ if(datatype == -1 || profile != NULL) return -1; remote_profile = cmd.data.GET_PROFILE.profile.addr; mn = min(cmd.data.GET_PROFILE.profile.min,min(fixedDel,incDel)); mx = max(cmd.data.GET_PROFILE.profile.max,max(fixedDel,incDel)); ls1 = cmd.data.GET_PROFILE.profile.len; blockSize = cmd.data.GET_PROFILE.profile.blockSize; profile = alloc( blockSize * MATRIX_DIM * dataSize[datatype] ); loadOpt = alloc( blockSize * dataSize[datatype] ); storeOpt = alloc( blockSize * dataSize[datatype] ); rD = alloc( blockSize * dataSize[datatype] ); if(memRemaining() < 0) return -1; blockStart = 0; #ifdef DEBUG_FETCH printf(">>>> fetching profile (%d bytes)\n",ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype])); #endif for( i=0; i<ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]); i+=MAX_TRANSFER ) { mfc_get( ((char*)profile)+i, remote_profile+i, ALIGN16(min(blockSize*MATRIX_DIM*dataSize[datatype]-i, (int64_t)MAX_TRANSFER)), 0, 0, 0 ); /* wait for DMA to finish */ mfc_write_tag_mask(1<<0); mfc_read_tag_status_all(); } break; case SPE_CMD_ALIGN: /* perform a local alignment */ if(profile == NULL) return -1; ls2 = cmd.data.ALIGN.db.len; /* download database sequence */ for( i=0; i<ls2; i+=MAX_TRANSFER ) mfc_get( s2+i, cmd.data.ALIGN.db.addr+i, ALIGN16(min(ls2-i, MAX_TRANSFER)*sizeof(char)), 0, 0, 0 ); mfc_write_tag_mask(1<<0); mfc_read_tag_status_all(); /* initialize the profile if it has not been initialized yet */ if(blockStart != 0) { if(remote_profile == 0) { blockStart = 0; #ifdef DEBUG_FETCH printf(">>>> creating profile\n"); #endif createProfile[datatype](); } else { blockStart = 0; #ifdef DEBUG_FETCH printf(">>>> fetching profile (%d bytes)\n",ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype])); #endif for( i=0; i<ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]); i+=MAX_TRANSFER ) { mfc_get( ((char*)profile)+i, remote_profile+i, ALIGN16(min(blockSize*MATRIX_DIM*dataSize[datatype]-i, (int64_t)MAX_TRANSFER)), 0, 0, 0 ); /* wait for DMA to finish */ mfc_write_tag_mask(1<<0); mfc_read_tag_status_all(); } } } cmd.data.ALIGN.result = dynProgLocal[datatype](); /* Write back the data */ mfc_put(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0); mfc_write_tag_mask(1<<0); mfc_read_tag_status_all(); break; default: return -1; } return 0; }
LDV_STATUS cdecl Run(LDV_IMAGE *in, LDV_PARAMS *params, LDV_IMAGE *out) { LDV_PALETTE *vdi_palette = &in->Palette ; VDI_ELEMENTPAL *vdi_epal ; double rgamma, ggamma, bgamma ; short vdi_index ; short cancel = 0 ; if ( (params->Param[0].s / 100.0) != gamma_rgb ) { gamma_rgb = params->Param[0].s / 100.0 ; params->Param[1].s = params->Param[0].s ; params->Param[2].s = params->Param[0].s ; params->Param[3].s = params->Param[0].s ; } rgamma = params->Param[1].s / 100.0 ; ggamma = params->Param[2].s / 100.0 ; bgamma = params->Param[3].s / 100.0 ; if ( in->Raster.fd_nplanes <= 8 ) { if ( vdi_palette == NULL ) return( ELDV_GENERALFAILURE ) ; vdi_epal = vdi_palette->Pal ; if ( vdi_epal == NULL ) return( ELDV_GENERALFAILURE ) ; for ( vdi_index = 0; vdi_index < vdi_palette->NbColors; vdi_index++, vdi_epal++ ) { vdi_epal->Red = (short) ( 0.5 + GammaFunc( vdi_epal->Red, 1000.0, rgamma ) ) ; if ( ggamma == rgamma ) vdi_epal->Green = vdi_epal->Red ; else vdi_epal->Green = (short) ( 0.5 + GammaFunc( vdi_epal->Green, 1000.0, ggamma ) ) ; if ( bgamma == rgamma ) vdi_epal->Blue = vdi_epal->Red ; else vdi_epal->Blue = (short) ( 0.5 + GammaFunc( vdi_epal->Blue, 1000.0, bgamma ) ) ; } } else { REMAP_COLORS rc ; MFDB *img = &in->Raster ; long *pt_line32, nb_pts_in_line ; short *pt_line16, y, pc, is_15bits, cancel=0 ; unsigned short nb_bits_red=8, nb_bits_green=8, nb_bits_blue=8 ; /* Sur 32 bits par defaut */ CHECK_VAPI(Vapi) ; rc.red = RedRemap ; rc.green = GreenRemap ; rc.blue = BlueRemap ; rc.nb_pts = (long) (1 + params->x2 - params->x1) ; is_15bits = Vapi->RaIs15Bits() ; if ( img->fd_nplanes == 16 ) { nb_bits_red = 5 ; nb_bits_green = is_15bits ? 5:6 ; nb_bits_blue = 5 ; } /* Remet a jour les precalculs de pourcentage sur les composantes si necessaire */ if ( rgamma != gamma_red ) ChangeColors( rgamma, RedRemap, nb_bits_red ) ; if ( ggamma != gamma_green ) ChangeColors( ggamma, GreenRemap, nb_bits_green ) ; if ( bgamma != gamma_blue ) ChangeColors( bgamma, BlueRemap, nb_bits_blue ) ; nb_pts_in_line = ALIGN16(img->fd_w) ; pt_line16 = (short *) img->fd_addr ; pt_line16 += (long)(params->y1) * nb_pts_in_line ; pt_line16 += params->x1 ; pt_line32 = (long *) img->fd_addr ; pt_line32 += (long)(params->y1) * nb_pts_in_line ; pt_line32 += params->x1 ; for ( y = params->y1; !cancel && (y <= params->y2); y++ ) { if ( img->fd_nplanes == 16 ) { rc.pt_img = pt_line16 ; pt_line16 += nb_pts_in_line ; if ( is_15bits ) Vapi->RaTC15RemapColors( &rc ) ; else Vapi->RaTC16RemapColors( &rc ) ; } else { rc.pt_img = pt_line32 ; pt_line32 += nb_pts_in_line ; Vapi->RaTC32RemapColors( &rc ) ; } if ( ( y & 0x0F ) == 0x0F ) { pc = (short) ( ( 100L * (long)(y - params->y1) ) / (long)(params->y2 - params->y1) ) ; cancel = Vapi->PrSetProgEx( pc ) ; } } } gamma_red = rgamma ; gamma_green = ggamma ; gamma_blue = bgamma ; return( cancel ? ELDV_CANCELLED : ELDV_NOERROR ) ; }
void allocate_input(app_ctxt_t *ps_app_ctxt) { WORD32 num_bufs; WORD32 pic_size; WORD32 luma_size; WORD32 chroma_size; WORD32 num_mbs; WORD32 i; UWORD8 *pu1_buf[3]; ih264e_ctl_getbufinfo_op_t *ps_get_buf_info_op = &ps_app_ctxt->s_get_buf_info_op; num_bufs = MAX(DEFAULT_NUM_INPUT_BUFS, ps_get_buf_info_op->s_ive_op.u4_min_inp_bufs); num_bufs = MIN(DEFAULT_MAX_INPUT_BUFS, num_bufs); /* Size of buffer */ luma_size = ps_app_ctxt->u4_wd * ps_app_ctxt->u4_ht; chroma_size = luma_size >> 1; pic_size = luma_size + chroma_size; num_mbs = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht); num_mbs /= 256; /* Memset the input buffer array to set is_free to 0 */ memset(ps_app_ctxt->as_input_buf, 0, sizeof(input_buf_t) * DEFAULT_MAX_INPUT_BUFS); for(i = 0; i < num_bufs; i++) { pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, pic_size); if(NULL == pu1_buf[0]) { CHAR ac_error[STRLENGTH]; sprintf(ac_error, "Allocation failed for input buffer of size %d\n", pic_size); codec_exit(ac_error); } ps_app_ctxt->as_input_buf[i].pu1_buf = pu1_buf[0]; pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, num_mbs * sizeof(ih264e_mb_info_t)); if(NULL == pu1_buf[0]) { CHAR ac_error[STRLENGTH]; sprintf(ac_error, "Allocation failed for mb info buffer of size %d\n", (WORD32)(num_mbs * sizeof(ih264e_mb_info_t))); codec_exit(ac_error); } ps_app_ctxt->as_input_buf[i].pv_mb_info = pu1_buf[0]; pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, sizeof(ih264e_pic_info2_t)); if(NULL == pu1_buf[0]) { CHAR ac_error[STRLENGTH]; sprintf(ac_error, "Allocation failed for pic info buffer of size %d\n", (WORD32) sizeof(ih264e_pic_info2_t)); codec_exit(ac_error); } ps_app_ctxt->as_input_buf[i].pv_pic_info = pu1_buf[0]; ps_app_ctxt->as_input_buf[i].u4_buf_size = pic_size; ps_app_ctxt->as_input_buf[i].u4_is_free = 1; } return; }
QSVEncoder(int fps_, int width, int height, int quality, CTSTR preset, bool bUse444, int maxBitrate, int bufferSize, bool bUseCFR_, bool bDupeFrames_) : enc(nullptr) { Log(TEXT("------------------------------------------")); for(int i = 0; i < sizeof(validImpl)/sizeof(validImpl[0]); i++) { mfxIMPL impl = validImpl[i]; ver = version; mfxStatus result = MFX_ERR_UNKNOWN; for(ver.Minor = 6; ver.Minor >= 4; ver.Minor -= 2) { result = session.Init(impl, &ver); if(result == MFX_ERR_NONE) { Log(TEXT("QSV version %u.%u using %s"), ver.Major, ver.Minor, implStr[impl]); break; } } if(result == MFX_ERR_NONE) break; } session.SetPriority(MFX_PRIORITY_HIGH); fps = fps_; bUseCBR = AppConfig->GetInt(TEXT("Video Encoding"), TEXT("UseCBR")) != 0; bUseCFR = bUseCFR_; bDupeFrames = bDupeFrames_; memset(¶ms, 0, sizeof(params)); //params.AsyncDepth = 0; params.mfx.CodecId = MFX_CODEC_AVC; params.mfx.TargetUsage = MFX_TARGETUSAGE_BEST_QUALITY;//SPEED; params.mfx.TargetKbps = (mfxU16)(maxBitrate*0.9); params.mfx.MaxKbps = maxBitrate; //params.mfx.InitialDelayInKB = 1; //params.mfx.GopRefDist = 1; //params.mfx.NumRefFrame = 0; params.mfx.GopPicSize = 61; params.mfx.GopRefDist = 3; params.mfx.GopOptFlag = MFX_GOP_STRICT; params.mfx.IdrInterval = 2; params.mfx.NumSlice = 1; params.mfx.RateControlMethod = bUseCBR ? MFX_RATECONTROL_CBR : MFX_RATECONTROL_VBR; params.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY; auto& fi = params.mfx.FrameInfo; ConvertFrameRate(fps, fi.FrameRateExtN, fi.FrameRateExtD); fi.FourCC = MFX_FOURCC_NV12; fi.ChromaFormat = bUse444 ? MFX_CHROMAFORMAT_YUV444 : MFX_CHROMAFORMAT_YUV420; fi.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; fi.Width = ALIGN16(width); fi.Height = ALIGN16(height); fi.CropX = 0; fi.CropY = 0; fi.CropW = width; fi.CropH = height; this->width = width; this->height = height; enc.reset(new MFXVideoENCODE(session)); enc->Close(); mfxFrameAllocRequest req; memset(&req, 0, sizeof(req)); enc->QueryIOSurf(¶ms, &req); enc->Init(¶ms); decltype(params) query; memcpy(&query, ¶ms, sizeof(params)); enc->GetVideoParam(&query); unsigned num_surf = max(6, req.NumFrameSuggested + params.AsyncDepth); encode_tasks.SetSize(num_surf); const unsigned bs_size = max(query.mfx.BufferSizeInKB*1000, bufferSize*1024/8); bs_buff.SetSize(bs_size * encode_tasks.Num() + 31); params.mfx.BufferSizeInKB = bs_size/1000; mfxU8* bs_start = (mfxU8*)(((size_t)bs_buff.Array() + 31)/32*32); for(unsigned i = 0; i < encode_tasks.Num(); i++) { encode_tasks[i].sp = nullptr; mfxFrameSurface1& surf = encode_tasks[i].surf; memset(&surf, 0, sizeof(mfxFrameSurface1)); memcpy(&surf.Info, ¶ms.mfx.FrameInfo, sizeof(params.mfx.FrameInfo)); mfxBitstream& bs = encode_tasks[i].bs; memset(&bs, 0, sizeof(mfxBitstream)); bs.Data = bs_start + i*bs_size; bs.MaxLength = bs_size; idle_tasks << i; } frames.SetSize(num_surf+3); //+NUM_OUT_BUFFERS const unsigned lum_channel_size = fi.Width*fi.Height, uv_channel_size = fi.Width*fi.Height, frame_size = lum_channel_size + uv_channel_size; frame_buff.SetSize(frame_size * frames.Num() + 15); mfxU8* frame_start = (mfxU8*)(((size_t)frame_buff.Array() + 15)/16*16); memset(frame_start, 0, frame_size * frames.Num()); for(unsigned i = 0; i < frames.Num(); i++) { mfxFrameData& frame = frames[i]; memset(&frame, 0, sizeof(mfxFrameData)); frame.Y = frame_start + i * frame_size; frame.UV = frame_start + i * frame_size + lum_channel_size; frame.V = frame.UV + 1; frame.Pitch = fi.Width; } Log(TEXT("Using %u encode tasks"), encode_tasks.Num()); Log(TEXT("Buffer size: %u configured, %u suggested by QSV; using %u"), bufferSize, query.mfx.BufferSizeInKB*1000*8/1024, params.mfx.BufferSizeInKB*1000*8/1024); Log(TEXT("------------------------------------------")); Log(TEXT("%s"), GetInfoString().Array()); Log(TEXT("------------------------------------------")); memset(&ctrl, 0, sizeof(ctrl)); ctrl.FrameType = MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF | MFX_FRAMETYPE_IDR; deferredFrames = 0; bUsingDecodeTimestamp = false && ver.Minor >= 6; DataPacket packet; GetHeaders(packet); }
/* ======================== LZWJobInternal This job takes a stream of objects, which should already be zrle compressed, and then lzw compresses them and builds a final delta packet ready to be sent to peers. ======================== */ void LZWJobInternal( lzwParm_t * parm, unsigned int dmaTag ) { assert( parm->numObjects > 0 ); #ifndef ALLOW_MULTIPLE_DELTAS if ( parm->ioData->numlzwDeltas > 0 ) { // Currently, we don't use fragmented deltas. // We only send the first one and rely on a full snap being sent to get the whole snap across assert( parm->ioData->numlzwDeltas == 1 ); assert( !parm->ioData->fullSnap ); return; } #endif assert( parm->ioData->lzwBytes < parm->ioData->maxlzwMem ); dmaTag = dmaTag; ALIGN16( idLZWCompressor lzwCompressor( parm->ioData->lzwData ) ); if ( parm->fragmented ) { // This packet was partially written out, we need to continue writing, using previous lzw dictionary values ContinueLZWStream( parm, &lzwCompressor ); } else { // We can start a new lzw dictionary NewLZWStream( parm, &lzwCompressor ); } int numChangedObjProcessed = 0; for ( int i = 0; i < parm->numObjects; i++ ) { // This will eventually be gracefully caught in SnapshotProcessor.cpp. // It's nice to know right when it happens though, so you can inspect the situation. assert( !lzwCompressor.IsOverflowed() || numChangedObjProcessed > 1 ); // First, see if we need to finish the current lzw stream if ( lzwCompressor.IsOverflowed() || lzwCompressor.Length() >= parm->ioData->optimalLength ) { FinishLZWStream( parm, &lzwCompressor ); // indicate how much needs to be DMA'ed back out parm->ioData->lzwDmaOut = parm->ioData->lzwBytes; #ifdef ALLOW_MULTIPLE_DELTAS NewLZWStream( parm, &lzwCompressor ); #else // Currently, we don't use fragmented deltas. // We only send the first one and rely on a full snap being sent to get the whole snap across assert( !parm->ioData->fullSnap ); assert( parm->ioData->numlzwDeltas == 1 ); return; #endif } if ( numChangedObjProcessed > 0 ) { // We should be at a good spot in the stream if we've written at least one obj without overflowing, so save it lzwCompressor.Save(); } // Get header objHeader_t * header = &parm->headers[i]; if ( header->objID == -1 ) { assert( header->flags & OBJ_SAME ); continue; // Don't send object (which means ack) } numChangedObjProcessed++; // Write obj id as delta into stream lzwCompressor.WriteAgnostic<uint16>( (uint16)( header->objID - parm->ioData->lastObjId ) ); parm->ioData->lastObjId = (uint16)header->objID; // Check special stale/notstale flags if ( header->flags & ( OBJ_VIS_STALE | OBJ_VIS_NOT_STALE ) ) { // Write stale/notstale flag objectSize_t value = ( header->flags & OBJ_VIS_STALE ) ? SIZE_STALE : SIZE_NOT_STALE; lzwCompressor.WriteAgnostic<objectSize_t>( value ); } if ( header->flags & OBJ_VIS_STALE ) { continue; // Don't write out data for stale objects } if ( header->flags & OBJ_DELETED ) { // Object was deleted lzwCompressor.WriteAgnostic<objectSize_t>( 0 ); continue; } // Write size lzwCompressor.WriteAgnostic<objectSize_t>( (objectSize_t)header->size ); // Get compressed data area uint8 * compressedData = header->data; if ( header->csize == -1 ) { // Wasn't zrle compressed, zrle now while lzw'ing idZeroRunLengthCompressor rleCompressor; rleCompressor.Start( NULL, &lzwCompressor, 0xFFFF ); rleCompressor.WriteBytes( compressedData, header->size ); rleCompressor.End(); } else { // Write out zero-rle compressed data lzwCompressor.Write( compressedData, header->csize ); } #ifdef SNAPSHOT_CHECKSUMS // Write checksum lzwCompressor.WriteAgnostic( header->checksum ); #endif // This will eventually be gracefully caught in SnapshotProcessor.cpp. // It's nice to know right when it happens though, so you can inspect the situation. assert( !lzwCompressor.IsOverflowed() || numChangedObjProcessed > 1 ); } if ( !parm->saveDictionary ) { // Write out terminator uint16 objectDelta = 0xFFFF - parm->ioData->lastObjId; lzwCompressor.WriteAgnostic( objectDelta ); // Last stream FinishLZWStream( parm, &lzwCompressor ); // indicate how much needs to be DMA'ed back out parm->ioData->lzwDmaOut = parm->ioData->lzwBytes; parm->ioData->fullSnap = true; // We sent a full snap } else { // the compressor did some work, wrote data to lzwMem, but since we didn't call FinishLZWStream to end the compression, // we need to figure how much needs to be DMA'ed back out assert( parm->ioData->lzwBytes == 0 ); // I don't think we ever hit this with lzwBytes != 0, but adding it just in case parm->ioData->lzwDmaOut = parm->ioData->lzwBytes + lzwCompressor.Length(); } assert( parm->ioData->lzwBytes < parm->ioData->maxlzwMem ); }