static void scale_frame_down2x2(yuv_frame_t* sin, yuv_frame_t* sout) { int wo=sout->width; int ho=sout->height; int so=sout->stride_y; int si=sin->stride_y; int soc=sout->stride_c; int sic=sin->stride_c; int i, j; for (i=0; i<ho; ++i) { for (j=0; j<wo; ++j) { sout->y[i*so+j]=( ((sin->y[(2*i+0)*si+(2*j+0)] + sin->y[(2*i+1)*si+(2*j+0)]+1)>>1)+ + ((sin->y[(2*i+0)*si+(2*j+1)] + sin->y[(2*i+1)*si+(2*j+1)]+1)>>1) )>>1; } } ho /= 2; wo /= 2; for (int i=0; i<ho; ++i) { for (j=0; j<wo; ++j) { sout->u[i*soc+j]=( ((sin->u[(2*i+0)*sic+(2*j+0)] + sin->u[(2*i+1)*sic+(2*j+0)]+1)>>1)+ + ((sin->u[(2*i+0)*sic+(2*j+1)] + sin->u[(2*i+1)*sic+(2*j+1)]+1)>>1) )>>1; } for (j=0; j<wo; ++j) { sout->v[i*soc+j]=( ((sin->v[(2*i+0)*sic+(2*j+0)] + sin->v[(2*i+1)*sic+(2*j+0)]+1)>>1)+ + ((sin->v[(2*i+0)*sic+(2*j+1)] + sin->v[(2*i+1)*sic+(2*j+1)]+1)>>1) )>>1; } } pad_yuv_frame(sout); }
void scale_frame_down2x2_simd(yuv_frame_t* sin, yuv_frame_t* sout) { int wo=sout->width; int ho=sout->height; int so=sout->stride_y; int si=sin->stride_y; int i, j; v128 ones = v128_dup_8(1); v128 z = v128_dup_8(0); for (i=0; i<ho; ++i) { for (j=0; j<=wo-8; j+=8) { v128 a = v128_load_aligned(&sin->y[(2*i+0)*si+2*j]); v128 b = v128_load_aligned(&sin->y[(2*i+1)*si+2*j]); v128 c = v128_avg_u8(a,b); v128 d = v128_shr_s16(v128_madd_us8(c,ones),1); v64_store_aligned(&sout->y[i*so+j], v128_low_v64(v128_pack_s16_u8(z,d))); } for (; j<wo; ++j) { sout->y[i*so+j]=( ((sin->y[(2*i+0)*si+(2*j+0)] + sin->y[(2*i+1)*si+(2*j+0)]+1)>>1)+ + ((sin->y[(2*i+0)*si+(2*j+1)] + sin->y[(2*i+1)*si+(2*j+1)]+1)>>1) )>>1; } } #if USE_CHROMA int soc=sout->stride_c; int sic=sin->stride_c; ho /= 2; wo /= 2; for (int i=0; i<ho; ++i) { for (j=0; j<=wo-8; j+=8) { v128 a = v128_load_aligned(&sin->u[(2*i+0)*sic+2*j]); v128 b = v128_load_aligned(&sin->u[(2*i+1)*sic+2*j]); v128 c = v128_avg_u8(a,b); v128 d = v128_shr_s16(v128_madd_us8(c,ones),1); v64_store_aligned(&sout->u[i*soc+j], v128_low_v64(v128_pack_s16_u8(z,d))); } for (; j<wo; ++j) { sout->u[i*soc+j]=( ((sin->u[(2*i+0)*sic+(2*j+0)] + sin->u[(2*i+1)*sic+(2*j+0)]+1)>>1)+ + ((sin->u[(2*i+0)*sic+(2*j+1)] + sin->u[(2*i+1)*sic+(2*j+1)]+1)>>1) )>>1; } for (j=0; j<=wo-8; j+=8) { v128 a = v128_load_aligned(&sin->v[(2*i+0)*sic+2*j]); v128 b = v128_load_aligned(&sin->v[(2*i+1)*sic+2*j]); v128 c = v128_avg_u8(a,b); v128 d = v128_shr_s16(v128_madd_us8(c,ones),1); v64_store_aligned(&sout->v[i*soc+j], v128_low_v64(v128_pack_s16_u8(z,d))); } for (; j<wo; ++j) { sout->v[i*soc+j]=( ((sin->v[(2*i+0)*sic+(2*j+0)] + sin->v[(2*i+1)*sic+(2*j+0)]+1)>>1)+ + ((sin->v[(2*i+0)*sic+(2*j+1)] + sin->v[(2*i+1)*sic+(2*j+1)]+1)>>1) )>>1; } } #endif pad_yuv_frame(sout); }
int main(int argc, char **argv) { FILE *infile, *strfile, *reconfile; uint32_t input_file_size; //TODO: Support file size values larger than 32 bits yuv_frame_t orig,ref[MAX_REF_FRAMES]; yuv_frame_t rec[MAX_REORDER_BUFFER]; int rec_available[MAX_REORDER_BUFFER] = {0}; int last_frame_output=-1; int num_encoded_frames,num_bits,start_bits,end_bits; int sub_gop=1; int rec_buffer_idx; int frame_num,frame_num0,k,r; int frame_offset; int ysize,csize,frame_size; int width,height; int min_interp_depth; int last_intra_frame_num = 0; uint32_t acc_num_bits; snrvals psnr; snrvals accsnr; double bit_rate_in_kbps; enc_params *params; encoder_info_t encoder_info; int y4m_output; // Keep track of last P frame for using the right references for the tail of a sequence in re-ordered modes int last_PorI_frame; init_use_simd(); /* Read commands from command line and from configuration file(s) */ if (argc < 3) { fprintf(stdout,"usage: %s <parameters>\n",argv[0]); fatalerror(""); } params = parse_config_params(argc, argv); if (params == NULL) { fatalerror("Error while reading encoder paramaters."); } check_parameters(params); /* Open files */ if (!(infile = fopen(params->infilestr,"rb"))) { fatalerror("Could not open in-file for reading."); } if (!(strfile = fopen(params->outfilestr,"wb"))) { fatalerror("Could not open out-file for writing."); } reconfile = NULL; y4m_output = 0; if (params->reconfilestr) { char *p; if (!(reconfile = fopen(params->reconfilestr,"wb"))) { fatalerror("Could not open recon-file for reading."); } p = strrchr(params->reconfilestr,'.'); y4m_output = p != NULL && strcmp(p,".y4m") == 0; } fseek(infile, 0, SEEK_END); input_file_size = ftell(infile); fseek(infile, 0, SEEK_SET); if (y4m_output) { fprintf(reconfile, "YUV4MPEG2 W%d H%d F%d:1 Ip A0:0 C420jpeg XYSCSS=420JPEG\x0a", params->width, params->height, (int)params->frame_rate); } accsnr.y = 0; accsnr.u = 0; accsnr.v = 0; acc_num_bits = 0; height = params->height; width = params->width; ysize = height * width; csize = ysize / 4; frame_size = ysize + 2*csize; /* Create frames*/ create_yuv_frame(&orig,width,height,0,0,0,0); for (r=0;r<MAX_REORDER_BUFFER;r++){ create_yuv_frame(&rec[r],width,height,0,0,0,0); } for (r=0;r<MAX_REF_FRAMES;r++){ //TODO: Use Long-term frame instead of a large sliding window create_yuv_frame(&ref[r],width,height,PADDING_Y,PADDING_Y,PADDING_Y/2,PADDING_Y/2); } if (params->interp_ref) { for (r=0;r<MAX_SKIP_FRAMES;r++){ encoder_info.interp_frames[r] = malloc(sizeof(yuv_frame_t)); create_yuv_frame(encoder_info.interp_frames[r],width,height,PADDING_Y,PADDING_Y,PADDING_Y/2,PADDING_Y/2); } } /* Initialize main bit stream */ stream_t stream; stream.bitstream = (uint8_t *)malloc(MAX_BUFFER_SIZE * sizeof(uint8_t)); stream.bitbuf = 0; stream.bitrest = 32; stream.bytepos = 0; stream.bytesize = MAX_BUFFER_SIZE; /* Configure encoder */ encoder_info.params = params; encoder_info.orig = &orig; for (r=0;r<MAX_REF_FRAMES;r++){ encoder_info.ref[r] = &ref[r]; } encoder_info.stream = &stream; encoder_info.width = width; encoder_info.height = height; encoder_info.deblock_data = (deblock_data_t *)malloc((height/MIN_PB_SIZE) * (width/MIN_PB_SIZE) * sizeof(deblock_data_t)); /* Write sequence header */ //TODO: Separate function for sequence header start_bits = get_bit_pos(&stream); putbits(16,width,&stream); putbits(16,height,&stream); putbits(1,params->enable_pb_split,&stream); putbits(1,params->enable_tb_split,&stream); putbits(2,params->max_num_ref-1,&stream); //TODO: Support more than 4 reference frames putbits(1,params->interp_ref,&stream);// Use an interpolated reference frame putbits(1, (params->max_delta_qp || params->bitrate), &stream); putbits(1,params->deblocking,&stream); putbits(1,params->clpf,&stream); putbits(1,params->use_block_contexts,&stream); putbits(1,params->enable_bipred,&stream); end_bits = get_bit_pos(&stream); num_bits = end_bits-start_bits; acc_num_bits += num_bits; printf("SH: %4d bits\n",num_bits); /* Start encoding sequence */ num_encoded_frames = 0; sub_gop = max(1,params->num_reorder_pics+1); min_interp_depth = log2i(params->num_reorder_pics+1)-3; if (params->frame_rate > 30) min_interp_depth--; last_PorI_frame = -1; rate_control_t rc; encoder_info.rc = &rc; if (params->bitrate > 0) { int target_bits = params->bitrate / params->frame_rate; int num_sb = ((width + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE) * ((height + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE); init_rate_control_per_sequence(&rc, target_bits, num_sb); } for (frame_num0 = params->skip; frame_num0 < (params->skip + params->num_frames) && (frame_num0+1)*frame_size <= input_file_size; frame_num0+=sub_gop) { for (k=0; k<sub_gop; k++) { int r,r1,r2,r3; /* Initialize frame info */ frame_offset = reorder_frame_offset(k,sub_gop,params->dyadic_coding); frame_num = frame_num0 + frame_offset; // If there is an initial I frame and reordering need to jump to the next P frame if (frame_num<params->skip) continue; encoder_info.frame_info.frame_num = frame_num - params->skip; rec_buffer_idx = encoder_info.frame_info.frame_num%MAX_REORDER_BUFFER; encoder_info.rec = &rec[rec_buffer_idx]; encoder_info.rec->frame_num = encoder_info.frame_info.frame_num; if (params->num_reorder_pics==0) { if (params->intra_period > 0) encoder_info.frame_info.frame_type = ((num_encoded_frames%params->intra_period) == 0 ? I_FRAME : P_FRAME); else encoder_info.frame_info.frame_type = (num_encoded_frames == 0 ? I_FRAME : P_FRAME); } else { if (params->intra_period > 0) encoder_info.frame_info.frame_type = ((encoder_info.frame_info.frame_num%params->intra_period) == 0 ? I_FRAME : ((encoder_info.frame_info.frame_num%sub_gop)==0 ? P_FRAME : B_FRAME)); else encoder_info.frame_info.frame_type = (encoder_info.frame_info.frame_num == 0 ? I_FRAME : ((encoder_info.frame_info.frame_num%sub_gop)==0 ? P_FRAME : B_FRAME)); } int coded_phase = (num_encoded_frames + sub_gop - 2) % sub_gop + 1; int b_level = log2i(coded_phase); encoder_info.frame_info.b_level = b_level; if (encoder_info.frame_info.frame_type == I_FRAME){ encoder_info.frame_info.qp = params->qp + params->dqpI; last_intra_frame_num = encoder_info.frame_info.frame_num; } else if (params->num_reorder_pics==0) { if (num_encoded_frames % params->HQperiod) encoder_info.frame_info.qp = (int)(params->mqpP*(float)params->qp) + params->dqpP; else encoder_info.frame_info.qp = params->qp; } else { if (encoder_info.frame_info.frame_num % sub_gop) { if (params->dyadic_coding){ if (b_level == 0) encoder_info.frame_info.qp = (int)(params->mqpB0*(float)params->qp) + params->dqpB0; else if (b_level == 1) encoder_info.frame_info.qp = (int)(params->mqpB1*(float)params->qp) + params->dqpB1; else if (b_level == 2) encoder_info.frame_info.qp = (int)(params->mqpB2*(float)params->qp) + params->dqpB2; else if (b_level == 3) encoder_info.frame_info.qp = (int)(params->mqpB3*(float)params->qp) + params->dqpB3; else encoder_info.frame_info.qp = (int)(params->mqpB*(float)params->qp) + params->dqpB; } else { encoder_info.frame_info.qp = (int)(params->mqpB*(float)params->qp) + params->dqpB; } } else { if (encoder_info.frame_info.frame_num % params->HQperiod) { encoder_info.frame_info.qp = (int)(params->mqpP*(float)params->qp) + params->dqpP; } else encoder_info.frame_info.qp = params->qp; } } encoder_info.frame_info.qp = clip(encoder_info.frame_info.qp, 0, MAX_QP); encoder_info.frame_info.num_ref = encoder_info.frame_info.frame_type == I_FRAME ? 0 : min(num_encoded_frames,params->max_num_ref); encoder_info.frame_info.interp_ref = 0; if (encoder_info.frame_info.num_ref > 0) { if (params->num_reorder_pics > 0) { if (params->dyadic_coding) { /* if we have a P frame then use the previous P frame as a reference */ if ((num_encoded_frames-1) % sub_gop == 0) { if (num_encoded_frames==1) encoder_info.frame_info.ref_array[0] = 0; else encoder_info.frame_info.ref_array[0] = sub_gop-1; if (encoder_info.frame_info.num_ref>1 ) encoder_info.frame_info.ref_array[1] = min(MAX_REF_FRAMES-1,min(num_encoded_frames-1,2*sub_gop-1)); for (r=2;r<encoder_info.frame_info.num_ref;r++){ encoder_info.frame_info.ref_array[r] = r-2; } } else if (encoder_info.frame_info.num_ref>0){ int display_phase = (encoder_info.frame_info.frame_num-1) % sub_gop; int ref_offset=sub_gop>>(b_level+1); if (b_level >= min_interp_depth && params->interp_ref) { // Need to add another reference if we are at the beginning if (encoder_info.frame_info.num_ref==2) encoder_info.frame_info.num_ref++; encoder_info.frame_info.interp_ref = 1; encoder_info.frame_info.ref_array[1]=min(num_encoded_frames-1,coded_phase-dyadic_reorder_display_to_code[log2i(sub_gop)][display_phase-ref_offset+1]-1); encoder_info.frame_info.ref_array[2]=min(num_encoded_frames-1,coded_phase-dyadic_reorder_display_to_code[log2i(sub_gop)][display_phase+ref_offset+1]-1); // Interpolate these two reference frames to make a new frame encoder_info.frame_info.ref_array[0]=-1; // Add this interpolated frame to the reference buffer and use it as the first reference yuv_frame_t* ref1=encoder_info.ref[encoder_info.frame_info.ref_array[1]]; yuv_frame_t* ref2=encoder_info.ref[encoder_info.frame_info.ref_array[2]]; interpolate_frames(encoder_info.interp_frames[0], ref1, ref2, 2, 1); pad_yuv_frame(encoder_info.interp_frames[0]); encoder_info.interp_frames[0]->frame_num = encoder_info.frame_info.frame_num; /* use most recent frames for the last ref(s)*/ for (r=3;r<encoder_info.frame_info.num_ref;r++){ encoder_info.frame_info.ref_array[r] = r-3; } } else { encoder_info.frame_info.ref_array[0]=min(num_encoded_frames-1,coded_phase-dyadic_reorder_display_to_code[log2i(sub_gop)][display_phase-ref_offset+1]-1); encoder_info.frame_info.ref_array[1]=min(num_encoded_frames-1,coded_phase-dyadic_reorder_display_to_code[log2i(sub_gop)][display_phase+ref_offset+1]-1); /* use most recent frames for the last ref(s)*/ for (r=2;r<encoder_info.frame_info.num_ref;r++){ encoder_info.frame_info.ref_array[r] = r-2; } } } } else { /* if we have a P frame then use the previous P frame as a reference */ if ((num_encoded_frames-1) % sub_gop == 0) { if (num_encoded_frames==1) encoder_info.frame_info.ref_array[0] = 0; else encoder_info.frame_info.ref_array[0] = sub_gop-1; if (encoder_info.frame_info.num_ref>1 ) encoder_info.frame_info.ref_array[1] = min(MAX_REF_FRAMES-1,min(num_encoded_frames-1,2*sub_gop-1)); for (r=2;r<encoder_info.frame_info.num_ref;r++){ encoder_info.frame_info.ref_array[r] = r-1; } } else { if (params->interp_ref && encoder_info.frame_info.num_ref>0) { // Need to add another reference if we are at the beginning if (encoder_info.frame_info.num_ref==2) encoder_info.frame_info.num_ref++; encoder_info.frame_info.interp_ref = 1; // Use the last encoded frame as the first true ref if (encoder_info.frame_info.num_ref>0) { encoder_info.frame_info.ref_array[1] = 0; } /* Use the subsequent P frame as the 2nd ref */ int phase = (num_encoded_frames + sub_gop - 2) % sub_gop; if (encoder_info.frame_info.num_ref>1) { if (phase==0) encoder_info.frame_info.ref_array[2] = min(sub_gop, num_encoded_frames-1); else encoder_info.frame_info.ref_array[2] = min(phase, num_encoded_frames-1); } // Interpolate these two reference frames to make a new frame encoder_info.frame_info.ref_array[0]=-1; // Add this interpolated frame to the reference buffer and use it as the first reference yuv_frame_t* ref1=encoder_info.ref[encoder_info.frame_info.ref_array[1]]; yuv_frame_t* ref2=encoder_info.ref[encoder_info.frame_info.ref_array[2]]; interpolate_frames(encoder_info.interp_frames[0], ref1, ref2, sub_gop-phase,phase!=0 ? 1 : sub_gop-phase-1); pad_yuv_frame(encoder_info.interp_frames[0]); encoder_info.interp_frames[0]->frame_num = encoder_info.frame_info.frame_num; /* Use the prior P frame as the 4th ref */ if (encoder_info.frame_info.num_ref>2) { encoder_info.frame_info.ref_array[3] = min(phase ? phase + sub_gop : 2*sub_gop, num_encoded_frames-1); } /* use most recent frames for the last ref(s)*/ for (r=4;r<encoder_info.frame_info.num_ref;r++){ encoder_info.frame_info.ref_array[r] = r-4+1; } } else { // Use the last encoded frame as the first ref if (encoder_info.frame_info.num_ref>0) { encoder_info.frame_info.ref_array[0] = 0; } /* Use the subsequent P frame as the 2nd ref */ int phase = (num_encoded_frames + sub_gop - 2) % sub_gop; if (encoder_info.frame_info.num_ref>1) { if (phase==0) encoder_info.frame_info.ref_array[1] = min(sub_gop, num_encoded_frames-1); else encoder_info.frame_info.ref_array[1] = min(phase, num_encoded_frames-1); } /* Use the prior P frame as the 3rd ref */ if (encoder_info.frame_info.num_ref>2) { encoder_info.frame_info.ref_array[2] = min(phase ? phase + sub_gop : 2*sub_gop, num_encoded_frames-1); } /* use most recent frames for the last ref(s)*/ for (r=3;r<encoder_info.frame_info.num_ref;r++){ encoder_info.frame_info.ref_array[r] = r-3+1; } } } } } else { if (encoder_info.frame_info.num_ref>=1){ /* If num_ref==1 always use most recent frame */ encoder_info.frame_info.ref_array[0] = last_PorI_frame; } if (encoder_info.frame_info.num_ref==2){ /* If num_ref==2 use most recent LQ frame and most recent HQ frame */ r1 = ((num_encoded_frames + params->HQperiod - 2) % params->HQperiod) + 1; encoder_info.frame_info.ref_array[1] = r1; } else if (encoder_info.frame_info.num_ref==3){ r1 = ((num_encoded_frames + params->HQperiod - 2) % params->HQperiod) + 1; r2 = r1==1 ? 2 : 1; encoder_info.frame_info.ref_array[1] = r1; encoder_info.frame_info.ref_array[2] = r2; } else if (encoder_info.frame_info.num_ref==4){ r1 = ((num_encoded_frames + params->HQperiod - 2) % params->HQperiod) + 1; r2 = r1==1 ? 2 : 1; r3 = r2+1; if (r3==r1) r3 += 1; encoder_info.frame_info.ref_array[1] = r1; encoder_info.frame_info.ref_array[2] = r2; encoder_info.frame_info.ref_array[3] = r3; } else{ for (r=1;r<encoder_info.frame_info.num_ref;r++){ encoder_info.frame_info.ref_array[r] = r; } } } }