/*****************************************************************************
* analyse.c: h264 encoder library
*****************************************************************************
* Copyright (C) 2003 x264 project
* $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
* Loren Merritt <lorenm@u.washington.edu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#ifndef _MSC_VER
//#include <unistd.h>
#endif
#include "common.h"
#include "encoder_macroblock.h"
#include "me.h"
#include "ratecontrol.h"
#include "analyse.h"
#include "rdo.c"
typedef struct
{
/* 16x16 */
int i_ref;
int i_rd16x16;
x264_me_t me16x16;
/* 8x8 */
int i_cost8x8;
int mvc[32][5][2]; /* [ref][0] is 16x16 mv,
[ref][1..4] are 8x8 mv from partition [0..3] */
x264_me_t me8x8[4];
/* Sub 4x4 */
int i_cost4x4[4]; /* cost per 8x8 partition */
x264_me_t me4x4[4][4];
/* Sub 8x4 */
int i_cost8x4[4]; /* cost per 8x8 partition */
x264_me_t me8x4[4][2];
/* Sub 4x8 */
int i_cost4x8[4]; /* cost per 8x8 partition */
x264_me_t me4x8[4][2];
/* 16x8 */
int i_cost16x8;
x264_me_t me16x8[2];
/* 8x16 */
int i_cost8x16;
x264_me_t me8x16[2];
} x264_mb_analysis_list_t;
typedef struct
{
/* conduct the analysis using this lamda and QP */
int i_lambda;
int i_lambda2;
int i_qp;
int16_t *p_cost_mv;
int b_mbrd;
/* I: Intra part */
/* Take some shortcuts in intra search if intra is deemed unlikely */
int b_fast_intra;
int b_try_pskip;
/* Luma part */
int i_satd_i16x16;
int i_satd_i16x16_dir[7];
int i_predict16x16;
int i_satd_i8x8;
int i_satd_i8x8_dir[12][4];
int i_predict8x8[4];
int i_satd_i4x4;
int i_predict4x4[16];
/* Chroma part */
int i_satd_i8x8chroma;
int i_satd_i8x8chroma_dir[4];
int i_predict8x8chroma;
/* II: Inter part P/B frame */
x264_mb_analysis_list_t l0;
x264_mb_analysis_list_t l1;
int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
int i_cost16x16direct;
int i_cost8x8bi;
int i_cost8x8direct[4];
int i_cost16x8bi;
int i_cost8x16bi;
int i_rd16x16bi;
int i_rd16x16direct;
int i_rd16x8bi;
int i_rd8x16bi;
int i_rd8x8bi;
int i_mb_partition16x8[2]; /* mb_partition_e */
int i_mb_partition8x16[2];
int i_mb_type16x8; /* mb_class_e */
int i_mb_type8x16;
int b_direct_available;
} x264_mb_analysis_t;
/* lambda = pow(2,qp/6-2) */
static const int i_qp0_cost_table[52] = {
1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
1, 1, 1, 1, /* 8-11 */
1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
6, 7, 8, 9,10,11,13,14, /* 28-35 */
16,18,20,23,25,29,32,36, /* 36-43 */
40,45,51,57,64,72,81,91 /* 44-51 */
};
/* pow(lambda,2) * .9 */
static const int i_qp0_cost2_table[52] = {
1, 1, 1, 1, 1, 1, /* 0-5 */
1, 1, 1, 1, 1, 1, /* 6-11 */
1, 1, 1, 2, 2, 3, /* 12-17 */
4, 5, 6, 7, 9, 11, /* 18-23 */
14, 18, 23, 29, 36, 46, /* 24-29 */
58, 73, 91, 115, 145, 183, /* 30-35 */
230, 290, 366, 461, 581, 731, /* 36-41 */
922,1161,1463,1843,2322,2926, /* 42-47 */
3686,4645,5852,7373
};
/* TODO: calculate CABAC costs */
static const int i_mb_b_cost_table[X264_MBTYPE_MAX] = {
9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
};
static const int i_mb_b16x8_cost_table[17] = {
0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
};
static const int i_sub_mb_b_cost_table[13] = {
7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
};
static const int i_sub_mb_p_cost_table[4] = {
5, 3, 3, 1
};
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
static int16_t *p_cost_mv[52];
if( !p_cost_mv[a->i_qp] )
{
/* could be faster, but isn't called many times */
/* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
int i;
p_cost_mv[a->i_qp] = x264_malloc( (4*4*2048 + 1) * sizeof(int16_t) );
p_cost_mv[a->i_qp] += 2*4*2048;
for( i = 0; i <= 2*4*2048; i++ )
{
p_cost_mv[a->i_qp][-i] =
p_cost_mv[a->i_qp][i] = a->i_lambda * bs_size_se( i );
}
}
a->p_cost_mv = p_cost_mv[a->i_qp];
}
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
{
memset( a, 0, sizeof( x264_mb_analysis_t ) );
/* conduct the analysis using this lamda and QP */
a->i_qp = h->mb.i_qp = i_qp;
h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
a->i_lambda = i_qp0_cost_table[i_qp];
a->i_lambda2 = i_qp0_cost2_table[i_qp];
a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 &&
( h->sh.i_type != SLICE_TYPE_B || h->param.analyse.b_bframe_rdo );
h->mb.i_me_method = h->param.analyse.i_me_method;
h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
&& h->mb.i_subpel_refine >= 5;
h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->b_mbrd;
h->mb.b_transform_8x8 = 0;
h->mb.b_noise_reduction = 0;
/* I: Intra part */
a->i_satd_i16x16 =
a->i_satd_i8x8 =
a->i_satd_i4x4 =
a->i_satd_i8x8chroma = COST_MAX;
a->b_fast_intra = 0;
/* II: Inter part P/B frame */
if( h->sh.i_type != SLICE_TYPE_I )
{
int i, j;
int i_fmv_range = 4 * h->param.analyse.i_mv_range;
// limit motion search to a slightly smaller range than the theoretical limit,
// since the search may go a few iterations past its given range
int i_fpel_border = 5; // umh unconditional radius
int i_spel_border = 8; // 1.5 for subpel_satd, 1.5 for subpel_rd, 2 for bime, round up
/* Calculate max allowed MV range */
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range )
h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
h->mb.mv_max[0] = 4*( 16*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
if( h->mb.i_mb_x == 0)
{
int mb_y = h->mb.i_mb_y >> h->sh.b_mbaff;
int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
int thread_mvy_range = i_fmv_range;
if( h->param.i_threads > 1 )
{
int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
int thresh = pix_y + h->param.analyse.i_mv_range_thread;
for( i = (h->sh.i_type == SLICE_TYPE_B);
- 1
- 2
前往页