|
@@ -11,17 +11,13 @@
|
|
|
********************************************************************
|
|
|
|
|
|
function:
|
|
|
- last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $
|
|
|
+ last mod: $Id$
|
|
|
|
|
|
********************************************************************/
|
|
|
#if !defined(_encint_H)
|
|
|
# define _encint_H (1)
|
|
|
-# if defined(HAVE_CONFIG_H)
|
|
|
-# include "config.h"
|
|
|
-# endif
|
|
|
# include "theora/theoraenc.h"
|
|
|
-# include "internal.h"
|
|
|
-# include "ocintrin.h"
|
|
|
+# include "state.h"
|
|
|
# include "mathops.h"
|
|
|
# include "enquant.h"
|
|
|
# include "huffenc.h"
|
|
@@ -32,8 +28,13 @@
|
|
|
typedef oc_mv oc_mv2[2];
|
|
|
|
|
|
typedef struct oc_enc_opt_vtable oc_enc_opt_vtable;
|
|
|
+typedef struct oc_enc_opt_data oc_enc_opt_data;
|
|
|
typedef struct oc_mb_enc_info oc_mb_enc_info;
|
|
|
typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
|
|
|
+typedef struct oc_fr_state oc_fr_state;
|
|
|
+typedef struct oc_qii_state oc_qii_state;
|
|
|
+typedef struct oc_enc_pipeline_state oc_enc_pipeline_state;
|
|
|
+typedef struct oc_mode_rd oc_mode_rd;
|
|
|
typedef struct oc_iir_filter oc_iir_filter;
|
|
|
typedef struct oc_frame_metrics oc_frame_metrics;
|
|
|
typedef struct oc_rc_state oc_rc_state;
|
|
@@ -42,6 +43,170 @@ typedef struct oc_token_checkpoint oc_token_checkpoint;
|
|
|
|
|
|
|
|
|
|
|
|
+/*Encoder-specific accelerated functions.*/
|
|
|
+# if defined(OC_X86_ASM)
|
|
|
+# if defined(_MSC_VER)
|
|
|
+# include "x86_vc/x86enc.h"
|
|
|
+# else
|
|
|
+# include "x86/x86enc.h"
|
|
|
+# endif
|
|
|
+# endif
|
|
|
+# if defined(OC_ARM_ASM)
|
|
|
+# include "arm/armenc.h"
|
|
|
+# endif
|
|
|
+
|
|
|
+# if !defined(oc_enc_accel_init)
|
|
|
+# define oc_enc_accel_init oc_enc_accel_init_c
|
|
|
+# endif
|
|
|
+# if defined(OC_ENC_USE_VTABLE)
|
|
|
+# if !defined(oc_enc_frag_sub)
|
|
|
+# define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_sub_128)
|
|
|
+# define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_sub_128)(_diff,_src,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_sad)
|
|
|
+# define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_sad)(_src,_ref,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_sad_thresh)
|
|
|
+# define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_sad2_thresh)
|
|
|
+# define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,_thresh))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_intra_sad)
|
|
|
+# define oc_enc_frag_intra_sad(_enc,_src,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_intra_sad)(_src,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_satd)
|
|
|
+# define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_satd2)
|
|
|
+# define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_intra_satd)
|
|
|
+# define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_intra_satd)(_dc,_src,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_ssd)
|
|
|
+# define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_ssd)(_src,_ref,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_border_ssd)
|
|
|
+# define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_border_ssd)(_src,_ref,_ystride,_mask))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_copy2)
|
|
|
+# define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_enquant_table_init)
|
|
|
+# define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
|
|
|
+ ((*(_enc)->opt_vtable.enquant_table_init)(_enquant,_dequant))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_enquant_table_fixup)
|
|
|
+# define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
|
|
|
+ ((*(_enc)->opt_vtable.enquant_table_fixup)(_enquant,_nqis))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_quantize)
|
|
|
+# define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
|
|
|
+ ((*(_enc)->opt_vtable.quantize)(_qdct,_dct,_dequant,_enquant))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_recon_intra)
|
|
|
+# define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_recon_inter)
|
|
|
+# define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
|
|
|
+ ((*(_enc)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_fdct8x8)
|
|
|
+# define oc_enc_fdct8x8(_enc,_y,_x) \
|
|
|
+ ((*(_enc)->opt_vtable.fdct8x8)(_y,_x))
|
|
|
+# endif
|
|
|
+# else
|
|
|
+# if !defined(oc_enc_frag_sub)
|
|
|
+# define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
|
|
|
+ oc_enc_frag_sub_c(_diff,_src,_ref,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_sub_128)
|
|
|
+# define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
|
|
|
+ oc_enc_frag_sub_128_c(_diff,_src,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_sad)
|
|
|
+# define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
|
|
|
+ oc_enc_frag_sad_c(_src,_ref,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_sad_thresh)
|
|
|
+# define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
|
|
|
+ oc_enc_frag_sad_thresh_c(_src,_ref,_ystride,_thresh)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_sad2_thresh)
|
|
|
+# define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
|
|
|
+ oc_enc_frag_sad2_thresh_c(_src,_ref1,_ref2,_ystride,_thresh)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_intra_sad)
|
|
|
+# define oc_enc_frag_intra_sad(_enc,_src,_ystride) \
|
|
|
+ oc_enc_frag_intra_sad_c(_src,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_satd)
|
|
|
+# define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
|
|
|
+ oc_enc_frag_satd_c(_dc,_src,_ref,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_satd2)
|
|
|
+# define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
|
|
|
+ oc_enc_frag_satd2_c(_dc,_src,_ref1,_ref2,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_intra_satd)
|
|
|
+# define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
|
|
|
+ oc_enc_frag_intra_satd_c(_dc,_src,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_ssd)
|
|
|
+# define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
|
|
|
+ oc_enc_frag_ssd_c(_src,_ref,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_border_ssd)
|
|
|
+# define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
|
|
|
+ oc_enc_frag_border_ssd_c(_src,_ref,_ystride,_mask)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_copy2)
|
|
|
+# define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
|
|
|
+ oc_enc_frag_copy2_c(_dst,_src1,_src2,_ystride)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_enquant_table_init)
|
|
|
+# define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
|
|
|
+ oc_enc_enquant_table_init_c(_enquant,_dequant)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_enquant_table_fixup)
|
|
|
+# define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
|
|
|
+ oc_enc_enquant_table_fixup_c(_enquant,_nqis)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_quantize)
|
|
|
+# define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
|
|
|
+ oc_enc_quantize_c(_qdct,_dct,_dequant,_enquant)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_recon_intra)
|
|
|
+# define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
|
|
|
+ oc_frag_recon_intra_c(_dst,_ystride,_residue)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_frag_recon_inter)
|
|
|
+# define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
|
|
|
+ oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
|
|
|
+# endif
|
|
|
+# if !defined(oc_enc_fdct8x8)
|
|
|
+# define oc_enc_fdct8x8(_enc,_y,_x) oc_enc_fdct8x8_c(_y,_x)
|
|
|
+# endif
|
|
|
+# endif
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
/*Constants for the packet-out state machine specific to the encoder.*/
|
|
|
|
|
|
/*Next packet to emit: Data packet, but none are ready yet.*/
|
|
@@ -50,13 +215,61 @@ typedef struct oc_token_checkpoint oc_token_checkpoint;
|
|
|
#define OC_PACKET_READY (1)
|
|
|
|
|
|
/*All features enabled.*/
|
|
|
-#define OC_SP_LEVEL_SLOW (0)
|
|
|
+#define OC_SP_LEVEL_SLOW (0)
|
|
|
/*Enable early skip.*/
|
|
|
-#define OC_SP_LEVEL_EARLY_SKIP (1)
|
|
|
+#define OC_SP_LEVEL_EARLY_SKIP (1)
|
|
|
+/*Use analysis shortcuts, single quantizer, and faster tokenization.*/
|
|
|
+#define OC_SP_LEVEL_FAST_ANALYSIS (2)
|
|
|
+/*Use SAD instead of SATD*/
|
|
|
+#define OC_SP_LEVEL_NOSATD (3)
|
|
|
/*Disable motion compensation.*/
|
|
|
-#define OC_SP_LEVEL_NOMC (2)
|
|
|
+#define OC_SP_LEVEL_NOMC (4)
|
|
|
/*Maximum valid speed level.*/
|
|
|
-#define OC_SP_LEVEL_MAX (2)
|
|
|
+#define OC_SP_LEVEL_MAX (4)
|
|
|
+
|
|
|
+
|
|
|
+/*The number of extra bits of precision at which to store rate metrics.*/
|
|
|
+# define OC_BIT_SCALE (6)
|
|
|
+/*The number of extra bits of precision at which to store RMSE metrics.
|
|
|
+ This must be at least half OC_BIT_SCALE (rounded up).*/
|
|
|
+# define OC_RMSE_SCALE (5)
|
|
|
+/*The number of quantizer bins to partition statistics into.*/
|
|
|
+# define OC_LOGQ_BINS (8)
|
|
|
+/*The number of SAD/SATD bins to partition statistics into.*/
|
|
|
+# define OC_COMP_BINS (24)
|
|
|
+/*The number of bits of precision to drop from SAD and SATD scores
|
|
|
+ to assign them to a bin.*/
|
|
|
+# define OC_SAD_SHIFT (6)
|
|
|
+# define OC_SATD_SHIFT (9)
|
|
|
+
|
|
|
+/*Masking is applied by scaling the D used in R-D optimization (via rd_scale)
|
|
|
+ or the lambda parameter (via rd_iscale).
|
|
|
+ These are only equivalent within a single block; when more than one block is
|
|
|
+ being considered, the former is the interpretation used.*/
|
|
|
+
|
|
|
+/*This must be at least 4 for OC_RD_SKIP_SCALE() to work below.*/
|
|
|
+# define OC_RD_SCALE_BITS (12-OC_BIT_SCALE)
|
|
|
+# define OC_RD_ISCALE_BITS (11)
|
|
|
+
|
|
|
+/*This macro is applied to _ssd values with just 4 bits of headroom
|
|
|
+ ((15-OC_RMSE_SCALE)*2+OC_BIT_SCALE+2); since we want to allow rd_scales as
|
|
|
+ large as 16, and need additional fractional bits, our only recourse that
|
|
|
+ doesn't lose precision on blocks with very small SSDs is to use a wider
|
|
|
+ multiply.*/
|
|
|
+# if LONG_MAX>2147483647
|
|
|
+# define OC_RD_SCALE(_ssd,_rd_scale) \
|
|
|
+ ((unsigned)((unsigned long)(_ssd)*(_rd_scale) \
|
|
|
+ +((1<<OC_RD_SCALE_BITS)>>1)>>OC_RD_SCALE_BITS))
|
|
|
+# else
|
|
|
+# define OC_RD_SCALE(_ssd,_rd_scale) \
|
|
|
+ (((_ssd)>>OC_RD_SCALE_BITS)*(_rd_scale) \
|
|
|
+ +(((_ssd)&(1<<OC_RD_SCALE_BITS)-1)*(_rd_scale) \
|
|
|
+ +((1<<OC_RD_SCALE_BITS)>>1)>>OC_RD_SCALE_BITS))
|
|
|
+# endif
|
|
|
+# define OC_RD_SKIP_SCALE(_ssd,_rd_scale) \
|
|
|
+ ((_ssd)*(_rd_scale)+((1<<OC_RD_SCALE_BITS-4)>>1)>>OC_RD_SCALE_BITS-4)
|
|
|
+# define OC_RD_ISCALE(_lambda,_rd_iscale) \
|
|
|
+ ((_lambda)*(_rd_iscale)+((1<<OC_RD_ISCALE_BITS)>>1)>>OC_RD_ISCALE_BITS)
|
|
|
|
|
|
|
|
|
/*The bits used for each of the MB mode codebooks.*/
|
|
@@ -78,6 +291,10 @@ extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30];
|
|
|
|
|
|
/*Encoder specific functions with accelerated variants.*/
|
|
|
struct oc_enc_opt_vtable{
|
|
|
+ void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
|
|
|
+ const unsigned char *_ref,int _ystride);
|
|
|
+ void (*frag_sub_128)(ogg_int16_t _diff[64],
|
|
|
+ const unsigned char *_src,int _ystride);
|
|
|
unsigned (*frag_sad)(const unsigned char *_src,
|
|
|
const unsigned char *_ref,int _ystride);
|
|
|
unsigned (*frag_sad_thresh)(const unsigned char *_src,
|
|
@@ -85,18 +302,23 @@ struct oc_enc_opt_vtable{
|
|
|
unsigned (*frag_sad2_thresh)(const unsigned char *_src,
|
|
|
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
|
|
|
unsigned _thresh);
|
|
|
- unsigned (*frag_satd_thresh)(const unsigned char *_src,
|
|
|
- const unsigned char *_ref,int _ystride,unsigned _thresh);
|
|
|
- unsigned (*frag_satd2_thresh)(const unsigned char *_src,
|
|
|
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
|
|
|
- unsigned _thresh);
|
|
|
- unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride);
|
|
|
- void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
|
|
|
+ unsigned (*frag_intra_sad)(const unsigned char *_src,int _ystride);
|
|
|
+ unsigned (*frag_satd)(int *_dc,const unsigned char *_src,
|
|
|
const unsigned char *_ref,int _ystride);
|
|
|
- void (*frag_sub_128)(ogg_int16_t _diff[64],
|
|
|
- const unsigned char *_src,int _ystride);
|
|
|
+ unsigned (*frag_satd2)(int *_dc,const unsigned char *_src,
|
|
|
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
|
|
|
+ unsigned (*frag_intra_satd)(int *_dc,const unsigned char *_src,int _ystride);
|
|
|
+ unsigned (*frag_ssd)(const unsigned char *_src,
|
|
|
+ const unsigned char *_ref,int _ystride);
|
|
|
+ unsigned (*frag_border_ssd)(const unsigned char *_src,
|
|
|
+ const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
|
|
|
void (*frag_copy2)(unsigned char *_dst,
|
|
|
const unsigned char *_src1,const unsigned char *_src2,int _ystride);
|
|
|
+ void (*enquant_table_init)(void *_enquant,
|
|
|
+ const ogg_uint16_t _dequant[64]);
|
|
|
+ void (*enquant_table_fixup)(void *_enquant[3][3][2],int _nqis);
|
|
|
+ int (*quantize)(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
|
|
|
+ const ogg_uint16_t _dequant[64],const void *_enquant);
|
|
|
void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
|
|
|
const ogg_int16_t _residue[64]);
|
|
|
void (*frag_recon_inter)(unsigned char *_dst,
|
|
@@ -105,7 +327,19 @@ struct oc_enc_opt_vtable{
|
|
|
};
|
|
|
|
|
|
|
|
|
-void oc_enc_vtable_init(oc_enc_ctx *_enc);
|
|
|
+/*Encoder specific data that varies according to which variants of the above
|
|
|
+ functions are used.*/
|
|
|
+struct oc_enc_opt_data{
|
|
|
+ /*The size of a single quantizer table.
|
|
|
+ This must be a multiple of enquant_table_alignment.*/
|
|
|
+ size_t enquant_table_size;
|
|
|
+ /*The alignment required for the quantizer tables.
|
|
|
+ This must be a positive power of two.*/
|
|
|
+ int enquant_table_alignment;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+void oc_enc_accel_init(oc_enc_ctx *_enc);
|
|
|
|
|
|
|
|
|
|
|
@@ -158,7 +392,7 @@ struct oc_mode_scheme_chooser{
|
|
|
corresponds to the ranks above.*/
|
|
|
unsigned char scheme0_list[OC_NMODES];
|
|
|
/*The number of times each mode has been chosen so far.*/
|
|
|
- int mode_counts[OC_NMODES];
|
|
|
+ unsigned mode_counts[OC_NMODES];
|
|
|
/*The list of mode coding schemes, sorted in ascending order of bit cost.*/
|
|
|
unsigned char scheme_list[8];
|
|
|
/*The number of bits used by each mode coding scheme.*/
|
|
@@ -170,6 +404,106 @@ void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
|
|
|
|
|
|
|
|
|
|
|
|
+/*State to track coded block flags and their bit cost.
|
|
|
+ We use opportunity cost to measure the bits required to code or skip the next
|
|
|
+ block, using the cheaper of the cost to code it fully or partially, so long
|
|
|
+ as both are possible.*/
|
|
|
+struct oc_fr_state{
|
|
|
+ /*The number of bits required for the coded block flags so far this frame.*/
|
|
|
+ ptrdiff_t bits;
|
|
|
+ /*The length of the current run for the partial super block flag, not
|
|
|
+ including the current super block.*/
|
|
|
+ unsigned sb_partial_count:16;
|
|
|
+ /*The length of the current run for the full super block flag, not
|
|
|
+ including the current super block.*/
|
|
|
+ unsigned sb_full_count:16;
|
|
|
+ /*The length of the coded block flag run when the current super block
|
|
|
+ started.*/
|
|
|
+ unsigned b_coded_count_prev:6;
|
|
|
+ /*The coded block flag when the current super block started.*/
|
|
|
+ signed int b_coded_prev:2;
|
|
|
+ /*The length of the current coded block flag run.*/
|
|
|
+ unsigned b_coded_count:6;
|
|
|
+ /*The current coded block flag.*/
|
|
|
+ signed int b_coded:2;
|
|
|
+ /*The number of blocks processed in the current super block.*/
|
|
|
+ unsigned b_count:5;
|
|
|
+ /*Whether or not it is cheaper to code the current super block partially,
|
|
|
+ even if it could still be coded fully.*/
|
|
|
+ unsigned sb_prefer_partial:1;
|
|
|
+ /*Whether the last super block was coded partially.*/
|
|
|
+ signed int sb_partial:2;
|
|
|
+ /*The number of bits required for the flags for the current super block.*/
|
|
|
+ unsigned sb_bits:6;
|
|
|
+ /*Whether the last non-partial super block was coded fully.*/
|
|
|
+ signed int sb_full:2;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+struct oc_qii_state{
|
|
|
+ ptrdiff_t bits;
|
|
|
+ unsigned qi01_count:14;
|
|
|
+ signed int qi01:2;
|
|
|
+ unsigned qi12_count:14;
|
|
|
+ signed int qi12:2;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+/*Temporary encoder state for the analysis pipeline.*/
|
|
|
+struct oc_enc_pipeline_state{
|
|
|
+ /*DCT coefficient storage.
|
|
|
+ This is kept off the stack because a) gcc can't align things on the stack
|
|
|
+ reliably on ARM, and b) it avoids (unintentional) data hazards between
|
|
|
+ ARM and NEON code.*/
|
|
|
+ OC_ALIGN16(ogg_int16_t dct_data[64*3]);
|
|
|
+ OC_ALIGN16(signed char bounding_values[256]);
|
|
|
+ oc_fr_state fr[3];
|
|
|
+ oc_qii_state qs[3];
|
|
|
+ /*Skip SSD storage for the current MCU in each plane.*/
|
|
|
+ unsigned *skip_ssd[3];
|
|
|
+ /*Coded/uncoded fragment lists for each plane for the current MCU.*/
|
|
|
+ ptrdiff_t *coded_fragis[3];
|
|
|
+ ptrdiff_t *uncoded_fragis[3];
|
|
|
+ ptrdiff_t ncoded_fragis[3];
|
|
|
+ ptrdiff_t nuncoded_fragis[3];
|
|
|
+ /*The starting fragment for the current MCU in each plane.*/
|
|
|
+ ptrdiff_t froffset[3];
|
|
|
+ /*The starting row for the current MCU in each plane.*/
|
|
|
+ int fragy0[3];
|
|
|
+ /*The ending row for the current MCU in each plane.*/
|
|
|
+ int fragy_end[3];
|
|
|
+ /*The starting superblock for the current MCU in each plane.*/
|
|
|
+ unsigned sbi0[3];
|
|
|
+ /*The ending superblock for the current MCU in each plane.*/
|
|
|
+ unsigned sbi_end[3];
|
|
|
+ /*The number of tokens for zzi=1 for each color plane.*/
|
|
|
+ int ndct_tokens1[3];
|
|
|
+ /*The outstanding eob_run count for zzi=1 for each color plane.*/
|
|
|
+ int eob_run1[3];
|
|
|
+ /*Whether or not the loop filter is enabled.*/
|
|
|
+ int loop_filter;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+/*Statistics used to estimate R-D cost of a block in a given coding mode.
|
|
|
+ See modedec.h for more details.*/
|
|
|
+struct oc_mode_rd{
|
|
|
+ /*The expected bits used by the DCT tokens, shifted by OC_BIT_SCALE.*/
|
|
|
+ ogg_int16_t rate;
|
|
|
+ /*The expected square root of the sum of squared errors, shifted by
|
|
|
+ OC_RMSE_SCALE.*/
|
|
|
+ ogg_int16_t rmse;
|
|
|
+};
|
|
|
+
|
|
|
+# if defined(OC_COLLECT_METRICS)
|
|
|
+# include "collect.h"
|
|
|
+# endif
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
/*A 2nd order low-pass Bessel follower.
|
|
|
We use this for rate control because it has fast reaction time, but is
|
|
|
critically damped.*/
|
|
@@ -190,6 +524,8 @@ struct oc_frame_metrics{
|
|
|
unsigned dup_count:31;
|
|
|
/*The frame type from pass 1.*/
|
|
|
unsigned frame_type:1;
|
|
|
+ /*The frame activity average from pass 1.*/
|
|
|
+ unsigned activity_avg;
|
|
|
};
|
|
|
|
|
|
|
|
@@ -335,10 +671,15 @@ struct th_enc_ctx{
|
|
|
size_t mv_bits[2];
|
|
|
/*The mode scheme chooser for estimating mode coding costs.*/
|
|
|
oc_mode_scheme_chooser chooser;
|
|
|
+ /*Temporary encoder state for the analysis pipeline.*/
|
|
|
+ oc_enc_pipeline_state pipe;
|
|
|
/*The number of vertical super blocks in an MCU.*/
|
|
|
int mcu_nvsbs;
|
|
|
/*The SSD error for skipping each fragment in the current MCU.*/
|
|
|
unsigned *mcu_skip_ssd;
|
|
|
+ /*The masking scale factors for chroma blocks in the current MCU.*/
|
|
|
+ ogg_uint16_t *mcu_rd_scale;
|
|
|
+ ogg_uint16_t *mcu_rd_iscale;
|
|
|
/*The DCT token lists for each coefficient and each plane.*/
|
|
|
unsigned char **dct_tokens[3];
|
|
|
/*The extra bits associated with each DCT token.*/
|
|
@@ -350,8 +691,10 @@ struct th_enc_ctx{
|
|
|
/*The offset of the first DCT token for each coefficient for each plane.*/
|
|
|
unsigned char dct_token_offs[3][64];
|
|
|
/*The last DC coefficient for each plane and reference frame.*/
|
|
|
- int dc_pred_last[3][3];
|
|
|
+ int dc_pred_last[3][4];
|
|
|
#if defined(OC_COLLECT_METRICS)
|
|
|
+ /*Fragment SAD statistics for MB mode estimation metrics.*/
|
|
|
+ unsigned *frag_sad;
|
|
|
/*Fragment SATD statistics for MB mode estimation metrics.*/
|
|
|
unsigned *frag_satd;
|
|
|
/*Fragment SSD statistics for MB mode estimation metrics.*/
|
|
@@ -359,32 +702,56 @@ struct th_enc_ctx{
|
|
|
#endif
|
|
|
/*The R-D optimization parameter.*/
|
|
|
int lambda;
|
|
|
+ /*The average block "activity" of the previous frame.*/
|
|
|
+ unsigned activity_avg;
|
|
|
+ /*The average MB luma of the previous frame.*/
|
|
|
+ unsigned luma_avg;
|
|
|
/*The huffman tables in use.*/
|
|
|
th_huff_code huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
|
|
|
/*The quantization parameters in use.*/
|
|
|
th_quant_info qinfo;
|
|
|
- oc_iquant *enquant_tables[64][3][2];
|
|
|
- oc_iquant_table enquant_table_data[64][3][2];
|
|
|
- /*An "average" quantizer for each quantizer type (INTRA or INTER) and qi
|
|
|
- value.
|
|
|
- This is used to paramterize the rate control decisions.
|
|
|
+ /*The original DC coefficients saved off from the dequatization tables.*/
|
|
|
+ ogg_uint16_t dequant_dc[64][3][2];
|
|
|
+ /*Condensed dequantization tables.*/
|
|
|
+ const ogg_uint16_t *dequant[3][3][2];
|
|
|
+ /*Condensed quantization tables.*/
|
|
|
+ void *enquant[3][3][2];
|
|
|
+ /*The full set of quantization tables.*/
|
|
|
+ void *enquant_tables[64][3][2];
|
|
|
+ /*Storage for the quantization tables.*/
|
|
|
+ unsigned char *enquant_table_data;
|
|
|
+ /*An "average" quantizer for each frame type (INTRA or INTER) and qi value.
|
|
|
+ This is used to parameterize the rate control decisions.
|
|
|
They are kept in the log domain to simplify later processing.
|
|
|
- Keep in mind these are DCT domain quantizers, and so are scaled by an
|
|
|
- additional factor of 4 from the pixel domain.*/
|
|
|
+ These are DCT domain quantizers, and so are scaled by an additional factor
|
|
|
+ of 4 from the pixel domain.*/
|
|
|
ogg_int64_t log_qavg[2][64];
|
|
|
+ /*The "average" quantizer futher partitioned by color plane.
|
|
|
+ This is used to parameterize mode decision.
|
|
|
+ These are DCT domain quantizers, and so are scaled by an additional factor
|
|
|
+ of 4 from the pixel domain.*/
|
|
|
+ ogg_int16_t log_plq[64][3][2];
|
|
|
+ /*The R-D scale factors to apply to chroma blocks for a given frame type
|
|
|
+ (INTRA or INTER) and qi value.
|
|
|
+ The first is the "D" modifier (rd_scale), while the second is the "lambda"
|
|
|
+ modifier (rd_iscale).*/
|
|
|
+ ogg_uint16_t chroma_rd_scale[2][64][2];
|
|
|
+ /*The interpolated mode decision R-D lookup tables for the current
|
|
|
+ quantizers, color plane, and quantization type.*/
|
|
|
+ oc_mode_rd mode_rd[3][3][2][OC_COMP_BINS];
|
|
|
/*The buffer state used to drive rate control.*/
|
|
|
oc_rc_state rc;
|
|
|
+# if defined(OC_ENC_USE_VTABLE)
|
|
|
/*Table for encoder acceleration functions.*/
|
|
|
oc_enc_opt_vtable opt_vtable;
|
|
|
+# endif
|
|
|
+ /*Table for encoder data used by accelerated functions.*/
|
|
|
+ oc_enc_opt_data opt_data;
|
|
|
};
|
|
|
|
|
|
|
|
|
void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode);
|
|
|
int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode);
|
|
|
-#if defined(OC_COLLECT_METRICS)
|
|
|
-void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc);
|
|
|
-void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc);
|
|
|
-#endif
|
|
|
|
|
|
|
|
|
|
|
@@ -415,8 +782,13 @@ struct oc_token_checkpoint{
|
|
|
|
|
|
void oc_enc_tokenize_start(oc_enc_ctx *_enc);
|
|
|
int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
|
|
|
- ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
|
|
|
- int _zzi,oc_token_checkpoint **_stack,int _acmin);
|
|
|
+ ogg_int16_t *_qdct_out,const ogg_int16_t *_qdct_in,
|
|
|
+ const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
|
|
|
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin);
|
|
|
+int oc_enc_tokenize_ac_fast(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
|
|
|
+ ogg_int16_t *_qdct_out,const ogg_int16_t *_qdct_in,
|
|
|
+ const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
|
|
|
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin);
|
|
|
void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
|
|
|
const oc_token_checkpoint *_stack,int _n);
|
|
|
void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
|
|
@@ -436,45 +808,13 @@ int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
|
|
|
|
|
|
|
|
|
|
|
|
-/*Encoder-specific accelerated functions.*/
|
|
|
-void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
|
|
|
- const unsigned char *_src,const unsigned char *_ref,int _ystride);
|
|
|
-void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
|
|
|
- const unsigned char *_src,int _ystride);
|
|
|
-unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src,
|
|
|
- const unsigned char *_ref,int _ystride);
|
|
|
-unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
|
|
|
- const unsigned char *_src,const unsigned char *_ref,int _ystride,
|
|
|
- unsigned _thresh);
|
|
|
-unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
|
|
|
- const unsigned char *_src,const unsigned char *_ref1,
|
|
|
- const unsigned char *_ref2,int _ystride,unsigned _thresh);
|
|
|
-unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
|
|
|
- const unsigned char *_src,const unsigned char *_ref,int _ystride,
|
|
|
- unsigned _thresh);
|
|
|
-unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
|
|
|
- const unsigned char *_src,const unsigned char *_ref1,
|
|
|
- const unsigned char *_ref2,int _ystride,unsigned _thresh);
|
|
|
-unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
|
|
|
- const unsigned char *_src,int _ystride);
|
|
|
-void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
|
|
|
- const unsigned char *_src1,const unsigned char *_src2,int _ystride);
|
|
|
-void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
|
|
|
- unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]);
|
|
|
-void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
|
|
|
- const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
|
|
|
-void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
|
|
|
- const ogg_int16_t _x[64]);
|
|
|
-
|
|
|
-/*Default pure-C implementations.*/
|
|
|
-void oc_enc_vtable_init_c(oc_enc_ctx *_enc);
|
|
|
+/*Default pure-C implementations of encoder-specific accelerated functions.*/
|
|
|
+void oc_enc_accel_init_c(oc_enc_ctx *_enc);
|
|
|
|
|
|
void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
|
|
|
const unsigned char *_src,const unsigned char *_ref,int _ystride);
|
|
|
void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],
|
|
|
const unsigned char *_src,int _ystride);
|
|
|
-void oc_enc_frag_copy2_c(unsigned char *_dst,
|
|
|
- const unsigned char *_src1,const unsigned char *_src2,int _ystride);
|
|
|
unsigned oc_enc_frag_sad_c(const unsigned char *_src,
|
|
|
const unsigned char *_ref,int _ystride);
|
|
|
unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
|
|
@@ -482,12 +822,24 @@ unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
|
|
|
unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
|
|
|
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
|
|
|
unsigned _thresh);
|
|
|
-unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
|
|
|
- const unsigned char *_ref,int _ystride,unsigned _thresh);
|
|
|
-unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
|
|
|
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
|
|
|
- unsigned _thresh);
|
|
|
-unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride);
|
|
|
+unsigned oc_enc_frag_intra_sad_c(const unsigned char *_src, int _ystride);
|
|
|
+unsigned oc_enc_frag_satd_c(int *_dc,const unsigned char *_src,
|
|
|
+ const unsigned char *_ref,int _ystride);
|
|
|
+unsigned oc_enc_frag_satd2_c(int *_dc,const unsigned char *_src,
|
|
|
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
|
|
|
+unsigned oc_enc_frag_intra_satd_c(int *_dc,
|
|
|
+ const unsigned char *_src,int _ystride);
|
|
|
+unsigned oc_enc_frag_ssd_c(const unsigned char *_src,
|
|
|
+ const unsigned char *_ref,int _ystride);
|
|
|
+unsigned oc_enc_frag_border_ssd_c(const unsigned char *_src,
|
|
|
+ const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
|
|
|
+void oc_enc_frag_copy2_c(unsigned char *_dst,
|
|
|
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
|
|
|
+void oc_enc_enquant_table_init_c(void *_enquant,
|
|
|
+ const ogg_uint16_t _dequant[64]);
|
|
|
+void oc_enc_enquant_table_fixup_c(void *_enquant[3][3][2],int _nqis);
|
|
|
+int oc_enc_quantize_c(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
|
|
|
+ const ogg_uint16_t _dequant[64],const void *_enquant);
|
|
|
void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
|
|
|
|
|
|
#endif
|