Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libtheora/mcenc.c
9903 views
1
/********************************************************************
2
* *
3
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7
* *
8
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9
* by the Xiph.Org Foundation https://www.xiph.org/ *
10
* *
11
********************************************************************
12
13
function:
14
15
********************************************************************/
16
#include <stdlib.h>
17
#include <limits.h>
18
#include <string.h>
19
#include "encint.h"
20
21
22
23
typedef struct oc_mcenc_ctx oc_mcenc_ctx;
24
25
26
27
/*Temporary state used for motion estimation.*/
28
struct oc_mcenc_ctx{
29
/*The candidate motion vectors.*/
30
int candidates[13][2];
31
/*The start of the Set B candidates.*/
32
int setb0;
33
/*The total number of candidates.*/
34
int ncandidates;
35
};
36
37
38
39
/*The maximum Y plane SAD value for accepting the median predictor.*/
40
#define OC_YSAD_THRESH1 (256)
41
/*The amount to right shift the minimum error by when inflating it for
42
computing the second maximum Y plane SAD threshold.*/
43
#define OC_YSAD_THRESH2_SCALE_BITS (4)
44
/*The amount to add to the second maximum Y plane threshold when inflating
45
it.*/
46
#define OC_YSAD_THRESH2_OFFSET (64)
47
48
/*The vector offsets in the X direction for each search site in the square
49
pattern.*/
50
static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
51
/*The vector offsets in the Y direction for each search site in the square
52
pattern.*/
53
static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
54
/*The number of sites to search for each boundary condition in the square
55
pattern.
56
Bit flags for the boundary conditions are as follows:
57
1: -16==dx
58
2: dx==15(.5)
59
4: -16==dy
60
8: dy==15(.5)*/
61
static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
62
/*The list of sites to search for each boundary condition in the square
63
pattern.*/
64
static const int OC_SQUARE_SITES[11][8]={
65
/* -15.5<dx<31, -15.5<dy<15(.5)*/
66
{0,1,2,3,5,6,7,8},
67
/*-15.5==dx, -15.5<dy<15(.5)*/
68
{1,2,5,7,8},
69
/* dx==15(.5), -15.5<dy<15(.5)*/
70
{0,1,3,6,7},
71
/*-15.5==dx==15(.5), -15.5<dy<15(.5)*/
72
{-1},
73
/* -15.5<dx<15(.5), -15.5==dy*/
74
{3,5,6,7,8},
75
/*-15.5==dx, -15.5==dy*/
76
{5,7,8},
77
/* dx==15(.5), -15.5==dy*/
78
{3,6,7},
79
/*-15.5==dx==15(.5), -15.5==dy*/
80
{-1},
81
/*-15.5dx<15(.5), dy==15(.5)*/
82
{0,1,2,3,5},
83
/*-15.5==dx, dy==15(.5)*/
84
{1,2,5},
85
/* dx==15(.5), dy==15(.5)*/
86
{0,1,3}
87
};
88
89
90
static void oc_mcenc_find_candidates_a(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
91
oc_mv _accum,int _mbi,int _frame){
92
oc_mb_enc_info *embs;
93
int accum_x;
94
int accum_y;
95
int a[3][2];
96
int ncandidates;
97
unsigned nmbi;
98
int i;
99
embs=_enc->mb_info;
100
/*Skip a position to store the median predictor in.*/
101
ncandidates=1;
102
if(embs[_mbi].ncneighbors>0){
103
/*Fill in the first part of set A: the vectors from adjacent blocks.*/
104
for(i=0;i<embs[_mbi].ncneighbors;i++){
105
nmbi=embs[_mbi].cneighbors[i];
106
_mcenc->candidates[ncandidates][0]=
107
OC_MV_X(embs[nmbi].analysis_mv[0][_frame]);
108
_mcenc->candidates[ncandidates][1]=
109
OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]);
110
ncandidates++;
111
}
112
}
113
accum_x=OC_MV_X(_accum);
114
accum_y=OC_MV_Y(_accum);
115
/*Add a few additional vectors to set A: the vectors used in the previous
116
frames and the (0,0) vector.*/
117
_mcenc->candidates[ncandidates][0]=accum_x;
118
_mcenc->candidates[ncandidates][1]=accum_y;
119
ncandidates++;
120
_mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
121
OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31);
122
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
123
OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31);
124
ncandidates++;
125
_mcenc->candidates[ncandidates][0]=0;
126
_mcenc->candidates[ncandidates][1]=0;
127
ncandidates++;
128
/*Use the first three vectors of set A to find our best predictor: their
129
median.*/
130
memcpy(a,_mcenc->candidates+1,sizeof(a));
131
OC_SORT2I(a[0][0],a[1][0]);
132
OC_SORT2I(a[0][1],a[1][1]);
133
OC_SORT2I(a[1][0],a[2][0]);
134
OC_SORT2I(a[1][1],a[2][1]);
135
OC_SORT2I(a[0][0],a[1][0]);
136
OC_SORT2I(a[0][1],a[1][1]);
137
_mcenc->candidates[0][0]=a[1][0];
138
_mcenc->candidates[0][1]=a[1][1];
139
_mcenc->setb0=ncandidates;
140
}
141
142
static void oc_mcenc_find_candidates_b(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
143
oc_mv _accum,int _mbi,int _frame){
144
oc_mb_enc_info *embs;
145
int accum_x;
146
int accum_y;
147
int ncandidates;
148
embs=_enc->mb_info;
149
accum_x=OC_MV_X(_accum);
150
accum_y=OC_MV_Y(_accum);
151
/*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
152
ncandidates=_mcenc->setb0;
153
/*Use only the current block. Using more did not appear to be helpful
154
with the current selection logic due to escaping the local search too
155
quickly.*/
156
_mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
157
2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame])
158
-OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31);
159
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
160
2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])
161
-OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31);
162
ncandidates++;
163
_mcenc->ncandidates=ncandidates;
164
}
165
166
static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc,
167
const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
168
int _mvoffset0,int _mvoffset1,const unsigned char *_src,
169
const unsigned char *_ref,int _ystride,unsigned _best_err){
170
unsigned err;
171
int bi;
172
err=0;
173
for(bi=0;bi<4;bi++){
174
ptrdiff_t frag_offs;
175
frag_offs=_frag_buf_offs[_fragis[bi]];
176
err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
177
_ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
178
}
179
return err;
180
}
181
182
static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc,
183
const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
184
int _mvoffset0,int _mvoffset1,const unsigned char *_src,
185
const unsigned char *_ref,int _ystride,unsigned _best_err){
186
unsigned err;
187
int dc;
188
int bi;
189
err=0;
190
for(bi=0;bi<4;bi++){
191
ptrdiff_t frag_offs;
192
frag_offs=_frag_buf_offs[_fragis[bi]];
193
err+=oc_enc_frag_satd2(_enc,&dc,_src+frag_offs,
194
_ref+frag_offs+_mvoffset0,_ref+frag_offs+_mvoffset1,_ystride);
195
err+=abs(dc);
196
}
197
return err;
198
}
199
200
static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
201
const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
202
const unsigned char *_src,const unsigned char *_ref,int _ystride,
203
unsigned _block_err[4]){
204
unsigned err;
205
int mvoffset;
206
int bi;
207
mvoffset=_dx+_dy*_ystride;
208
err=0;
209
for(bi=0;bi<4;bi++){
210
ptrdiff_t frag_offs;
211
unsigned block_err;
212
frag_offs=_frag_buf_offs[_fragis[bi]];
213
block_err=oc_enc_frag_sad(_enc,
214
_src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
215
_block_err[bi]=block_err;
216
err+=block_err;
217
}
218
return err;
219
}
220
221
static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
222
const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
223
const unsigned char *_src,const unsigned char *_ref,int _ystride){
224
int mvoffset;
225
int err;
226
int bi;
227
mvoffset=_dx+_dy*_ystride;
228
err=0;
229
for(bi=0;bi<4;bi++){
230
ptrdiff_t frag_offs;
231
int dc;
232
frag_offs=_frag_buf_offs[_fragis[bi]];
233
if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
234
err+=oc_enc_frag_satd(_enc,&dc,
235
_src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
236
err+=abs(dc);
237
}
238
else{
239
err+=oc_enc_frag_sad(_enc,
240
_src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
241
}
242
}
243
return err;
244
}
245
246
static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
247
ptrdiff_t _frag_offs,int _dx,int _dy,
248
const unsigned char *_src,const unsigned char *_ref,int _ystride){
249
unsigned err;
250
int dc;
251
err=oc_enc_frag_satd(_enc,&dc,
252
_src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride);
253
return err+abs(dc);
254
}
255
256
/*Perform a motion vector search for this macro block against a single
257
reference frame.
258
As a bonus, individual block motion vectors are computed as well, as much of
259
the work can be shared.
260
The actual motion vector is stored in the appropriate place in the
261
oc_mb_enc_info structure.
262
_accum: Drop frame/golden MV accumulators.
263
_mbi: The macro block index.
264
_frame: The frame to use for SATD calculations and refinement,
265
either OC_FRAME_PREV or OC_FRAME_GOLD.
266
_frame_full: The frame to perform the 1px search on, one of OC_FRAME_PREV,
267
OC_FRAME_GOLD, OC_FRAME_PREV_ORIG, or OC_FRAME_GOLD_ORIG.*/
268
void oc_mcenc_search_frame(oc_enc_ctx *_enc,oc_mv _accum,int _mbi,int _frame,
269
int _frame_full){
270
/*Note: Traditionally this search is done using a rate-distortion objective
271
function of the form D+lambda*R.
272
However, xiphmont tested this and found it produced a small degradation,
273
while requiring extra computation.
274
This is most likely due to Theora's peculiar MV encoding scheme: MVs are
275
not coded relative to a predictor, and the only truly cheap way to use a
276
MV is in the LAST or LAST2 MB modes, which are not being considered here.
277
Therefore if we use the MV found here, it's only because both LAST and
278
LAST2 performed poorly, and therefore the MB is not likely to be uniform
279
or suffer from the aperture problem.
280
Furthermore we would like to reuse the MV found here for as many MBs as
281
possible, so picking a slightly sub-optimal vector to save a bit or two
282
may cause increased degradation in many blocks to come.
283
We could artificially reduce lambda to compensate, but it's faster to just
284
disable it entirely, and use D (the distortion) as the sole criterion.*/
285
oc_mcenc_ctx mcenc;
286
const ptrdiff_t *frag_buf_offs;
287
const ptrdiff_t *fragis;
288
const unsigned char *src;
289
const unsigned char *ref;
290
const unsigned char *satd_ref;
291
int ystride;
292
oc_mb_enc_info *embs;
293
ogg_int32_t hit_cache[31];
294
ogg_int32_t hitbit;
295
unsigned best_block_err[4];
296
unsigned block_err[4];
297
unsigned best_err;
298
int best_vec[2];
299
int best_block_vec[4][2];
300
int candx;
301
int candy;
302
int bi;
303
embs=_enc->mb_info;
304
/*Find some candidate motion vectors.*/
305
oc_mcenc_find_candidates_a(_enc,&mcenc,_accum,_mbi,_frame);
306
/*Clear the cache of locations we've examined.*/
307
memset(hit_cache,0,sizeof(hit_cache));
308
/*Start with the median predictor.*/
309
candx=OC_DIV2(mcenc.candidates[0][0]);
310
candy=OC_DIV2(mcenc.candidates[0][1]);
311
hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
312
frag_buf_offs=_enc->state.frag_buf_offs;
313
fragis=_enc->state.mb_maps[_mbi][0];
314
src=_enc->state.ref_frame_data[OC_FRAME_IO];
315
ref=_enc->state.ref_frame_data[_frame_full];
316
satd_ref=_enc->state.ref_frame_data[_frame];
317
ystride=_enc->state.ref_ystride[0];
318
/*TODO: customize error function for speed/(quality+size) tradeoff.*/
319
best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
320
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
321
best_vec[0]=candx;
322
best_vec[1]=candy;
323
if(_frame==OC_FRAME_PREV){
324
for(bi=0;bi<4;bi++){
325
best_block_err[bi]=block_err[bi];
326
best_block_vec[bi][0]=candx;
327
best_block_vec[bi][1]=candy;
328
}
329
}
330
/*If this predictor fails, move on to set A.*/
331
if(best_err>OC_YSAD_THRESH1){
332
unsigned err;
333
unsigned t2;
334
int ncs;
335
int ci;
336
/*Compute the early termination threshold for set A.*/
337
t2=embs[_mbi].error[_frame];
338
ncs=OC_MINI(3,embs[_mbi].ncneighbors);
339
for(ci=0;ci<ncs;ci++){
340
t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
341
}
342
t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
343
/*Examine the candidates in set A.*/
344
for(ci=1;ci<mcenc.setb0;ci++){
345
candx=OC_DIV2(mcenc.candidates[ci][0]);
346
candy=OC_DIV2(mcenc.candidates[ci][1]);
347
/*If we've already examined this vector, then we would be using it if it
348
was better than what we are using.*/
349
hitbit=(ogg_int32_t)1<<candx+15;
350
if(hit_cache[candy+15]&hitbit)continue;
351
hit_cache[candy+15]|=hitbit;
352
err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
353
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
354
if(err<best_err){
355
best_err=err;
356
best_vec[0]=candx;
357
best_vec[1]=candy;
358
}
359
if(_frame==OC_FRAME_PREV){
360
for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
361
best_block_err[bi]=block_err[bi];
362
best_block_vec[bi][0]=candx;
363
best_block_vec[bi][1]=candy;
364
}
365
}
366
}
367
if(best_err>t2){
368
oc_mcenc_find_candidates_b(_enc,&mcenc,_accum,_mbi,_frame);
369
/*Examine the candidates in set B.*/
370
for(;ci<mcenc.ncandidates;ci++){
371
candx=OC_DIV2(mcenc.candidates[ci][0]);
372
candy=OC_DIV2(mcenc.candidates[ci][1]);
373
hitbit=(ogg_int32_t)1<<candx+15;
374
if(hit_cache[candy+15]&hitbit)continue;
375
hit_cache[candy+15]|=hitbit;
376
err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
377
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
378
if(err<best_err){
379
best_err=err;
380
best_vec[0]=candx;
381
best_vec[1]=candy;
382
}
383
if(_frame==OC_FRAME_PREV){
384
for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
385
best_block_err[bi]=block_err[bi];
386
best_block_vec[bi][0]=candx;
387
best_block_vec[bi][1]=candy;
388
}
389
}
390
}
391
/*Use the same threshold for set B as in set A.*/
392
if(best_err>t2){
393
int best_site;
394
int nsites;
395
int sitei;
396
int site;
397
int b;
398
/*Square pattern search.*/
399
for(;;){
400
best_site=4;
401
/*Compose the bit flags for boundary conditions.*/
402
b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
403
OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
404
nsites=OC_SQUARE_NSITES[b];
405
for(sitei=0;sitei<nsites;sitei++){
406
site=OC_SQUARE_SITES[b][sitei];
407
candx=best_vec[0]+OC_SQUARE_DX[site];
408
candy=best_vec[1]+OC_SQUARE_DY[site];
409
hitbit=(ogg_int32_t)1<<candx+15;
410
if(hit_cache[candy+15]&hitbit)continue;
411
hit_cache[candy+15]|=hitbit;
412
err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
413
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
414
if(err<best_err){
415
best_err=err;
416
best_site=site;
417
}
418
if(_frame==OC_FRAME_PREV){
419
for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
420
best_block_err[bi]=block_err[bi];
421
best_block_vec[bi][0]=candx;
422
best_block_vec[bi][1]=candy;
423
}
424
}
425
}
426
if(best_site==4)break;
427
best_vec[0]+=OC_SQUARE_DX[best_site];
428
best_vec[1]+=OC_SQUARE_DY[best_site];
429
}
430
/*Final 4-MV search.*/
431
/*Simply use 1/4 of the macro block set A and B threshold as the
432
individual block threshold.*/
433
if(_frame==OC_FRAME_PREV){
434
t2>>=2;
435
for(bi=0;bi<4;bi++){
436
if(best_block_err[bi]>t2){
437
/*Square pattern search.
438
We do this in a slightly interesting manner.
439
We continue to check the SAD of all four blocks in the
440
macro block.
441
This gives us two things:
442
1) We can continue to use the hit_cache to avoid duplicate
443
checks.
444
Otherwise we could continue to read it, but not write to it
445
without saving and restoring it for each block.
446
Note that we could still eliminate a large number of
447
duplicate checks by taking into account the site we came
448
from when choosing the site list.
449
We can still do that to avoid extra hit_cache queries, and
450
it might even be a speed win.
451
2) It gives us a slightly better chance of escaping local
452
minima.
453
We would not be here if we weren't doing a fairly bad job
454
in finding a good vector, and checking these vectors can
455
save us from 100 to several thousand points off our SAD 1
456
in 15 times.
457
TODO: Is this a good idea?
458
Who knows.
459
It needs more testing.*/
460
for(;;){
461
int bestx;
462
int besty;
463
int bj;
464
bestx=best_block_vec[bi][0];
465
besty=best_block_vec[bi][1];
466
/*Compose the bit flags for boundary conditions.*/
467
b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
468
OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
469
nsites=OC_SQUARE_NSITES[b];
470
for(sitei=0;sitei<nsites;sitei++){
471
site=OC_SQUARE_SITES[b][sitei];
472
candx=bestx+OC_SQUARE_DX[site];
473
candy=besty+OC_SQUARE_DY[site];
474
hitbit=(ogg_int32_t)1<<candx+15;
475
if(hit_cache[candy+15]&hitbit)continue;
476
hit_cache[candy+15]|=hitbit;
477
err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
478
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
479
if(err<best_err){
480
best_err=err;
481
best_vec[0]=candx;
482
best_vec[1]=candy;
483
}
484
for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
485
best_block_err[bj]=block_err[bj];
486
best_block_vec[bj][0]=candx;
487
best_block_vec[bj][1]=candy;
488
}
489
}
490
if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
491
break;
492
}
493
}
494
}
495
}
496
}
497
}
498
}
499
}
500
embs[_mbi].error[_frame]=(ogg_uint16_t)best_err;
501
candx=best_vec[0];
502
candy=best_vec[1];
503
embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
504
frag_buf_offs,fragis,candx,candy,src,satd_ref,ystride);
505
embs[_mbi].analysis_mv[0][_frame]=OC_MV(candx<<1,candy<<1);
506
if(_frame==OC_FRAME_PREV&&_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
507
for(bi=0;bi<4;bi++){
508
candx=best_block_vec[bi][0];
509
candy=best_block_vec[bi][1];
510
embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
511
frag_buf_offs[fragis[bi]],candx,candy,src,satd_ref,ystride);
512
embs[_mbi].block_mv[bi]=OC_MV(candx<<1,candy<<1);
513
}
514
}
515
}
516
517
void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
518
oc_mv2 *mvs;
519
oc_mv accum_p;
520
oc_mv accum_g;
521
oc_mv mv2_p;
522
mvs=_enc->mb_info[_mbi].analysis_mv;
523
if(_enc->prevframe_dropped)accum_p=mvs[0][OC_FRAME_PREV];
524
else accum_p=0;
525
accum_g=mvs[2][OC_FRAME_GOLD];
526
/*Move the motion vector predictors back a frame.*/
527
mv2_p=mvs[2][OC_FRAME_PREV];
528
mvs[2][OC_FRAME_GOLD]=mvs[1][OC_FRAME_GOLD];
529
mvs[2][OC_FRAME_PREV]=mvs[1][OC_FRAME_PREV];
530
mvs[1][OC_FRAME_GOLD]=mvs[0][OC_FRAME_GOLD];
531
mvs[1][OC_FRAME_PREV]=OC_MV_SUB(mvs[0][OC_FRAME_PREV],mv2_p);
532
/*Search the last frame.*/
533
oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV,OC_FRAME_PREV_ORIG);
534
mvs[2][OC_FRAME_PREV]=accum_p;
535
/*GOLDEN MVs are different from PREV MVs in that they're each absolute
536
offsets from some frame in the past rather than relative offsets from the
537
frame before.
538
For predictor calculation to make sense, we need them to be in the same
539
form as PREV MVs.*/
540
mvs[1][OC_FRAME_GOLD]=OC_MV_SUB(mvs[1][OC_FRAME_GOLD],mvs[2][OC_FRAME_GOLD]);
541
mvs[2][OC_FRAME_GOLD]=OC_MV_SUB(mvs[2][OC_FRAME_GOLD],accum_g);
542
/*Search the golden frame.*/
543
oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD,OC_FRAME_GOLD_ORIG);
544
/*Put GOLDEN MVs back into absolute offset form.
545
The newest MV is already an absolute offset.*/
546
mvs[2][OC_FRAME_GOLD]=OC_MV_ADD(mvs[2][OC_FRAME_GOLD],accum_g);
547
mvs[1][OC_FRAME_GOLD]=OC_MV_ADD(mvs[1][OC_FRAME_GOLD],mvs[2][OC_FRAME_GOLD]);
548
}
549
550
#if 0
551
static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi,
552
int _vec[2],int _best_err,int _frame){
553
const unsigned char *src;
554
const unsigned char *ref;
555
const ptrdiff_t *frag_buf_offs;
556
const ptrdiff_t *fragis;
557
int offset_y[9];
558
int ystride;
559
int mvoffset_base;
560
int best_site;
561
int sitei;
562
int err;
563
src=_enc->state.ref_frame_data[OC_FRAME_IO];
564
ref=_enc->state.ref_frame_data[_framei];
565
frag_buf_offs=_enc->state.frag_buf_offs;
566
fragis=_enc->state.mb_maps[_mbi][0];
567
ystride=_enc->state.ref_ystride[0];
568
mvoffset_base=_vec[0]+_vec[1]*ystride;
569
offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
570
offset_y[3]=offset_y[5]=0;
571
offset_y[6]=offset_y[7]=offset_y[8]=ystride;
572
best_site=4;
573
for(sitei=0;sitei<8;sitei++){
574
int site;
575
int xmask;
576
int ymask;
577
int dx;
578
int dy;
579
int mvoffset0;
580
int mvoffset1;
581
site=OC_SQUARE_SITES[0][sitei];
582
dx=OC_SQUARE_DX[site];
583
dy=OC_SQUARE_DY[site];
584
/*The following code SHOULD be equivalent to
585
oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
586
(_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
587
However, it should also be much faster, as it involves no multiplies and
588
doesn't have to handle chroma vectors.*/
589
xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
590
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
591
mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
592
mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
593
err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
594
mvoffset0,mvoffset1,src,ref,ystride,_best_err);
595
if(err<_best_err){
596
_best_err=err;
597
best_site=site;
598
}
599
}
600
_vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
601
_vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
602
return _best_err;
603
}
604
#endif
605
606
static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc,
607
int _mbi,int _vec[2],unsigned _best_err,int _frame){
608
const unsigned char *src;
609
const unsigned char *ref;
610
const ptrdiff_t *frag_buf_offs;
611
const ptrdiff_t *fragis;
612
int offset_y[9];
613
int ystride;
614
int mvoffset_base;
615
int best_site;
616
int sitei;
617
int err;
618
src=_enc->state.ref_frame_data[OC_FRAME_IO];
619
ref=_enc->state.ref_frame_data[_frame];
620
frag_buf_offs=_enc->state.frag_buf_offs;
621
fragis=_enc->state.mb_maps[_mbi][0];
622
ystride=_enc->state.ref_ystride[0];
623
mvoffset_base=_vec[0]+_vec[1]*ystride;
624
offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
625
offset_y[3]=offset_y[5]=0;
626
offset_y[6]=offset_y[7]=offset_y[8]=ystride;
627
best_site=4;
628
for(sitei=0;sitei<8;sitei++){
629
int site;
630
int xmask;
631
int ymask;
632
int dx;
633
int dy;
634
int mvoffset0;
635
int mvoffset1;
636
site=OC_SQUARE_SITES[0][sitei];
637
dx=OC_SQUARE_DX[site];
638
dy=OC_SQUARE_DY[site];
639
/*The following code SHOULD be equivalent to
640
oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
641
(_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
642
However, it should also be much faster, as it involves no multiplies and
643
doesn't have to handle chroma vectors.*/
644
xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
645
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
646
mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
647
mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
648
if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
649
err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis,
650
mvoffset0,mvoffset1,src,ref,ystride,_best_err);
651
}
652
else{
653
err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
654
mvoffset0,mvoffset1,src,ref,ystride,_best_err);
655
}
656
if(err<_best_err){
657
_best_err=err;
658
best_site=site;
659
}
660
}
661
_vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
662
_vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
663
return _best_err;
664
}
665
666
void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){
667
oc_mb_enc_info *embs;
668
int vec[2];
669
embs=_enc->mb_info;
670
vec[0]=OC_DIV2(OC_MV_X(embs[_mbi].analysis_mv[0][_frame]));
671
vec[1]=OC_DIV2(OC_MV_Y(embs[_mbi].analysis_mv[0][_frame]));
672
embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
673
_mbi,vec,embs[_mbi].satd[_frame],_frame);
674
embs[_mbi].analysis_mv[0][_frame]=OC_MV(vec[0],vec[1]);
675
}
676
677
#if 0
678
static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc,
679
int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
680
int _offset_y[9],unsigned _best_err){
681
int mvoffset_base;
682
int best_site;
683
int sitei;
684
mvoffset_base=_vec[0]+_vec[1]*_ystride;
685
best_site=4;
686
for(sitei=0;sitei<8;sitei++){
687
unsigned err;
688
int site;
689
int xmask;
690
int ymask;
691
int dx;
692
int dy;
693
int mvoffset0;
694
int mvoffset1;
695
site=OC_SQUARE_SITES[0][sitei];
696
dx=OC_SQUARE_DX[site];
697
dy=OC_SQUARE_DY[site];
698
/*The following code SHOULD be equivalent to
699
oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
700
(_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
701
However, it should also be much faster, as it involves no multiplies and
702
doesn't have to handle chroma vectors.*/
703
xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
704
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
705
mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
706
mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
707
err=oc_enc_frag_sad2_thresh(_enc,_src,
708
_ref+mvoffset0,_ref+mvoffset1,ystride,_best_err);
709
if(err<_best_err){
710
_best_err=err;
711
best_site=site;
712
}
713
}
714
_vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
715
_vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
716
return _best_err;
717
}
718
#endif
719
720
static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc,
721
int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
722
int _offset_y[9],unsigned _best_err){
723
int mvoffset_base;
724
int best_site;
725
int sitei;
726
mvoffset_base=_vec[0]+_vec[1]*_ystride;
727
best_site=4;
728
for(sitei=0;sitei<8;sitei++){
729
unsigned err;
730
int dc;
731
int site;
732
int xmask;
733
int ymask;
734
int dx;
735
int dy;
736
int mvoffset0;
737
int mvoffset1;
738
site=OC_SQUARE_SITES[0][sitei];
739
dx=OC_SQUARE_DX[site];
740
dy=OC_SQUARE_DY[site];
741
/*The following code SHOULD be equivalent to
742
oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0,
743
(_vec[0]<<1)+dx,(_vec[1]<<1)+dy);
744
However, it should also be much faster, as it involves no multiplies and
745
doesn't have to handle chroma vectors.*/
746
xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
747
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
748
mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
749
mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
750
err=oc_enc_frag_satd2(_enc,&dc,_src,
751
_ref+mvoffset0,_ref+mvoffset1,_ystride);
752
err+=abs(dc);
753
if(err<_best_err){
754
_best_err=err;
755
best_site=site;
756
}
757
}
758
_vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
759
_vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
760
return _best_err;
761
}
762
763
void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){
764
oc_mb_enc_info *embs;
765
const ptrdiff_t *frag_buf_offs;
766
const ptrdiff_t *fragis;
767
const unsigned char *src;
768
const unsigned char *ref;
769
int offset_y[9];
770
int ystride;
771
int bi;
772
ystride=_enc->state.ref_ystride[0];
773
frag_buf_offs=_enc->state.frag_buf_offs;
774
fragis=_enc->state.mb_maps[_mbi][0];
775
src=_enc->state.ref_frame_data[OC_FRAME_IO];
776
ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
777
offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
778
offset_y[3]=offset_y[5]=0;
779
offset_y[6]=offset_y[7]=offset_y[8]=ystride;
780
embs=_enc->mb_info;
781
for(bi=0;bi<4;bi++){
782
ptrdiff_t frag_offs;
783
int vec[2];
784
frag_offs=frag_buf_offs[fragis[bi]];
785
vec[0]=OC_DIV2(OC_MV_X(embs[_mbi].block_mv[bi]));
786
vec[1]=OC_DIV2(OC_MV_Y(embs[_mbi].block_mv[bi]));
787
embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
788
src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
789
embs[_mbi].ref_mv[bi]=OC_MV(vec[0],vec[1]);
790
}
791
}
792
793