Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libtheora/collect.c
9906 views
1
/********************************************************************
2
* *
3
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7
* *
8
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2011 *
9
* by the Xiph.Org Foundation and contributors *
10
* https://www.xiph.org/ *
11
* *
12
********************************************************************
13
14
function: mode selection code
15
16
********************************************************************/
17
#include <stdio.h>
18
#include <limits.h>
19
#include <math.h>
20
#include <string.h>
21
#include "collect.h"
22
23
#if defined(OC_COLLECT_METRICS)
24
25
int OC_HAS_MODE_METRICS;
26
double OC_MODE_RD_WEIGHT_SATD[OC_LOGQ_BINS][3][2][OC_COMP_BINS];
27
double OC_MODE_RD_WEIGHT_SAD[OC_LOGQ_BINS][3][2][OC_COMP_BINS];
28
oc_mode_metrics OC_MODE_METRICS_SATD[OC_LOGQ_BINS-1][3][2][OC_COMP_BINS];
29
oc_mode_metrics OC_MODE_METRICS_SAD[OC_LOGQ_BINS-1][3][2][OC_COMP_BINS];
30
const char *OC_MODE_METRICS_FILENAME="modedec.stats";
31
32
void oc_mode_metrics_add(oc_mode_metrics *_metrics,
33
double _w,int _s,int _q,int _r,double _d){
34
if(_metrics->w>0){
35
double ds;
36
double dq;
37
double dr;
38
double dd;
39
double ds2;
40
double dq2;
41
double s2;
42
double sq;
43
double q2;
44
double sr;
45
double qr;
46
double sd;
47
double qd;
48
double s2q;
49
double sq2;
50
double w;
51
double wa;
52
double rwa;
53
double rwa2;
54
double rwb;
55
double rwb2;
56
double rw2;
57
double rw3;
58
double rw4;
59
wa=_metrics->w;
60
ds=_s-_metrics->s/wa;
61
dq=_q-_metrics->q/wa;
62
dr=_r-_metrics->r/wa;
63
dd=_d-_metrics->d/wa;
64
ds2=ds*ds;
65
dq2=dq*dq;
66
s2=_metrics->s2;
67
sq=_metrics->sq;
68
q2=_metrics->q2;
69
sr=_metrics->sr;
70
qr=_metrics->qr;
71
sd=_metrics->sd;
72
qd=_metrics->qd;
73
s2q=_metrics->s2q;
74
sq2=_metrics->sq2;
75
w=wa+_w;
76
rwa=wa/w;
77
rwb=_w/w;
78
rwa2=rwa*rwa;
79
rwb2=rwb*rwb;
80
rw2=wa*rwb;
81
rw3=rw2*(rwa2-rwb2);
82
rw4=_w*rwa2*rwa2+wa*rwb2*rwb2;
83
_metrics->s2q2+=-2*(ds*sq2+dq*s2q)*rwb
84
+(ds2*q2+4*ds*dq*sq+dq2*s2)*rwb2+ds2*dq2*rw4;
85
_metrics->s2q+=(-2*ds*sq-dq*s2)*rwb+ds2*dq*rw3;
86
_metrics->sq2+=(-ds*q2-2*dq*sq)*rwb+ds*dq2*rw3;
87
_metrics->sqr+=(-ds*qr-dq*sr-dr*sq)*rwb+ds*dq*dr*rw3;
88
_metrics->sqd+=(-ds*qd-dq*sd-dd*sq)*rwb+ds*dq*dd*rw3;
89
_metrics->s2+=ds2*rw2;
90
_metrics->sq+=ds*dq*rw2;
91
_metrics->q2+=dq2*rw2;
92
_metrics->sr+=ds*dr*rw2;
93
_metrics->qr+=dq*dr*rw2;
94
_metrics->r2+=dr*dr*rw2;
95
_metrics->sd+=ds*dd*rw2;
96
_metrics->qd+=dq*dd*rw2;
97
_metrics->d2+=dd*dd*rw2;
98
}
99
_metrics->w+=_w;
100
_metrics->s+=_s*_w;
101
_metrics->q+=_q*_w;
102
_metrics->r+=_r*_w;
103
_metrics->d+=_d*_w;
104
}
105
106
void oc_mode_metrics_merge(oc_mode_metrics *_dst,
107
const oc_mode_metrics *_src,int _n){
108
int i;
109
/*Find a non-empty set of metrics.*/
110
for(i=0;i<_n&&_src[i].w==0;i++);
111
if(i>=_n){
112
memset(_dst,0,sizeof(*_dst));
113
return;
114
}
115
memcpy(_dst,_src+i,sizeof(*_dst));
116
/*And iterate over the remaining non-empty sets of metrics.*/
117
for(i++;i<_n;i++)if(_src[i].w!=0){
118
double ds;
119
double dq;
120
double dr;
121
double dd;
122
double ds2;
123
double dq2;
124
double s2a;
125
double s2b;
126
double sqa;
127
double sqb;
128
double q2a;
129
double q2b;
130
double sra;
131
double srb;
132
double qra;
133
double qrb;
134
double sda;
135
double sdb;
136
double qda;
137
double qdb;
138
double s2qa;
139
double s2qb;
140
double sq2a;
141
double sq2b;
142
double w;
143
double wa;
144
double wb;
145
double rwa;
146
double rwb;
147
double rwa2;
148
double rwb2;
149
double rw2;
150
double rw3;
151
double rw4;
152
wa=_dst->w;
153
wb=_src[i].w;
154
ds=_src[i].s/wb-_dst->s/wa;
155
dq=_src[i].q/wb-_dst->q/wa;
156
dr=_src[i].r/wb-_dst->r/wa;
157
dd=_src[i].d/wb-_dst->d/wa;
158
ds2=ds*ds;
159
dq2=dq*dq;
160
s2a=_dst->s2;
161
sqa=_dst->sq;
162
q2a=_dst->q2;
163
sra=_dst->sr;
164
qra=_dst->qr;
165
sda=_dst->sd;
166
qda=_dst->qd;
167
s2qa=_dst->s2q;
168
sq2a=_dst->sq2;
169
s2b=_src[i].s2;
170
sqb=_src[i].sq;
171
q2b=_src[i].q2;
172
srb=_src[i].sr;
173
qrb=_src[i].qr;
174
sdb=_src[i].sd;
175
qdb=_src[i].qd;
176
s2qb=_src[i].s2q;
177
sq2b=_src[i].sq2;
178
w=wa+wb;
179
if(w==0)rwa=rwb=0;
180
else{
181
rwa=wa/w;
182
rwb=wb/w;
183
}
184
rwa2=rwa*rwa;
185
rwb2=rwb*rwb;
186
rw2=wa*rwb;
187
rw3=rw2*(rwa2-rwb2);
188
rw4=wb*rwa2*rwa2+wa*rwb2*rwb2;
189
/*
190
(1,1,1) ->
191
(0,0,0)#
192
(1,0,0) C(1,1)*C(1,0)*C(1,0)-> d^{1,0,0}*(rwa*B_{0,1,1}-rwb*A_{0,1,1})
193
(0,1,0) C(1,0)*C(1,1)*C(1,0)-> d^{0,1,0}*(rwa*B_{1,0,1}-rwb*A_{1,0,1})
194
(0,0,1) C(1,0)*C(1,0)*C(1,1)-> d^{0,0,1}*(rwa*B_{1,1,0}-rwb*A_{1,1,0})
195
(1,1,0)*
196
(1,0,1)*
197
(0,1,1)*
198
(1,1,1) C(1,1)*C(1,1)*C(1,1)-> d^{1,1,1}*(rwa^3*wb-rwb^3*wa)
199
(2,1) ->
200
(0,0)#
201
(1,0) C(2,1)*C(1,1)->2*d^{1,0}*(rwa*B_{1,1}-rwb*A_{1,1})
202
(0,1) C(2,0)*C(1,1)-> d^{0,1}*(rwa*B_{2,0}-rwb*A_{2,0})
203
(2,0)*
204
(1,1)*
205
(2,1) C(2,2)*C(1,1)-> d^{2,1}*(rwa^3*wb-rwb^3*wa)
206
(2,2) ->
207
(0,0)#
208
(1,0) C(2,1)*C(2,0)->2*d^{1,0}*(rwa*B_{1,2}-rwb*A_{1,2})
209
(0,1) C(2,0)*C(2,1)->2*d^{0,1}*(rwa*B_{2,1}-rwb*A_{2,1})
210
(2,0) C(2,2)*C(2,0)-> d^{2,0}*(rwa^2*B_{0,2}+rwb^2*A_{0,2})
211
(1,1) C(2,1)*C(2,1)->4*d^{1,1}*(rwa^2*B_{1,1}+rwb^2*A_{1,1})
212
(0,2) C(2,0)*C(2,2)-> d^{0,2}*(rwa^2*B_{2,0}+rwb^2*A_{2,0})
213
(1,2)*
214
(2,1)*
215
(2,2) C(2,2)*C(2,2)*d^{2,2}*(rwa^4*wb+rwb^4*wa)
216
*/
217
_dst->s2q2+=_src[i].s2q2+2*(ds*(rwa*sq2b-rwb*sq2a)+dq*(rwa*s2qb-rwb*s2qa))
218
+ds2*(rwa2*q2b+rwb2*q2a)+4*ds*dq*(rwa2*sqb+rwb2*sqa)
219
+dq2*(rwa2*s2b+rwb2*s2a)+ds2*dq2*rw4;
220
_dst->s2q+=_src[i].s2q+2*ds*(rwa*sqb-rwb*sqa)
221
+dq*(rwa*s2b-rwb*s2a)+ds2*dq*rw3;
222
_dst->sq2+=_src[i].sq2+ds*(rwa*q2b-rwb*q2a)
223
+2*dq*(rwa*sqb-rwb*sqa)+ds*dq2*rw3;
224
_dst->sqr+=_src[i].sqr+ds*(rwa*qrb-rwb*qra)+dq*(rwa*srb-rwb*sra)
225
+dr*(rwa*sqb-rwb*sqa)+ds*dq*dr*rw3;
226
_dst->sqd+=_src[i].sqd+ds*(rwa*qdb-rwb*qda)+dq*(rwa*sdb-rwb*sda)
227
+dd*(rwa*sqb-rwb*sqa)+ds*dq*dd*rw3;
228
_dst->s2+=_src[i].s2+ds2*rw2;
229
_dst->sq+=_src[i].sq+ds*dq*rw2;
230
_dst->q2+=_src[i].q2+dq2*rw2;
231
_dst->sr+=_src[i].sr+ds*dr*rw2;
232
_dst->qr+=_src[i].qr+dq*dr*rw2;
233
_dst->r2+=_src[i].r2+dr*dr*rw2;
234
_dst->sd+=_src[i].sd+ds*dd*rw2;
235
_dst->qd+=_src[i].qd+dq*dd*rw2;
236
_dst->d2+=_src[i].d2+dd*dd*rw2;
237
_dst->w+=_src[i].w;
238
_dst->s+=_src[i].s;
239
_dst->q+=_src[i].q;
240
_dst->r+=_src[i].r;
241
_dst->d+=_src[i].d;
242
}
243
}
244
245
/*Adjust a single corner of a set of metric bins to minimize the squared
246
prediction error of R and D.
247
Each bin is assumed to cover a quad like so:
248
(s0,q0) (s1,q0)
249
A----------B
250
| |
251
| |
252
| |
253
| |
254
C----------Z
255
(s0,q1) (s1,q1)
256
The values A, B, and C are fixed, and Z is the free parameter.
257
Then, for example, R_i is predicted via bilinear interpolation as
258
x_i=(s_i-s0)/(s1-s0)
259
y_i=(q_i-q0)/(q1-q0)
260
dRds1_i=A+(B-A)*x_i
261
dRds2_i=C+(Z-C)*x_i
262
R_i=dRds1_i+(dRds2_i-dRds1_i)*y_i
263
To find the Z that minimizes the squared prediction error over i, this can
264
be rewritten as
265
R_i-(A+(B-A)*x_i+(C-A)*y_i+(A-B-C)*x_i*y_i)=x_i*y_i*Z
266
Letting X={...,x_i*y_i,...}^T and
267
Y={...,R_i-(A+(B-A)*x_i+(C-A)*y_i+(A-B-C)*x_i*y_i),...}^T,
268
the optimal Z is given by Z=(X^T.Y)/(X^T.X).
269
Now, we need to compute these dot products without actually storing data for
270
each sample.
271
Starting with X^T.X, we have
272
X^T.X = sum(x_i^2*y_i^2) = sum((s_i-s0)^2*(q_i-q0)^2)/((s1-s0)^2*(q1-q0)^2).
273
Expanding the interior of the sum in a monomial basis of s_i and q_i gives
274
s0^2*q0^2 *(1)
275
-2*s0*q0^2*(s_i)
276
-2*s0^2*q0*(q_i)
277
+q0^2 *(s_i^2)
278
+4*s0*q0 *(s_i*q_i)
279
+s0^2 *(q_i^2)
280
-2*q0 *(s_i^2*q_i)
281
-2*s0 *(s_i*q_i^2)
282
+1 *(s_i^2*q_i^2).
283
However, computing things directly in this basis leads to gross numerical
284
errors, as most of the terms will have similar size and destructive
285
cancellation results.
286
A much better basis is the central (co-)moment basis:
287
{1,s_i-sbar,q_i-qbar,(s_i-sbar)^2,(s_i-sbar)*(q_i-qbar),(q_i-qbar)^2,
288
(s_i-sbar)^2*(q_i-qbar),(s_i-sbar)*(q_i-qbar)^2,(s_i-sbar)^2*(q_i-qbar)^2},
289
where sbar and qbar are the average s and q values over the bin,
290
respectively.
291
In that basis, letting ds=sbar-s0 and dq=qbar-q0, (s_i-s0)^2*(q_i-q0)^2 is
292
ds^2*dq^2*(1)
293
+dq^2 *((s_i-sbar)^2)
294
+4*ds*dq*((s_i-sbar)*(q_i-qbar))
295
+ds^2 *((q_i-qbar)^2)
296
+2*dq *((s_i-sbar)^2*(q_i-qbar))
297
+2*ds *((s_i-sbar)*(q_i-qbar)^2)
298
+1 *((s_i-sbar)^2*(q_i-qbar)^2).
299
With these expressions in the central (co-)moment bases, all we need to do
300
is compute sums over the (co-)moment terms, which can be done
301
incrementally (see oc_mode_metrics_add() and oc_mode_metrics_merge()),
302
with no need to store the individual samples.
303
Now, for X^T.Y, we have
304
X^T.Y = sum((R_i-A-((B-A)/(s1-s0))*(s_i-s0)-((C-A)/(q1-q0))*(q_i-q0)
305
-((A-B-C)/((s1-s0)*(q1-q0)))*(s_i-s0)*(q_i-q0))*(s_i-s0)*(q_i-q0))/
306
((s1-s0)*(q1-q0)),
307
or, rewriting the constants to simplify notation,
308
X^T.Y = sum((C0+C1*(s_i-s0)+C2*(q_i-q0)
309
+C3*(s_i-s0)*(q_i-q0)+R_i)*(s_i-s0)*(q_i-q0))/((s1-s0)*(q1-q0)).
310
Again, converting to the central (co-)moment basis, the interior of the
311
above sum is
312
ds*dq*(rbar+C0+C1*ds+C2*dq+C3*ds*dq) *(1)
313
+(C1*dq+C3*dq^2) *((s_i-sbar)^2)
314
+(rbar+C0+2*C1*ds+2*C2*dq+4*C3*ds*dq)*((s_i-sbar)*(q_i-qbar))
315
+(C2*ds+C3*ds^2) *((q_i-qbar)^2)
316
+dq *((s_i-sbar)*(r_i-rbar))
317
+ds *((q_i-qbar)*(r_i-rbar))
318
+(C1+2*C3*dq) *((s_i-sbar)^2*(q_i-qbar))
319
+(C2+2*C3*ds) *((s_i-sbar)*(q_i-qbar)^2)
320
+1 *((s_i-sbar)*(q_i-qbar)*(r_i-rbar))
321
+C3 *((s_i-sbar)^2*(q_i-qbar)^2).
322
You might think it would be easier (if perhaps slightly less robust) to
323
accumulate terms directly around s0 and q0.
324
However, we update each corner of the bins in turn, so we would have to
325
change basis to move the sums from corner to corner anyway.*/
326
double oc_mode_metrics_solve(double *_r,double *_d,
327
const oc_mode_metrics *_metrics,const int *_s0,const int *_s1,
328
const int *_q0,const int *_q1,
329
const double *_ra,const double *_rb,const double *_rc,
330
const double *_da,const double *_db,const double *_dc,int _n){
331
double xx;
332
double rxy;
333
double dxy;
334
double wt;
335
int i;
336
xx=rxy=dxy=wt=0;
337
for(i=0;i<_n;i++)if(_metrics[i].w>0){
338
double s10;
339
double q10;
340
double sq10;
341
double ds;
342
double dq;
343
double ds2;
344
double dq2;
345
double r;
346
double d;
347
double s2;
348
double sq;
349
double q2;
350
double sr;
351
double qr;
352
double sd;
353
double qd;
354
double s2q;
355
double sq2;
356
double sqr;
357
double sqd;
358
double s2q2;
359
double c0;
360
double c1;
361
double c2;
362
double c3;
363
double w;
364
w=_metrics[i].w;
365
wt+=w;
366
s10=_s1[i]-_s0[i];
367
q10=_q1[i]-_q0[i];
368
sq10=s10*q10;
369
ds=_metrics[i].s/w-_s0[i];
370
dq=_metrics[i].q/w-_q0[i];
371
ds2=ds*ds;
372
dq2=dq*dq;
373
s2=_metrics[i].s2;
374
sq=_metrics[i].sq;
375
q2=_metrics[i].q2;
376
s2q=_metrics[i].s2q;
377
sq2=_metrics[i].sq2;
378
s2q2=_metrics[i].s2q2;
379
xx+=(dq2*(ds2*w+s2)+4*ds*dq*sq+ds2*q2+2*(dq*s2q+ds*sq2)+s2q2)/(sq10*sq10);
380
r=_metrics[i].r/w;
381
sr=_metrics[i].sr;
382
qr=_metrics[i].qr;
383
sqr=_metrics[i].sqr;
384
c0=-_ra[i];
385
c1=-(_rb[i]-_ra[i])/s10;
386
c2=-(_rc[i]-_ra[i])/q10;
387
c3=-(_ra[i]-_rb[i]-_rc[i])/sq10;
388
rxy+=(ds*dq*(r+c0+c1*ds+c2*dq+c3*ds*dq)*w+(c1*dq+c3*dq2)*s2
389
+(r+c0+2*(c1*ds+(c2+2*c3*ds)*dq))*sq+(c2*ds+c3*ds2)*q2+dq*sr+ds*qr
390
+(c1+2*c3*dq)*s2q+(c2+2*c3*ds)*sq2+sqr+c3*s2q2)/sq10;
391
d=_metrics[i].d/w;
392
sd=_metrics[i].sd;
393
qd=_metrics[i].qd;
394
sqd=_metrics[i].sqd;
395
c0=-_da[i];
396
c1=-(_db[i]-_da[i])/s10;
397
c2=-(_dc[i]-_da[i])/q10;
398
c3=-(_da[i]-_db[i]-_dc[i])/sq10;
399
dxy+=(ds*dq*(d+c0+c1*ds+c2*dq+c3*ds*dq)*w+(c1*dq+c3*dq2)*s2
400
+(d+c0+2*(c1*ds+(c2+2*c3*ds)*dq))*sq+(c2*ds+c3*ds2)*q2+dq*sd+ds*qd
401
+(c1+2*c3*dq)*s2q+(c2+2*c3*ds)*sq2+sqd+c3*s2q2)/sq10;
402
}
403
if(xx>1E-3){
404
*_r=rxy/xx;
405
*_d=dxy/xx;
406
}
407
else{
408
*_r=0;
409
*_d=0;
410
}
411
return wt;
412
}
413
414
/*Compile collected SATD/logq/rate/RMSE metrics into a form that's immediately
415
useful for mode decision.*/
416
void oc_mode_metrics_update(oc_mode_metrics (*_metrics)[3][2][OC_COMP_BINS],
417
int _niters_min,int _reweight,oc_mode_rd (*_table)[3][2][OC_COMP_BINS],
418
int _shift,double (*_weight)[3][2][OC_COMP_BINS]){
419
int niters;
420
int prevdr;
421
int prevdd;
422
int dr;
423
int dd;
424
int pli;
425
int qti;
426
int qi;
427
int si;
428
dd=dr=INT_MAX;
429
niters=0;
430
/*The encoder interpolates rate and RMSE terms bilinearly from an
431
OC_LOGQ_BINS by OC_COMP_BINS grid of sample points in _table.
432
To find the sample values at the grid points that minimize the total
433
squared prediction error actually requires solving a relatively sparse
434
linear system with a number of variables equal to the number of grid
435
points.
436
Instead of writing a general sparse linear system solver, we just use
437
Gauss-Seidel iteration, i.e., we update one grid point at time until
438
they stop changing.*/
439
do{
440
prevdr=dr;
441
prevdd=dd;
442
dd=dr=0;
443
for(pli=0;pli<3;pli++){
444
for(qti=0;qti<2;qti++){
445
for(qi=0;qi<OC_LOGQ_BINS;qi++){
446
for(si=0;si<OC_COMP_BINS;si++){
447
oc_mode_metrics m[4];
448
int s0[4];
449
int s1[4];
450
int q0[4];
451
int q1[4];
452
double ra[4];
453
double rb[4];
454
double rc[4];
455
double da[4];
456
double db[4];
457
double dc[4];
458
double r;
459
double d;
460
int rate;
461
int rmse;
462
int ds;
463
int n;
464
n=0;
465
/*Collect the statistics for the (up to) four bins grid point
466
(si,qi) touches.*/
467
if(qi>0&&si>0){
468
q0[n]=OC_MODE_LOGQ[qi-1][pli][qti];
469
q1[n]=OC_MODE_LOGQ[qi][pli][qti];
470
s0[n]=si-1<<_shift;
471
s1[n]=si<<_shift;
472
ra[n]=ldexp(_table[qi-1][pli][qti][si-1].rate,-OC_BIT_SCALE);
473
da[n]=ldexp(_table[qi-1][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
474
rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE);
475
db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE);
476
rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE);
477
dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
478
*(m+n++)=*(_metrics[qi-1][pli][qti]+si-1);
479
}
480
if(qi>0){
481
ds=si+1<OC_COMP_BINS?1:-1;
482
q0[n]=OC_MODE_LOGQ[qi-1][pli][qti];
483
q1[n]=OC_MODE_LOGQ[qi][pli][qti];
484
s0[n]=si+ds<<_shift;
485
s1[n]=si<<_shift;
486
ra[n]=ldexp(_table[qi-1][pli][qti][si+ds].rate,-OC_BIT_SCALE);
487
da[n]=
488
ldexp(_table[qi-1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
489
rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE);
490
db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE);
491
rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE);
492
dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
493
*(m+n++)=*(_metrics[qi-1][pli][qti]+si);
494
}
495
if(qi+1<OC_LOGQ_BINS&&si>0){
496
q0[n]=OC_MODE_LOGQ[qi+1][pli][qti];
497
q1[n]=OC_MODE_LOGQ[qi][pli][qti];
498
s0[n]=si-1<<_shift;
499
s1[n]=si<<_shift;
500
ra[n]=ldexp(_table[qi+1][pli][qti][si-1].rate,-OC_BIT_SCALE);
501
da[n]=ldexp(_table[qi+1][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
502
rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE);
503
db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE);
504
rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE);
505
dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
506
*(m+n++)=*(_metrics[qi][pli][qti]+si-1);
507
}
508
if(qi+1<OC_LOGQ_BINS){
509
ds=si+1<OC_COMP_BINS?1:-1;
510
q0[n]=OC_MODE_LOGQ[qi+1][pli][qti];
511
q1[n]=OC_MODE_LOGQ[qi][pli][qti];
512
s0[n]=si+ds<<_shift;
513
s1[n]=si<<_shift;
514
ra[n]=ldexp(_table[qi+1][pli][qti][si+ds].rate,-OC_BIT_SCALE);
515
da[n]=
516
ldexp(_table[qi+1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
517
rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE);
518
db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE);
519
rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE);
520
dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
521
*(m+n++)=*(_metrics[qi][pli][qti]+si);
522
}
523
/*On the first pass, initialize with a simple weighted average of
524
the neighboring bins.*/
525
if(!OC_HAS_MODE_METRICS&&niters==0){
526
double w;
527
w=r=d=0;
528
while(n-->0){
529
w+=m[n].w;
530
r+=m[n].r;
531
d+=m[n].d;
532
}
533
r=w>1E-3?r/w:0;
534
d=w>1E-3?d/w:0;
535
_weight[qi][pli][qti][si]=w;
536
}
537
else{
538
/*Update the grid point and save the weight for later.*/
539
_weight[qi][pli][qti][si]=
540
oc_mode_metrics_solve(&r,&d,m,s0,s1,q0,q1,ra,rb,rc,da,db,dc,n);
541
}
542
rate=OC_CLAMPI(-32768,(int)(ldexp(r,OC_BIT_SCALE)+0.5),32767);
543
rmse=OC_CLAMPI(-32768,(int)(ldexp(d,OC_RMSE_SCALE)+0.5),32767);
544
dr+=abs(rate-_table[qi][pli][qti][si].rate);
545
dd+=abs(rmse-_table[qi][pli][qti][si].rmse);
546
_table[qi][pli][qti][si].rate=(ogg_int16_t)rate;
547
_table[qi][pli][qti][si].rmse=(ogg_int16_t)rmse;
548
}
549
}
550
}
551
}
552
}
553
/*After a fixed number of initial iterations, only iterate so long as the
554
total change is decreasing.
555
This ensures we don't oscillate forever, which is a danger, as all of our
556
results are rounded fairly coarsely.*/
557
while((dr>0||dd>0)&&(niters++<_niters_min||(dr<prevdr&&dd<prevdd)));
558
if(_reweight){
559
/*Now, reduce the values of the optimal solution until we get enough
560
samples in each bin to overcome the constant OC_ZWEIGHT factor.
561
This encourages sampling under-populated bins and prevents a single large
562
sample early on from discouraging coding in that bin ever again.*/
563
for(pli=0;pli<3;pli++){
564
for(qti=0;qti<2;qti++){
565
for(qi=0;qi<OC_LOGQ_BINS;qi++){
566
for(si=0;si<OC_COMP_BINS;si++){
567
double wt;
568
wt=_weight[qi][pli][qti][si];
569
wt/=OC_ZWEIGHT+wt;
570
_table[qi][pli][qti][si].rate=(ogg_int16_t)
571
(_table[qi][pli][qti][si].rate*wt+0.5);
572
_table[qi][pli][qti][si].rmse=(ogg_int16_t)
573
(_table[qi][pli][qti][si].rmse*wt+0.5);
574
}
575
}
576
}
577
}
578
}
579
}
580
581
/*Dump the in memory mode metrics to a file.
582
Note this data format isn't portable between different platforms.*/
583
void oc_mode_metrics_dump(void){
584
FILE *fmetrics;
585
fmetrics=fopen(OC_MODE_METRICS_FILENAME,"wb");
586
if(fmetrics!=NULL){
587
(void)fwrite(OC_MODE_LOGQ,sizeof(OC_MODE_LOGQ),1,fmetrics);
588
(void)fwrite(OC_MODE_METRICS_SATD,sizeof(OC_MODE_METRICS_SATD),1,fmetrics);
589
(void)fwrite(OC_MODE_METRICS_SAD,sizeof(OC_MODE_METRICS_SAD),1,fmetrics);
590
fclose(fmetrics);
591
}
592
}
593
594
void oc_mode_metrics_print_rd(FILE *_fout,const char *_table_name,
595
#if !defined(OC_COLLECT_METRICS)
596
const oc_mode_rd (*_mode_rd_table)[3][2][OC_COMP_BINS]){
597
#else
598
oc_mode_rd (*_mode_rd_table)[3][2][OC_COMP_BINS]){
599
#endif
600
int qii;
601
fprintf(_fout,
602
"# if !defined(OC_COLLECT_METRICS)\n"
603
"static const\n"
604
"# endif\n"
605
"oc_mode_rd %s[OC_LOGQ_BINS][3][2][OC_COMP_BINS]={\n",_table_name);
606
for(qii=0;qii<OC_LOGQ_BINS;qii++){
607
int pli;
608
fprintf(_fout," {\n");
609
for(pli=0;pli<3;pli++){
610
int qti;
611
fprintf(_fout," {\n");
612
for(qti=0;qti<2;qti++){
613
int bin;
614
int qi;
615
static const char *pl_names[3]={"Y'","Cb","Cr"};
616
static const char *qti_names[2]={"INTRA","INTER"};
617
qi=(63*qii+(OC_LOGQ_BINS-1>>1))/(OC_LOGQ_BINS-1);
618
fprintf(_fout," /*%s qi=%i %s*/\n",
619
pl_names[pli],qi,qti_names[qti]);
620
fprintf(_fout," {\n");
621
fprintf(_fout," ");
622
for(bin=0;bin<OC_COMP_BINS;bin++){
623
if(bin&&!(bin&0x3))fprintf(_fout,"\n ");
624
fprintf(_fout,"{%5i,%5i}",
625
_mode_rd_table[qii][pli][qti][bin].rate,
626
_mode_rd_table[qii][pli][qti][bin].rmse);
627
if(bin+1<OC_COMP_BINS)fprintf(_fout,",");
628
}
629
fprintf(_fout,"\n }");
630
if(qti<1)fprintf(_fout,",");
631
fprintf(_fout,"\n");
632
}
633
fprintf(_fout," }");
634
if(pli<2)fprintf(_fout,",");
635
fprintf(_fout,"\n");
636
}
637
fprintf(_fout," }");
638
if(qii+1<OC_LOGQ_BINS)fprintf(_fout,",");
639
fprintf(_fout,"\n");
640
}
641
fprintf(_fout,
642
"};\n"
643
"\n");
644
}
645
646
void oc_mode_metrics_print(FILE *_fout){
647
int qii;
648
fprintf(_fout,
649
"/*File generated by libtheora with OC_COLLECT_METRICS"
650
" defined at compile time.*/\n"
651
"#if !defined(_modedec_H)\n"
652
"# define _modedec_H (1)\n"
653
"# include \"encint.h\"\n"
654
"\n"
655
"\n"
656
"\n"
657
"/*The log of the average quantizer for each of the OC_MODE_RD table rows\n"
658
" (e.g., for the represented qi's, and each pli and qti), in Q10 format.\n"
659
" The actual statistics used by the encoder will be interpolated from\n"
660
" that table based on log_plq for the actual quantization matrix used.*/\n"
661
"# if !defined(OC_COLLECT_METRICS)\n"
662
"static const\n"
663
"# endif\n"
664
"ogg_int16_t OC_MODE_LOGQ[OC_LOGQ_BINS][3][2]={\n");
665
for(qii=0;qii<OC_LOGQ_BINS;qii++){
666
fprintf(_fout," { {0x%04X,0x%04X},{0x%04X,0x%04X},{0x%04X,0x%04X} }%s\n",
667
OC_MODE_LOGQ[qii][0][0],OC_MODE_LOGQ[qii][0][1],OC_MODE_LOGQ[qii][1][0],
668
OC_MODE_LOGQ[qii][1][1],OC_MODE_LOGQ[qii][2][0],OC_MODE_LOGQ[qii][2][1],
669
qii+1<OC_LOGQ_BINS?",":"");
670
}
671
fprintf(_fout,
672
"};\n"
673
"\n");
674
oc_mode_metrics_print_rd(_fout,"OC_MODE_RD_SATD",OC_MODE_RD_SATD);
675
oc_mode_metrics_print_rd(_fout,"OC_MODE_RD_SAD",OC_MODE_RD_SAD);
676
fprintf(_fout,
677
"#endif\n");
678
}
679
680
681
# if !defined(OC_COLLECT_NO_ENC_FUNCS)
682
void oc_enc_mode_metrics_load(oc_enc_ctx *_enc){
683
oc_restore_fpu(&_enc->state);
684
/*Load any existing mode metrics if we haven't already.*/
685
if(!OC_HAS_MODE_METRICS){
686
FILE *fmetrics;
687
memset(OC_MODE_METRICS_SATD,0,sizeof(OC_MODE_METRICS_SATD));
688
memset(OC_MODE_METRICS_SAD,0,sizeof(OC_MODE_METRICS_SAD));
689
fmetrics=fopen(OC_MODE_METRICS_FILENAME,"rb");
690
if(fmetrics!=NULL){
691
/*Read in the binary structures as written my oc_mode_metrics_dump().
692
Note this format isn't portable between different platforms.*/
693
(void)fread(OC_MODE_LOGQ,sizeof(OC_MODE_LOGQ),1,fmetrics);
694
(void)fread(OC_MODE_METRICS_SATD,sizeof(OC_MODE_METRICS_SATD),1,fmetrics);
695
(void)fread(OC_MODE_METRICS_SAD,sizeof(OC_MODE_METRICS_SAD),1,fmetrics);
696
fclose(fmetrics);
697
}
698
else{
699
int qii;
700
int qi;
701
int pli;
702
int qti;
703
for(qii=0;qii<OC_LOGQ_BINS;qii++){
704
qi=(63*qii+(OC_LOGQ_BINS-1>>1))/(OC_LOGQ_BINS-1);
705
for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
706
OC_MODE_LOGQ[qii][pli][qti]=_enc->log_plq[qi][pli][qti];
707
}
708
}
709
}
710
oc_mode_metrics_update(OC_MODE_METRICS_SATD,100,1,
711
OC_MODE_RD_SATD,OC_SATD_SHIFT,OC_MODE_RD_WEIGHT_SATD);
712
oc_mode_metrics_update(OC_MODE_METRICS_SAD,100,1,
713
OC_MODE_RD_SAD,OC_SAD_SHIFT,OC_MODE_RD_WEIGHT_SAD);
714
OC_HAS_MODE_METRICS=1;
715
}
716
}
717
718
/*The following token skipping code used to also be used in the decoder (and
719
even at one point other places in the encoder).
720
However, it was obsoleted by other optimizations, and is now only used here.
721
It has been moved here to avoid generating the code when it's not needed.*/
722
723
/*Determines the number of blocks or coefficients to be skipped for a given
724
token value.
725
_token: The token value to skip.
726
_extra_bits: The extra bits attached to this token.
727
Return: A positive value indicates that number of coefficients are to be
728
skipped in the current block.
729
Otherwise, the negative of the return value indicates that number of
730
blocks are to be ended.*/
731
typedef ptrdiff_t (*oc_token_skip_func)(int _token,int _extra_bits);
732
733
/*Handles the simple end of block tokens.*/
734
static ptrdiff_t oc_token_skip_eob(int _token,int _extra_bits){
735
int nblocks_adjust;
736
nblocks_adjust=OC_UNIBBLE_TABLE32(0,1,2,3,7,15,0,0,_token)+1;
737
return -_extra_bits-nblocks_adjust;
738
}
739
740
/*The last EOB token has a special case, where an EOB run of size zero ends all
741
the remaining blocks in the frame.*/
742
static ptrdiff_t oc_token_skip_eob6(int _token,int _extra_bits){
743
/*Note: We want to return -PTRDIFF_MAX, but that requires C99, which is not
744
yet available everywhere; this should be equivalent.*/
745
if(!_extra_bits)return -(~(size_t)0>>1);
746
return -_extra_bits;
747
}
748
749
/*Handles the pure zero run tokens.*/
750
static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){
751
return _extra_bits+1;
752
}
753
754
/*Handles a normal coefficient value token.*/
755
static ptrdiff_t oc_token_skip_val(void){
756
return 1;
757
}
758
759
/*Handles a category 1A zero run/coefficient value combo token.*/
760
static ptrdiff_t oc_token_skip_run_cat1a(int _token){
761
return _token-OC_DCT_RUN_CAT1A+2;
762
}
763
764
/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/
765
static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){
766
int run_cati;
767
int ncoeffs_mask;
768
int ncoeffs_adjust;
769
run_cati=_token-OC_DCT_RUN_CAT1B;
770
ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati);
771
ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati);
772
return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust;
773
}
774
775
/*A jump table for computing the number of coefficients or blocks to skip for
776
a given token value.
777
This reduces all the conditional branches, etc., needed to parse these token
778
values down to one indirect jump.*/
779
static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={
780
oc_token_skip_eob,
781
oc_token_skip_eob,
782
oc_token_skip_eob,
783
oc_token_skip_eob,
784
oc_token_skip_eob,
785
oc_token_skip_eob,
786
oc_token_skip_eob6,
787
oc_token_skip_zrl,
788
oc_token_skip_zrl,
789
(oc_token_skip_func)oc_token_skip_val,
790
(oc_token_skip_func)oc_token_skip_val,
791
(oc_token_skip_func)oc_token_skip_val,
792
(oc_token_skip_func)oc_token_skip_val,
793
(oc_token_skip_func)oc_token_skip_val,
794
(oc_token_skip_func)oc_token_skip_val,
795
(oc_token_skip_func)oc_token_skip_val,
796
(oc_token_skip_func)oc_token_skip_val,
797
(oc_token_skip_func)oc_token_skip_val,
798
(oc_token_skip_func)oc_token_skip_val,
799
(oc_token_skip_func)oc_token_skip_val,
800
(oc_token_skip_func)oc_token_skip_val,
801
(oc_token_skip_func)oc_token_skip_val,
802
(oc_token_skip_func)oc_token_skip_val,
803
(oc_token_skip_func)oc_token_skip_run_cat1a,
804
(oc_token_skip_func)oc_token_skip_run_cat1a,
805
(oc_token_skip_func)oc_token_skip_run_cat1a,
806
(oc_token_skip_func)oc_token_skip_run_cat1a,
807
(oc_token_skip_func)oc_token_skip_run_cat1a,
808
oc_token_skip_run,
809
oc_token_skip_run,
810
oc_token_skip_run,
811
oc_token_skip_run
812
};
813
814
/*Determines the number of blocks or coefficients to be skipped for a given
815
token value.
816
_token: The token value to skip.
817
_extra_bits: The extra bits attached to this token.
818
Return: A positive value indicates that number of coefficients are to be
819
skipped in the current block.
820
Otherwise, the negative of the return value indicates that number of
821
blocks are to be ended.
822
0 will never be returned, so that at least one coefficient in one
823
block will always be decoded for every token.*/
824
static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){
825
return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
826
}
827
828
829
void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){
830
static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
831
0,16,16,16,16,16,32,32,
832
32,32,32,32,32,32,32,48,
833
48,48,48,48,48,48,48,48,
834
48,48,48,48,64,64,64,64,
835
64,64,64,64,64,64,64,64,
836
64,64,64,64,64,64,64,64,
837
64,64,64,64,64,64,64,64
838
};
839
const oc_fragment *frags;
840
const unsigned *frag_sad;
841
const unsigned *frag_satd;
842
const unsigned *frag_ssd;
843
const ptrdiff_t *coded_fragis;
844
ptrdiff_t ncoded_fragis;
845
ptrdiff_t fragii;
846
double fragw;
847
int modelines[3][3][2];
848
int qti;
849
int qii;
850
int qi;
851
int pli;
852
int zzi;
853
int token;
854
int eb;
855
oc_restore_fpu(&_enc->state);
856
/*Figure out which metric bins to use for this frame's quantizers.*/
857
for(qii=0;qii<_enc->state.nqis;qii++){
858
for(pli=0;pli<3;pli++){
859
for(qti=0;qti<2;qti++){
860
int log_plq;
861
int modeline;
862
log_plq=_enc->log_plq[_enc->state.qis[qii]][pli][qti];
863
for(modeline=0;modeline<OC_LOGQ_BINS-1&&
864
OC_MODE_LOGQ[modeline+1][pli][qti]>log_plq;modeline++);
865
modelines[qii][pli][qti]=modeline;
866
}
867
}
868
}
869
qti=_enc->state.frame_type;
870
frags=_enc->state.frags;
871
frag_sad=_enc->frag_sad;
872
frag_satd=_enc->frag_satd;
873
frag_ssd=_enc->frag_ssd;
874
coded_fragis=_enc->state.coded_fragis;
875
ncoded_fragis=fragii=0;
876
/*Weight the fragments by the inverse frame size; this prevents HD content
877
from dominating the statistics.*/
878
fragw=1.0/_enc->state.nfrags;
879
for(pli=0;pli<3;pli++){
880
ptrdiff_t ti[64];
881
int eob_token[64];
882
int eob_run[64];
883
/*Set up token indices and eob run counts.
884
We don't bother trying to figure out the real cost of the runs that span
885
coefficients; instead we use the costs that were available when R-D
886
token optimization was done.*/
887
for(zzi=0;zzi<64;zzi++){
888
ti[zzi]=_enc->dct_token_offs[pli][zzi];
889
if(ti[zzi]>0){
890
token=_enc->dct_tokens[pli][zzi][0];
891
eb=_enc->extra_bits[pli][zzi][0];
892
eob_token[zzi]=token;
893
eob_run[zzi]=-oc_dct_token_skip(token,eb);
894
}
895
else{
896
eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
897
eob_run[zzi]=0;
898
}
899
}
900
/*Scan the list of coded fragments for this plane.*/
901
ncoded_fragis+=_enc->state.ncoded_fragis[pli];
902
for(;fragii<ncoded_fragis;fragii++){
903
ptrdiff_t fragi;
904
int frag_bits;
905
int huffi;
906
int skip;
907
int mb_mode;
908
unsigned sad;
909
unsigned satd;
910
double sqrt_ssd;
911
int bin;
912
int qtj;
913
fragi=coded_fragis[fragii];
914
frag_bits=0;
915
for(zzi=0;zzi<64;){
916
if(eob_run[zzi]>0){
917
/*We've reached the end of the block.*/
918
eob_run[zzi]--;
919
break;
920
}
921
huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1]
922
+OC_ZZI_HUFF_OFFSET[zzi];
923
if(eob_token[zzi]<OC_NDCT_EOB_TOKEN_MAX){
924
/*This token caused an EOB run to be flushed.
925
Therefore it gets the bits associated with it.*/
926
frag_bits+=_enc->huff_codes[huffi][eob_token[zzi]].nbits
927
+OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]];
928
eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
929
}
930
token=_enc->dct_tokens[pli][zzi][ti[zzi]];
931
eb=_enc->extra_bits[pli][zzi][ti[zzi]];
932
ti[zzi]++;
933
skip=oc_dct_token_skip(token,eb);
934
if(skip<0){
935
eob_token[zzi]=token;
936
eob_run[zzi]=-skip;
937
}
938
else{
939
/*A regular DCT value token; accumulate the bits for it.*/
940
frag_bits+=_enc->huff_codes[huffi][token].nbits
941
+OC_DCT_TOKEN_EXTRA_BITS[token];
942
zzi+=skip;
943
}
944
}
945
mb_mode=frags[fragi].mb_mode;
946
qii=frags[fragi].qii;
947
qi=_enc->state.qis[qii];
948
sad=frag_sad[fragi]<<(pli+1&2);
949
satd=frag_satd[fragi]<<(pli+1&2);
950
sqrt_ssd=sqrt(frag_ssd[fragi]);
951
qtj=mb_mode!=OC_MODE_INTRA;
952
/*Accumulate statistics.
953
The rate (frag_bits) and RMSE (sqrt(frag_ssd)) are not scaled by
954
OC_BIT_SCALE and OC_RMSE_SCALE; this lets us change the scale factor
955
yet still use old data.*/
956
bin=OC_MINI(satd>>OC_SATD_SHIFT,OC_COMP_BINS-1);
957
oc_mode_metrics_add(
958
OC_MODE_METRICS_SATD[modelines[qii][pli][qtj]][pli][qtj]+bin,
959
fragw,satd,_enc->log_plq[qi][pli][qtj],frag_bits,sqrt_ssd);
960
bin=OC_MINI(sad>>OC_SAD_SHIFT,OC_COMP_BINS-1);
961
oc_mode_metrics_add(
962
OC_MODE_METRICS_SAD[modelines[qii][pli][qtj]][pli][qtj]+bin,
963
fragw,sad,_enc->log_plq[qi][pli][qtj],frag_bits,sqrt_ssd);
964
}
965
}
966
/*Update global SA(T)D/logq/rate/RMSE estimation matrix.*/
967
oc_mode_metrics_update(OC_MODE_METRICS_SATD,4,1,
968
OC_MODE_RD_SATD,OC_SATD_SHIFT,OC_MODE_RD_WEIGHT_SATD);
969
oc_mode_metrics_update(OC_MODE_METRICS_SAD,4,1,
970
OC_MODE_RD_SAD,OC_SAD_SHIFT,OC_MODE_RD_WEIGHT_SAD);
971
}
972
# endif
973
974
#endif
975
976