Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/videoio/src/cap_mjpeg_encoder.cpp
16354 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
15
//
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
18
//
19
// * Redistribution's of source code must retain the above copyright notice,
20
// this list of conditions and the following disclaimer.
21
//
22
// * Redistribution's in binary form must reproduce the above copyright notice,
23
// this list of conditions and the following disclaimer in the documentation
24
// and/or other materials provided with the distribution.
25
//
26
// * The name of Intel Corporation may not be used to endorse or promote products
27
// derived from this software without specific prior written permission.
28
//
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
39
//
40
//M*/
41
42
#include "precomp.hpp"
43
#include "opencv2/videoio/container_avi.private.hpp"
44
45
#include <vector>
46
#include <deque>
47
#include <iostream>
48
#include <cstdlib>
49
50
#if CV_NEON
51
#define WITH_NEON
52
#endif
53
54
namespace cv
55
{
56
57
static const unsigned bit_mask[] =
58
{
59
0,
60
0x00000001, 0x00000003, 0x00000007, 0x0000000F,
61
0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
62
0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
63
0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
64
0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
65
0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
66
0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
67
0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
68
};
69
70
static const uchar huff_val_shift = 20;
71
static const int huff_code_mask = (1 << huff_val_shift) - 1;
72
73
static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )
74
{
75
int i, k;
76
int min_val = INT_MAX, max_val = INT_MIN;
77
int size;
78
79
/* calc min and max values in the table */
80
for( i = 1, k = 1; src[k] >= 0; i++ )
81
{
82
int code_count = src[k++];
83
84
for( code_count += k; k < code_count; k++ )
85
{
86
int val = src[k] >> huff_val_shift;
87
if( val < min_val )
88
min_val = val;
89
if( val > max_val )
90
max_val = val;
91
}
92
}
93
94
size = max_val - min_val + 3;
95
96
if( size > max_size )
97
{
98
CV_Error(CV_StsOutOfRange, "too big maximum Huffman code size");
99
}
100
101
memset( table, 0, size*sizeof(table[0]));
102
103
table[0] = min_val;
104
table[1] = size - 2;
105
106
for( i = 1, k = 1; src[k] >= 0; i++ )
107
{
108
int code_count = src[k++];
109
110
for( code_count += k; k < code_count; k++ )
111
{
112
int val = src[k] >> huff_val_shift;
113
int code = src[k] & huff_code_mask;
114
115
table[val - min_val + 2] = (code << 8) | i;
116
}
117
}
118
return true;
119
}
120
121
static int* createSourceHuffmanTable(const uchar* src, int* dst,
122
int max_bits, int first_bits)
123
{
124
int i, val_idx, code = 0;
125
int* table = dst;
126
*dst++ = first_bits;
127
for (i = 1, val_idx = max_bits; i <= max_bits; i++)
128
{
129
int code_count = src[i - 1];
130
dst[0] = code_count;
131
code <<= 1;
132
for (int k = 0; k < code_count; k++)
133
{
134
dst[k + 1] = (src[val_idx + k] << huff_val_shift) | (code + k);
135
}
136
code += code_count;
137
dst += code_count + 1;
138
val_idx += code_count;
139
}
140
dst[0] = -1;
141
return table;
142
}
143
144
145
namespace mjpeg
146
{
147
148
class mjpeg_buffer
149
{
150
public:
151
mjpeg_buffer()
152
{
153
reset();
154
}
155
156
void resize(int size)
157
{
158
data.resize(size);
159
}
160
161
inline void put_bits(unsigned bits, int len)
162
{
163
CV_Assert(len >=0 && len < 32);
164
if((m_pos == (data.size() - 1) && len > bits_free) || m_pos == data.size())
165
{
166
resize(int(2*data.size()));
167
}
168
169
bits_free -= (len);
170
unsigned int tempval = (bits) & bit_mask[(len)];
171
172
if( bits_free <= 0 )
173
{
174
data[m_pos] |= ((unsigned)tempval >> -bits_free);
175
176
bits_free += 32;
177
++m_pos;
178
data[m_pos] = bits_free < 32 ? (tempval << bits_free) : 0;
179
}
180
else
181
{
182
data[m_pos] |= (bits_free == 32) ? tempval : (tempval << bits_free);
183
}
184
}
185
186
inline void put_val(int val, const unsigned * table)
187
{
188
unsigned code = table[(val) + 2];
189
put_bits(code >> 8, (int)(code & 255));
190
}
191
192
void finish()
193
{
194
if(bits_free == 32)
195
{
196
bits_free = 0;
197
m_data_len = m_pos;
198
}
199
else
200
{
201
m_data_len = m_pos + 1;
202
}
203
}
204
205
void reset()
206
{
207
bits_free = 32;
208
m_pos = 0;
209
m_data_len = 0;
210
}
211
212
void clear()
213
{
214
//we need to clear only first element, the rest would be overwritten
215
data[0] = 0;
216
}
217
218
int get_bits_free()
219
{
220
return bits_free;
221
}
222
223
unsigned* get_data()
224
{
225
return &data[0];
226
}
227
228
unsigned get_len()
229
{
230
return m_data_len;
231
}
232
233
private:
234
std::vector<unsigned> data;
235
int bits_free;
236
unsigned m_pos;
237
unsigned m_data_len;
238
};
239
240
241
class mjpeg_buffer_keeper
242
{
243
public:
244
mjpeg_buffer_keeper()
245
{
246
reset();
247
}
248
249
mjpeg_buffer& operator[](int i)
250
{
251
return m_buffer_list[i];
252
}
253
254
void allocate_buffers(int count, int size)
255
{
256
for(int i = (int)m_buffer_list.size(); i < count; ++i)
257
{
258
m_buffer_list.push_back(mjpeg_buffer());
259
m_buffer_list.back().resize(size);
260
}
261
}
262
263
unsigned* get_data()
264
{
265
//if there is only one buffer (single thread) there is no need to stack buffers
266
if(m_buffer_list.size() == 1)
267
{
268
m_buffer_list[0].finish();
269
270
m_data_len = m_buffer_list[0].get_len();
271
m_last_bit_len = m_buffer_list[0].get_bits_free() ? 32 - m_buffer_list[0].get_bits_free() : 0;
272
273
return m_buffer_list[0].get_data();
274
}
275
276
allocate_output_buffer();
277
278
int bits = 0;
279
unsigned currval = 0;
280
m_data_len = 0;
281
282
for(unsigned j = 0; j < m_buffer_list.size(); ++j)
283
{
284
mjpeg_buffer& buffer = m_buffer_list[j];
285
286
//if no bit shift required we could use memcpy
287
if(bits == 0)
288
{
289
size_t current_pos = m_data_len;
290
291
if(buffer.get_bits_free() == 0)
292
{
293
memcpy(&m_output_buffer[current_pos], buffer.get_data(), sizeof(buffer.get_data()[0])*buffer.get_len());
294
m_data_len += buffer.get_len();
295
currval = 0;
296
}
297
else
298
{
299
memcpy(&m_output_buffer[current_pos], buffer.get_data(), sizeof(buffer.get_data()[0])*(buffer.get_len() - 1 ));
300
m_data_len += buffer.get_len() - 1;
301
currval = buffer.get_data()[buffer.get_len() - 1];
302
}
303
}
304
else
305
{
306
for(unsigned i = 0; i < buffer.get_len() - 1; ++i)
307
{
308
currval |= ( (unsigned)buffer.get_data()[i] >> (31 & (-bits)) );
309
310
m_output_buffer[m_data_len++] = currval;
311
312
currval = buffer.get_data()[i] << (bits + 32);
313
}
314
315
currval |= ( (unsigned)buffer.get_data()[buffer.get_len() - 1] >> (31 & (-bits)) );
316
317
if( buffer.get_bits_free() <= -bits)
318
{
319
m_output_buffer[m_data_len++] = currval;
320
321
currval = buffer.get_data()[buffer.get_len() - 1] << (bits + 32);
322
}
323
}
324
325
bits += buffer.get_bits_free();
326
327
if(bits > 0)
328
{
329
bits -= 32;
330
}
331
}
332
333
//bits == 0 means that last element shouldn't be used.
334
m_output_buffer[m_data_len++] = currval;
335
336
m_last_bit_len = -bits;
337
338
return &m_output_buffer[0];
339
}
340
341
int get_last_bit_len()
342
{
343
return m_last_bit_len;
344
}
345
346
int get_data_size()
347
{
348
return m_data_len;
349
}
350
351
void reset()
352
{
353
m_last_bit_len = 0;
354
for(unsigned i = 0; i < m_buffer_list.size(); ++i)
355
{
356
m_buffer_list[i].reset();
357
}
358
359
//there is no need to erase output buffer since it would be overwritten
360
m_data_len = 0;
361
}
362
363
private:
364
365
void allocate_output_buffer()
366
{
367
unsigned total_size = 0;
368
369
for(unsigned i = 0; i < m_buffer_list.size(); ++i)
370
{
371
m_buffer_list[i].finish();
372
total_size += m_buffer_list[i].get_len();
373
}
374
375
if(total_size > m_output_buffer.size())
376
{
377
m_output_buffer.clear();
378
m_output_buffer.resize(total_size);
379
}
380
}
381
382
std::deque<mjpeg_buffer> m_buffer_list;
383
std::vector<unsigned> m_output_buffer;
384
int m_data_len;
385
int m_last_bit_len;
386
};
387
388
class MotionJpegWriter : public IVideoWriter
389
{
390
public:
391
MotionJpegWriter()
392
{
393
rawstream = false;
394
nstripes = -1;
395
quality = 0;
396
}
397
398
MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)
399
{
400
rawstream = false;
401
open(filename, fps, size, iscolor);
402
nstripes = -1;
403
}
404
~MotionJpegWriter() { close(); }
405
406
virtual int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_OPENCV_MJPEG; }
407
408
void close()
409
{
410
if( !container.isOpenedStream() )
411
return;
412
413
if( !container.isEmptyFrameOffset() && !rawstream )
414
{
415
container.endWriteChunk(); // end LIST 'movi'
416
container.writeIndex(0, dc);
417
container.finishWriteAVI();
418
}
419
}
420
421
bool open(const String& filename, double fps, Size size, bool iscolor)
422
{
423
close();
424
425
if( filename.empty() )
426
return false;
427
const char* ext = strrchr(filename.c_str(), '.');
428
if( !ext )
429
return false;
430
if( strcmp(ext, ".avi") != 0 && strcmp(ext, ".AVI") != 0 && strcmp(ext, ".Avi") != 0 )
431
return false;
432
433
if( !container.initContainer(filename, fps, size, iscolor) )
434
return false;
435
436
CV_Assert(fps >= 1);
437
quality = 75;
438
rawstream = false;
439
440
if( !rawstream )
441
{
442
container.startWriteAVI(1); // count stream
443
container.writeStreamHeader(MJPEG);
444
}
445
//printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());
446
return true;
447
}
448
449
bool isOpened() const CV_OVERRIDE { return container.isOpenedStream(); }
450
451
void write(InputArray _img) CV_OVERRIDE
452
{
453
Mat img = _img.getMat();
454
size_t chunkPointer = container.getStreamPos();
455
int input_channels = img.channels();
456
int colorspace = -1;
457
int imgWidth = img.cols;
458
int frameWidth = container.getWidth();
459
int imgHeight = img.rows;
460
int frameHeight = container.getHeight();
461
int channels = container.getChannels();
462
463
464
if( input_channels == 1 && channels == 1 )
465
{
466
CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight );
467
colorspace = COLORSPACE_GRAY;
468
}
469
else if( input_channels == 4 )
470
{
471
CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == 3 );
472
colorspace = COLORSPACE_RGBA;
473
}
474
else if( input_channels == 3 )
475
{
476
CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == 3 );
477
colorspace = COLORSPACE_BGR;
478
}
479
else if( input_channels == 1 && channels == 3 )
480
{
481
CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight*3 );
482
colorspace = COLORSPACE_YUV444P;
483
}
484
else
485
CV_Error(CV_StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");
486
487
if( !rawstream ) {
488
int avi_index = container.getAVIIndex(0, dc);
489
container.startWriteChunk(avi_index);
490
}
491
492
writeFrameData(img.data, (int)img.step, colorspace, input_channels);
493
494
if( !rawstream )
495
{
496
size_t tempChunkPointer = container.getStreamPos();
497
size_t moviPointer = container.getMoviPointer();
498
container.pushFrameOffset(chunkPointer - moviPointer);
499
container.pushFrameSize(tempChunkPointer - chunkPointer - 8); // Size excludes '00dc' and size field
500
container.endWriteChunk(); // end '00dc'
501
}
502
}
503
504
double getProperty(int propId) const CV_OVERRIDE
505
{
506
if( propId == VIDEOWRITER_PROP_QUALITY )
507
return quality;
508
if( propId == VIDEOWRITER_PROP_FRAMEBYTES )
509
{
510
bool isEmpty = container.isEmptyFrameSize();
511
return isEmpty ? 0. : container.atFrameSize(container.countFrameSize() - 1);
512
}
513
if( propId == VIDEOWRITER_PROP_NSTRIPES )
514
return nstripes;
515
return 0.;
516
}
517
518
bool setProperty(int propId, double value) CV_OVERRIDE
519
{
520
if( propId == VIDEOWRITER_PROP_QUALITY )
521
{
522
quality = value;
523
return true;
524
}
525
526
if( propId == VIDEOWRITER_PROP_NSTRIPES)
527
{
528
nstripes = value;
529
return true;
530
}
531
532
return false;
533
}
534
535
void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );
536
537
protected:
538
double quality;
539
bool rawstream;
540
mjpeg_buffer_keeper buffers_list;
541
double nstripes;
542
543
AVIWriteContainer container;
544
};
545
546
#define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))
547
#define fix(x, n) (int)((x)*(1 << (n)) + .5);
548
549
enum
550
{
551
fixb = 14,
552
fixc = 12,
553
postshift = 14
554
};
555
556
static const int C0_707 = fix(0.707106781f, fixb);
557
static const int C0_541 = fix(0.541196100f, fixb);
558
static const int C0_382 = fix(0.382683432f, fixb);
559
static const int C1_306 = fix(1.306562965f, fixb);
560
561
static const int y_r = fix(0.299, fixc);
562
static const int y_g = fix(0.587, fixc);
563
static const int y_b = fix(0.114, fixc);
564
565
static const int cb_r = -fix(0.1687, fixc);
566
static const int cb_g = -fix(0.3313, fixc);
567
static const int cb_b = fix(0.5, fixc);
568
569
static const int cr_r = fix(0.5, fixc);
570
static const int cr_g = -fix(0.4187, fixc);
571
static const int cr_b = -fix(0.0813, fixc);
572
573
// Standard JPEG quantization tables
574
static const uchar jpegTableK1_T[] =
575
{
576
16, 12, 14, 14, 18, 24, 49, 72,
577
11, 12, 13, 17, 22, 35, 64, 92,
578
10, 14, 16, 22, 37, 55, 78, 95,
579
16, 19, 24, 29, 56, 64, 87, 98,
580
24, 26, 40, 51, 68, 81, 103, 112,
581
40, 58, 57, 87, 109, 104, 121, 100,
582
51, 60, 69, 80, 103, 113, 120, 103,
583
61, 55, 56, 62, 77, 92, 101, 99
584
};
585
586
static const uchar jpegTableK2_T[] =
587
{
588
17, 18, 24, 47, 99, 99, 99, 99,
589
18, 21, 26, 66, 99, 99, 99, 99,
590
24, 26, 56, 99, 99, 99, 99, 99,
591
47, 66, 99, 99, 99, 99, 99, 99,
592
99, 99, 99, 99, 99, 99, 99, 99,
593
99, 99, 99, 99, 99, 99, 99, 99,
594
99, 99, 99, 99, 99, 99, 99, 99,
595
99, 99, 99, 99, 99, 99, 99, 99
596
};
597
598
// Standard Huffman tables
599
600
// ... for luma DCs.
601
static const uchar jpegTableK3[] =
602
{
603
0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
604
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
605
};
606
607
// ... for chroma DCs.
608
static const uchar jpegTableK4[] =
609
{
610
0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
611
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
612
};
613
614
// ... for luma ACs.
615
static const uchar jpegTableK5[] =
616
{
617
0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125,
618
0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
619
0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
620
0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
621
0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
622
0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
623
0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
624
0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
625
0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
626
0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
627
0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
628
0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
629
0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
630
0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
631
0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
632
0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
633
0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
634
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
635
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
636
0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
637
0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
638
0xf9, 0xfa
639
};
640
641
// ... for chroma ACs
642
static const uchar jpegTableK6[] =
643
{
644
0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119,
645
0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
646
0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
647
0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
648
0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
649
0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
650
0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
651
0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
652
0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
653
0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
654
0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
655
0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
656
0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
657
0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
658
0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
659
0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
660
0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
661
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
662
0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
663
0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
664
0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
665
0xf9, 0xfa
666
};
667
668
static const uchar zigzag[] =
669
{
670
0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
671
33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
672
28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
673
23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63,
674
63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
675
};
676
677
678
static const int idct_prescale[] =
679
{
680
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
681
22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
682
21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
683
19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
684
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
685
12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
686
8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
687
4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
688
};
689
690
static const char jpegHeader[] =
691
"\xFF\xD8" // SOI - start of image
692
"\xFF\xE0" // APP0 - jfif extension
693
"\x00\x10" // 2 bytes: length of APP0 segment
694
"JFIF\x00" // JFIF signature
695
"\x01\x02" // version of JFIF
696
"\x00" // units = pixels ( 1 - inch, 2 - cm )
697
"\x00\x01\x00\x01" // 2 2-bytes values: x density & y density
698
"\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)
699
700
#ifdef WITH_NEON
701
// FDCT with postscaling
702
static void aan_fdct8x8( const short *src, short *dst,
703
int step, const short *postscale )
704
{
705
// Pass 1: process rows
706
int16x8_t x0 = vld1q_s16(src); int16x8_t x1 = vld1q_s16(src + step*7);
707
int16x8_t x2 = vld1q_s16(src + step*3); int16x8_t x3 = vld1q_s16(src + step*4);
708
709
int16x8_t x4 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
710
x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
711
712
int16x8_t t1 = x0; int16x8_t t2 = x2;
713
714
x2 = vaddq_s16(x4, x1); x4 = vsubq_s16(x4, x1);
715
716
x0 = vld1q_s16(src + step); x3 = vld1q_s16(src + step*6);
717
718
x1 = vaddq_s16(x0, x3); x0 = vsubq_s16(x0, x3);
719
int16x8_t t3 = x0;
720
721
x0 = vld1q_s16(src + step*2); x3 = vld1q_s16(src + step*5);
722
723
int16x8_t t4 = vsubq_s16(x0, x3);
724
725
x0 = vaddq_s16(x0, x3);
726
x3 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
727
x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
728
729
int16x8_t res0 = x1;
730
int16x8_t res4 = x2;
731
x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
732
x1 = vaddq_s16(x4, x0); x4 = vsubq_s16(x4, x0);
733
734
int16x8_t res2 = x4;
735
int16x8_t res6 = x1;
736
737
x0 = t2; x1 = t4;
738
x2 = t3; x3 = t1;
739
x0 = vaddq_s16(x0, x1); x1 = vaddq_s16(x1, x2); x2 = vaddq_s16(x2, x3);
740
x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
741
742
x4 = vaddq_s16(x1, x3); x3 = vsubq_s16(x3, x1);
743
x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
744
x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
745
x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
746
747
x1 = vaddq_s16(x0, x3); x3 = vsubq_s16(x3, x0);
748
x0 = vaddq_s16(x4, x2); x4 = vsubq_s16(x4, x2);
749
750
int16x8_t res1 = x0;
751
int16x8_t res3 = x3;
752
int16x8_t res5 = x1;
753
int16x8_t res7 = x4;
754
755
//transpose a matrix
756
/*
757
res0 00 01 02 03 04 05 06 07
758
res1 10 11 12 13 14 15 16 17
759
res2 20 21 22 23 24 25 26 27
760
res3 30 31 32 33 34 35 36 37
761
res4 40 41 42 43 44 45 46 47
762
res5 50 51 52 53 54 55 56 57
763
res6 60 61 62 63 64 65 66 67
764
res7 70 71 72 73 74 75 76 77
765
*/
766
767
//transpose elements 00-33
768
int16x4_t res0_0 = vget_low_s16(res0);
769
int16x4_t res1_0 = vget_low_s16(res1);
770
int16x4x2_t tres = vtrn_s16(res0_0, res1_0);
771
int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
772
773
res0_0 = vget_low_s16(res2);
774
res1_0 = vget_low_s16(res3);
775
tres = vtrn_s16(res0_0, res1_0);
776
int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
777
778
int32x4x2_t tres1 = vtrnq_s32(l0, l1);
779
780
// transpose elements 40-73
781
res0_0 = vget_low_s16(res4);
782
res1_0 = vget_low_s16(res5);
783
tres = vtrn_s16(res0_0, res1_0);
784
l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
785
786
res0_0 = vget_low_s16(res6);
787
res1_0 = vget_low_s16(res7);
788
789
tres = vtrn_s16(res0_0, res1_0);
790
l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
791
792
int32x4x2_t tres2 = vtrnq_s32(l0, l1);
793
794
//combine into 0-3
795
int16x8_t transp_res0 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
796
int16x8_t transp_res1 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
797
int16x8_t transp_res2 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
798
int16x8_t transp_res3 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
799
800
// transpose elements 04-37
801
res0_0 = vget_high_s16(res0);
802
res1_0 = vget_high_s16(res1);
803
tres = vtrn_s16(res0_0, res1_0);
804
l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
805
806
res0_0 = vget_high_s16(res2);
807
res1_0 = vget_high_s16(res3);
808
809
tres = vtrn_s16(res0_0, res1_0);
810
l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
811
812
tres1 = vtrnq_s32(l0, l1);
813
814
// transpose elements 44-77
815
res0_0 = vget_high_s16(res4);
816
res1_0 = vget_high_s16(res5);
817
tres = vtrn_s16(res0_0, res1_0);
818
l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
819
820
res0_0 = vget_high_s16(res6);
821
res1_0 = vget_high_s16(res7);
822
823
tres = vtrn_s16(res0_0, res1_0);
824
l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
825
826
tres2 = vtrnq_s32(l0, l1);
827
828
//combine into 4-7
829
int16x8_t transp_res4 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
830
int16x8_t transp_res5 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
831
int16x8_t transp_res6 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
832
int16x8_t transp_res7 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
833
834
//special hack for vqdmulhq_s16 command that is producing -1 instead of 0
835
#define STORE_DESCALED(addr, reg, mul_addr) postscale_line = vld1q_s16((mul_addr)); \
836
mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \
837
reg = vabsq_s16(reg); \
838
reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \
839
reg = vsubq_s16(veorq_s16(reg, mask), mask); \
840
vst1q_s16((addr), reg);
841
842
int16x8_t z = vdupq_n_s16(0), postscale_line, mask;
843
844
// pass 2: process columns
845
x0 = transp_res0; x1 = transp_res7;
846
x2 = transp_res3; x3 = transp_res4;
847
848
x4 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
849
x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
850
851
t1 = x0; t2 = x2;
852
853
x2 = vaddq_s16(x4, x1); x4 = vsubq_s16(x4, x1);
854
855
x0 = transp_res1;
856
x3 = transp_res6;
857
858
x1 = vaddq_s16(x0, x3); x0 = vsubq_s16(x0, x3);
859
860
t3 = x0;
861
862
x0 = transp_res2; x3 = transp_res5;
863
864
t4 = vsubq_s16(x0, x3);
865
866
x0 = vaddq_s16(x0, x3);
867
868
x3 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);
869
x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);
870
871
STORE_DESCALED(dst, x1, postscale);
872
STORE_DESCALED(dst + 4*8, x2, postscale + 4*8);
873
874
x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
875
876
x1 = vaddq_s16(x4, x0); x4 = vsubq_s16(x4, x0);
877
878
STORE_DESCALED(dst + 2*8, x4,postscale + 2*8);
879
STORE_DESCALED(dst + 6*8, x1,postscale + 6*8);
880
881
x0 = t2; x1 = t4;
882
x2 = t3; x3 = t1;
883
884
x0 = vaddq_s16(x0, x1); x1 = vaddq_s16(x1, x2); x2 = vaddq_s16(x2, x3);
885
886
x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
887
888
x4 = vaddq_s16(x1, x3); x3 = vsubq_s16(x3, x1);
889
890
x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
891
x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
892
x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
893
894
x1 = vaddq_s16(x0, x3); x3 = vsubq_s16(x3, x0);
895
x0 = vaddq_s16(x4, x2); x4 = vsubq_s16(x4, x2);
896
897
STORE_DESCALED(dst + 5*8, x1,postscale + 5*8);
898
STORE_DESCALED(dst + 1*8, x0,postscale + 1*8);
899
STORE_DESCALED(dst + 7*8, x4,postscale + 7*8);
900
STORE_DESCALED(dst + 3*8, x3,postscale + 3*8);
901
}
902
903
#else
904
// FDCT with postscaling
905
static void aan_fdct8x8( const short *src, short *dst,
906
int step, const short *postscale )
907
{
908
int workspace[64], *work = workspace;
909
int i;
910
911
// Pass 1: process rows
912
for( i = 8; i > 0; i--, src += step, work += 8 )
913
{
914
int x0 = src[0], x1 = src[7];
915
int x2 = src[3], x3 = src[4];
916
917
int x4 = x0 + x1; x0 -= x1;
918
x1 = x2 + x3; x2 -= x3;
919
920
work[7] = x0; work[1] = x2;
921
x2 = x4 + x1; x4 -= x1;
922
923
x0 = src[1]; x3 = src[6];
924
x1 = x0 + x3; x0 -= x3;
925
work[5] = x0;
926
927
x0 = src[2]; x3 = src[5];
928
work[3] = x0 - x3; x0 += x3;
929
930
x3 = x0 + x1; x0 -= x1;
931
x1 = x2 + x3; x2 -= x3;
932
933
work[0] = x1; work[4] = x2;
934
935
x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
936
x1 = x4 + x0; x4 -= x0;
937
work[2] = x4; work[6] = x1;
938
939
x0 = work[1]; x1 = work[3];
940
x2 = work[5]; x3 = work[7];
941
942
x0 += x1; x1 += x2; x2 += x3;
943
x1 = DCT_DESCALE(x1*C0_707, fixb);
944
945
x4 = x1 + x3; x3 -= x1;
946
x1 = (x0 - x2)*C0_382;
947
x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
948
x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
949
950
x1 = x0 + x3; x3 -= x0;
951
x0 = x4 + x2; x4 -= x2;
952
953
work[5] = x1; work[1] = x0;
954
work[7] = x4; work[3] = x3;
955
}
956
957
work = workspace;
958
// pass 2: process columns
959
for( i = 8; i > 0; i--, work++, postscale += 8, dst += 8 )
960
{
961
int x0 = work[8*0], x1 = work[8*7];
962
int x2 = work[8*3], x3 = work[8*4];
963
964
int x4 = x0 + x1; x0 -= x1;
965
x1 = x2 + x3; x2 -= x3;
966
967
work[8*7] = x0; work[8*0] = x2;
968
x2 = x4 + x1; x4 -= x1;
969
970
x0 = work[8*1]; x3 = work[8*6];
971
x1 = x0 + x3; x0 -= x3;
972
work[8*4] = x0;
973
974
x0 = work[8*2]; x3 = work[8*5];
975
work[8*3] = x0 - x3; x0 += x3;
976
977
x3 = x0 + x1; x0 -= x1;
978
x1 = x2 + x3; x2 -= x3;
979
980
dst[0] = (short)DCT_DESCALE(x1*postscale[0], postshift);
981
dst[4] = (short)DCT_DESCALE(x2*postscale[4], postshift);
982
983
x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
984
x1 = x4 + x0; x4 -= x0;
985
986
dst[2] = (short)DCT_DESCALE(x4*postscale[2], postshift);
987
dst[6] = (short)DCT_DESCALE(x1*postscale[6], postshift);
988
989
x0 = work[8*0]; x1 = work[8*3];
990
x2 = work[8*4]; x3 = work[8*7];
991
992
x0 += x1; x1 += x2; x2 += x3;
993
x1 = DCT_DESCALE(x1*C0_707, fixb);
994
995
x4 = x1 + x3; x3 -= x1;
996
x1 = (x0 - x2)*C0_382;
997
x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
998
x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
999
1000
x1 = x0 + x3; x3 -= x0;
1001
x0 = x4 + x2; x4 -= x2;
1002
1003
dst[5] = (short)DCT_DESCALE(x1*postscale[5], postshift);
1004
dst[1] = (short)DCT_DESCALE(x0*postscale[1], postshift);
1005
dst[7] = (short)DCT_DESCALE(x4*postscale[7], postshift);
1006
dst[3] = (short)DCT_DESCALE(x3*postscale[3], postshift);
1007
}
1008
}
1009
#endif
1010
1011
1012
inline void convertToYUV(int colorspace, int channels, int input_channels, short* UV_data, short* Y_data, const uchar* pix_data, int y_limit, int x_limit, int step, int u_plane_ofs, int v_plane_ofs)
1013
{
1014
int i, j;
1015
const int UV_step = 16;
1016
int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1017
int Y_step = x_scale*8;
1018
1019
if( channels > 1 )
1020
{
1021
if( colorspace == COLORSPACE_YUV444P && y_limit == 16 && x_limit == 16 )
1022
{
1023
for( i = 0; i < y_limit; i += 2, pix_data += step*2, Y_data += Y_step*2, UV_data += UV_step )
1024
{
1025
#ifdef WITH_NEON
1026
{
1027
uint16x8_t masklo = vdupq_n_u16(255);
1028
uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));
1029
uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1030
lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));
1031
uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1032
t1 = vaddq_u16(t1, t2);
1033
vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1034
1035
lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));
1036
t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1037
lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));
1038
t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1039
t1 = vaddq_u16(t1, t2);
1040
vst1q_s16(UV_data + 8, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1041
}
1042
1043
{
1044
int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));
1045
int16x8_t delta = vdupq_n_s16(128);
1046
lane = vsubq_s16(lane, delta);
1047
vst1q_s16(Y_data, lane);
1048
1049
lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+8)));
1050
lane = vsubq_s16(lane, delta);
1051
vst1q_s16(Y_data + 8, lane);
1052
1053
lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));
1054
lane = vsubq_s16(lane, delta);
1055
vst1q_s16(Y_data+Y_step, lane);
1056
1057
lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + 8)));
1058
lane = vsubq_s16(lane, delta);
1059
vst1q_s16(Y_data+Y_step + 8, lane);
1060
}
1061
#else
1062
for( j = 0; j < x_limit; j += 2, pix_data += 2 )
1063
{
1064
Y_data[j] = pix_data[0] - 128;
1065
Y_data[j+1] = pix_data[1] - 128;
1066
Y_data[j+Y_step] = pix_data[step] - 128;
1067
Y_data[j+Y_step+1] = pix_data[step+1] - 128;
1068
1069
UV_data[j>>1] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+1] +
1070
pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+1] - 128*4;
1071
UV_data[(j>>1)+8] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+1] +
1072
pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+1] - 128*4;
1073
1074
}
1075
1076
pix_data -= x_limit*input_channels;
1077
#endif
1078
}
1079
}
1080
else
1081
{
1082
for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1083
{
1084
for( j = 0; j < x_limit; j++, pix_data += input_channels )
1085
{
1086
int Y, U, V;
1087
1088
if( colorspace == COLORSPACE_BGR )
1089
{
1090
int r = pix_data[2];
1091
int g = pix_data[1];
1092
int b = pix_data[0];
1093
1094
Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1095
U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1096
V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1097
}
1098
else if( colorspace == COLORSPACE_RGBA )
1099
{
1100
int r = pix_data[0];
1101
int g = pix_data[1];
1102
int b = pix_data[2];
1103
1104
Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1105
U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1106
V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1107
}
1108
else
1109
{
1110
Y = pix_data[0] - 128;
1111
U = pix_data[v_plane_ofs] - 128;
1112
V = pix_data[u_plane_ofs] - 128;
1113
}
1114
1115
int j2 = j >> (x_scale - 1);
1116
Y_data[j] = (short)Y;
1117
UV_data[j2] = (short)(UV_data[j2] + U);
1118
UV_data[j2 + 8] = (short)(UV_data[j2 + 8] + V);
1119
}
1120
1121
pix_data -= x_limit*input_channels;
1122
if( ((i+1) & (y_scale - 1)) == 0 )
1123
{
1124
UV_data += UV_step;
1125
}
1126
}
1127
}
1128
1129
}
1130
else
1131
{
1132
for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1133
{
1134
for( j = 0; j < x_limit; j++ )
1135
Y_data[j] = (short)(pix_data[j]*4 - 128*4);
1136
}
1137
}
1138
}
1139
1140
class MjpegEncoder : public ParallelLoopBody
1141
{
1142
public:
1143
MjpegEncoder(int _height,
1144
int _width,
1145
int _step,
1146
const uchar* _data,
1147
int _input_channels,
1148
int _channels,
1149
int _colorspace,
1150
unsigned (&_huff_dc_tab)[2][16],
1151
unsigned (&_huff_ac_tab)[2][256],
1152
short (&_fdct_qtab)[2][64],
1153
uchar* _cat_table,
1154
mjpeg_buffer_keeper& _buffer_list,
1155
double nstripes
1156
) :
1157
m_buffer_list(_buffer_list),
1158
height(_height),
1159
width(_width),
1160
step(_step),
1161
in_data(_data),
1162
input_channels(_input_channels),
1163
channels(_channels),
1164
colorspace(_colorspace),
1165
huff_dc_tab(_huff_dc_tab),
1166
huff_ac_tab(_huff_ac_tab),
1167
fdct_qtab(_fdct_qtab),
1168
cat_table(_cat_table)
1169
{
1170
//empirically found value. if number of pixels is less than that value there is no sense to parallelize it.
1171
const int min_pixels_count = 96*96;
1172
1173
stripes_count = 1;
1174
1175
if(nstripes < 0)
1176
{
1177
if(height*width > min_pixels_count)
1178
{
1179
stripes_count = default_stripes_count;
1180
}
1181
}
1182
else
1183
{
1184
stripes_count = cvCeil(nstripes);
1185
}
1186
1187
int y_scale = channels > 1 ? 2 : 1;
1188
int y_step = y_scale * 8;
1189
1190
int max_stripes = (height - 1)/y_step + 1;
1191
1192
stripes_count = std::min(stripes_count, max_stripes);
1193
1194
m_buffer_list.allocate_buffers(stripes_count, (height*width*2)/stripes_count);
1195
}
1196
1197
void operator()( const cv::Range& range ) const CV_OVERRIDE
1198
{
1199
const int CAT_TAB_SIZE = 4096;
1200
1201
int x, y;
1202
int i, j;
1203
1204
short buffer[4096];
1205
int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1206
int dc_pred[] = { 0, 0, 0 };
1207
int x_step = x_scale * 8;
1208
int y_step = y_scale * 8;
1209
short block[6][64];
1210
int luma_count = x_scale*y_scale;
1211
int block_count = luma_count + channels - 1;
1212
int u_plane_ofs = step*height;
1213
int v_plane_ofs = u_plane_ofs + step*height;
1214
const uchar* data = in_data;
1215
const uchar* init_data = data;
1216
1217
int num_steps = (height - 1)/y_step + 1;
1218
1219
//if this is not first stripe we need to calculate dc_pred from previous step
1220
if(range.start > 0)
1221
{
1222
y = y_step*int(num_steps*range.start/stripes_count - 1);
1223
data = init_data + y*step;
1224
1225
for( x = 0; x < width; x += x_step )
1226
{
1227
int x_limit = x_step;
1228
int y_limit = y_step;
1229
const uchar* pix_data = data + x*input_channels;
1230
short* Y_data = block[0];
1231
short* UV_data = block[luma_count];
1232
1233
if( x + x_limit > width ) x_limit = width - x;
1234
if( y + y_limit > height ) y_limit = height - y;
1235
1236
memset( block, 0, block_count*64*sizeof(block[0][0]));
1237
1238
convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);
1239
1240
for( i = 0; i < block_count; i++ )
1241
{
1242
int is_chroma = i >= luma_count;
1243
int src_step = x_scale * 8;
1244
const short* src_ptr = block[i & -2] + (i & 1)*8;
1245
1246
aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );
1247
1248
j = is_chroma + (i > luma_count);
1249
dc_pred[j] = buffer[0];
1250
}
1251
}
1252
}
1253
1254
for(int k = range.start; k < range.end; ++k)
1255
{
1256
mjpeg_buffer& output_buffer = m_buffer_list[k];
1257
output_buffer.clear();
1258
1259
int y_min = y_step*int(num_steps*k/stripes_count);
1260
int y_max = y_step*int(num_steps*(k+1)/stripes_count);
1261
1262
if(k == stripes_count - 1)
1263
{
1264
y_max = height;
1265
}
1266
1267
1268
data = init_data + y_min*step;
1269
1270
for( y = y_min; y < y_max; y += y_step, data += y_step*step )
1271
{
1272
for( x = 0; x < width; x += x_step )
1273
{
1274
int x_limit = x_step;
1275
int y_limit = y_step;
1276
const uchar* pix_data = data + x*input_channels;
1277
short* Y_data = block[0];
1278
short* UV_data = block[luma_count];
1279
1280
if( x + x_limit > width ) x_limit = width - x;
1281
if( y + y_limit > height ) y_limit = height - y;
1282
1283
memset( block, 0, block_count*64*sizeof(block[0][0]));
1284
1285
convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);
1286
1287
for( i = 0; i < block_count; i++ )
1288
{
1289
int is_chroma = i >= luma_count;
1290
int src_step = x_scale * 8;
1291
int run = 0, val;
1292
const short* src_ptr = block[i & -2] + (i & 1)*8;
1293
const unsigned* htable = huff_ac_tab[is_chroma];
1294
1295
aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );
1296
1297
j = is_chroma + (i > luma_count);
1298
val = buffer[0] - dc_pred[j];
1299
dc_pred[j] = buffer[0];
1300
1301
{
1302
int cat = cat_table[val + CAT_TAB_SIZE];
1303
1304
//CV_Assert( cat <= 11 );
1305
output_buffer.put_val(cat, huff_dc_tab[is_chroma] );
1306
output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );
1307
}
1308
1309
for( j = 1; j < 64; j++ )
1310
{
1311
val = buffer[zigzag[j]];
1312
1313
if( val == 0 )
1314
{
1315
run++;
1316
}
1317
else
1318
{
1319
while( run >= 16 )
1320
{
1321
output_buffer.put_val( 0xF0, htable ); // encode 16 zeros
1322
run -= 16;
1323
}
1324
1325
{
1326
int cat = cat_table[val + CAT_TAB_SIZE];
1327
//CV_Assert( cat <= 10 );
1328
output_buffer.put_val( cat + run*16, htable );
1329
output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );
1330
}
1331
1332
run = 0;
1333
}
1334
}
1335
1336
if( run )
1337
{
1338
output_buffer.put_val( 0x00, htable ); // encode EOB
1339
}
1340
}
1341
}
1342
}
1343
}
1344
}
1345
1346
cv::Range getRange()
1347
{
1348
return cv::Range(0, stripes_count);
1349
}
1350
1351
double getNStripes()
1352
{
1353
return stripes_count;
1354
}
1355
1356
mjpeg_buffer_keeper& m_buffer_list;
1357
private:
1358
1359
MjpegEncoder& operator=( const MjpegEncoder & ) { return *this; }
1360
1361
const int height;
1362
const int width;
1363
const int step;
1364
const uchar* in_data;
1365
const int input_channels;
1366
const int channels;
1367
const int colorspace;
1368
const unsigned (&huff_dc_tab)[2][16];
1369
const unsigned (&huff_ac_tab)[2][256];
1370
const short (&fdct_qtab)[2][64];
1371
const uchar* cat_table;
1372
int stripes_count;
1373
static const int default_stripes_count;
1374
};
1375
1376
const int MjpegEncoder::default_stripes_count = 4;
1377
1378
void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
1379
{
1380
//double total_cvt = 0, total_dct = 0;
1381
static bool init_cat_table = false;
1382
const int CAT_TAB_SIZE = 4096;
1383
static uchar cat_table[CAT_TAB_SIZE*2+1];
1384
if( !init_cat_table )
1385
{
1386
for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )
1387
{
1388
Cv32suf a;
1389
a.f = (float)i;
1390
cat_table[i+CAT_TAB_SIZE] = ((a.i >> 23) & 255) - (126 & (i ? -1 : 0));
1391
}
1392
init_cat_table = true;
1393
}
1394
1395
//double total_dct = 0, total_cvt = 0;
1396
int width = container.getWidth();
1397
int height = container.getHeight();
1398
int channels = container.getChannels();
1399
1400
CV_Assert( data && width > 0 && height > 0 );
1401
1402
// encode the header and tables
1403
// for each mcu:
1404
// convert rgb to yuv with downsampling (if color).
1405
// for every block:
1406
// calc dct and quantize
1407
// encode block.
1408
int i, j;
1409
const int max_quality = 12;
1410
short fdct_qtab[2][64];
1411
unsigned huff_dc_tab[2][16];
1412
unsigned huff_ac_tab[2][256];
1413
1414
int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1415
short buffer[4096];
1416
int* hbuffer = (int*)buffer;
1417
int luma_count = x_scale*y_scale;
1418
double _quality = quality*0.01*max_quality;
1419
1420
if( _quality < 1. ) _quality = 1.;
1421
if( _quality > max_quality ) _quality = max_quality;
1422
1423
double inv_quality = 1./_quality;
1424
1425
// Encode header
1426
container.putStreamBytes( (const uchar*)jpegHeader, sizeof(jpegHeader) - 1 );
1427
1428
// Encode quantization tables
1429
for( i = 0; i < (channels > 1 ? 2 : 1); i++ )
1430
{
1431
const uchar* qtable = i == 0 ? jpegTableK1_T : jpegTableK2_T;
1432
int chroma_scale = i > 0 ? luma_count : 1;
1433
1434
container.jputStreamShort( 0xffdb ); // DQT marker
1435
container.jputStreamShort( 2 + 65*1 ); // put single qtable
1436
container.putStreamByte( 0*16 + i ); // 8-bit table
1437
1438
// put coefficients
1439
for( j = 0; j < 64; j++ )
1440
{
1441
int idx = zigzag[j];
1442
int qval = cvRound(qtable[idx]*inv_quality);
1443
if( qval < 1 )
1444
qval = 1;
1445
if( qval > 255 )
1446
qval = 255;
1447
fdct_qtab[i][idx] = (short)(cvRound((1 << (postshift + 11)))/
1448
(qval*chroma_scale*idct_prescale[idx]));
1449
container.putStreamByte( qval );
1450
}
1451
}
1452
1453
// Encode huffman tables
1454
for( i = 0; i < (channels > 1 ? 4 : 2); i++ )
1455
{
1456
const uchar* htable = i == 0 ? jpegTableK3 : i == 1 ? jpegTableK5 :
1457
i == 2 ? jpegTableK4 : jpegTableK6;
1458
int is_ac_tab = i & 1;
1459
int idx = i >= 2;
1460
int tableSize = 16 + (is_ac_tab ? 162 : 12);
1461
1462
container.jputStreamShort( 0xFFC4 ); // DHT marker
1463
container.jputStreamShort( 3 + tableSize ); // define one huffman table
1464
container.putStreamByte( is_ac_tab*16 + idx ); // put DC/AC flag and table index
1465
container.putStreamBytes( htable, tableSize ); // put table
1466
1467
createEncodeHuffmanTable(createSourceHuffmanTable( htable, hbuffer, 16, 9 ),
1468
is_ac_tab ? huff_ac_tab[idx] : huff_dc_tab[idx],
1469
is_ac_tab ? 256 : 16 );
1470
}
1471
1472
// put frame header
1473
container.jputStreamShort( 0xFFC0 ); // SOF0 marker
1474
container.jputStreamShort( 8 + 3*channels ); // length of frame header
1475
container.putStreamByte( 8 ); // sample precision
1476
container.jputStreamShort( height );
1477
container.jputStreamShort( width );
1478
container.putStreamByte( channels ); // number of components
1479
1480
for( i = 0; i < channels; i++ )
1481
{
1482
container.putStreamByte( i + 1 ); // (i+1)-th component id (Y,U or V)
1483
if( i == 0 )
1484
container.putStreamByte(x_scale*16 + y_scale); // chroma scale factors
1485
else
1486
container.putStreamByte(1*16 + 1);
1487
container.putStreamByte( i > 0 ); // quantization table idx
1488
}
1489
1490
// put scan header
1491
container.jputStreamShort( 0xFFDA ); // SOS marker
1492
container.jputStreamShort( 6 + 2*channels ); // length of scan header
1493
container.putStreamByte( channels ); // number of components in the scan
1494
1495
for( i = 0; i < channels; i++ )
1496
{
1497
container.putStreamByte( i+1 ); // component id
1498
container.putStreamByte( (i>0)*16 + (i>0) );// selection of DC & AC tables
1499
}
1500
1501
container.jputStreamShort(0*256 + 63); // start and end of spectral selection - for
1502
// sequential DCT start is 0 and end is 63
1503
1504
container.putStreamByte( 0 ); // successive approximation bit position
1505
// high & low - (0,0) for sequential DCT
1506
1507
buffers_list.reset();
1508
1509
MjpegEncoder parallel_encoder(height, width, step, data, input_channels, channels, colorspace, huff_dc_tab, huff_ac_tab, fdct_qtab, cat_table, buffers_list, nstripes);
1510
1511
cv::parallel_for_(parallel_encoder.getRange(), parallel_encoder, parallel_encoder.getNStripes());
1512
1513
//std::vector<unsigned>& v = parallel_encoder.m_buffer_list.get_data();
1514
unsigned* v = buffers_list.get_data();
1515
unsigned last_data_elem = buffers_list.get_data_size() - 1;
1516
1517
for(unsigned k = 0; k < last_data_elem; ++k)
1518
{
1519
container.jputStream(v[k]);
1520
}
1521
container.jflushStream(v[last_data_elem], 32 - buffers_list.get_last_bit_len());
1522
container.jputStreamShort( 0xFFD9 ); // EOI marker
1523
/*printf("total dct = %.1fms, total cvt = %.1fms\n",
1524
total_dct*1000./cv::getTickFrequency(),
1525
total_cvt*1000./cv::getTickFrequency());*/
1526
1527
size_t pos = container.getStreamPos();
1528
size_t pos1 = (pos + 3) & ~3;
1529
for( ; pos < pos1; pos++ )
1530
container.putStreamByte(0);
1531
}
1532
1533
}
1534
1535
Ptr<IVideoWriter> createMotionJpegWriter(const String& filename, int fourcc, double fps, Size frameSize, bool iscolor)
1536
{
1537
if (fourcc != CV_FOURCC('M', 'J', 'P', 'G'))
1538
return Ptr<IVideoWriter>();
1539
1540
Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(filename, fps, frameSize, iscolor);
1541
if( !iwriter->isOpened() )
1542
iwriter.release();
1543
return iwriter;
1544
}
1545
1546
}
1547
1548