CoCalc -- cap_mjpeg

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/videoio/src/cap_mjpeg_encoder.cpp
¹⁶³⁵⁴ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
15
//
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
18
//
19
//   * Redistribution's of source code must retain the above copyright notice,
20
//     this list of conditions and the following disclaimer.
21
//
22
//   * Redistribution's in binary form must reproduce the above copyright notice,
23
//     this list of conditions and the following disclaimer in the documentation
24
//     and/or other materials provided with the distribution.
25
//
26
//   * The name of Intel Corporation may not be used to endorse or promote products
27
//     derived from this software without specific prior written permission.
28
//
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
39
//
40
//M*/
41

42
#include "precomp.hpp"
43
#include "opencv2/videoio/container_avi.private.hpp"
44

45
#include <vector>
46
#include <deque>
47
#include <iostream>
48
#include <cstdlib>
49

50
#if CV_NEON
51
#define WITH_NEON
52
#endif
53

54
namespace cv
55
{
56

57
static const unsigned bit_mask[] =
58
{
59
    0,
60
    0x00000001, 0x00000003, 0x00000007, 0x0000000F,
61
    0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
62
    0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
63
    0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
64
    0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
65
    0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
66
    0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
67
    0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
68
};
69

70
static const uchar huff_val_shift = 20;
71
static const int huff_code_mask = (1 << huff_val_shift) - 1;
72

73
static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )
74
{
75
    int  i, k;
76
    int  min_val = INT_MAX, max_val = INT_MIN;
77
    int  size;
78

79
    /* calc min and max values in the table */
80
    for( i = 1, k = 1; src[k] >= 0; i++ )
81
    {
82
        int code_count = src[k++];
83

84
        for( code_count += k; k < code_count; k++ )
85
        {
86
            int  val = src[k] >> huff_val_shift;
87
            if( val < min_val )
88
                min_val = val;
89
            if( val > max_val )
90
                max_val = val;
91
        }
92
    }
93

94
    size = max_val - min_val + 3;
95

96
    if( size > max_size )
97
    {
98
        CV_Error(CV_StsOutOfRange, "too big maximum Huffman code size");
99
    }
100

101
    memset( table, 0, size*sizeof(table[0]));
102

103
    table[0] = min_val;
104
    table[1] = size - 2;
105

106
    for( i = 1, k = 1; src[k] >= 0; i++ )
107
    {
108
        int code_count = src[k++];
109

110
        for( code_count += k; k < code_count; k++ )
111
        {
112
            int  val = src[k] >> huff_val_shift;
113
            int  code = src[k] & huff_code_mask;
114

115
            table[val - min_val + 2] = (code << 8) | i;
116
        }
117
    }
118
    return true;
119
}
120

121
static int* createSourceHuffmanTable(const uchar* src, int* dst,
122
                                         int max_bits, int first_bits)
123
{
124
    int   i, val_idx, code = 0;
125
    int*  table = dst;
126
    *dst++ = first_bits;
127
    for (i = 1, val_idx = max_bits; i <= max_bits; i++)
128
    {
129
        int code_count = src[i - 1];
130
        dst[0] = code_count;
131
        code <<= 1;
132
        for (int k = 0; k < code_count; k++)
133
        {
134
            dst[k + 1] = (src[val_idx + k] << huff_val_shift) | (code + k);
135
        }
136
        code += code_count;
137
        dst += code_count + 1;
138
        val_idx += code_count;
139
    }
140
    dst[0] = -1;
141
    return  table;
142
}
143

144

145
namespace mjpeg
146
{
147

148
class mjpeg_buffer
149
{
150
public:
151
    mjpeg_buffer()
152
    {
153
        reset();
154
    }
155

156
    void resize(int size)
157
    {
158
        data.resize(size);
159
    }
160

161
    inline void put_bits(unsigned bits, int len)
162
    {
163
        CV_Assert(len >=0 && len < 32);
164
        if((m_pos == (data.size() - 1) && len > bits_free) || m_pos == data.size())
165
        {
166
            resize(int(2*data.size()));
167
        }
168

169
        bits_free -= (len);
170
        unsigned int tempval = (bits) & bit_mask[(len)];
171

172
        if( bits_free <= 0 )
173
        {
174
            data[m_pos] |= ((unsigned)tempval >> -bits_free);
175

176
            bits_free += 32;
177
            ++m_pos;
178
            data[m_pos] = bits_free < 32 ? (tempval << bits_free) : 0;
179
        }
180
        else
181
        {
182
            data[m_pos] |= (bits_free == 32) ? tempval : (tempval << bits_free);
183
        }
184
    }
185

186
    inline void put_val(int val, const unsigned * table)
187
    {
188
        unsigned code = table[(val) + 2];
189
        put_bits(code >> 8, (int)(code & 255));
190
    }
191

192
    void finish()
193
    {
194
        if(bits_free == 32)
195
        {
196
            bits_free = 0;
197
            m_data_len = m_pos;
198
        }
199
        else
200
        {
201
            m_data_len = m_pos + 1;
202
        }
203
    }
204

205
    void reset()
206
    {
207
        bits_free = 32;
208
        m_pos = 0;
209
        m_data_len = 0;
210
    }
211

212
    void clear()
213
    {
214
        //we need to clear only first element, the rest would be overwritten
215
        data[0] = 0;
216
    }
217

218
    int get_bits_free()
219
    {
220
        return bits_free;
221
    }
222

223
    unsigned* get_data()
224
    {
225
        return &data[0];
226
    }
227

228
    unsigned get_len()
229
    {
230
        return m_data_len;
231
    }
232

233
private:
234
    std::vector<unsigned> data;
235
    int bits_free;
236
    unsigned m_pos;
237
    unsigned m_data_len;
238
};
239

240

241
class mjpeg_buffer_keeper
242
{
243
public:
244
    mjpeg_buffer_keeper()
245
    {
246
        reset();
247
    }
248

249
    mjpeg_buffer& operator[](int i)
250
    {
251
        return m_buffer_list[i];
252
    }
253

254
    void allocate_buffers(int count, int size)
255
    {
256
        for(int i = (int)m_buffer_list.size(); i < count; ++i)
257
        {
258
            m_buffer_list.push_back(mjpeg_buffer());
259
            m_buffer_list.back().resize(size);
260
        }
261
    }
262

263
    unsigned* get_data()
264
    {
265
        //if there is only one buffer (single thread) there is no need to stack buffers
266
        if(m_buffer_list.size() == 1)
267
        {
268
            m_buffer_list[0].finish();
269

270
            m_data_len = m_buffer_list[0].get_len();
271
            m_last_bit_len = m_buffer_list[0].get_bits_free() ? 32 - m_buffer_list[0].get_bits_free() : 0;
272

273
            return m_buffer_list[0].get_data();
274
        }
275

276
        allocate_output_buffer();
277

278
        int bits = 0;
279
        unsigned currval = 0;
280
        m_data_len = 0;
281

282
        for(unsigned j = 0; j < m_buffer_list.size(); ++j)
283
        {
284
            mjpeg_buffer& buffer = m_buffer_list[j];
285

286
            //if no bit shift required we could use memcpy
287
            if(bits == 0)
288
            {
289
                size_t current_pos = m_data_len;
290

291
                if(buffer.get_bits_free() == 0)
292
                {
293
                    memcpy(&m_output_buffer[current_pos], buffer.get_data(), sizeof(buffer.get_data()[0])*buffer.get_len());
294
                    m_data_len += buffer.get_len();
295
                    currval = 0;
296
                }
297
                else
298
                {
299
                    memcpy(&m_output_buffer[current_pos], buffer.get_data(), sizeof(buffer.get_data()[0])*(buffer.get_len() - 1 ));
300
                    m_data_len += buffer.get_len() - 1;
301
                    currval = buffer.get_data()[buffer.get_len() - 1];
302
                }
303
            }
304
            else
305
            {
306
                for(unsigned i = 0; i < buffer.get_len() - 1; ++i)
307
                {
308
                    currval |= ( (unsigned)buffer.get_data()[i] >> (31 & (-bits)) );
309

310
                    m_output_buffer[m_data_len++] = currval;
311

312
                    currval = buffer.get_data()[i] << (bits + 32);
313
                }
314

315
                currval |= ( (unsigned)buffer.get_data()[buffer.get_len() - 1] >> (31 & (-bits)) );
316

317
                if( buffer.get_bits_free() <= -bits)
318
                {
319
                    m_output_buffer[m_data_len++] = currval;
320

321
                    currval = buffer.get_data()[buffer.get_len() - 1] << (bits + 32);
322
                }
323
            }
324

325
            bits += buffer.get_bits_free();
326

327
            if(bits > 0)
328
            {
329
                bits -= 32;
330
            }
331
        }
332

333
        //bits == 0 means that last element shouldn't be used.
334
        m_output_buffer[m_data_len++] = currval;
335

336
        m_last_bit_len = -bits;
337

338
        return &m_output_buffer[0];
339
    }
340

341
    int get_last_bit_len()
342
    {
343
        return m_last_bit_len;
344
    }
345

346
    int get_data_size()
347
    {
348
        return m_data_len;
349
    }
350

351
    void reset()
352
    {
353
        m_last_bit_len = 0;
354
        for(unsigned i = 0; i < m_buffer_list.size(); ++i)
355
        {
356
            m_buffer_list[i].reset();
357
        }
358

359
        //there is no need to erase output buffer since it would be overwritten
360
        m_data_len = 0;
361
    }
362

363
private:
364

365
    void allocate_output_buffer()
366
    {
367
        unsigned total_size = 0;
368

369
        for(unsigned i = 0; i < m_buffer_list.size(); ++i)
370
        {
371
            m_buffer_list[i].finish();
372
            total_size += m_buffer_list[i].get_len();
373
        }
374

375
        if(total_size > m_output_buffer.size())
376
        {
377
            m_output_buffer.clear();
378
            m_output_buffer.resize(total_size);
379
        }
380
    }
381

382
    std::deque<mjpeg_buffer> m_buffer_list;
383
    std::vector<unsigned> m_output_buffer;
384
    int m_data_len;
385
    int m_last_bit_len;
386
};
387

388
class MotionJpegWriter : public IVideoWriter
389
{
390
public:
391
    MotionJpegWriter()
392
    {
393
        rawstream = false;
394
        nstripes = -1;
395
        quality = 0;
396
    }
397

398
    MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)
399
    {
400
        rawstream = false;
401
        open(filename, fps, size, iscolor);
402
        nstripes = -1;
403
    }
404
    ~MotionJpegWriter() { close(); }
405

406
    virtual int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_OPENCV_MJPEG; }
407

408
    void close()
409
    {
410
        if( !container.isOpenedStream() )
411
            return;
412

413
        if( !container.isEmptyFrameOffset() && !rawstream )
414
        {
415
            container.endWriteChunk(); // end LIST 'movi'
416
            container.writeIndex(0, dc);
417
            container.finishWriteAVI();
418
        }
419
    }
420

421
    bool open(const String& filename, double fps, Size size, bool iscolor)
422
    {
423
        close();
424

425
        if( filename.empty() )
426
            return false;
427
        const char* ext = strrchr(filename.c_str(), '.');
428
        if( !ext )
429
            return false;
430
        if( strcmp(ext, ".avi") != 0 && strcmp(ext, ".AVI") != 0 && strcmp(ext, ".Avi") != 0 )
431
            return false;
432

433
        if( !container.initContainer(filename, fps, size, iscolor) )
434
            return false;
435

436
        CV_Assert(fps >= 1);
437
        quality = 75;
438
        rawstream = false;
439

440
        if( !rawstream )
441
        {
442
            container.startWriteAVI(1); // count stream
443
            container.writeStreamHeader(MJPEG);
444
        }
445
        //printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());
446
        return true;
447
    }
448

449
    bool isOpened() const CV_OVERRIDE { return container.isOpenedStream(); }
450

451
    void write(InputArray _img) CV_OVERRIDE
452
    {
453
        Mat img = _img.getMat();
454
        size_t chunkPointer = container.getStreamPos();
455
        int input_channels = img.channels();
456
        int colorspace = -1;
457
        int imgWidth = img.cols;
458
        int frameWidth = container.getWidth();
459
        int imgHeight = img.rows;
460
        int frameHeight = container.getHeight();
461
        int channels = container.getChannels();
462

463

464
        if( input_channels == 1 && channels == 1 )
465
        {
466
            CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight );
467
            colorspace = COLORSPACE_GRAY;
468
        }
469
        else if( input_channels == 4 )
470
        {
471
            CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == 3 );
472
            colorspace = COLORSPACE_RGBA;
473
        }
474
        else if( input_channels == 3 )
475
        {
476
            CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == 3 );
477
            colorspace = COLORSPACE_BGR;
478
        }
479
        else if( input_channels == 1 && channels == 3 )
480
        {
481
            CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight*3 );
482
            colorspace = COLORSPACE_YUV444P;
483
        }
484
        else
485
            CV_Error(CV_StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");
486

487
        if( !rawstream ) {
488
            int avi_index = container.getAVIIndex(0, dc);
489
            container.startWriteChunk(avi_index);
490
        }
491

492
        writeFrameData(img.data, (int)img.step, colorspace, input_channels);
493

494
        if( !rawstream )
495
        {
496
            size_t tempChunkPointer = container.getStreamPos();
497
            size_t moviPointer = container.getMoviPointer();
498
            container.pushFrameOffset(chunkPointer - moviPointer);
499
            container.pushFrameSize(tempChunkPointer - chunkPointer - 8);       // Size excludes '00dc' and size field
500
            container.endWriteChunk(); // end '00dc'
501
        }
502
    }
503

504
    double getProperty(int propId) const CV_OVERRIDE
505
    {
506
        if( propId == VIDEOWRITER_PROP_QUALITY )
507
            return quality;
508
        if( propId == VIDEOWRITER_PROP_FRAMEBYTES )
509
        {
510
            bool isEmpty = container.isEmptyFrameSize();
511
            return isEmpty ? 0. : container.atFrameSize(container.countFrameSize() - 1);
512
        }
513
        if( propId == VIDEOWRITER_PROP_NSTRIPES )
514
            return nstripes;
515
        return 0.;
516
    }
517

518
    bool setProperty(int propId, double value) CV_OVERRIDE
519
    {
520
        if( propId == VIDEOWRITER_PROP_QUALITY )
521
        {
522
            quality = value;
523
            return true;
524
        }
525

526
        if( propId == VIDEOWRITER_PROP_NSTRIPES)
527
        {
528
            nstripes = value;
529
            return true;
530
        }
531

532
        return false;
533
    }
534

535
    void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );
536

537
protected:
538
    double quality;
539
    bool rawstream;
540
    mjpeg_buffer_keeper buffers_list;
541
    double nstripes;
542

543
    AVIWriteContainer container;
544
};
545

546
#define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))
547
#define fix(x, n)   (int)((x)*(1 << (n)) + .5);
548

549
enum
550
{
551
    fixb = 14,
552
    fixc = 12,
553
    postshift = 14
554
};
555

556
static const int C0_707 = fix(0.707106781f, fixb);
557
static const int C0_541 = fix(0.541196100f, fixb);
558
static const int C0_382 = fix(0.382683432f, fixb);
559
static const int C1_306 = fix(1.306562965f, fixb);
560

561
static const int y_r = fix(0.299, fixc);
562
static const int y_g = fix(0.587, fixc);
563
static const int y_b = fix(0.114, fixc);
564

565
static const int cb_r = -fix(0.1687, fixc);
566
static const int cb_g = -fix(0.3313, fixc);
567
static const int cb_b = fix(0.5, fixc);
568

569
static const int cr_r = fix(0.5, fixc);
570
static const int cr_g = -fix(0.4187, fixc);
571
static const int cr_b = -fix(0.0813, fixc);
572

573
// Standard JPEG quantization tables
574
static const uchar jpegTableK1_T[] =
575
{
576
    16, 12, 14, 14,  18,  24,  49,  72,
577
    11, 12, 13, 17,  22,  35,  64,  92,
578
    10, 14, 16, 22,  37,  55,  78,  95,
579
    16, 19, 24, 29,  56,  64,  87,  98,
580
    24, 26, 40, 51,  68,  81, 103, 112,
581
    40, 58, 57, 87, 109, 104, 121, 100,
582
    51, 60, 69, 80, 103, 113, 120, 103,
583
    61, 55, 56, 62,  77,  92, 101,  99
584
};
585

586
static const uchar jpegTableK2_T[] =
587
{
588
    17, 18, 24, 47, 99, 99, 99, 99,
589
    18, 21, 26, 66, 99, 99, 99, 99,
590
    24, 26, 56, 99, 99, 99, 99, 99,
591
    47, 66, 99, 99, 99, 99, 99, 99,
592
    99, 99, 99, 99, 99, 99, 99, 99,
593
    99, 99, 99, 99, 99, 99, 99, 99,
594
    99, 99, 99, 99, 99, 99, 99, 99,
595
    99, 99, 99, 99, 99, 99, 99, 99
596
};
597

598
// Standard Huffman tables
599

600
// ... for luma DCs.
601
static const uchar jpegTableK3[] =
602
{
603
    0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
604
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
605
};
606

607
// ... for chroma DCs.
608
static const uchar jpegTableK4[] =
609
{
610
    0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
611
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
612
};
613

614
// ... for luma ACs.
615
static const uchar jpegTableK5[] =
616
{
617
    0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125,
618
    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
619
    0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
620
    0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
621
    0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
622
    0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
623
    0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
624
    0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
625
    0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
626
    0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
627
    0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
628
    0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
629
    0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
630
    0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
631
    0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
632
    0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
633
    0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
634
    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
635
    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
636
    0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
637
    0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
638
    0xf9, 0xfa
639
};
640

641
// ... for chroma ACs
642
static const uchar jpegTableK6[] =
643
{
644
    0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119,
645
    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
646
    0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
647
    0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
648
    0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
649
    0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
650
    0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
651
    0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
652
    0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
653
    0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
654
    0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
655
    0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
656
    0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
657
    0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
658
    0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
659
    0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
660
    0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
661
    0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
662
    0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
663
    0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
664
    0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
665
    0xf9, 0xfa
666
};
667

668
static const uchar zigzag[] =
669
{
670
    0,  8,  1,  2,  9, 16, 24, 17, 10,  3,  4, 11, 18, 25, 32, 40,
671
    33, 26, 19, 12,  5,  6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
672
    28, 21, 14,  7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
673
    23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63,
674
    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
675
};
676

677

678
static const int idct_prescale[] =
679
{
680
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
681
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
682
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
683
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
684
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
685
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
686
    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
687
    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
688
};
689

690
static const char jpegHeader[] =
691
"\xFF\xD8"  // SOI  - start of image
692
"\xFF\xE0"  // APP0 - jfif extension
693
"\x00\x10"  // 2 bytes: length of APP0 segment
694
"JFIF\x00"  // JFIF signature
695
"\x01\x02"  // version of JFIF
696
"\x00"      // units = pixels ( 1 - inch, 2 - cm )
697
"\x00\x01\x00\x01" // 2 2-bytes values: x density & y density
698
"\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)
699

700
#ifdef WITH_NEON
701
// FDCT with postscaling
702
static void aan_fdct8x8( const short *src, short *dst,
703
                        int step, const short *postscale )
704
{
705
    // Pass 1: process rows
706
    int16x8_t x0 = vld1q_s16(src);    int16x8_t x1 = vld1q_s16(src + step*7);
707
    int16x8_t x2 = vld1q_s16(src + step*3);    int16x8_t x3 = vld1q_s16(src + step*4);
708

709
    int16x8_t x4 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
710
    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
711

712
    int16x8_t t1 = x0; int16x8_t t2 = x2;
713

714
    x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
715

716
    x0 = vld1q_s16(src + step);    x3 = vld1q_s16(src + step*6);
717

718
    x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
719
    int16x8_t t3 = x0;
720

721
    x0 = vld1q_s16(src + step*2);    x3 = vld1q_s16(src + step*5);
722

723
    int16x8_t t4 = vsubq_s16(x0, x3);
724

725
    x0 = vaddq_s16(x0, x3);
726
    x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
727
    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
728

729
    int16x8_t res0 = x1;
730
    int16x8_t res4 = x2;
731
    x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
732
    x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
733

734
    int16x8_t res2 = x4;
735
    int16x8_t res6 = x1;
736

737
    x0 = t2;    x1 = t4;
738
    x2 = t3;    x3 = t1;
739
    x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
740
    x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
741

742
    x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
743
    x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
744
    x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
745
    x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
746

747
    x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
748
    x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
749

750
    int16x8_t res1 = x0;
751
    int16x8_t res3 = x3;
752
    int16x8_t res5 = x1;
753
    int16x8_t res7 = x4;
754

755
    //transpose a matrix
756
    /*
757
     res0 00 01 02 03 04 05 06 07
758
     res1 10 11 12 13 14 15 16 17
759
     res2 20 21 22 23 24 25 26 27
760
     res3 30 31 32 33 34 35 36 37
761
     res4 40 41 42 43 44 45 46 47
762
     res5 50 51 52 53 54 55 56 57
763
     res6 60 61 62 63 64 65 66 67
764
     res7 70 71 72 73 74 75 76 77
765
     */
766

767
    //transpose elements 00-33
768
    int16x4_t res0_0 = vget_low_s16(res0);
769
    int16x4_t res1_0 = vget_low_s16(res1);
770
    int16x4x2_t tres = vtrn_s16(res0_0, res1_0);
771
    int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
772

773
    res0_0 = vget_low_s16(res2);
774
    res1_0 = vget_low_s16(res3);
775
    tres = vtrn_s16(res0_0, res1_0);
776
    int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
777

778
    int32x4x2_t tres1 = vtrnq_s32(l0, l1);
779

780
    // transpose elements 40-73
781
    res0_0 = vget_low_s16(res4);
782
    res1_0 = vget_low_s16(res5);
783
    tres = vtrn_s16(res0_0, res1_0);
784
    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
785

786
    res0_0 = vget_low_s16(res6);
787
    res1_0 = vget_low_s16(res7);
788

789
    tres = vtrn_s16(res0_0, res1_0);
790
    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
791

792
    int32x4x2_t tres2 = vtrnq_s32(l0, l1);
793

794
    //combine into 0-3
795
    int16x8_t transp_res0 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
796
    int16x8_t transp_res1 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
797
    int16x8_t transp_res2 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
798
    int16x8_t transp_res3 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
799

800
    // transpose elements 04-37
801
    res0_0 = vget_high_s16(res0);
802
    res1_0 = vget_high_s16(res1);
803
    tres = vtrn_s16(res0_0, res1_0);
804
    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
805

806
    res0_0 = vget_high_s16(res2);
807
    res1_0 = vget_high_s16(res3);
808

809
    tres = vtrn_s16(res0_0, res1_0);
810
    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
811

812
    tres1 = vtrnq_s32(l0, l1);
813

814
    // transpose elements 44-77
815
    res0_0 = vget_high_s16(res4);
816
    res1_0 = vget_high_s16(res5);
817
    tres = vtrn_s16(res0_0, res1_0);
818
    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
819

820
    res0_0 = vget_high_s16(res6);
821
    res1_0 = vget_high_s16(res7);
822

823
    tres = vtrn_s16(res0_0, res1_0);
824
    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
825

826
    tres2 = vtrnq_s32(l0, l1);
827

828
    //combine into 4-7
829
    int16x8_t transp_res4 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
830
    int16x8_t transp_res5 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
831
    int16x8_t transp_res6 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
832
    int16x8_t transp_res7 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
833

834
    //special hack for vqdmulhq_s16 command that is producing -1 instead of 0
835
#define STORE_DESCALED(addr, reg, mul_addr)            postscale_line = vld1q_s16((mul_addr)); \
836
mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \
837
reg = vabsq_s16(reg); \
838
reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \
839
reg = vsubq_s16(veorq_s16(reg, mask), mask); \
840
vst1q_s16((addr), reg);
841

842
    int16x8_t z = vdupq_n_s16(0), postscale_line, mask;
843

844
    // pass 2: process columns
845
    x0 = transp_res0;    x1 = transp_res7;
846
    x2 = transp_res3;    x3 = transp_res4;
847

848
    x4 = vaddq_s16(x0, x1);   x0 = vsubq_s16(x0, x1);
849
    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
850

851
    t1 = x0; t2 = x2;
852

853
    x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
854

855
    x0 = transp_res1;
856
    x3 = transp_res6;
857

858
    x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
859

860
    t3 = x0;
861

862
    x0 = transp_res2; x3 = transp_res5;
863

864
    t4 = vsubq_s16(x0, x3);
865

866
    x0 = vaddq_s16(x0, x3);
867

868
    x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
869
    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
870

871
    STORE_DESCALED(dst, x1, postscale);
872
    STORE_DESCALED(dst + 4*8, x2, postscale + 4*8);
873

874
    x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
875

876
    x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
877

878
    STORE_DESCALED(dst + 2*8, x4,postscale + 2*8);
879
    STORE_DESCALED(dst + 6*8, x1,postscale + 6*8);
880

881
    x0 = t2; x1 = t4;
882
    x2 = t3; x3 = t1;
883

884
    x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
885

886
    x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
887

888
    x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
889

890
    x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
891
    x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
892
    x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
893

894
    x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
895
    x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
896

897
    STORE_DESCALED(dst + 5*8, x1,postscale + 5*8);
898
    STORE_DESCALED(dst + 1*8, x0,postscale + 1*8);
899
    STORE_DESCALED(dst + 7*8, x4,postscale + 7*8);
900
    STORE_DESCALED(dst + 3*8, x3,postscale + 3*8);
901
}
902

903
#else
904
// FDCT with postscaling
905
static void aan_fdct8x8( const short *src, short *dst,
906
                        int step, const short *postscale )
907
{
908
    int workspace[64], *work = workspace;
909
    int  i;
910

911
    // Pass 1: process rows
912
    for( i = 8; i > 0; i--, src += step, work += 8 )
913
    {
914
        int x0 = src[0], x1 = src[7];
915
        int x2 = src[3], x3 = src[4];
916

917
        int x4 = x0 + x1; x0 -= x1;
918
        x1 = x2 + x3; x2 -= x3;
919

920
        work[7] = x0; work[1] = x2;
921
        x2 = x4 + x1; x4 -= x1;
922

923
        x0 = src[1]; x3 = src[6];
924
        x1 = x0 + x3; x0 -= x3;
925
        work[5] = x0;
926

927
        x0 = src[2]; x3 = src[5];
928
        work[3] = x0 - x3; x0 += x3;
929

930
        x3 = x0 + x1; x0 -= x1;
931
        x1 = x2 + x3; x2 -= x3;
932

933
        work[0] = x1; work[4] = x2;
934

935
        x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
936
        x1 = x4 + x0; x4 -= x0;
937
        work[2] = x4; work[6] = x1;
938

939
        x0 = work[1]; x1 = work[3];
940
        x2 = work[5]; x3 = work[7];
941

942
        x0 += x1; x1 += x2; x2 += x3;
943
        x1 = DCT_DESCALE(x1*C0_707, fixb);
944

945
        x4 = x1 + x3; x3 -= x1;
946
        x1 = (x0 - x2)*C0_382;
947
        x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
948
        x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
949

950
        x1 = x0 + x3; x3 -= x0;
951
        x0 = x4 + x2; x4 -= x2;
952

953
        work[5] = x1; work[1] = x0;
954
        work[7] = x4; work[3] = x3;
955
    }
956

957
    work = workspace;
958
    // pass 2: process columns
959
    for( i = 8; i > 0; i--, work++, postscale += 8, dst += 8 )
960
    {
961
        int  x0 = work[8*0], x1 = work[8*7];
962
        int  x2 = work[8*3], x3 = work[8*4];
963

964
        int  x4 = x0 + x1; x0 -= x1;
965
        x1 = x2 + x3; x2 -= x3;
966

967
        work[8*7] = x0; work[8*0] = x2;
968
        x2 = x4 + x1; x4 -= x1;
969

970
        x0 = work[8*1]; x3 = work[8*6];
971
        x1 = x0 + x3; x0 -= x3;
972
        work[8*4] = x0;
973

974
        x0 = work[8*2]; x3 = work[8*5];
975
        work[8*3] = x0 - x3; x0 += x3;
976

977
        x3 = x0 + x1; x0 -= x1;
978
        x1 = x2 + x3; x2 -= x3;
979

980
        dst[0] = (short)DCT_DESCALE(x1*postscale[0], postshift);
981
        dst[4] = (short)DCT_DESCALE(x2*postscale[4], postshift);
982

983
        x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
984
        x1 = x4 + x0; x4 -= x0;
985

986
        dst[2] = (short)DCT_DESCALE(x4*postscale[2], postshift);
987
        dst[6] = (short)DCT_DESCALE(x1*postscale[6], postshift);
988

989
        x0 = work[8*0]; x1 = work[8*3];
990
        x2 = work[8*4]; x3 = work[8*7];
991

992
        x0 += x1; x1 += x2; x2 += x3;
993
        x1 = DCT_DESCALE(x1*C0_707, fixb);
994

995
        x4 = x1 + x3; x3 -= x1;
996
        x1 = (x0 - x2)*C0_382;
997
        x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
998
        x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
999

1000
        x1 = x0 + x3; x3 -= x0;
1001
        x0 = x4 + x2; x4 -= x2;
1002

1003
        dst[5] = (short)DCT_DESCALE(x1*postscale[5], postshift);
1004
        dst[1] = (short)DCT_DESCALE(x0*postscale[1], postshift);
1005
        dst[7] = (short)DCT_DESCALE(x4*postscale[7], postshift);
1006
        dst[3] = (short)DCT_DESCALE(x3*postscale[3], postshift);
1007
    }
1008
}
1009
#endif
1010

1011

1012
inline void convertToYUV(int colorspace, int channels, int input_channels, short* UV_data, short* Y_data, const uchar* pix_data, int y_limit, int x_limit, int step, int u_plane_ofs, int v_plane_ofs)
1013
{
1014
    int i, j;
1015
    const int UV_step = 16;
1016
    int  x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1017
    int  Y_step = x_scale*8;
1018

1019
    if( channels > 1 )
1020
    {
1021
        if( colorspace == COLORSPACE_YUV444P && y_limit == 16 && x_limit == 16 )
1022
        {
1023
            for( i = 0; i < y_limit; i += 2, pix_data += step*2, Y_data += Y_step*2, UV_data += UV_step )
1024
            {
1025
#ifdef WITH_NEON
1026
                {
1027
                    uint16x8_t masklo = vdupq_n_u16(255);
1028
                    uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));
1029
                    uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1030
                    lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));
1031
                    uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1032
                    t1 = vaddq_u16(t1, t2);
1033
                    vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1034

1035
                    lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));
1036
                    t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1037
                    lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));
1038
                    t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1039
                    t1 = vaddq_u16(t1, t2);
1040
                    vst1q_s16(UV_data + 8, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1041
                }
1042

1043
                {
1044
                    int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));
1045
                    int16x8_t delta = vdupq_n_s16(128);
1046
                    lane = vsubq_s16(lane, delta);
1047
                    vst1q_s16(Y_data, lane);
1048

1049
                    lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+8)));
1050
                    lane = vsubq_s16(lane, delta);
1051
                    vst1q_s16(Y_data + 8, lane);
1052

1053
                    lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));
1054
                    lane = vsubq_s16(lane, delta);
1055
                    vst1q_s16(Y_data+Y_step, lane);
1056

1057
                    lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + 8)));
1058
                    lane = vsubq_s16(lane, delta);
1059
                    vst1q_s16(Y_data+Y_step + 8, lane);
1060
                }
1061
#else
1062
                for( j = 0; j < x_limit; j += 2, pix_data += 2 )
1063
                {
1064
                    Y_data[j] = pix_data[0] - 128;
1065
                    Y_data[j+1] = pix_data[1] - 128;
1066
                    Y_data[j+Y_step] = pix_data[step] - 128;
1067
                    Y_data[j+Y_step+1] = pix_data[step+1] - 128;
1068

1069
                    UV_data[j>>1] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+1] +
1070
                        pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+1] - 128*4;
1071
                    UV_data[(j>>1)+8] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+1] +
1072
                        pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+1] - 128*4;
1073

1074
                }
1075

1076
                pix_data -= x_limit*input_channels;
1077
#endif
1078
            }
1079
        }
1080
        else
1081
        {
1082
            for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1083
            {
1084
                for( j = 0; j < x_limit; j++, pix_data += input_channels )
1085
                {
1086
                    int Y, U, V;
1087

1088
                    if( colorspace == COLORSPACE_BGR )
1089
                    {
1090
                        int r = pix_data[2];
1091
                        int g = pix_data[1];
1092
                        int b = pix_data[0];
1093

1094
                        Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1095
                        U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1096
                        V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1097
                    }
1098
                    else if( colorspace == COLORSPACE_RGBA )
1099
                    {
1100
                        int r = pix_data[0];
1101
                        int g = pix_data[1];
1102
                        int b = pix_data[2];
1103

1104
                        Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1105
                        U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1106
                        V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1107
                    }
1108
                    else
1109
                    {
1110
                        Y = pix_data[0] - 128;
1111
                        U = pix_data[v_plane_ofs] - 128;
1112
                        V = pix_data[u_plane_ofs] - 128;
1113
                    }
1114

1115
                    int j2 = j >> (x_scale - 1);
1116
                    Y_data[j] = (short)Y;
1117
                    UV_data[j2] = (short)(UV_data[j2] + U);
1118
                    UV_data[j2 + 8] = (short)(UV_data[j2 + 8] + V);
1119
                }
1120

1121
                pix_data -= x_limit*input_channels;
1122
                if( ((i+1) & (y_scale - 1)) == 0 )
1123
                {
1124
                    UV_data += UV_step;
1125
                }
1126
            }
1127
        }
1128

1129
    }
1130
    else
1131
    {
1132
        for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1133
        {
1134
            for( j = 0; j < x_limit; j++ )
1135
                Y_data[j] = (short)(pix_data[j]*4 - 128*4);
1136
        }
1137
    }
1138
}
1139

1140
class MjpegEncoder : public ParallelLoopBody
1141
{
1142
public:
1143
    MjpegEncoder(int _height,
1144
        int _width,
1145
        int _step,
1146
        const uchar* _data,
1147
        int _input_channels,
1148
        int _channels,
1149
        int _colorspace,
1150
        unsigned (&_huff_dc_tab)[2][16],
1151
        unsigned (&_huff_ac_tab)[2][256],
1152
        short (&_fdct_qtab)[2][64],
1153
        uchar* _cat_table,
1154
        mjpeg_buffer_keeper& _buffer_list,
1155
        double nstripes
1156
    ) :
1157
        m_buffer_list(_buffer_list),
1158
        height(_height),
1159
        width(_width),
1160
        step(_step),
1161
        in_data(_data),
1162
        input_channels(_input_channels),
1163
        channels(_channels),
1164
        colorspace(_colorspace),
1165
        huff_dc_tab(_huff_dc_tab),
1166
        huff_ac_tab(_huff_ac_tab),
1167
        fdct_qtab(_fdct_qtab),
1168
        cat_table(_cat_table)
1169
    {
1170
        //empirically found value. if number of pixels is less than that value there is no sense to parallelize it.
1171
        const int min_pixels_count = 96*96;
1172

1173
        stripes_count = 1;
1174

1175
        if(nstripes < 0)
1176
        {
1177
            if(height*width > min_pixels_count)
1178
            {
1179
                stripes_count = default_stripes_count;
1180
            }
1181
        }
1182
        else
1183
        {
1184
            stripes_count = cvCeil(nstripes);
1185
        }
1186

1187
        int y_scale = channels > 1 ? 2 : 1;
1188
        int y_step = y_scale * 8;
1189

1190
        int max_stripes = (height - 1)/y_step + 1;
1191

1192
        stripes_count = std::min(stripes_count, max_stripes);
1193

1194
        m_buffer_list.allocate_buffers(stripes_count, (height*width*2)/stripes_count);
1195
    }
1196

1197
    void operator()( const cv::Range& range ) const CV_OVERRIDE
1198
    {
1199
        const int CAT_TAB_SIZE = 4096;
1200

1201
        int x, y;
1202
        int i, j;
1203

1204
        short  buffer[4096];
1205
        int  x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1206
        int  dc_pred[] = { 0, 0, 0 };
1207
        int  x_step = x_scale * 8;
1208
        int  y_step = y_scale * 8;
1209
        short  block[6][64];
1210
        int  luma_count = x_scale*y_scale;
1211
        int  block_count = luma_count + channels - 1;
1212
        int u_plane_ofs = step*height;
1213
        int v_plane_ofs = u_plane_ofs + step*height;
1214
        const uchar* data = in_data;
1215
        const uchar* init_data = data;
1216

1217
        int num_steps = (height - 1)/y_step + 1;
1218

1219
        //if this is not first stripe we need to calculate dc_pred from previous step
1220
        if(range.start > 0)
1221
        {
1222
            y = y_step*int(num_steps*range.start/stripes_count - 1);
1223
            data = init_data + y*step;
1224

1225
            for( x = 0; x < width; x += x_step )
1226
            {
1227
                int x_limit = x_step;
1228
                int y_limit = y_step;
1229
                const uchar* pix_data = data + x*input_channels;
1230
                short* Y_data = block[0];
1231
                short* UV_data = block[luma_count];
1232

1233
                if( x + x_limit > width ) x_limit = width - x;
1234
                if( y + y_limit > height ) y_limit = height - y;
1235

1236
                memset( block, 0, block_count*64*sizeof(block[0][0]));
1237

1238
                convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);
1239

1240
                for( i = 0; i < block_count; i++ )
1241
                {
1242
                    int is_chroma = i >= luma_count;
1243
                    int src_step = x_scale * 8;
1244
                    const short* src_ptr = block[i & -2] + (i & 1)*8;
1245

1246
                    aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );
1247

1248
                    j = is_chroma + (i > luma_count);
1249
                    dc_pred[j] = buffer[0];
1250
                }
1251
            }
1252
        }
1253

1254
        for(int k = range.start; k < range.end; ++k)
1255
        {
1256
            mjpeg_buffer& output_buffer = m_buffer_list[k];
1257
            output_buffer.clear();
1258

1259
            int y_min = y_step*int(num_steps*k/stripes_count);
1260
            int y_max = y_step*int(num_steps*(k+1)/stripes_count);
1261

1262
            if(k == stripes_count - 1)
1263
            {
1264
                y_max = height;
1265
            }
1266

1267

1268
            data = init_data + y_min*step;
1269

1270
            for( y = y_min; y < y_max; y += y_step, data += y_step*step )
1271
            {
1272
                for( x = 0; x < width; x += x_step )
1273
                {
1274
                    int x_limit = x_step;
1275
                    int y_limit = y_step;
1276
                    const uchar* pix_data = data + x*input_channels;
1277
                    short* Y_data = block[0];
1278
                    short* UV_data = block[luma_count];
1279

1280
                    if( x + x_limit > width ) x_limit = width - x;
1281
                    if( y + y_limit > height ) y_limit = height - y;
1282

1283
                    memset( block, 0, block_count*64*sizeof(block[0][0]));
1284

1285
                    convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);
1286

1287
                    for( i = 0; i < block_count; i++ )
1288
                    {
1289
                        int is_chroma = i >= luma_count;
1290
                        int src_step = x_scale * 8;
1291
                        int run = 0, val;
1292
                        const short* src_ptr = block[i & -2] + (i & 1)*8;
1293
                        const unsigned* htable = huff_ac_tab[is_chroma];
1294

1295
                        aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );
1296

1297
                        j = is_chroma + (i > luma_count);
1298
                        val = buffer[0] - dc_pred[j];
1299
                        dc_pred[j] = buffer[0];
1300

1301
                        {
1302
                            int cat = cat_table[val + CAT_TAB_SIZE];
1303

1304
                            //CV_Assert( cat <= 11 );
1305
                            output_buffer.put_val(cat, huff_dc_tab[is_chroma] );
1306
                            output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );
1307
                        }
1308

1309
                        for( j = 1; j < 64; j++ )
1310
                        {
1311
                            val = buffer[zigzag[j]];
1312

1313
                            if( val == 0 )
1314
                            {
1315
                                run++;
1316
                            }
1317
                            else
1318
                            {
1319
                                while( run >= 16 )
1320
                                {
1321
                                    output_buffer.put_val( 0xF0, htable ); // encode 16 zeros
1322
                                    run -= 16;
1323
                                }
1324

1325
                                {
1326
                                    int cat = cat_table[val + CAT_TAB_SIZE];
1327
                                    //CV_Assert( cat <= 10 );
1328
                                    output_buffer.put_val( cat + run*16, htable );
1329
                                    output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );
1330
                                }
1331

1332
                                run = 0;
1333
                            }
1334
                        }
1335

1336
                        if( run )
1337
                        {
1338
                            output_buffer.put_val( 0x00, htable ); // encode EOB
1339
                        }
1340
                    }
1341
                }
1342
            }
1343
        }
1344
    }
1345

1346
    cv::Range getRange()
1347
    {
1348
        return cv::Range(0, stripes_count);
1349
    }
1350

1351
    double getNStripes()
1352
    {
1353
        return stripes_count;
1354
    }
1355

1356
    mjpeg_buffer_keeper& m_buffer_list;
1357
private:
1358

1359
    MjpegEncoder& operator=( const MjpegEncoder & ) { return *this; }
1360

1361
    const int height;
1362
    const int width;
1363
    const int step;
1364
    const uchar* in_data;
1365
    const int input_channels;
1366
    const int channels;
1367
    const int colorspace;
1368
    const unsigned (&huff_dc_tab)[2][16];
1369
    const unsigned (&huff_ac_tab)[2][256];
1370
    const short (&fdct_qtab)[2][64];
1371
    const uchar* cat_table;
1372
    int stripes_count;
1373
    static const int default_stripes_count;
1374
};
1375

1376
const int MjpegEncoder::default_stripes_count = 4;
1377

1378
void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
1379
{
1380
    //double total_cvt = 0, total_dct = 0;
1381
    static bool init_cat_table = false;
1382
    const int CAT_TAB_SIZE = 4096;
1383
    static uchar cat_table[CAT_TAB_SIZE*2+1];
1384
    if( !init_cat_table )
1385
    {
1386
        for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )
1387
        {
1388
            Cv32suf a;
1389
            a.f = (float)i;
1390
            cat_table[i+CAT_TAB_SIZE] = ((a.i >> 23) & 255) - (126 & (i ? -1 : 0));
1391
        }
1392
        init_cat_table = true;
1393
    }
1394

1395
    //double total_dct = 0, total_cvt = 0;
1396
    int width = container.getWidth();
1397
    int height = container.getHeight();
1398
    int channels = container.getChannels();
1399

1400
    CV_Assert( data && width > 0 && height > 0 );
1401

1402
    // encode the header and tables
1403
    // for each mcu:
1404
    //   convert rgb to yuv with downsampling (if color).
1405
    //   for every block:
1406
    //     calc dct and quantize
1407
    //     encode block.
1408
    int i, j;
1409
    const int max_quality = 12;
1410
    short fdct_qtab[2][64];
1411
    unsigned huff_dc_tab[2][16];
1412
    unsigned huff_ac_tab[2][256];
1413

1414
    int  x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1415
    short  buffer[4096];
1416
    int*   hbuffer = (int*)buffer;
1417
    int  luma_count = x_scale*y_scale;
1418
    double _quality = quality*0.01*max_quality;
1419

1420
    if( _quality < 1. ) _quality = 1.;
1421
    if( _quality > max_quality ) _quality = max_quality;
1422

1423
    double inv_quality = 1./_quality;
1424

1425
    // Encode header
1426
    container.putStreamBytes( (const uchar*)jpegHeader, sizeof(jpegHeader) - 1 );
1427

1428
    // Encode quantization tables
1429
    for( i = 0; i < (channels > 1 ? 2 : 1); i++ )
1430
    {
1431
        const uchar* qtable = i == 0 ? jpegTableK1_T : jpegTableK2_T;
1432
        int chroma_scale = i > 0 ? luma_count : 1;
1433

1434
        container.jputStreamShort( 0xffdb );   // DQT marker
1435
        container.jputStreamShort( 2 + 65*1 ); // put single qtable
1436
        container.putStreamByte( 0*16 + i );   // 8-bit table
1437

1438
        // put coefficients
1439
        for( j = 0; j < 64; j++ )
1440
        {
1441
            int idx = zigzag[j];
1442
            int qval = cvRound(qtable[idx]*inv_quality);
1443
            if( qval < 1 )
1444
                qval = 1;
1445
            if( qval > 255 )
1446
                qval = 255;
1447
            fdct_qtab[i][idx] = (short)(cvRound((1 << (postshift + 11)))/
1448
                                (qval*chroma_scale*idct_prescale[idx]));
1449
            container.putStreamByte( qval );
1450
        }
1451
    }
1452

1453
    // Encode huffman tables
1454
    for( i = 0; i < (channels > 1 ? 4 : 2); i++ )
1455
    {
1456
        const uchar* htable = i == 0 ? jpegTableK3 : i == 1 ? jpegTableK5 :
1457
        i == 2 ? jpegTableK4 : jpegTableK6;
1458
        int is_ac_tab = i & 1;
1459
        int idx = i >= 2;
1460
        int tableSize = 16 + (is_ac_tab ? 162 : 12);
1461

1462
        container.jputStreamShort( 0xFFC4 );      // DHT marker
1463
        container.jputStreamShort( 3 + tableSize ); // define one huffman table
1464
        container.putStreamByte( is_ac_tab*16 + idx ); // put DC/AC flag and table index
1465
        container.putStreamBytes( htable, tableSize ); // put table
1466

1467
        createEncodeHuffmanTable(createSourceHuffmanTable( htable, hbuffer, 16, 9 ),
1468
                                 is_ac_tab ? huff_ac_tab[idx] : huff_dc_tab[idx],
1469
                                 is_ac_tab ? 256 : 16 );
1470
    }
1471

1472
    // put frame header
1473
    container.jputStreamShort( 0xFFC0 );          // SOF0 marker
1474
    container.jputStreamShort( 8 + 3*channels );  // length of frame header
1475
    container.putStreamByte( 8 );               // sample precision
1476
    container.jputStreamShort( height );
1477
    container.jputStreamShort( width );
1478
    container.putStreamByte( channels );        // number of components
1479

1480
    for( i = 0; i < channels; i++ )
1481
    {
1482
        container.putStreamByte( i + 1 );  // (i+1)-th component id (Y,U or V)
1483
        if( i == 0 )
1484
            container.putStreamByte(x_scale*16 + y_scale); // chroma scale factors
1485
        else
1486
            container.putStreamByte(1*16 + 1);
1487
        container.putStreamByte( i > 0 ); // quantization table idx
1488
    }
1489

1490
    // put scan header
1491
    container.jputStreamShort( 0xFFDA );          // SOS marker
1492
    container.jputStreamShort( 6 + 2*channels );  // length of scan header
1493
    container.putStreamByte( channels );          // number of components in the scan
1494

1495
    for( i = 0; i < channels; i++ )
1496
    {
1497
        container.putStreamByte( i+1 );             // component id
1498
        container.putStreamByte( (i>0)*16 + (i>0) );// selection of DC & AC tables
1499
    }
1500

1501
    container.jputStreamShort(0*256 + 63); // start and end of spectral selection - for
1502
    // sequential DCT start is 0 and end is 63
1503

1504
    container.putStreamByte( 0 );  // successive approximation bit position
1505
    // high & low - (0,0) for sequential DCT
1506

1507
    buffers_list.reset();
1508

1509
    MjpegEncoder parallel_encoder(height, width, step, data, input_channels, channels, colorspace, huff_dc_tab, huff_ac_tab, fdct_qtab, cat_table, buffers_list, nstripes);
1510

1511
    cv::parallel_for_(parallel_encoder.getRange(), parallel_encoder, parallel_encoder.getNStripes());
1512

1513
    //std::vector<unsigned>& v = parallel_encoder.m_buffer_list.get_data();
1514
    unsigned* v = buffers_list.get_data();
1515
    unsigned last_data_elem = buffers_list.get_data_size() - 1;
1516

1517
    for(unsigned k = 0; k < last_data_elem; ++k)
1518
    {
1519
        container.jputStream(v[k]);
1520
    }
1521
    container.jflushStream(v[last_data_elem], 32 - buffers_list.get_last_bit_len());
1522
    container.jputStreamShort( 0xFFD9 ); // EOI marker
1523
    /*printf("total dct = %.1fms, total cvt = %.1fms\n",
1524
     total_dct*1000./cv::getTickFrequency(),
1525
     total_cvt*1000./cv::getTickFrequency());*/
1526

1527
    size_t pos = container.getStreamPos();
1528
    size_t pos1 = (pos + 3) & ~3;
1529
    for( ; pos < pos1; pos++ )
1530
        container.putStreamByte(0);
1531
}
1532

1533
}
1534

1535
Ptr<IVideoWriter> createMotionJpegWriter(const String& filename, int fourcc, double fps, Size frameSize, bool iscolor)
1536
{
1537
    if (fourcc != CV_FOURCC('M', 'J', 'P', 'G'))
1538
        return Ptr<IVideoWriter>();
1539

1540
    Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(filename, fps, frameSize, iscolor);
1541
    if( !iwriter->isOpened() )
1542
        iwriter.release();
1543
    return iwriter;
1544
}
1545

1546
}
1547

1548
Product

Resources

Company