Path: blob/master/modules/videoio/src/cap_mjpeg_encoder.cpp
16354 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2015, OpenCV Foundation, all rights reserved.13// Third party copyrights are property of their respective owners.14//15// Redistribution and use in source and binary forms, with or without modification,16// are permitted provided that the following conditions are met:17//18// * Redistribution's of source code must retain the above copyright notice,19// this list of conditions and the following disclaimer.20//21// * Redistribution's in binary form must reproduce the above copyright notice,22// this list of conditions and the following disclaimer in the documentation23// and/or other materials provided with the distribution.24//25// * The name of Intel Corporation may not be used to endorse or promote products26// derived from this software without specific prior written permission.27//28// This software is provided by the copyright holders and contributors "as is" and29// any express or implied warranties, including, but not limited to, the implied30// warranties of merchantability and fitness for a particular purpose are disclaimed.31// In no event shall the Intel Corporation or contributors be liable for any direct,32// indirect, incidental, special, exemplary, or consequential damages33// (including, but not limited to, procurement of substitute goods or services;34// loss of use, data, or profits; or business interruption) however caused35// and on any theory of liability, whether in contract, strict liability,36// or tort (including negligence or otherwise) arising in any way out of37// the use of this software, even if advised of the possibility of such damage.38//39//M*/4041#include "precomp.hpp"42#include "opencv2/videoio/container_avi.private.hpp"4344#include <vector>45#include <deque>46#include <iostream>47#include <cstdlib>4849#if CV_NEON50#define WITH_NEON51#endif5253namespace cv54{5556static const unsigned bit_mask[] =57{580,590x00000001, 0x00000003, 0x00000007, 0x0000000F,600x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,610x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,620x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,630x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,640x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,650x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,660x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF67};6869static const uchar huff_val_shift = 20;70static const int huff_code_mask = (1 << huff_val_shift) - 1;7172static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )73{74int i, k;75int min_val = INT_MAX, max_val = INT_MIN;76int size;7778/* calc min and max values in the table */79for( i = 1, k = 1; src[k] >= 0; i++ )80{81int code_count = src[k++];8283for( code_count += k; k < code_count; k++ )84{85int val = src[k] >> huff_val_shift;86if( val < min_val )87min_val = val;88if( val > max_val )89max_val = val;90}91}9293size = max_val - min_val + 3;9495if( size > max_size )96{97CV_Error(CV_StsOutOfRange, "too big maximum Huffman code size");98}99100memset( table, 0, size*sizeof(table[0]));101102table[0] = min_val;103table[1] = size - 2;104105for( i = 1, k = 1; src[k] >= 0; i++ )106{107int code_count = src[k++];108109for( code_count += k; k < code_count; k++ )110{111int val = src[k] >> huff_val_shift;112int code = src[k] & huff_code_mask;113114table[val - min_val + 2] = (code << 8) | i;115}116}117return true;118}119120static int* createSourceHuffmanTable(const uchar* src, int* dst,121int max_bits, int first_bits)122{123int i, val_idx, code = 0;124int* table = dst;125*dst++ = first_bits;126for (i = 1, val_idx = max_bits; i <= max_bits; i++)127{128int code_count = src[i - 1];129dst[0] = code_count;130code <<= 1;131for (int k = 0; k < code_count; k++)132{133dst[k + 1] = (src[val_idx + k] << huff_val_shift) | (code + k);134}135code += code_count;136dst += code_count + 1;137val_idx += code_count;138}139dst[0] = -1;140return table;141}142143144namespace mjpeg145{146147class mjpeg_buffer148{149public:150mjpeg_buffer()151{152reset();153}154155void resize(int size)156{157data.resize(size);158}159160inline void put_bits(unsigned bits, int len)161{162CV_Assert(len >=0 && len < 32);163if((m_pos == (data.size() - 1) && len > bits_free) || m_pos == data.size())164{165resize(int(2*data.size()));166}167168bits_free -= (len);169unsigned int tempval = (bits) & bit_mask[(len)];170171if( bits_free <= 0 )172{173data[m_pos] |= ((unsigned)tempval >> -bits_free);174175bits_free += 32;176++m_pos;177data[m_pos] = bits_free < 32 ? (tempval << bits_free) : 0;178}179else180{181data[m_pos] |= (bits_free == 32) ? tempval : (tempval << bits_free);182}183}184185inline void put_val(int val, const unsigned * table)186{187unsigned code = table[(val) + 2];188put_bits(code >> 8, (int)(code & 255));189}190191void finish()192{193if(bits_free == 32)194{195bits_free = 0;196m_data_len = m_pos;197}198else199{200m_data_len = m_pos + 1;201}202}203204void reset()205{206bits_free = 32;207m_pos = 0;208m_data_len = 0;209}210211void clear()212{213//we need to clear only first element, the rest would be overwritten214data[0] = 0;215}216217int get_bits_free()218{219return bits_free;220}221222unsigned* get_data()223{224return &data[0];225}226227unsigned get_len()228{229return m_data_len;230}231232private:233std::vector<unsigned> data;234int bits_free;235unsigned m_pos;236unsigned m_data_len;237};238239240class mjpeg_buffer_keeper241{242public:243mjpeg_buffer_keeper()244{245reset();246}247248mjpeg_buffer& operator[](int i)249{250return m_buffer_list[i];251}252253void allocate_buffers(int count, int size)254{255for(int i = (int)m_buffer_list.size(); i < count; ++i)256{257m_buffer_list.push_back(mjpeg_buffer());258m_buffer_list.back().resize(size);259}260}261262unsigned* get_data()263{264//if there is only one buffer (single thread) there is no need to stack buffers265if(m_buffer_list.size() == 1)266{267m_buffer_list[0].finish();268269m_data_len = m_buffer_list[0].get_len();270m_last_bit_len = m_buffer_list[0].get_bits_free() ? 32 - m_buffer_list[0].get_bits_free() : 0;271272return m_buffer_list[0].get_data();273}274275allocate_output_buffer();276277int bits = 0;278unsigned currval = 0;279m_data_len = 0;280281for(unsigned j = 0; j < m_buffer_list.size(); ++j)282{283mjpeg_buffer& buffer = m_buffer_list[j];284285//if no bit shift required we could use memcpy286if(bits == 0)287{288size_t current_pos = m_data_len;289290if(buffer.get_bits_free() == 0)291{292memcpy(&m_output_buffer[current_pos], buffer.get_data(), sizeof(buffer.get_data()[0])*buffer.get_len());293m_data_len += buffer.get_len();294currval = 0;295}296else297{298memcpy(&m_output_buffer[current_pos], buffer.get_data(), sizeof(buffer.get_data()[0])*(buffer.get_len() - 1 ));299m_data_len += buffer.get_len() - 1;300currval = buffer.get_data()[buffer.get_len() - 1];301}302}303else304{305for(unsigned i = 0; i < buffer.get_len() - 1; ++i)306{307currval |= ( (unsigned)buffer.get_data()[i] >> (31 & (-bits)) );308309m_output_buffer[m_data_len++] = currval;310311currval = buffer.get_data()[i] << (bits + 32);312}313314currval |= ( (unsigned)buffer.get_data()[buffer.get_len() - 1] >> (31 & (-bits)) );315316if( buffer.get_bits_free() <= -bits)317{318m_output_buffer[m_data_len++] = currval;319320currval = buffer.get_data()[buffer.get_len() - 1] << (bits + 32);321}322}323324bits += buffer.get_bits_free();325326if(bits > 0)327{328bits -= 32;329}330}331332//bits == 0 means that last element shouldn't be used.333m_output_buffer[m_data_len++] = currval;334335m_last_bit_len = -bits;336337return &m_output_buffer[0];338}339340int get_last_bit_len()341{342return m_last_bit_len;343}344345int get_data_size()346{347return m_data_len;348}349350void reset()351{352m_last_bit_len = 0;353for(unsigned i = 0; i < m_buffer_list.size(); ++i)354{355m_buffer_list[i].reset();356}357358//there is no need to erase output buffer since it would be overwritten359m_data_len = 0;360}361362private:363364void allocate_output_buffer()365{366unsigned total_size = 0;367368for(unsigned i = 0; i < m_buffer_list.size(); ++i)369{370m_buffer_list[i].finish();371total_size += m_buffer_list[i].get_len();372}373374if(total_size > m_output_buffer.size())375{376m_output_buffer.clear();377m_output_buffer.resize(total_size);378}379}380381std::deque<mjpeg_buffer> m_buffer_list;382std::vector<unsigned> m_output_buffer;383int m_data_len;384int m_last_bit_len;385};386387class MotionJpegWriter : public IVideoWriter388{389public:390MotionJpegWriter()391{392rawstream = false;393nstripes = -1;394quality = 0;395}396397MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)398{399rawstream = false;400open(filename, fps, size, iscolor);401nstripes = -1;402}403~MotionJpegWriter() { close(); }404405virtual int getCaptureDomain() const CV_OVERRIDE { return cv::CAP_OPENCV_MJPEG; }406407void close()408{409if( !container.isOpenedStream() )410return;411412if( !container.isEmptyFrameOffset() && !rawstream )413{414container.endWriteChunk(); // end LIST 'movi'415container.writeIndex(0, dc);416container.finishWriteAVI();417}418}419420bool open(const String& filename, double fps, Size size, bool iscolor)421{422close();423424if( filename.empty() )425return false;426const char* ext = strrchr(filename.c_str(), '.');427if( !ext )428return false;429if( strcmp(ext, ".avi") != 0 && strcmp(ext, ".AVI") != 0 && strcmp(ext, ".Avi") != 0 )430return false;431432if( !container.initContainer(filename, fps, size, iscolor) )433return false;434435CV_Assert(fps >= 1);436quality = 75;437rawstream = false;438439if( !rawstream )440{441container.startWriteAVI(1); // count stream442container.writeStreamHeader(MJPEG);443}444//printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());445return true;446}447448bool isOpened() const CV_OVERRIDE { return container.isOpenedStream(); }449450void write(InputArray _img) CV_OVERRIDE451{452Mat img = _img.getMat();453size_t chunkPointer = container.getStreamPos();454int input_channels = img.channels();455int colorspace = -1;456int imgWidth = img.cols;457int frameWidth = container.getWidth();458int imgHeight = img.rows;459int frameHeight = container.getHeight();460int channels = container.getChannels();461462463if( input_channels == 1 && channels == 1 )464{465CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight );466colorspace = COLORSPACE_GRAY;467}468else if( input_channels == 4 )469{470CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == 3 );471colorspace = COLORSPACE_RGBA;472}473else if( input_channels == 3 )474{475CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight && channels == 3 );476colorspace = COLORSPACE_BGR;477}478else if( input_channels == 1 && channels == 3 )479{480CV_Assert( imgWidth == frameWidth && imgHeight == frameHeight*3 );481colorspace = COLORSPACE_YUV444P;482}483else484CV_Error(CV_StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");485486if( !rawstream ) {487int avi_index = container.getAVIIndex(0, dc);488container.startWriteChunk(avi_index);489}490491writeFrameData(img.data, (int)img.step, colorspace, input_channels);492493if( !rawstream )494{495size_t tempChunkPointer = container.getStreamPos();496size_t moviPointer = container.getMoviPointer();497container.pushFrameOffset(chunkPointer - moviPointer);498container.pushFrameSize(tempChunkPointer - chunkPointer - 8); // Size excludes '00dc' and size field499container.endWriteChunk(); // end '00dc'500}501}502503double getProperty(int propId) const CV_OVERRIDE504{505if( propId == VIDEOWRITER_PROP_QUALITY )506return quality;507if( propId == VIDEOWRITER_PROP_FRAMEBYTES )508{509bool isEmpty = container.isEmptyFrameSize();510return isEmpty ? 0. : container.atFrameSize(container.countFrameSize() - 1);511}512if( propId == VIDEOWRITER_PROP_NSTRIPES )513return nstripes;514return 0.;515}516517bool setProperty(int propId, double value) CV_OVERRIDE518{519if( propId == VIDEOWRITER_PROP_QUALITY )520{521quality = value;522return true;523}524525if( propId == VIDEOWRITER_PROP_NSTRIPES)526{527nstripes = value;528return true;529}530531return false;532}533534void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );535536protected:537double quality;538bool rawstream;539mjpeg_buffer_keeper buffers_list;540double nstripes;541542AVIWriteContainer container;543};544545#define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))546#define fix(x, n) (int)((x)*(1 << (n)) + .5);547548enum549{550fixb = 14,551fixc = 12,552postshift = 14553};554555static const int C0_707 = fix(0.707106781f, fixb);556static const int C0_541 = fix(0.541196100f, fixb);557static const int C0_382 = fix(0.382683432f, fixb);558static const int C1_306 = fix(1.306562965f, fixb);559560static const int y_r = fix(0.299, fixc);561static const int y_g = fix(0.587, fixc);562static const int y_b = fix(0.114, fixc);563564static const int cb_r = -fix(0.1687, fixc);565static const int cb_g = -fix(0.3313, fixc);566static const int cb_b = fix(0.5, fixc);567568static const int cr_r = fix(0.5, fixc);569static const int cr_g = -fix(0.4187, fixc);570static const int cr_b = -fix(0.0813, fixc);571572// Standard JPEG quantization tables573static const uchar jpegTableK1_T[] =574{57516, 12, 14, 14, 18, 24, 49, 72,57611, 12, 13, 17, 22, 35, 64, 92,57710, 14, 16, 22, 37, 55, 78, 95,57816, 19, 24, 29, 56, 64, 87, 98,57924, 26, 40, 51, 68, 81, 103, 112,58040, 58, 57, 87, 109, 104, 121, 100,58151, 60, 69, 80, 103, 113, 120, 103,58261, 55, 56, 62, 77, 92, 101, 99583};584585static const uchar jpegTableK2_T[] =586{58717, 18, 24, 47, 99, 99, 99, 99,58818, 21, 26, 66, 99, 99, 99, 99,58924, 26, 56, 99, 99, 99, 99, 99,59047, 66, 99, 99, 99, 99, 99, 99,59199, 99, 99, 99, 99, 99, 99, 99,59299, 99, 99, 99, 99, 99, 99, 99,59399, 99, 99, 99, 99, 99, 99, 99,59499, 99, 99, 99, 99, 99, 99, 99595};596597// Standard Huffman tables598599// ... for luma DCs.600static const uchar jpegTableK3[] =601{6020, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,6030, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11604};605606// ... for chroma DCs.607static const uchar jpegTableK4[] =608{6090, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,6100, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11611};612613// ... for luma ACs.614static const uchar jpegTableK5[] =615{6160, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125,6170x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,6180x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,6190x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,6200x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,6210x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,6220x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,6230x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,6240x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,6250x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,6260x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,6270x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,6280x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,6290x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,6300x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,6310xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,6320xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,6330xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,6340xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,6350xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,6360xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,6370xf9, 0xfa638};639640// ... for chroma ACs641static const uchar jpegTableK6[] =642{6430, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119,6440x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,6450x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,6460x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,6470xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,6480x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,6490xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,6500x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,6510x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,6520x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,6530x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,6540x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,6550x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,6560x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,6570x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,6580xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,6590xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,6600xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,6610xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,6620xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,6630xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,6640xf9, 0xfa665};666667static const uchar zigzag[] =668{6690, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,67033, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,67128, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,67223, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63,67363, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63674};675676677static const int idct_prescale[] =678{67916384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,68022725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,68121407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,68219266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,68316384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,68412873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,6858867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,6864520, 6270, 5906, 5315, 4520, 3552, 2446, 1247687};688689static const char jpegHeader[] =690"\xFF\xD8" // SOI - start of image691"\xFF\xE0" // APP0 - jfif extension692"\x00\x10" // 2 bytes: length of APP0 segment693"JFIF\x00" // JFIF signature694"\x01\x02" // version of JFIF695"\x00" // units = pixels ( 1 - inch, 2 - cm )696"\x00\x01\x00\x01" // 2 2-bytes values: x density & y density697"\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)698699#ifdef WITH_NEON700// FDCT with postscaling701static void aan_fdct8x8( const short *src, short *dst,702int step, const short *postscale )703{704// Pass 1: process rows705int16x8_t x0 = vld1q_s16(src); int16x8_t x1 = vld1q_s16(src + step*7);706int16x8_t x2 = vld1q_s16(src + step*3); int16x8_t x3 = vld1q_s16(src + step*4);707708int16x8_t x4 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);709x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);710711int16x8_t t1 = x0; int16x8_t t2 = x2;712713x2 = vaddq_s16(x4, x1); x4 = vsubq_s16(x4, x1);714715x0 = vld1q_s16(src + step); x3 = vld1q_s16(src + step*6);716717x1 = vaddq_s16(x0, x3); x0 = vsubq_s16(x0, x3);718int16x8_t t3 = x0;719720x0 = vld1q_s16(src + step*2); x3 = vld1q_s16(src + step*5);721722int16x8_t t4 = vsubq_s16(x0, x3);723724x0 = vaddq_s16(x0, x3);725x3 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);726x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);727728int16x8_t res0 = x1;729int16x8_t res4 = x2;730x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));731x1 = vaddq_s16(x4, x0); x4 = vsubq_s16(x4, x0);732733int16x8_t res2 = x4;734int16x8_t res6 = x1;735736x0 = t2; x1 = t4;737x2 = t3; x3 = t1;738x0 = vaddq_s16(x0, x1); x1 = vaddq_s16(x1, x2); x2 = vaddq_s16(x2, x3);739x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));740741x4 = vaddq_s16(x1, x3); x3 = vsubq_s16(x3, x1);742x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));743x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);744x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);745746x1 = vaddq_s16(x0, x3); x3 = vsubq_s16(x3, x0);747x0 = vaddq_s16(x4, x2); x4 = vsubq_s16(x4, x2);748749int16x8_t res1 = x0;750int16x8_t res3 = x3;751int16x8_t res5 = x1;752int16x8_t res7 = x4;753754//transpose a matrix755/*756res0 00 01 02 03 04 05 06 07757res1 10 11 12 13 14 15 16 17758res2 20 21 22 23 24 25 26 27759res3 30 31 32 33 34 35 36 37760res4 40 41 42 43 44 45 46 47761res5 50 51 52 53 54 55 56 57762res6 60 61 62 63 64 65 66 67763res7 70 71 72 73 74 75 76 77764*/765766//transpose elements 00-33767int16x4_t res0_0 = vget_low_s16(res0);768int16x4_t res1_0 = vget_low_s16(res1);769int16x4x2_t tres = vtrn_s16(res0_0, res1_0);770int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));771772res0_0 = vget_low_s16(res2);773res1_0 = vget_low_s16(res3);774tres = vtrn_s16(res0_0, res1_0);775int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));776777int32x4x2_t tres1 = vtrnq_s32(l0, l1);778779// transpose elements 40-73780res0_0 = vget_low_s16(res4);781res1_0 = vget_low_s16(res5);782tres = vtrn_s16(res0_0, res1_0);783l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));784785res0_0 = vget_low_s16(res6);786res1_0 = vget_low_s16(res7);787788tres = vtrn_s16(res0_0, res1_0);789l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));790791int32x4x2_t tres2 = vtrnq_s32(l0, l1);792793//combine into 0-3794int16x8_t transp_res0 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));795int16x8_t transp_res1 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));796int16x8_t transp_res2 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));797int16x8_t transp_res3 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));798799// transpose elements 04-37800res0_0 = vget_high_s16(res0);801res1_0 = vget_high_s16(res1);802tres = vtrn_s16(res0_0, res1_0);803l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));804805res0_0 = vget_high_s16(res2);806res1_0 = vget_high_s16(res3);807808tres = vtrn_s16(res0_0, res1_0);809l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));810811tres1 = vtrnq_s32(l0, l1);812813// transpose elements 44-77814res0_0 = vget_high_s16(res4);815res1_0 = vget_high_s16(res5);816tres = vtrn_s16(res0_0, res1_0);817l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));818819res0_0 = vget_high_s16(res6);820res1_0 = vget_high_s16(res7);821822tres = vtrn_s16(res0_0, res1_0);823l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));824825tres2 = vtrnq_s32(l0, l1);826827//combine into 4-7828int16x8_t transp_res4 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));829int16x8_t transp_res5 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));830int16x8_t transp_res6 = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));831int16x8_t transp_res7 = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));832833//special hack for vqdmulhq_s16 command that is producing -1 instead of 0834#define STORE_DESCALED(addr, reg, mul_addr) postscale_line = vld1q_s16((mul_addr)); \835mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \836reg = vabsq_s16(reg); \837reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \838reg = vsubq_s16(veorq_s16(reg, mask), mask); \839vst1q_s16((addr), reg);840841int16x8_t z = vdupq_n_s16(0), postscale_line, mask;842843// pass 2: process columns844x0 = transp_res0; x1 = transp_res7;845x2 = transp_res3; x3 = transp_res4;846847x4 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);848x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);849850t1 = x0; t2 = x2;851852x2 = vaddq_s16(x4, x1); x4 = vsubq_s16(x4, x1);853854x0 = transp_res1;855x3 = transp_res6;856857x1 = vaddq_s16(x0, x3); x0 = vsubq_s16(x0, x3);858859t3 = x0;860861x0 = transp_res2; x3 = transp_res5;862863t4 = vsubq_s16(x0, x3);864865x0 = vaddq_s16(x0, x3);866867x3 = vaddq_s16(x0, x1); x0 = vsubq_s16(x0, x1);868x1 = vaddq_s16(x2, x3); x2 = vsubq_s16(x2, x3);869870STORE_DESCALED(dst, x1, postscale);871STORE_DESCALED(dst + 4*8, x2, postscale + 4*8);872873x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));874875x1 = vaddq_s16(x4, x0); x4 = vsubq_s16(x4, x0);876877STORE_DESCALED(dst + 2*8, x4,postscale + 2*8);878STORE_DESCALED(dst + 6*8, x1,postscale + 6*8);879880x0 = t2; x1 = t4;881x2 = t3; x3 = t1;882883x0 = vaddq_s16(x0, x1); x1 = vaddq_s16(x1, x2); x2 = vaddq_s16(x2, x3);884885x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));886887x4 = vaddq_s16(x1, x3); x3 = vsubq_s16(x3, x1);888889x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));890x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);891x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);892893x1 = vaddq_s16(x0, x3); x3 = vsubq_s16(x3, x0);894x0 = vaddq_s16(x4, x2); x4 = vsubq_s16(x4, x2);895896STORE_DESCALED(dst + 5*8, x1,postscale + 5*8);897STORE_DESCALED(dst + 1*8, x0,postscale + 1*8);898STORE_DESCALED(dst + 7*8, x4,postscale + 7*8);899STORE_DESCALED(dst + 3*8, x3,postscale + 3*8);900}901902#else903// FDCT with postscaling904static void aan_fdct8x8( const short *src, short *dst,905int step, const short *postscale )906{907int workspace[64], *work = workspace;908int i;909910// Pass 1: process rows911for( i = 8; i > 0; i--, src += step, work += 8 )912{913int x0 = src[0], x1 = src[7];914int x2 = src[3], x3 = src[4];915916int x4 = x0 + x1; x0 -= x1;917x1 = x2 + x3; x2 -= x3;918919work[7] = x0; work[1] = x2;920x2 = x4 + x1; x4 -= x1;921922x0 = src[1]; x3 = src[6];923x1 = x0 + x3; x0 -= x3;924work[5] = x0;925926x0 = src[2]; x3 = src[5];927work[3] = x0 - x3; x0 += x3;928929x3 = x0 + x1; x0 -= x1;930x1 = x2 + x3; x2 -= x3;931932work[0] = x1; work[4] = x2;933934x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);935x1 = x4 + x0; x4 -= x0;936work[2] = x4; work[6] = x1;937938x0 = work[1]; x1 = work[3];939x2 = work[5]; x3 = work[7];940941x0 += x1; x1 += x2; x2 += x3;942x1 = DCT_DESCALE(x1*C0_707, fixb);943944x4 = x1 + x3; x3 -= x1;945x1 = (x0 - x2)*C0_382;946x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);947x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);948949x1 = x0 + x3; x3 -= x0;950x0 = x4 + x2; x4 -= x2;951952work[5] = x1; work[1] = x0;953work[7] = x4; work[3] = x3;954}955956work = workspace;957// pass 2: process columns958for( i = 8; i > 0; i--, work++, postscale += 8, dst += 8 )959{960int x0 = work[8*0], x1 = work[8*7];961int x2 = work[8*3], x3 = work[8*4];962963int x4 = x0 + x1; x0 -= x1;964x1 = x2 + x3; x2 -= x3;965966work[8*7] = x0; work[8*0] = x2;967x2 = x4 + x1; x4 -= x1;968969x0 = work[8*1]; x3 = work[8*6];970x1 = x0 + x3; x0 -= x3;971work[8*4] = x0;972973x0 = work[8*2]; x3 = work[8*5];974work[8*3] = x0 - x3; x0 += x3;975976x3 = x0 + x1; x0 -= x1;977x1 = x2 + x3; x2 -= x3;978979dst[0] = (short)DCT_DESCALE(x1*postscale[0], postshift);980dst[4] = (short)DCT_DESCALE(x2*postscale[4], postshift);981982x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);983x1 = x4 + x0; x4 -= x0;984985dst[2] = (short)DCT_DESCALE(x4*postscale[2], postshift);986dst[6] = (short)DCT_DESCALE(x1*postscale[6], postshift);987988x0 = work[8*0]; x1 = work[8*3];989x2 = work[8*4]; x3 = work[8*7];990991x0 += x1; x1 += x2; x2 += x3;992x1 = DCT_DESCALE(x1*C0_707, fixb);993994x4 = x1 + x3; x3 -= x1;995x1 = (x0 - x2)*C0_382;996x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);997x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);998999x1 = x0 + x3; x3 -= x0;1000x0 = x4 + x2; x4 -= x2;10011002dst[5] = (short)DCT_DESCALE(x1*postscale[5], postshift);1003dst[1] = (short)DCT_DESCALE(x0*postscale[1], postshift);1004dst[7] = (short)DCT_DESCALE(x4*postscale[7], postshift);1005dst[3] = (short)DCT_DESCALE(x3*postscale[3], postshift);1006}1007}1008#endif100910101011inline void convertToYUV(int colorspace, int channels, int input_channels, short* UV_data, short* Y_data, const uchar* pix_data, int y_limit, int x_limit, int step, int u_plane_ofs, int v_plane_ofs)1012{1013int i, j;1014const int UV_step = 16;1015int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;1016int Y_step = x_scale*8;10171018if( channels > 1 )1019{1020if( colorspace == COLORSPACE_YUV444P && y_limit == 16 && x_limit == 16 )1021{1022for( i = 0; i < y_limit; i += 2, pix_data += step*2, Y_data += Y_step*2, UV_data += UV_step )1023{1024#ifdef WITH_NEON1025{1026uint16x8_t masklo = vdupq_n_u16(255);1027uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));1028uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));1029lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));1030uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));1031t1 = vaddq_u16(t1, t2);1032vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));10331034lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));1035t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));1036lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));1037t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));1038t1 = vaddq_u16(t1, t2);1039vst1q_s16(UV_data + 8, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));1040}10411042{1043int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));1044int16x8_t delta = vdupq_n_s16(128);1045lane = vsubq_s16(lane, delta);1046vst1q_s16(Y_data, lane);10471048lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+8)));1049lane = vsubq_s16(lane, delta);1050vst1q_s16(Y_data + 8, lane);10511052lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));1053lane = vsubq_s16(lane, delta);1054vst1q_s16(Y_data+Y_step, lane);10551056lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + 8)));1057lane = vsubq_s16(lane, delta);1058vst1q_s16(Y_data+Y_step + 8, lane);1059}1060#else1061for( j = 0; j < x_limit; j += 2, pix_data += 2 )1062{1063Y_data[j] = pix_data[0] - 128;1064Y_data[j+1] = pix_data[1] - 128;1065Y_data[j+Y_step] = pix_data[step] - 128;1066Y_data[j+Y_step+1] = pix_data[step+1] - 128;10671068UV_data[j>>1] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+1] +1069pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+1] - 128*4;1070UV_data[(j>>1)+8] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+1] +1071pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+1] - 128*4;10721073}10741075pix_data -= x_limit*input_channels;1076#endif1077}1078}1079else1080{1081for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )1082{1083for( j = 0; j < x_limit; j++, pix_data += input_channels )1084{1085int Y, U, V;10861087if( colorspace == COLORSPACE_BGR )1088{1089int r = pix_data[2];1090int g = pix_data[1];1091int b = pix_data[0];10921093Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;1094U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );1095V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );1096}1097else if( colorspace == COLORSPACE_RGBA )1098{1099int r = pix_data[0];1100int g = pix_data[1];1101int b = pix_data[2];11021103Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;1104U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );1105V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );1106}1107else1108{1109Y = pix_data[0] - 128;1110U = pix_data[v_plane_ofs] - 128;1111V = pix_data[u_plane_ofs] - 128;1112}11131114int j2 = j >> (x_scale - 1);1115Y_data[j] = (short)Y;1116UV_data[j2] = (short)(UV_data[j2] + U);1117UV_data[j2 + 8] = (short)(UV_data[j2 + 8] + V);1118}11191120pix_data -= x_limit*input_channels;1121if( ((i+1) & (y_scale - 1)) == 0 )1122{1123UV_data += UV_step;1124}1125}1126}11271128}1129else1130{1131for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )1132{1133for( j = 0; j < x_limit; j++ )1134Y_data[j] = (short)(pix_data[j]*4 - 128*4);1135}1136}1137}11381139class MjpegEncoder : public ParallelLoopBody1140{1141public:1142MjpegEncoder(int _height,1143int _width,1144int _step,1145const uchar* _data,1146int _input_channels,1147int _channels,1148int _colorspace,1149unsigned (&_huff_dc_tab)[2][16],1150unsigned (&_huff_ac_tab)[2][256],1151short (&_fdct_qtab)[2][64],1152uchar* _cat_table,1153mjpeg_buffer_keeper& _buffer_list,1154double nstripes1155) :1156m_buffer_list(_buffer_list),1157height(_height),1158width(_width),1159step(_step),1160in_data(_data),1161input_channels(_input_channels),1162channels(_channels),1163colorspace(_colorspace),1164huff_dc_tab(_huff_dc_tab),1165huff_ac_tab(_huff_ac_tab),1166fdct_qtab(_fdct_qtab),1167cat_table(_cat_table)1168{1169//empirically found value. if number of pixels is less than that value there is no sense to parallelize it.1170const int min_pixels_count = 96*96;11711172stripes_count = 1;11731174if(nstripes < 0)1175{1176if(height*width > min_pixels_count)1177{1178stripes_count = default_stripes_count;1179}1180}1181else1182{1183stripes_count = cvCeil(nstripes);1184}11851186int y_scale = channels > 1 ? 2 : 1;1187int y_step = y_scale * 8;11881189int max_stripes = (height - 1)/y_step + 1;11901191stripes_count = std::min(stripes_count, max_stripes);11921193m_buffer_list.allocate_buffers(stripes_count, (height*width*2)/stripes_count);1194}11951196void operator()( const cv::Range& range ) const CV_OVERRIDE1197{1198const int CAT_TAB_SIZE = 4096;11991200int x, y;1201int i, j;12021203short buffer[4096];1204int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;1205int dc_pred[] = { 0, 0, 0 };1206int x_step = x_scale * 8;1207int y_step = y_scale * 8;1208short block[6][64];1209int luma_count = x_scale*y_scale;1210int block_count = luma_count + channels - 1;1211int u_plane_ofs = step*height;1212int v_plane_ofs = u_plane_ofs + step*height;1213const uchar* data = in_data;1214const uchar* init_data = data;12151216int num_steps = (height - 1)/y_step + 1;12171218//if this is not first stripe we need to calculate dc_pred from previous step1219if(range.start > 0)1220{1221y = y_step*int(num_steps*range.start/stripes_count - 1);1222data = init_data + y*step;12231224for( x = 0; x < width; x += x_step )1225{1226int x_limit = x_step;1227int y_limit = y_step;1228const uchar* pix_data = data + x*input_channels;1229short* Y_data = block[0];1230short* UV_data = block[luma_count];12311232if( x + x_limit > width ) x_limit = width - x;1233if( y + y_limit > height ) y_limit = height - y;12341235memset( block, 0, block_count*64*sizeof(block[0][0]));12361237convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);12381239for( i = 0; i < block_count; i++ )1240{1241int is_chroma = i >= luma_count;1242int src_step = x_scale * 8;1243const short* src_ptr = block[i & -2] + (i & 1)*8;12441245aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );12461247j = is_chroma + (i > luma_count);1248dc_pred[j] = buffer[0];1249}1250}1251}12521253for(int k = range.start; k < range.end; ++k)1254{1255mjpeg_buffer& output_buffer = m_buffer_list[k];1256output_buffer.clear();12571258int y_min = y_step*int(num_steps*k/stripes_count);1259int y_max = y_step*int(num_steps*(k+1)/stripes_count);12601261if(k == stripes_count - 1)1262{1263y_max = height;1264}126512661267data = init_data + y_min*step;12681269for( y = y_min; y < y_max; y += y_step, data += y_step*step )1270{1271for( x = 0; x < width; x += x_step )1272{1273int x_limit = x_step;1274int y_limit = y_step;1275const uchar* pix_data = data + x*input_channels;1276short* Y_data = block[0];1277short* UV_data = block[luma_count];12781279if( x + x_limit > width ) x_limit = width - x;1280if( y + y_limit > height ) y_limit = height - y;12811282memset( block, 0, block_count*64*sizeof(block[0][0]));12831284convertToYUV(colorspace, channels, input_channels, UV_data, Y_data, pix_data, y_limit, x_limit, step, u_plane_ofs, v_plane_ofs);12851286for( i = 0; i < block_count; i++ )1287{1288int is_chroma = i >= luma_count;1289int src_step = x_scale * 8;1290int run = 0, val;1291const short* src_ptr = block[i & -2] + (i & 1)*8;1292const unsigned* htable = huff_ac_tab[is_chroma];12931294aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );12951296j = is_chroma + (i > luma_count);1297val = buffer[0] - dc_pred[j];1298dc_pred[j] = buffer[0];12991300{1301int cat = cat_table[val + CAT_TAB_SIZE];13021303//CV_Assert( cat <= 11 );1304output_buffer.put_val(cat, huff_dc_tab[is_chroma] );1305output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );1306}13071308for( j = 1; j < 64; j++ )1309{1310val = buffer[zigzag[j]];13111312if( val == 0 )1313{1314run++;1315}1316else1317{1318while( run >= 16 )1319{1320output_buffer.put_val( 0xF0, htable ); // encode 16 zeros1321run -= 16;1322}13231324{1325int cat = cat_table[val + CAT_TAB_SIZE];1326//CV_Assert( cat <= 10 );1327output_buffer.put_val( cat + run*16, htable );1328output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );1329}13301331run = 0;1332}1333}13341335if( run )1336{1337output_buffer.put_val( 0x00, htable ); // encode EOB1338}1339}1340}1341}1342}1343}13441345cv::Range getRange()1346{1347return cv::Range(0, stripes_count);1348}13491350double getNStripes()1351{1352return stripes_count;1353}13541355mjpeg_buffer_keeper& m_buffer_list;1356private:13571358MjpegEncoder& operator=( const MjpegEncoder & ) { return *this; }13591360const int height;1361const int width;1362const int step;1363const uchar* in_data;1364const int input_channels;1365const int channels;1366const int colorspace;1367const unsigned (&huff_dc_tab)[2][16];1368const unsigned (&huff_ac_tab)[2][256];1369const short (&fdct_qtab)[2][64];1370const uchar* cat_table;1371int stripes_count;1372static const int default_stripes_count;1373};13741375const int MjpegEncoder::default_stripes_count = 4;13761377void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )1378{1379//double total_cvt = 0, total_dct = 0;1380static bool init_cat_table = false;1381const int CAT_TAB_SIZE = 4096;1382static uchar cat_table[CAT_TAB_SIZE*2+1];1383if( !init_cat_table )1384{1385for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )1386{1387Cv32suf a;1388a.f = (float)i;1389cat_table[i+CAT_TAB_SIZE] = ((a.i >> 23) & 255) - (126 & (i ? -1 : 0));1390}1391init_cat_table = true;1392}13931394//double total_dct = 0, total_cvt = 0;1395int width = container.getWidth();1396int height = container.getHeight();1397int channels = container.getChannels();13981399CV_Assert( data && width > 0 && height > 0 );14001401// encode the header and tables1402// for each mcu:1403// convert rgb to yuv with downsampling (if color).1404// for every block:1405// calc dct and quantize1406// encode block.1407int i, j;1408const int max_quality = 12;1409short fdct_qtab[2][64];1410unsigned huff_dc_tab[2][16];1411unsigned huff_ac_tab[2][256];14121413int x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;1414short buffer[4096];1415int* hbuffer = (int*)buffer;1416int luma_count = x_scale*y_scale;1417double _quality = quality*0.01*max_quality;14181419if( _quality < 1. ) _quality = 1.;1420if( _quality > max_quality ) _quality = max_quality;14211422double inv_quality = 1./_quality;14231424// Encode header1425container.putStreamBytes( (const uchar*)jpegHeader, sizeof(jpegHeader) - 1 );14261427// Encode quantization tables1428for( i = 0; i < (channels > 1 ? 2 : 1); i++ )1429{1430const uchar* qtable = i == 0 ? jpegTableK1_T : jpegTableK2_T;1431int chroma_scale = i > 0 ? luma_count : 1;14321433container.jputStreamShort( 0xffdb ); // DQT marker1434container.jputStreamShort( 2 + 65*1 ); // put single qtable1435container.putStreamByte( 0*16 + i ); // 8-bit table14361437// put coefficients1438for( j = 0; j < 64; j++ )1439{1440int idx = zigzag[j];1441int qval = cvRound(qtable[idx]*inv_quality);1442if( qval < 1 )1443qval = 1;1444if( qval > 255 )1445qval = 255;1446fdct_qtab[i][idx] = (short)(cvRound((1 << (postshift + 11)))/1447(qval*chroma_scale*idct_prescale[idx]));1448container.putStreamByte( qval );1449}1450}14511452// Encode huffman tables1453for( i = 0; i < (channels > 1 ? 4 : 2); i++ )1454{1455const uchar* htable = i == 0 ? jpegTableK3 : i == 1 ? jpegTableK5 :1456i == 2 ? jpegTableK4 : jpegTableK6;1457int is_ac_tab = i & 1;1458int idx = i >= 2;1459int tableSize = 16 + (is_ac_tab ? 162 : 12);14601461container.jputStreamShort( 0xFFC4 ); // DHT marker1462container.jputStreamShort( 3 + tableSize ); // define one huffman table1463container.putStreamByte( is_ac_tab*16 + idx ); // put DC/AC flag and table index1464container.putStreamBytes( htable, tableSize ); // put table14651466createEncodeHuffmanTable(createSourceHuffmanTable( htable, hbuffer, 16, 9 ),1467is_ac_tab ? huff_ac_tab[idx] : huff_dc_tab[idx],1468is_ac_tab ? 256 : 16 );1469}14701471// put frame header1472container.jputStreamShort( 0xFFC0 ); // SOF0 marker1473container.jputStreamShort( 8 + 3*channels ); // length of frame header1474container.putStreamByte( 8 ); // sample precision1475container.jputStreamShort( height );1476container.jputStreamShort( width );1477container.putStreamByte( channels ); // number of components14781479for( i = 0; i < channels; i++ )1480{1481container.putStreamByte( i + 1 ); // (i+1)-th component id (Y,U or V)1482if( i == 0 )1483container.putStreamByte(x_scale*16 + y_scale); // chroma scale factors1484else1485container.putStreamByte(1*16 + 1);1486container.putStreamByte( i > 0 ); // quantization table idx1487}14881489// put scan header1490container.jputStreamShort( 0xFFDA ); // SOS marker1491container.jputStreamShort( 6 + 2*channels ); // length of scan header1492container.putStreamByte( channels ); // number of components in the scan14931494for( i = 0; i < channels; i++ )1495{1496container.putStreamByte( i+1 ); // component id1497container.putStreamByte( (i>0)*16 + (i>0) );// selection of DC & AC tables1498}14991500container.jputStreamShort(0*256 + 63); // start and end of spectral selection - for1501// sequential DCT start is 0 and end is 6315021503container.putStreamByte( 0 ); // successive approximation bit position1504// high & low - (0,0) for sequential DCT15051506buffers_list.reset();15071508MjpegEncoder parallel_encoder(height, width, step, data, input_channels, channels, colorspace, huff_dc_tab, huff_ac_tab, fdct_qtab, cat_table, buffers_list, nstripes);15091510cv::parallel_for_(parallel_encoder.getRange(), parallel_encoder, parallel_encoder.getNStripes());15111512//std::vector<unsigned>& v = parallel_encoder.m_buffer_list.get_data();1513unsigned* v = buffers_list.get_data();1514unsigned last_data_elem = buffers_list.get_data_size() - 1;15151516for(unsigned k = 0; k < last_data_elem; ++k)1517{1518container.jputStream(v[k]);1519}1520container.jflushStream(v[last_data_elem], 32 - buffers_list.get_last_bit_len());1521container.jputStreamShort( 0xFFD9 ); // EOI marker1522/*printf("total dct = %.1fms, total cvt = %.1fms\n",1523total_dct*1000./cv::getTickFrequency(),1524total_cvt*1000./cv::getTickFrequency());*/15251526size_t pos = container.getStreamPos();1527size_t pos1 = (pos + 3) & ~3;1528for( ; pos < pos1; pos++ )1529container.putStreamByte(0);1530}15311532}15331534Ptr<IVideoWriter> createMotionJpegWriter(const String& filename, int fourcc, double fps, Size frameSize, bool iscolor)1535{1536if (fourcc != CV_FOURCC('M', 'J', 'P', 'G'))1537return Ptr<IVideoWriter>();15381539Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(filename, fps, frameSize, iscolor);1540if( !iwriter->isOpened() )1541iwriter.release();1542return iwriter;1543}15441545}154615471548