Path: blob/master/3rdparty/carotene/src/convert_depth.cpp
16337 views
/*1* By downloading, copying, installing or using the software you agree to this license.2* If you do not agree to this license, do not download, install,3* copy or use the software.4*5*6* License Agreement7* For Open Source Computer Vision Library8* (3-clause BSD License)9*10* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.11* Third party copyrights are property of their respective owners.12*13* Redistribution and use in source and binary forms, with or without modification,14* are permitted provided that the following conditions are met:15*16* * Redistributions of source code must retain the above copyright notice,17* this list of conditions and the following disclaimer.18*19* * Redistributions in binary form must reproduce the above copyright notice,20* this list of conditions and the following disclaimer in the documentation21* and/or other materials provided with the distribution.22*23* * Neither the names of the copyright holders nor the names of the contributors24* may be used to endorse or promote products derived from this software25* without specific prior written permission.26*27* This software is provided by the copyright holders and contributors "as is" and28* any express or implied warranties, including, but not limited to, the implied29* warranties of merchantability and fitness for a particular purpose are disclaimed.30* In no event shall copyright holders or contributors be liable for any direct,31* indirect, incidental, special, exemplary, or consequential damages32* (including, but not limited to, procurement of substitute goods or services;33* loss of use, data, or profits; or business interruption) however caused34* and on any theory of liability, whether in contract, strict liability,35* or tort (including negligence or otherwise) arising in any way out of36* the use of this software, even if advised of the possibility of such damage.37*/3839#include "common.hpp"4041#include <cstring>4243namespace CAROTENE_NS {4445#ifdef CAROTENE_NEON4647namespace {4849template <int shift>50void lshiftConst(const Size2D &size,51const u8 * srcBase, ptrdiff_t srcStride,52s16 * dstBase, ptrdiff_t dstStride)53{54size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;55size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;5657for (size_t i = 0; i < size.height; ++i)58{59const u8 * src = internal::getRowPtr(srcBase, srcStride, i);60s16 * dst = internal::getRowPtr(dstBase, dstStride, i);61size_t j = 0;6263for (; j < roiw16; j += 16)64{65internal::prefetch(src + j);66uint8x16_t v_src = vld1q_u8(src + j);67int16x8_t v_dst0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));68int16x8_t v_dst1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));6970vst1q_s16(dst + j, vshlq_n_s16(v_dst0, shift));71vst1q_s16(dst + j + 8, vshlq_n_s16(v_dst1, shift));72}73for (; j < roiw8; j += 8)74{75int16x8_t v_dst = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src + j)));76vst1q_s16(dst + j, vshlq_n_s16(v_dst, shift));77}7879for (; j < size.width; j++)80{81dst[j] = ((s16)src[j] << shift);82}83}84}8586template <>87void lshiftConst<0>(const Size2D &size,88const u8 * srcBase, ptrdiff_t srcStride,89s16 * dstBase, ptrdiff_t dstStride)90{91size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;92size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;9394for (size_t i = 0; i < size.height; ++i)95{96const u8 * src = internal::getRowPtr(srcBase, srcStride, i);97s16 * dst = internal::getRowPtr(dstBase, dstStride, i);98size_t j = 0;99100for (; j < roiw16; j += 16)101{102internal::prefetch(src + j);103uint8x16_t v_src = vld1q_u8(src + j);104int16x8_t v_dst0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));105int16x8_t v_dst1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));106107vst1q_s16(dst + j, v_dst0);108vst1q_s16(dst + j + 8, v_dst1);109}110for (; j < roiw8; j += 8)111{112int16x8_t v_dst = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src + j)));113vst1q_s16(dst + j, v_dst);114}115116for (; j < size.width; j++)117{118dst[j] = (s16)src[j];119}120}121}122123template <int shift>124void rshiftConst(const Size2D &size,125const s16 * srcBase, ptrdiff_t srcStride,126u8 * dstBase, ptrdiff_t dstStride,127CONVERT_POLICY cpolicy)128{129size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;130size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;131132for (size_t i = 0; i < size.height; ++i)133{134const s16 * src = internal::getRowPtr(srcBase, srcStride, i);135u8 * dst = internal::getRowPtr(dstBase, dstStride, i);136size_t j = 0;137138if (cpolicy == CONVERT_POLICY_SATURATE)139{140for (; j < roiw16; j += 16)141{142internal::prefetch(src + j);143int16x8_t v_src0 = vshrq_n_s16(vld1q_s16(src + j), shift),144v_src1 = vshrq_n_s16(vld1q_s16(src + j + 8), shift);145uint8x16_t v_dst = vcombine_u8(vqmovun_s16(v_src0),146vqmovun_s16(v_src1));147vst1q_u8(dst + j, v_dst);148}149for (; j < roiw8; j += 8)150{151int16x8_t v_src = vshrq_n_s16(vld1q_s16(src + j), shift);152vst1_u8(dst + j, vqmovun_s16(v_src));153}154155for (; j < size.width; j++)156{157dst[j] = internal::saturate_cast<u8>((src[j] >> shift));158}159}160else // CONVERT_POLICY_WRAP161{162for (; j < roiw16; j += 16)163{164internal::prefetch(src + j);165int16x8_t v_src0 = vshrq_n_s16(vld1q_s16(src + j), shift),166v_src1 = vshrq_n_s16(vld1q_s16(src + j + 8), shift);167int8x16_t v_dst = vcombine_s8(vmovn_s16(v_src0),168vmovn_s16(v_src1));169vst1q_u8(dst + j, vreinterpretq_u8_s8(v_dst));170}171for (; j < roiw8; j += 8)172{173int16x8_t v_src = vshrq_n_s16(vld1q_s16(src + j), shift);174vst1_u8(dst + j, vreinterpret_u8_s8(vmovn_s16(v_src)));175}176177for (; j < size.width; j++)178{179dst[j] = (u8)((src[j] >> shift));180}181}182}183}184185template <>186void rshiftConst<0>(const Size2D &size,187const s16 * srcBase, ptrdiff_t srcStride,188u8 * dstBase, ptrdiff_t dstStride,189CONVERT_POLICY cpolicy)190{191size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;192size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;193194for (size_t i = 0; i < size.height; ++i)195{196const s16 * src = internal::getRowPtr(srcBase, srcStride, i);197u8 * dst = internal::getRowPtr(dstBase, dstStride, i);198size_t j = 0;199200if (cpolicy == CONVERT_POLICY_SATURATE)201{202for (; j < roiw16; j += 16)203{204internal::prefetch(src + j);205int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);206uint8x16_t v_dst = vcombine_u8(vqmovun_s16(v_src0), vqmovun_s16(v_src1));207vst1q_u8(dst + j, v_dst);208}209for (; j < roiw8; j += 8)210{211int16x8_t v_src = vld1q_s16(src + j);212vst1_u8(dst + j, vqmovun_s16(v_src));213}214215for (; j < size.width; j++)216{217dst[j] = internal::saturate_cast<u8>(src[j]);218}219}220else // CONVERT_POLICY_WRAP221{222for (; j < roiw16; j += 16)223{224internal::prefetch(src + j);225int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);226int8x16_t v_dst = vcombine_s8(vmovn_s16(v_src0), vmovn_s16(v_src1));227vst1q_u8(dst + j, vreinterpretq_u8_s8(v_dst));228}229for (; j < roiw8; j += 8)230{231int16x8_t v_src = vld1q_s16(src + j);232vst1_u8(dst + j, vreinterpret_u8_s8(vmovn_s16(v_src)));233}234235for (; j < size.width; j++)236{237dst[j] = (u8)src[j];238}239}240}241}242243typedef void (* lshiftConstFunc)(const Size2D &size,244const u8 * srcBase, ptrdiff_t srcStride,245s16 * dstBase, ptrdiff_t dstStride);246247typedef void (* rshiftConstFunc)(const Size2D &size,248const s16 * srcBase, ptrdiff_t srcStride,249u8 * dstBase, ptrdiff_t dstStride,250CONVERT_POLICY cpolicy);251252} // namespace253254#endif255256void lshift(const Size2D &size,257const u8 * srcBase, ptrdiff_t srcStride,258s16 * dstBase, ptrdiff_t dstStride,259u32 shift)260{261internal::assertSupportedConfiguration();262263#ifdef CAROTENE_NEON264if (shift >= 16u)265{266for (size_t i = 0; i < size.height; ++i)267{268s16 * dst = internal::getRowPtr(dstBase, dstStride, i);269std::memset(dst, 0, sizeof(s16) * size.width);270}271return;272}273274// this ugly contruction is needed to avoid:275// /usr/lib/gcc/arm-linux-gnueabihf/4.8/include/arm_neon.h:3581:59: error: argument must be a constant276// return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1);277278lshiftConstFunc funcs[16] =279{280lshiftConst<0>,281lshiftConst<1>,282lshiftConst<2>,283lshiftConst<3>,284lshiftConst<4>,285lshiftConst<5>,286lshiftConst<6>,287lshiftConst<7>,288lshiftConst<8>,289lshiftConst<9>,290lshiftConst<10>,291lshiftConst<11>,292lshiftConst<12>,293lshiftConst<13>,294lshiftConst<14>,295lshiftConst<15>296}, func = funcs[shift];297298func(size, srcBase, srcStride, dstBase, dstStride);299#else300(void)size;301(void)srcBase;302(void)srcStride;303(void)dstBase;304(void)dstStride;305(void)shift;306#endif307}308309void rshift(const Size2D &size,310const s16 * srcBase, ptrdiff_t srcStride,311u8 * dstBase, ptrdiff_t dstStride,312u32 shift, CONVERT_POLICY cpolicy)313{314internal::assertSupportedConfiguration();315316#ifdef CAROTENE_NEON317if (shift >= 16)318{319if (cpolicy == CONVERT_POLICY_WRAP)320{321size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;322size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;323int16x8_t v_zero = vdupq_n_s16(0);324325for (size_t i = 0; i < size.height; ++i)326{327const s16 * src = internal::getRowPtr(srcBase, srcStride, i);328u8 * dst = internal::getRowPtr(dstBase, dstStride, i);329size_t j = 0;330331for (; j < roiw16; j += 16)332{333internal::prefetch(src + j);334int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);335uint8x16_t v_dst = vcombine_u8(vmovn_u16(vcltq_s16(v_src0, v_zero)),336vmovn_u16(vcltq_s16(v_src1, v_zero)));337vst1q_u8(dst + j, v_dst);338}339for (; j < roiw8; j += 8)340{341int16x8_t v_src = vld1q_s16(src + j);342vst1_u8(dst + j, vmovn_u16(vcltq_s16(v_src, v_zero)));343}344345for (; j < size.width; j++)346{347dst[j] = src[j] >= 0 ? 0 : 255;348}349}350}351else352{353for (size_t i = 0; i < size.height; ++i)354{355u8 * dst = internal::getRowPtr(dstBase, dstStride, i);356std::memset(dst, 0, sizeof(u8) * size.width);357}358}359return;360}361362// this ugly contruction is needed to avoid:363// /usr/lib/gcc/arm-linux-gnueabihf/4.8/include/arm_neon.h:3581:59: error: argument must be a constant364// return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1);365366rshiftConstFunc funcs[16] =367{368rshiftConst<0>,369rshiftConst<1>,370rshiftConst<2>,371rshiftConst<3>,372rshiftConst<4>,373rshiftConst<5>,374rshiftConst<6>,375rshiftConst<7>,376rshiftConst<8>,377rshiftConst<9>,378rshiftConst<10>,379rshiftConst<11>,380rshiftConst<12>,381rshiftConst<13>,382rshiftConst<14>,383rshiftConst<15>384}, func = funcs[shift];385386func(size, srcBase, srcStride, dstBase, dstStride, cpolicy);387#else388(void)size;389(void)srcBase;390(void)srcStride;391(void)dstBase;392(void)dstStride;393(void)shift;394(void)cpolicy;395#endif396}397398} // namespace CAROTENE_NS399400401