Path: blob/master/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
21798 views
// Copyright 2015 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// MIPS version of lossless functions10//11// Author(s): Djordje Pesut ([email protected])12// Jovan Zelincevic ([email protected])1314#include "src/dsp/dsp.h"15#include "src/dsp/lossless.h"16#include "src/dsp/lossless_common.h"1718#if defined(WEBP_USE_MIPS32)1920#include <assert.h>21#include <math.h>22#include <stdlib.h>23#include <string.h>2425static uint64_t FastSLog2Slow_MIPS32(uint32_t v) {26assert(v >= LOG_LOOKUP_IDX_MAX);27if (v < APPROX_LOG_WITH_CORRECTION_MAX) {28uint32_t log_cnt, y;29uint64_t correction;30const int c24 = 24;31uint32_t temp;3233// Xf = 256 = 2^834// log_cnt is index of leading one in upper 24 bits35__asm__ volatile(36"clz %[log_cnt], %[v] \n\t"37"addiu %[y], $zero, 1 \n\t"38"subu %[log_cnt], %[c24], %[log_cnt] \n\t"39"sllv %[y], %[y], %[log_cnt] \n\t"40"srlv %[temp], %[v], %[log_cnt] \n\t"41: [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),42[temp]"=r"(temp)43: [c24]"r"(c24), [v]"r"(v)44);4546// vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 25647// Xf = floor(Xf) * (1 + (v % y) / v)48// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)49// The correction factor: log(1 + d) ~ d; for very small d values, so50// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v5152// (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)53correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));54return (uint64_t)v * (kLog2Table[temp] +55((uint64_t)log_cnt << LOG_2_PRECISION_BITS)) +56correction;57} else {58return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);59}60}6162static uint32_t FastLog2Slow_MIPS32(uint32_t v) {63assert(v >= LOG_LOOKUP_IDX_MAX);64if (v < APPROX_LOG_WITH_CORRECTION_MAX) {65uint32_t log_cnt, y;66const int c24 = 24;67uint32_t log_2;68uint32_t temp;6970__asm__ volatile(71"clz %[log_cnt], %[v] \n\t"72"addiu %[y], $zero, 1 \n\t"73"subu %[log_cnt], %[c24], %[log_cnt] \n\t"74"sllv %[y], %[y], %[log_cnt] \n\t"75"srlv %[temp], %[v], %[log_cnt] \n\t"76: [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),77[temp]"=r"(temp)78: [c24]"r"(c24), [v]"r"(v)79);8081log_2 = kLog2Table[temp] + (log_cnt << LOG_2_PRECISION_BITS);82if (v >= APPROX_LOG_MAX) {83// Since the division is still expensive, add this correction factor only84// for large values of 'v'.85const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));86log_2 += (uint32_t)DivRound(correction, v);87}88return log_2;89} else {90return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5);91}92}9394// C version of this function:95// int i = 0;96// int64_t cost = 0;97// const uint32_t* pop = &population[4];98// const uint32_t* LoopEnd = &population[length];99// while (pop != LoopEnd) {100// ++i;101// cost += i * *pop;102// cost += i * *(pop + 1);103// pop += 2;104// }105// return cost;106static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {107int i, temp0, temp1;108const uint32_t* pop = &population[4];109const uint32_t* const LoopEnd = &population[length];110111__asm__ volatile(112"mult $zero, $zero \n\t"113"xor %[i], %[i], %[i] \n\t"114"beq %[pop], %[LoopEnd], 2f \n\t"115"1: \n\t"116"lw %[temp0], 0(%[pop]) \n\t"117"lw %[temp1], 4(%[pop]) \n\t"118"addiu %[i], %[i], 1 \n\t"119"addiu %[pop], %[pop], 8 \n\t"120"madd %[i], %[temp0] \n\t"121"madd %[i], %[temp1] \n\t"122"bne %[pop], %[LoopEnd], 1b \n\t"123"2: \n\t"124"mfhi %[temp0] \n\t"125"mflo %[temp1] \n\t"126: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),127[i]"=&r"(i), [pop]"+r"(pop)128: [LoopEnd]"r"(LoopEnd)129: "memory", "hi", "lo"130);131132return ((int64_t)temp0 << 32 | temp1);133}134135#define HUFFMAN_COST_PASS \136__asm__ volatile( \137"sll %[temp1], %[temp0], 3 \n\t" \138"addiu %[temp3], %[streak], -3 \n\t" \139"addu %[temp2], %[pstreaks], %[temp1] \n\t" \140"blez %[temp3], 1f \n\t" \141"srl %[temp1], %[temp1], 1 \n\t" \142"addu %[temp3], %[pcnts], %[temp1] \n\t" \143"lw %[temp0], 4(%[temp2]) \n\t" \144"lw %[temp1], 0(%[temp3]) \n\t" \145"addu %[temp0], %[temp0], %[streak] \n\t" \146"addiu %[temp1], %[temp1], 1 \n\t" \147"sw %[temp0], 4(%[temp2]) \n\t" \148"sw %[temp1], 0(%[temp3]) \n\t" \149"b 2f \n\t" \150"1: \n\t" \151"lw %[temp0], 0(%[temp2]) \n\t" \152"addu %[temp0], %[temp0], %[streak] \n\t" \153"sw %[temp0], 0(%[temp2]) \n\t" \154"2: \n\t" \155: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), \156[temp3]"=&r"(temp3), [temp0]"+r"(temp0) \157: [pstreaks]"r"(pstreaks), [pcnts]"r"(pcnts), \158[streak]"r"(streak) \159: "memory" \160);161162// Returns the various RLE counts163static WEBP_INLINE void GetEntropyUnrefinedHelper(164uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,165int* WEBP_RESTRICT const i_prev,166VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,167VP8LStreaks* WEBP_RESTRICT const stats) {168int* const pstreaks = &stats->streaks[0][0];169int* const pcnts = &stats->counts[0];170int temp0, temp1, temp2, temp3;171const int streak = i - *i_prev;172173// Gather info for the bit entropy.174if (*val_prev != 0) {175bit_entropy->sum += (*val_prev) * streak;176bit_entropy->nonzeros += streak;177bit_entropy->nonzero_code = *i_prev;178bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;179if (bit_entropy->max_val < *val_prev) {180bit_entropy->max_val = *val_prev;181}182}183184// Gather info for the Huffman cost.185temp0 = (*val_prev != 0);186HUFFMAN_COST_PASS187188*val_prev = val;189*i_prev = i;190}191192static void GetEntropyUnrefined_MIPS32(193const uint32_t X[], int length,194VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,195VP8LStreaks* WEBP_RESTRICT const stats) {196int i;197int i_prev = 0;198uint32_t x_prev = X[0];199200memset(stats, 0, sizeof(*stats));201VP8LBitEntropyInit(bit_entropy);202203for (i = 1; i < length; ++i) {204const uint32_t x = X[i];205if (x != x_prev) {206GetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats);207}208}209GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);210211bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;212}213214static void GetCombinedEntropyUnrefined_MIPS32(215const uint32_t X[], const uint32_t Y[], int length,216VP8LBitEntropy* WEBP_RESTRICT const entropy,217VP8LStreaks* WEBP_RESTRICT const stats) {218int i = 1;219int i_prev = 0;220uint32_t xy_prev = X[0] + Y[0];221222memset(stats, 0, sizeof(*stats));223VP8LBitEntropyInit(entropy);224225for (i = 1; i < length; ++i) {226const uint32_t xy = X[i] + Y[i];227if (xy != xy_prev) {228GetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, entropy, stats);229}230}231GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats);232233entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;234}235236#define ASM_START \237__asm__ volatile( \238".set push \n\t" \239".set at \n\t" \240".set macro \n\t" \241"1: \n\t"242243// P2 = P0 + P1244// A..D - offsets245// E - temp variable to tell macro246// if pointer should be incremented247// 'literal' and successive histograms could be unaligned248// so we must use ulw and usw249#define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \250"ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \251"ulw %[temp1], " #B "(%[" #P0 "]) \n\t" \252"ulw %[temp2], " #C "(%[" #P0 "]) \n\t" \253"ulw %[temp3], " #D "(%[" #P0 "]) \n\t" \254"ulw %[temp4], " #A "(%[" #P1 "]) \n\t" \255"ulw %[temp5], " #B "(%[" #P1 "]) \n\t" \256"ulw %[temp6], " #C "(%[" #P1 "]) \n\t" \257"ulw %[temp7], " #D "(%[" #P1 "]) \n\t" \258"addu %[temp4], %[temp4], %[temp0] \n\t" \259"addu %[temp5], %[temp5], %[temp1] \n\t" \260"addu %[temp6], %[temp6], %[temp2] \n\t" \261"addu %[temp7], %[temp7], %[temp3] \n\t" \262"addiu %[" #P0 "], %[" #P0 "], 16 \n\t" \263".if " #E " == 1 \n\t" \264"addiu %[" #P1 "], %[" #P1 "], 16 \n\t" \265".endif \n\t" \266"usw %[temp4], " #A "(%[" #P2 "]) \n\t" \267"usw %[temp5], " #B "(%[" #P2 "]) \n\t" \268"usw %[temp6], " #C "(%[" #P2 "]) \n\t" \269"usw %[temp7], " #D "(%[" #P2 "]) \n\t" \270"addiu %[" #P2 "], %[" #P2 "], 16 \n\t" \271"bne %[" #P0 "], %[LoopEnd], 1b \n\t" \272".set pop \n\t" \273274#define ASM_END_COMMON_0 \275: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), \276[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \277[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), \278[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), \279[pa]"+r"(pa), [pout]"+r"(pout)280281#define ASM_END_COMMON_1 \282: [LoopEnd]"r"(LoopEnd) \283: "memory", "at" \284);285286#define ASM_END_0 \287ASM_END_COMMON_0 \288, [pb]"+r"(pb) \289ASM_END_COMMON_1290291#define ASM_END_1 \292ASM_END_COMMON_0 \293ASM_END_COMMON_1294295static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa,296const uint32_t* WEBP_RESTRICT pb,297uint32_t* WEBP_RESTRICT pout, int size) {298uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;299const int end = ((size) / 4) * 4;300const uint32_t* const LoopEnd = pa + end;301int i;302ASM_START303ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)304ASM_END_0305for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];306}307308static void AddVectorEq_MIPS32(const uint32_t* WEBP_RESTRICT pa,309uint32_t* WEBP_RESTRICT pout, int size) {310uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;311const int end = ((size) / 4) * 4;312const uint32_t* const LoopEnd = pa + end;313int i;314ASM_START315ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)316ASM_END_1317for (i = 0; i < size - end; ++i) pout[i] += pa[i];318}319320#undef ASM_END_1321#undef ASM_END_0322#undef ASM_END_COMMON_1323#undef ASM_END_COMMON_0324#undef ADD_TO_OUT325#undef ASM_START326327//------------------------------------------------------------------------------328// Entry point329330extern void VP8LEncDspInitMIPS32(void);331332WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {333VP8LFastSLog2Slow = FastSLog2Slow_MIPS32;334VP8LFastLog2Slow = FastLog2Slow_MIPS32;335VP8LExtraCost = ExtraCost_MIPS32;336VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;337VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;338VP8LAddVector = AddVector_MIPS32;339VP8LAddVectorEq = AddVectorEq_MIPS32;340}341342#else // !WEBP_USE_MIPS32343344WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPS32)345346#endif // WEBP_USE_MIPS32347348349