CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/HW/StereoResampler.cpp
Views: 1401
// Copyright (c) 2015- PPSSPP Project and Dolphin Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617// Adapted from Dolphin.1819// 16 bit Stereo2021// These must be powers of 2.22#define MAX_BUFSIZE_DEFAULT (4096) // 2*64ms - had to double it for nVidia Shield which has huge buffers23#define MAX_BUFSIZE_EXTRA (8192)2425#define TARGET_BUFSIZE_MARGIN 5122627#define TARGET_BUFSIZE_DEFAULT 1680 // 40 ms28#define TARGET_BUFSIZE_EXTRA 3360 // 80 ms2930#define MAX_FREQ_SHIFT 600.0f // how far off can we be from 44100 Hz31#define CONTROL_FACTOR 0.2f // in freq_shift per fifo size offset32#define CONTROL_AVG 32.0f3334#include "ppsspp_config.h"35#include <cstring>36#include <atomic>3738#include "Common/Common.h"39#include "Common/System/System.h"40#include "Common/Math/math_util.h"41#include "Common/Serialize/Serializer.h"42#include "Common/Log.h"43#include "Common/TimeUtil.h"44#include "Core/Config.h"45#include "Core/ConfigValues.h"46#include "Core/HW/StereoResampler.h"47#include "Core/HLE/__sceAudio.h"48#include "Core/Util/AudioFormat.h" // for clamp_u849#include "Core/System.h"5051#ifdef _M_SSE52#include <emmintrin.h>53#endif54#if PPSSPP_ARCH(ARM_NEON)55#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)56#include <arm64_neon.h>57#else58#include <arm_neon.h>59#endif60#endif6162StereoResampler::StereoResampler()63: m_maxBufsize(MAX_BUFSIZE_DEFAULT)64, m_targetBufsize(TARGET_BUFSIZE_DEFAULT) {65// Need to have space for the worst case in case it changes.66m_buffer = new int16_t[MAX_BUFSIZE_EXTRA * 2]();6768// Some Android devices are v-synced to non-60Hz framerates. We simply timestretch audio to fit.69// TODO: should only do this if auto frameskip is off?70float refresh = System_GetPropertyFloat(SYSPROP_DISPLAY_REFRESH_RATE);7172// If framerate is "close"...73if (refresh != 60.0f && refresh > 50.0f && refresh < 70.0f) {74int input_sample_rate = (int)(44100 * (refresh / 60.0f));75INFO_LOG(Log::Audio, "StereoResampler: Adjusting target sample rate to %dHz", input_sample_rate);76m_input_sample_rate = input_sample_rate;77}7879UpdateBufferSize();80}8182StereoResampler::~StereoResampler() {83delete[] m_buffer;84m_buffer = nullptr;85}8687void StereoResampler::UpdateBufferSize() {88if (g_Config.bExtraAudioBuffering) {89m_maxBufsize = MAX_BUFSIZE_EXTRA;90m_targetBufsize = TARGET_BUFSIZE_EXTRA;91} else {92m_maxBufsize = MAX_BUFSIZE_DEFAULT;93m_targetBufsize = TARGET_BUFSIZE_DEFAULT;9495int systemBufsize = System_GetPropertyInt(SYSPROP_AUDIO_FRAMES_PER_BUFFER);96if (systemBufsize > 0 && m_targetBufsize < systemBufsize + TARGET_BUFSIZE_MARGIN) {97m_targetBufsize = std::min(4096, systemBufsize + TARGET_BUFSIZE_MARGIN);98if (m_targetBufsize * 2 > MAX_BUFSIZE_DEFAULT)99m_maxBufsize = MAX_BUFSIZE_EXTRA;100}101}102}103104template<bool useShift>105inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size, s8 volShift) {106#ifdef _M_SSE107// Size will always be 16-byte aligned as the hwBlockSize is.108while (size >= 8) {109__m128i in1 = _mm_loadu_si128((__m128i *)in);110__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));111__m128i packed = _mm_packs_epi32(in1, in2);112if (useShift) {113packed = _mm_srai_epi16(packed, volShift);114}115_mm_storeu_si128((__m128i *)out, packed);116out += 8;117in += 8;118size -= 8;119}120#elif PPSSPP_ARCH(ARM_NEON)121// Dynamic shifts can only be left, but it's signed - negate to shift right.122int16x4_t signedVolShift = vdup_n_s16(-volShift);123while (size >= 8) {124int32x4_t in1 = vld1q_s32(in);125int32x4_t in2 = vld1q_s32(in + 4);126int16x4_t packed1 = vqmovn_s32(in1);127int16x4_t packed2 = vqmovn_s32(in2);128if (useShift) {129packed1 = vshl_s16(packed1, signedVolShift);130packed2 = vshl_s16(packed2, signedVolShift);131}132vst1_s16(out, packed1);133vst1_s16(out + 4, packed2);134out += 8;135in += 8;136size -= 8;137}138#endif139// This does the remainder if SIMD was used, otherwise it does it all.140for (size_t i = 0; i < size; i++) {141out[i] = clamp_s16(useShift ? (in[i] >> volShift) : in[i]);142}143}144145inline void ClampBufferToS16WithVolume(s16 *out, const s32 *in, size_t size) {146int volume = g_Config.iGlobalVolume;147if (PSP_CoreParameter().fpsLimit != FPSLimit::NORMAL || PSP_CoreParameter().fastForward) {148if (g_Config.iAltSpeedVolume != -1) {149volume = g_Config.iAltSpeedVolume;150}151}152153if (volume >= VOLUME_FULL) {154ClampBufferToS16<false>(out, in, size, 0);155} else if (volume <= VOLUME_OFF) {156memset(out, 0, size * sizeof(s16));157} else {158ClampBufferToS16<true>(out, in, size, VOLUME_FULL - (s8)volume);159}160}161162void StereoResampler::Clear() {163memset(m_buffer, 0, m_maxBufsize * 2 * sizeof(int16_t));164}165166inline int16_t MixSingleSample(int16_t s1, int16_t s2, uint16_t frac) {167int32_t value = s1 + (((s2 - s1) * frac) >> 16);168if (value < -32767)169return -32767;170else if (value > 32767)171return 32767;172else173return (int16_t)value;174}175176// Executed from sound stream thread, pulling sound out of the buffer.177unsigned int StereoResampler::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {178if (!samples)179return 0;180181unsigned int currentSample;182183// Cache access in non-volatile variable184// This is the only function changing the read value, so it's safe to185// cache it locally although it's written here.186// The writing pointer will be modified outside, but it will only increase,187// so we will just ignore new written data while interpolating (until it wraps...).188// Without this cache, the compiler wouldn't be allowed to optimize the189// interpolation loop.190u32 indexR = m_indexR.load();191u32 indexW = m_indexW.load();192193const int INDEX_MASK = (m_maxBufsize * 2 - 1);194195// This is only for debug visualization, not used for anything.196lastBufSize_ = ((indexW - indexR) & INDEX_MASK) / 2;197198// Drift prevention mechanism.199float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);200// If we had to discard samples the last frame due to underrun,201// apply an adjustment here. Otherwise we'll overestimate how many202// samples we need.203numLeft -= droppedSamples_;204droppedSamples_ = 0;205206// m_numLeftI here becomes a lowpass filtered version of numLeft.207m_numLeftI = (numLeft + m_numLeftI * (CONTROL_AVG - 1.0f)) / CONTROL_AVG;208209// Here we try to keep the buffer size around m_lowwatermark (which is210// really now more like desired_buffer_size) by adjusting the speed.211// Note that the speed of adjustment here does not take the buffer size into212// account. Since this is called once per "output frame", the frame size213// will affect how fast this algorithm reacts, which can't be a good thing.214float offset = (m_numLeftI - (float)m_targetBufsize) * CONTROL_FACTOR;215if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;216if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;217218output_sample_rate_ = (float)(m_input_sample_rate + offset);219const u32 ratio = (u32)(65536.0 * output_sample_rate_ / (double)sample_rate);220ratio_ = ratio;221// TODO: consider a higher-quality resampling algorithm.222// TODO: Add a fast path for 1:1.223u32 frac = m_frac;224for (currentSample = 0; currentSample < numSamples * 2; currentSample += 2) {225if (((indexW - indexR) & INDEX_MASK) <= 2) {226// Ran out!227// int missing = numSamples * 2 - currentSample;228// ILOG("Resampler underrun: %d (numSamples: %d, currentSample: %d)", missing, numSamples, currentSample / 2);229underrunCount_++;230break;231}232u32 indexR2 = indexR + 2; //next sample233s16 l1 = m_buffer[indexR & INDEX_MASK]; //current234s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current235s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next236s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next237samples[currentSample] = MixSingleSample(l1, l2, (u16)frac);238samples[currentSample + 1] = MixSingleSample(r1, r2, (u16)frac);239frac += ratio;240indexR += 2 * (frac >> 16);241frac &= 0xffff;242}243m_frac = frac;244245// Let's not count the underrun padding here.246outputSampleCount_ += currentSample / 2;247248// Padding with the last value to reduce clicking249short s[2];250s[0] = clamp_s16(m_buffer[(indexR - 1) & INDEX_MASK]);251s[1] = clamp_s16(m_buffer[(indexR - 2) & INDEX_MASK]);252for (; currentSample < numSamples * 2; currentSample += 2) {253samples[currentSample] = s[0];254samples[currentSample + 1] = s[1];255}256257// Flush cached variable258m_indexR.store(indexR);259260// TODO: What should we actually return here?261return currentSample / 2;262}263264// Executes on the emulator thread, pushing sound into the buffer.265void StereoResampler::PushSamples(const s32 *samples, unsigned int numSamples) {266inputSampleCount_ += numSamples;267268UpdateBufferSize();269const int INDEX_MASK = (m_maxBufsize * 2 - 1);270// Cache access in non-volatile variable271// indexR isn't allowed to cache in the audio throttling loop as it272// needs to get updates to not deadlock.273u32 indexW = m_indexW.load();274275u32 cap = m_maxBufsize * 2;276// If fast-forwarding, no need to fill up the entire buffer, just screws up timing after releasing the fast-forward button.277if (PSP_CoreParameter().fastForward) {278cap = m_targetBufsize * 2;279}280281// Check if we have enough free space282// indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW283if (numSamples * 2 + ((indexW - m_indexR.load()) & INDEX_MASK) >= cap) {284if (!PSP_CoreParameter().fastForward) {285overrunCount_++;286}287// TODO: "Timestretch" by doing a windowed overlap with existing buffer content?288return;289}290291// Check if we need to roll over to the start of the buffer during the copy.292unsigned int indexW_left_samples = m_maxBufsize * 2 - (indexW & INDEX_MASK);293if (numSamples * 2 > indexW_left_samples) {294ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, indexW_left_samples);295ClampBufferToS16WithVolume(&m_buffer[0], samples + indexW_left_samples, numSamples * 2 - indexW_left_samples);296} else {297ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, numSamples * 2);298}299300m_indexW += numSamples * 2;301lastPushSize_ = numSamples;302}303304void StereoResampler::GetAudioDebugStats(char *buf, size_t bufSize) {305double elapsed = time_now_d() - startTime_;306307double effective_input_sample_rate = (double)inputSampleCount_ / elapsed;308double effective_output_sample_rate = (double)outputSampleCount_ / elapsed;309snprintf(buf, bufSize,310"Audio buffer: %d/%d (target: %d)\n"311"Filtered: %0.2f\n"312"Underruns: %d\n"313"Overruns: %d\n"314"Sample rate: %d (input: %d)\n"315"Effective input sample rate: %0.2f\n"316"Effective output sample rate: %0.2f\n"317"Push size: %d\n"318"Ratio: %0.6f\n",319lastBufSize_,320m_maxBufsize,321m_targetBufsize,322m_numLeftI,323underrunCountTotal_,324overrunCountTotal_,325(int)output_sample_rate_,326m_input_sample_rate,327effective_input_sample_rate,328effective_output_sample_rate,329lastPushSize_,330(float)ratio_ / 65536.0f);331underrunCountTotal_ += underrunCount_;332overrunCountTotal_ += overrunCount_;333underrunCount_ = 0;334overrunCount_ = 0;335336// Use this to remove the bias from the startup.337// if (elapsed > 3.0) {338//ResetStatCounters();339// }340}341342void StereoResampler::ResetStatCounters() {343underrunCount_ = 0;344overrunCount_ = 0;345underrunCountTotal_ = 0;346overrunCountTotal_ = 0;347inputSampleCount_ = 0;348outputSampleCount_ = 0;349startTime_ = time_now_d();350}351352353