CoCalc -- CrossSIMD.h

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Math/CrossSIMD.h
Views: ¹⁴⁰¹

1
// CrossSIMD
2
//
3
// Compatibility wrappers for SIMD dialects.
4
//
5
// In the long run, might do a more general single-source-SIMD wrapper here consisting
6
// of defines that translate to either NEON or SSE. It would be possible to write quite a lot of
7
// our various color conversion functions and so on in a pretty generic manner.
8

9
#include "ppsspp_config.h"
10

11
#include "stdint.h"
12

13
#ifdef __clang__
14
// Weird how you can't just use #pragma in a macro.
15
#define DO_NOT_VECTORIZE_LOOP _Pragma("clang loop vectorize(disable)")
16
#else
17
#define DO_NOT_VECTORIZE_LOOP
18
#endif
19

20
#if PPSSPP_ARCH(SSE2)
21
#include <emmintrin.h>
22
#endif
23

24
#if PPSSPP_ARCH(ARM_NEON)
25
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
26
#include <arm64_neon.h>
27
#else
28
#include <arm_neon.h>
29
#endif
30
#endif
31

32
// Basic types
33

34
#if PPSSPP_ARCH(ARM64_NEON)
35

36
// No special ones here.
37

38
#elif PPSSPP_ARCH(ARM_NEON)
39

40
// Compatibility wrappers making ARM64 NEON code run on ARM32
41
// With optimization on, these should compile down to the optimal code.
42

43
inline float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t b, int lane) {
44
	switch (lane & 3) {
45
	case 0: return vmulq_lane_f32(a, vget_low_f32(b), 0);
46
	case 1: return vmulq_lane_f32(a, vget_low_f32(b), 1);
47
	case 2: return vmulq_lane_f32(a, vget_high_f32(b), 0);
48
	default: return vmulq_lane_f32(a, vget_high_f32(b), 1);
49
	}
50
}
51

52
inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c, int lane) {
53
	switch (lane & 3) {
54
	case 0: return vmlaq_lane_f32(a, b, vget_low_f32(c), 0);
55
	case 1: return vmlaq_lane_f32(a, b, vget_low_f32(c), 1);
56
	case 2: return vmlaq_lane_f32(a, b, vget_high_f32(c), 0);
57
	default: return vmlaq_lane_f32(a, b, vget_high_f32(c), 1);
58
	}
59
}
60

61
inline uint32x4_t vcgezq_f32(float32x4_t v) {
62
	return vcgeq_f32(v, vdupq_n_f32(0.0f));
63
}
64

65
#endif
66

67

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company