Cogs.Core
SampleVolumeTask.cpp
1#if 0
2#include <emmintrin.h> // SSE2
3#include "SampleVolumeTask.h"
4#include "RemapTileTask.h"
5#include "../Utils.h"
6#include "Platform/Timer.h"
7
8#include "Context.h"
9#include "Services/Features.h"
10
11using namespace Cogs::Core;
12
13using glm::uvec3;
14using glm::ivec3;
15using glm::vec3;
16using glm::clamp;
17using glm::max;
18using glm::min;
19using glm::floor;
20using glm::ceil;
21using glm::inverse;
22
23namespace {
24 static const float piTwo = 1.5707963267948966f;
25 static const float pi = 3.1415926535897931f;
26 static const float signBit = -0.f;
27 static const float one = 1.f;
28 static const float atan_deg5_C5 = 8.7292946518897740e-02f;
29 static const float atan_deg5_C3 = -3.0189478312144946e-01f;
30 static const float atan_deg5_C1 = 1.0f;
31
32 static const float atan_deg7_C7 = -4.2330209451053591e-02f;
33 static const float atan_deg7_C5 = +1.5342994884206673e-01f;
34 static const float atan_deg7_C3 = -3.2570157599356531e-01f;
35 static const float atan_deg7_C1 = 1.0f;
36 static const __m128 atan_deg7_C = _mm_set_ps(atan_deg7_C7, atan_deg7_C5, atan_deg7_C3, atan_deg7_C1);
37
38 static const float atan_deg9_C9 = 0.0208351f;
39 static const float atan_deg9_C7 = -0.0851330f;
40 static const float atan_deg9_C5 = 0.1801410f;
41 static const float atan_deg9_C3 = -0.3302995f;
42 static const float atan_deg9_C1 = 0.9998660f;
43
44
45
46 inline bool insideRange(const vec3& x, const vec3& a, const vec3& b)
47 {
48 return
49 (a.x <= x.x) && (x.x <= b.x) &&
50 (a.y <= x.y) && (x.y <= b.y) &&
51 (a.z <= x.z) && (x.z <= b.z);
52 }
53
54 inline void vec3_cross_vec3_ps(__m128& c_x, __m128& c_y, __m128& c_z,
55 const __m128& a_x, const __m128& a_y, const __m128& a_z,
56 const __m128& b_x, const __m128& b_y, const __m128& b_z)
57 {
58 c_x = _mm_sub_ps(_mm_mul_ps(a_y, b_z), _mm_mul_ps(b_y, a_z));
59 c_y = _mm_sub_ps(_mm_mul_ps(a_z, b_x), _mm_mul_ps(b_z, a_x));
60 c_z = _mm_sub_ps(_mm_mul_ps(a_x, b_y), _mm_mul_ps(b_x, a_y));
61 }
62
63
64
65
66
67 __m128 atan2_ps(__m128 y, __m128 x)
68 {
69#if 0
70 // Reference
71 __m128 rv;
72 for (int i = 0; i < 4; i++) {
73 rv.m128_f32[i] = std::atan2(y.m128_f32[i], x.m128_f32[i]);
74 }
75 return rv;
76#endif
77
78 __m128 sign = _mm_load_ps1(&signBit);
79 __m128 abs_x = _mm_andnot_ps(sign, x);
80 __m128 abs_y = _mm_andnot_ps(sign, y);
81 __m128 pq = _mm_cmplt_ps(abs_y, abs_x);
82
83 __m128 num = _mm_or_ps(_mm_and_ps(pq, y), _mm_andnot_ps(pq, x));
84 __m128 den = _mm_or_ps(_mm_and_ps(pq, x), _mm_andnot_ps(pq, y));
85 __m128 t = _mm_div_ps(num, den);
86
87 __m128 t_t = _mm_mul_ps(t, t);
88
89#if 0
90 // Degree 5 polynomial approximation, max error < 1.4e-3
91 __m128 r = _mm_mul_ps(_mm_load_ps1(&atan_deg5_C5), t_t);
92 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg5_C3)), t_t);
93 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg5_C1)), t);
94#elif 1
95 // Degree 7 polynomial approximation, max error < 1.5e-4
96 __m128 C = _mm_load_ps((float*)(&atan_deg7_C));
97 __m128 r = _mm_mul_ps(_mm_shuffle_ps(C, C, _MM_SHUFFLE(3, 3, 3, 3)), t_t);
98 r = _mm_mul_ps(_mm_add_ps(r, _mm_shuffle_ps(C, C, _MM_SHUFFLE(2, 2, 2, 2))), t_t);
99 r = _mm_mul_ps(_mm_add_ps(r, _mm_shuffle_ps(C, C, _MM_SHUFFLE(1, 1, 1, 1))), t_t);
100 r = _mm_add_ps(_mm_mul_ps(r, t), t);
101#else
102 // Degree 9 polynomial approximation, max error < 1e-5
103 __m128 r = _mm_mul_ps(_mm_load_ps1(&atan_deg9_C9), t_t);
104 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg9_C7)), t_t);
105 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg9_C5)), t_t);
106 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg9_C3)), t_t);
107 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg9_C1)), t);
108#endif
109
110 r = _mm_xor_ps(r, _mm_andnot_ps(pq, sign));
111
112 __m128 t2 = _mm_andnot_ps(_mm_cmple_ps(_mm_setzero_ps(), x), _mm_set1_ps(pi)); // t2 = 0 <= x ? 0 : pi
113 __m128 t3 = _mm_or_ps(_mm_and_ps(pq, t2), _mm_andnot_ps(pq, _mm_set1_ps(piTwo))); // t3 = pq ? t2 : pi/2
114 __m128 shift = _mm_xor_ps(t3, _mm_and_ps(y, sign)); // shift = 0 <= y ? t3 : - t3
115
116 return _mm_add_ps(r, shift);
117 }
118
119}
120
121
122
123
124
125void EchoSounder::SampleVolumeTask2::operator()()
126{
127 auto timer = Timer::startNew();
128
129#ifdef COGS_EXTENSIONS_AVX
130 if (data.context->features->supported(CPUFeature::AVX2)) {
131 runAVX2(field,
132 times,
133 pinf,
134 data.upperFansToRemove,
135 numPings,
136 data.numSamples,
137 data.samplesInTile, data.samplesInTile,
138 data.depthOffset, data.depthStep, data.sampleSpacing, data.decay,
139 data.beamAngleAlongship, data.beamAngleAthwartship,
140 minIndex, maxIndex);
141 } else
142#endif
143 if (data.context->features->supported(CPUFeature::SSE41)) {
144 runSSE4_1(field,
145 times,
146 pinf,
147 data.upperFansToRemove,
148 numPings,
149 data.numSamples,
150 data.samplesInTile, data.samplesInTile,
151 data.depthOffset, data.depthStep, data.sampleSpacing, data.decay,
152 data.beamAngleAlongship, data.beamAngleAthwartship,
153 minIndex, maxIndex);
154 }
155 else {
156 runVanilla(field,
157 times,
158 pinf,
159 data.upperFansToRemove,
160 numPings,
161 data.numSamples,
162 data.samplesInTile, data.samplesInTile,
163 data.depthOffset, data.depthStep, data.sampleSpacing, data.decay,
164 data.beamAngleAlongship, data.beamAngleAthwartship,
165 minIndex, maxIndex);
166 }
167
168 if (elapsed_us != nullptr) {
169 elapsed_us->fetch_add(timer.elapsedMicroseconds());
170 }
171}
172#endif
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....