3#include "SampleVolumeTask.h"
4#include "RemapTileTask.h"
6#include "Platform/Timer.h"
9#include "Services/Features.h"
24 static const float piTwo = 1.5707963267948966f;
25 static const float pi = 3.1415926535897931f;
26 static const float signBit = -0.f;
27 static const float one = 1.f;
28 static const float atan_deg5_C5 = 8.7292946518897740e-02f;
29 static const float atan_deg5_C3 = -3.0189478312144946e-01f;
30 static const float atan_deg5_C1 = 1.0f;
32 static const float atan_deg7_C7 = -4.2330209451053591e-02f;
33 static const float atan_deg7_C5 = +1.5342994884206673e-01f;
34 static const float atan_deg7_C3 = -3.2570157599356531e-01f;
35 static const float atan_deg7_C1 = 1.0f;
36 static const __m128 atan_deg7_C = _mm_set_ps(atan_deg7_C7, atan_deg7_C5, atan_deg7_C3, atan_deg7_C1);
38 static const float atan_deg9_C9 = 0.0208351f;
39 static const float atan_deg9_C7 = -0.0851330f;
40 static const float atan_deg9_C5 = 0.1801410f;
41 static const float atan_deg9_C3 = -0.3302995f;
42 static const float atan_deg9_C1 = 0.9998660f;
46 inline bool insideRange(
const vec3& x,
const vec3& a,
const vec3& b)
49 (a.x <= x.x) && (x.x <= b.x) &&
50 (a.y <= x.y) && (x.y <= b.y) &&
51 (a.z <= x.z) && (x.z <= b.z);
54 inline void vec3_cross_vec3_ps(__m128& c_x, __m128& c_y, __m128& c_z,
55 const __m128& a_x,
const __m128& a_y,
const __m128& a_z,
56 const __m128& b_x,
const __m128& b_y,
const __m128& b_z)
58 c_x = _mm_sub_ps(_mm_mul_ps(a_y, b_z), _mm_mul_ps(b_y, a_z));
59 c_y = _mm_sub_ps(_mm_mul_ps(a_z, b_x), _mm_mul_ps(b_z, a_x));
60 c_z = _mm_sub_ps(_mm_mul_ps(a_x, b_y), _mm_mul_ps(b_x, a_y));
67 __m128 atan2_ps(__m128 y, __m128 x)
72 for (
int i = 0; i < 4; i++) {
73 rv.m128_f32[i] = std::atan2(y.m128_f32[i], x.m128_f32[i]);
78 __m128 sign = _mm_load_ps1(&signBit);
79 __m128 abs_x = _mm_andnot_ps(sign, x);
80 __m128 abs_y = _mm_andnot_ps(sign, y);
81 __m128 pq = _mm_cmplt_ps(abs_y, abs_x);
83 __m128 num = _mm_or_ps(_mm_and_ps(pq, y), _mm_andnot_ps(pq, x));
84 __m128 den = _mm_or_ps(_mm_and_ps(pq, x), _mm_andnot_ps(pq, y));
85 __m128 t = _mm_div_ps(num, den);
87 __m128 t_t = _mm_mul_ps(t, t);
91 __m128 r = _mm_mul_ps(_mm_load_ps1(&atan_deg5_C5), t_t);
92 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg5_C3)), t_t);
93 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg5_C1)), t);
96 __m128 C = _mm_load_ps((
float*)(&atan_deg7_C));
97 __m128 r = _mm_mul_ps(_mm_shuffle_ps(C, C, _MM_SHUFFLE(3, 3, 3, 3)), t_t);
98 r = _mm_mul_ps(_mm_add_ps(r, _mm_shuffle_ps(C, C, _MM_SHUFFLE(2, 2, 2, 2))), t_t);
99 r = _mm_mul_ps(_mm_add_ps(r, _mm_shuffle_ps(C, C, _MM_SHUFFLE(1, 1, 1, 1))), t_t);
100 r = _mm_add_ps(_mm_mul_ps(r, t), t);
103 __m128 r = _mm_mul_ps(_mm_load_ps1(&atan_deg9_C9), t_t);
104 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg9_C7)), t_t);
105 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg9_C5)), t_t);
106 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg9_C3)), t_t);
107 r = _mm_mul_ps(_mm_add_ps(r, _mm_load_ps1(&atan_deg9_C1)), t);
110 r = _mm_xor_ps(r, _mm_andnot_ps(pq, sign));
112 __m128 t2 = _mm_andnot_ps(_mm_cmple_ps(_mm_setzero_ps(), x), _mm_set1_ps(pi));
113 __m128 t3 = _mm_or_ps(_mm_and_ps(pq, t2), _mm_andnot_ps(pq, _mm_set1_ps(piTwo)));
114 __m128 shift = _mm_xor_ps(t3, _mm_and_ps(y, sign));
116 return _mm_add_ps(r, shift);
125void EchoSounder::SampleVolumeTask2::operator()()
127 auto timer = Timer::startNew();
129#ifdef COGS_EXTENSIONS_AVX
130 if (data.context->features->supported(CPUFeature::AVX2)) {
134 data.upperFansToRemove,
137 data.samplesInTile, data.samplesInTile,
138 data.depthOffset, data.depthStep, data.sampleSpacing, data.decay,
139 data.beamAngleAlongship, data.beamAngleAthwartship,
143 if (data.context->features->supported(CPUFeature::SSE41)) {
147 data.upperFansToRemove,
150 data.samplesInTile, data.samplesInTile,
151 data.depthOffset, data.depthStep, data.sampleSpacing, data.decay,
152 data.beamAngleAlongship, data.beamAngleAthwartship,
159 data.upperFansToRemove,
162 data.samplesInTile, data.samplesInTile,
163 data.depthOffset, data.depthStep, data.sampleSpacing, data.decay,
164 data.beamAngleAlongship, data.beamAngleAthwartship,
168 if (elapsed_us !=
nullptr) {
169 elapsed_us->fetch_add(timer.elapsedMicroseconds());
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....