1#if !defined(EMSCRIPTEN)
2#include "IsoSurfaces_internal.h"
4#include "Platform/Instrumentation.h"
5#include "Services/Features.h"
6#include "Services/TaskManager.h"
9#include "Foundation/Platform/Timer.h"
13void IsoSurfaces::AnalyzePrefixSum::operator()()
17 CpuInstrumentationScope(SCOPE_ISOSURFACES,
"AnalyzePrefixSum");
18 auto timer = Timer::startNew();
20 actCellVtxOff[Nc] = 0;
21 actCellIdxOff[Nc] = 0;
24 for (
int c = 0; c <= Nc; c++) {
25 auto tv = actCellVtxOff[c];
26 actCellVtxOff[c] = vOff;
29 auto ti = actCellIdxOff[c];
30 actCellIdxOff[c] = iOff;
34 if (elapsed_us !=
nullptr) {
35 elapsed_us->fetch_add(timer.elapsedMicroseconds());
39void IsoSurfaces::AnalyzePopulateCounts::operator()()
41 CpuInstrumentationScope(SCOPE_ISOSURFACES,
"AnalyzePopCnt");
42 auto timer = Timer::startNew();
45 const auto Mx =
static_cast<uint32_t
>(g->M.x);
46 const auto Mx_My = Mx*
static_cast<uint32_t
>(g->M.y);
47 const auto Ux = g->gridB.x - g->gridA.x - 1;
48 const auto Uy = g->gridB.y - g->gridA.y - 1;
49 const auto Uz = g->gridB.z - g->gridA.z - 1;
51#if !defined(__APPLE__)
52 if (g->context->features->supported(CPUFeature::SSE42)) {
53 __m128i ones4 = _mm_set1_epi32(1);
54 for (; c + 4 < cb; c += 4) {
56 __m128i r = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(g->actCellIndicesIn + c));
57 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(g->actCellIndicesOut + c), r);
59 __m128i q = _mm_setzero_si128();
60 q.m128i_u32[0] = *
reinterpret_cast<const uint32_t*
>(g->actCellCasesIn + c);
61 *
reinterpret_cast<uint32_t*
>(g->actCellCasesOut + c) = q.m128i_u32[0];
68 for (
int i = 0; i < 4; i++) {
71 auto rr = r.m128i_u32[i];
72 i_z.m128i_u32[i] = rr / Mx_My;
73 i_y.m128i_u32[i] = (rr % Mx_My) / Mx;
74 i_x.m128i_u32[i] = ((rr % Mx_My) % Mx);
77 tmp.i = (uint16_t)i_x.m128i_u32[i];
78 tmp.j = (uint16_t)i_y.m128i_u32[i];
79 tmp.k = (uint16_t)i_z.m128i_u32[i];
85 tmp.code = q.m128i_u8[i];
86 tmp.axes = g->axesTable[q.m128i_u8[i]];
87 g->actCellIJKOut[c + i] = tmp;
89 axes.m128i_u32[i] = g->axesTable[q.m128i_u8[i]];
90 indexCount.m128i_u32[i] = g->indexCountTable[q.m128i_u8[i]];
93 __m128i axes_0 = _mm_and_si128(ones4, axes);
94 __m128i axes_1 = _mm_and_si128(ones4, _mm_srli_epi32(axes, 1));
95 __m128i axes_2 = _mm_and_si128(ones4, _mm_srli_epi32(axes, 2));
96 __m128i Nv = _mm_add_epi32(axes_2, _mm_add_epi32(axes_1, axes_0));
97 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(g->actCellVtxCntOut + c), Nv);
99 __m128i indexOut_x = _mm_cmplt_epi32(i_x, _mm_set1_epi32(Ux));
100 __m128i indexOut_y = _mm_cmplt_epi32(i_y, _mm_set1_epi32(Uy));
101 __m128i indexOut_z = _mm_cmplt_epi32(i_z, _mm_set1_epi32(Uz));
102 __m128i indexOut = _mm_and_si128(_mm_and_si128(indexOut_x, indexOut_y), indexOut_z);
103 __m128i Ni = _mm_and_si128(indexOut, indexCount);
104 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(g->actCellIdxCntOut + c), Ni);
109 for (; c < cb; c++) {
110 const auto code = g->actCellCasesIn[c];
111 const auto uncompactedCellIndex = g->actCellIndicesIn[c];
114 i.z = uncompactedCellIndex / Mx_My;
115 i.y = (uncompactedCellIndex % Mx_My) / Mx;
116 i.x = (uncompactedCellIndex % Mx_My) % Mx;
119 tmp.i = (uint16_t)i.x;
120 tmp.j = (uint16_t)i.y;
121 tmp.k = (uint16_t)i.z;
124 tmp.axes = g->axesTable[code];
125 g->actCellIJKOut[c] = tmp;
129 const auto axes = g->axesTable[code];
130 const auto Nv = ((axes >> 2) & 1) + ((axes >> 1) & 1) + ((axes >> 0) & 1);
132 const auto indexOut = (i.x < Ux) && (i.y < Uy) && (i.z < Uz);
133 const auto Ni = indexOut ? g->indexCountTable[code] : 0;
135 g->actCellCasesOut[c] = code;
136 g->actCellIndicesOut[c] = uncompactedCellIndex;
137 g->actCellVtxCntOut[c] = Nv;
138 g->actCellIdxCntOut[c] = Ni;
141 if (g->elapsed_us !=
nullptr) {
142 g->elapsed_us->fetch_add(timer.elapsedMicroseconds());
std::unique_ptr< class TaskManager > taskManager
TaskManager service instance.
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....