Cogs.Core
Counting.cpp
1#if !defined(EMSCRIPTEN)
2#include "IsoSurfaces_internal.h"
3
4#include "Platform/Instrumentation.h"
5#include "Services/Features.h"
6#include "Services/TaskManager.h"
7#include "Context.h"
8
9#include "Foundation/Platform/Timer.h"
10
11using namespace Cogs::Core;
12
13void IsoSurfaces::AnalyzePrefixSum::operator()()
14{
15 context->taskManager->wait(antecedent);
16 context->taskManager->destroy(antecedent);
17 CpuInstrumentationScope(SCOPE_ISOSURFACES, "AnalyzePrefixSum");
18 auto timer = Timer::startNew();
19
20 actCellVtxOff[Nc] = 0;
21 actCellIdxOff[Nc] = 0;
22 int32_t vOff = 0;
23 int32_t iOff = 0;
24 for (int c = 0; c <= Nc; c++) {
25 auto tv = actCellVtxOff[c];
26 actCellVtxOff[c] = vOff;
27 vOff += tv;
28
29 auto ti = actCellIdxOff[c];
30 actCellIdxOff[c] = iOff;
31 iOff += ti;
32 }
33
34 if (elapsed_us != nullptr) {
35 elapsed_us->fetch_add(timer.elapsedMicroseconds());
36 }
37}
38
39void IsoSurfaces::AnalyzePopulateCounts::operator()()
40{
41 CpuInstrumentationScope(SCOPE_ISOSURFACES, "AnalyzePopCnt");
42 auto timer = Timer::startNew();
43
44 size_t c = ca;
45 const auto Mx = static_cast<uint32_t>(g->M.x);
46 const auto Mx_My = Mx*static_cast<uint32_t>(g->M.y);
47 const auto Ux = g->gridB.x - g->gridA.x - 1;
48 const auto Uy = g->gridB.y - g->gridA.y - 1;
49 const auto Uz = g->gridB.z - g->gridA.z - 1;
50
51#if !defined(__APPLE__)
52 if (g->context->features->supported(CPUFeature::SSE42)) {
53 __m128i ones4 = _mm_set1_epi32(1);
54 for (; c + 4 < cb; c += 4) {
55
56 __m128i r = _mm_loadu_si128(reinterpret_cast<const __m128i*>(g->actCellIndicesIn + c));
57 _mm_storeu_si128(reinterpret_cast<__m128i*>(g->actCellIndicesOut + c), r);
58
59 __m128i q = _mm_setzero_si128();
60 q.m128i_u32[0] = *reinterpret_cast<const uint32_t*>(g->actCellCasesIn + c);
61 *reinterpret_cast<uint32_t*>(g->actCellCasesOut + c) = q.m128i_u32[0];
62
63 __m128i i_z;
64 __m128i i_y;
65 __m128i i_x;
66 __m128i axes;
67 __m128i indexCount;
68 for (int i = 0; i < 4; i++) {
69
70
71 auto rr = r.m128i_u32[i];
72 i_z.m128i_u32[i] = rr / Mx_My;
73 i_y.m128i_u32[i] = (rr % Mx_My) / Mx;
74 i_x.m128i_u32[i] = ((rr % Mx_My) % Mx);
75
76 Scratch::ijk_t tmp;
77 tmp.i = (uint16_t)i_x.m128i_u32[i];
78 tmp.j = (uint16_t)i_y.m128i_u32[i];
79 tmp.k = (uint16_t)i_z.m128i_u32[i];
80
81 //assert(tmp.i == i_x.m128i_u32[i]);
82 //assert(tmp.j == i_y.m128i_u32[i]);
83 //assert(tmp.k == i_z.m128i_u32[i]);
84
85 tmp.code = q.m128i_u8[i];
86 tmp.axes = g->axesTable[q.m128i_u8[i]];
87 g->actCellIJKOut[c + i] = tmp;
88
89 axes.m128i_u32[i] = g->axesTable[q.m128i_u8[i]];
90 indexCount.m128i_u32[i] = g->indexCountTable[q.m128i_u8[i]];
91 }
92
93 __m128i axes_0 = _mm_and_si128(ones4, axes);
94 __m128i axes_1 = _mm_and_si128(ones4, _mm_srli_epi32(axes, 1));
95 __m128i axes_2 = _mm_and_si128(ones4, _mm_srli_epi32(axes, 2));
96 __m128i Nv = _mm_add_epi32(axes_2, _mm_add_epi32(axes_1, axes_0));
97 _mm_storeu_si128(reinterpret_cast<__m128i*>(g->actCellVtxCntOut + c), Nv);
98
99 __m128i indexOut_x = _mm_cmplt_epi32(i_x, _mm_set1_epi32(Ux));
100 __m128i indexOut_y = _mm_cmplt_epi32(i_y, _mm_set1_epi32(Uy));
101 __m128i indexOut_z = _mm_cmplt_epi32(i_z, _mm_set1_epi32(Uz));
102 __m128i indexOut = _mm_and_si128(_mm_and_si128(indexOut_x, indexOut_y), indexOut_z);
103 __m128i Ni = _mm_and_si128(indexOut, indexCount);
104 _mm_storeu_si128(reinterpret_cast<__m128i*>(g->actCellIdxCntOut + c), Ni);
105 }
106 }
107#endif
108
109 for (; c < cb; c++) {
110 const auto code = g->actCellCasesIn[c];
111 const auto uncompactedCellIndex = g->actCellIndicesIn[c];
112
113 glm::ivec3 i;
114 i.z = uncompactedCellIndex / Mx_My;
115 i.y = (uncompactedCellIndex % Mx_My) / Mx;
116 i.x = (uncompactedCellIndex % Mx_My) % Mx;
117
118 Scratch::ijk_t tmp;
119 tmp.i = (uint16_t)i.x;
120 tmp.j = (uint16_t)i.y;
121 tmp.k = (uint16_t)i.z;
122
123 tmp.code = code;
124 tmp.axes = g->axesTable[code];
125 g->actCellIJKOut[c] = tmp;
126
127 //const auto i = delinarize(uncompactedCellIndex, g->M, g->gridA);
128
129 const auto axes = g->axesTable[code];
130 const auto Nv = ((axes >> 2) & 1) + ((axes >> 1) & 1) + ((axes >> 0) & 1);
131
132 const auto indexOut = (i.x < Ux) && (i.y < Uy) && (i.z < Uz);
133 const auto Ni = indexOut ? g->indexCountTable[code] : 0;
134
135 g->actCellCasesOut[c] = code;
136 g->actCellIndicesOut[c] = uncompactedCellIndex;
137 g->actCellVtxCntOut[c] = Nv;
138 g->actCellIdxCntOut[c] = Ni;
139 }
140
141 if (g->elapsed_us != nullptr) {
142 g->elapsed_us->fetch_add(timer.elapsedMicroseconds());
143 }
144}
145
146#endif
std::unique_ptr< class TaskManager > taskManager
TaskManager service instance.
Definition: Context.h:186
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....