2#include "Resources/Resources.h"
3#include "Resources/MeshManager.h"
4#include "Resources/VertexFormats.h"
8#include "../../../IsoSurfaces/MarchingCubesTables.h"
10#include "Foundation/Logging/Logger.h"
11#include "Foundation/Memory/MemoryBuffer.h"
12#include "Foundation/Platform/Timer.h"
29 struct size3_t {
size_t x, y, z; };
32 void analyzeAVX2(uint8_t* tmp,
33 unsigned& vertexCount_,
34 unsigned& occupiedCells_,
35 unsigned& indexCount_,
37 const glm::uvec3 gridLayout_,
39 const float* thresholds)
41 size3_t gridLayout = { gridLayout_.x, gridLayout_.y, gridLayout_.z };
43 assert((gridLayout.x & 7) == 0 && (gridLayout.y & 7) == 0 && (gridLayout.z & 7) == 0);
44 assert((gridLayout.y*gridLayout.x & 31) == 0);
45 assert(gridLayout.x <= 256 &&
"x must fit inside a byte");
46 size3_t cellLayout = { gridLayout.x - 1, gridLayout.y - 1, gridLayout.z - 1 };
47 size3_t gridLayoutL2 = {
48 static_cast<size_t>(log2(gridLayout_.x)),
49 static_cast<size_t>(log2(gridLayout_.y)),
50 static_cast<size_t>(log2(gridLayout_.z))
52 assert((
size_t(1) << gridLayoutL2.x) == gridLayout.x &&
53 (
size_t(1) << gridLayoutL2.y) == gridLayout.y &&
54 (
size_t(1) << gridLayoutL2.z) == gridLayout.z);
56 const auto * axesTable = MarchingCubes::axesTable().data();
58 const auto * indexTable = MarchingCubes::indexTable().data();
60 auto * vtxwork = (uint32_t*)(tmp + gridLayout.x*gridLayout.y*gridLayout.z*((
sizeof(uint32_t) +
sizeof(uint8_t))*T_n));
61 auto * idxwork = (uint32_t*)(tmp + gridLayout.x*gridLayout.y*gridLayout.z*((
sizeof(uint32_t) +
sizeof(uint8_t) + 3 *
sizeof(uint32_t))*T_n));
63 unsigned vertexCount = 0;
64 unsigned occupiedCells = 0;
65 unsigned indexCount = 0;
66 for (
unsigned l = 0; l < T_n; l++) {
67 auto * offsets = (uint32_t*)(tmp + gridLayout.x*gridLayout.y*gridLayout.z *
sizeof(uint32_t)*l);
68 auto * codes = (tmp + gridLayout.x*gridLayout.y*gridLayout.z*(
sizeof(uint32_t)*T_n + l));
69 auto T = _mm256_set1_ps(thresholds[l]);
71 auto M = _mm256_set1_epi32(1);
73 for (
unsigned k = 0; k < gridLayout.z*gridLayout.y*gridLayout.x; k += 8) {
74 auto t0 = _mm256_load_ps(values + k);
75 auto t1 = _mm256_and_si256(M, _mm256_castps_si256(_mm256_cmp_ps(t0, T, _CMP_LT_OQ)));
76 auto t2 = _mm256_extracti128_si256(t1, 1);
77 auto t3 = _mm256_extracti128_si256(t1, 0);
78 auto t4 = _mm_packus_epi32(t3, t2);
79 auto t5 = _mm_packus_epi16(t4, _mm_setzero_si128());
80 _mm_storel_epi64((__m128i*)c, t5);
85 auto * c = (__m256i*)codes;
86 const auto M = (gridLayout.y*gridLayout.x) /
sizeof(__m256i);
87 for (
unsigned k = 0; k < cellLayout.z; k++) {
88 for (
unsigned j = 0; j < M; j++) {
89 auto t0 = _mm256_load_si256((__m256i*)c);
90 auto t1 = _mm256_load_si256((__m256i*)(c + M));
91 auto t2 = _mm256_slli_epi32(t1, 4);
92 auto t3 = _mm256_or_si256(t0, t2);
93 _mm256_store_si256(c++, t3);
96 for (
unsigned j = 0; j < M; j++) {
97 auto t0 = _mm256_load_si256((__m256i*)c);
98 auto t2 = _mm256_slli_epi32(t0, 4);
99 auto t3 = _mm256_or_si256(t0, t2);
100 _mm256_store_si256(c++, t3);
104 auto mask_i_epi8 = _mm256_set1_epi8(
static_cast<char>(gridLayout.x - 1));
105 auto mask_j_epi8 = _mm256_set1_epi8(
static_cast<char>(gridLayout.y - 1));
106 auto stride = _mm256_set1_epi8(32);
107 auto ki = _mm256_set_epi8(31, 30, 29, 28,
115 auto kj = _mm256_set_epi8(31 >> gridLayoutL2.x, 30 >> gridLayoutL2.x, 29 >> gridLayoutL2.x, 28 >> gridLayoutL2.x,
116 27 >> gridLayoutL2.x, 26 >> gridLayoutL2.x, 25 >> gridLayoutL2.x, 24 >> gridLayoutL2.x,
117 23 >> gridLayoutL2.x, 22 >> gridLayoutL2.x, 21 >> gridLayoutL2.x, 20 >> gridLayoutL2.x,
118 19 >> gridLayoutL2.x, 18 >> gridLayoutL2.x, 17 >> gridLayoutL2.x, 16 >> gridLayoutL2.x,
119 15 >> gridLayoutL2.x, 14 >> gridLayoutL2.x, 13 >> gridLayoutL2.x, 12 >> gridLayoutL2.x,
120 11 >> gridLayoutL2.x, 10 >> gridLayoutL2.x, 9 >> gridLayoutL2.x, 8 >> gridLayoutL2.x,
121 7 >> gridLayoutL2.x, 6 >> gridLayoutL2.x, 5 >> gridLayoutL2.x, 4 >> gridLayoutL2.x,
122 3 >> gridLayoutL2.x, 2 >> gridLayoutL2.x, 1 >> gridLayoutL2.x, 0 >> gridLayoutL2.x);
123 auto ones = _mm256_set1_epi32(~0);
129 for (
unsigned i = 0; i < gridLayout.z*gridLayout.y*gridLayout.x; i += 32) {
130 auto skirt_i = _mm256_cmpeq_epi8(_mm256_and_si256(ki, mask_i_epi8), mask_i_epi8);
131 ki = _mm256_add_epi8(ki, stride);
134 auto t0 = _mm256_load_si256((__m256i*)(codes + i));
135 auto t1 = _mm256_loadu_si256((__m256i*)(codes + i + 1));
136 auto t2 = _mm256_blendv_epi8(t1, t0, skirt_i);
137 auto t3 = _mm256_or_si256(t0, _mm256_slli_epi32(t2, 1));
139 auto row = _mm256_add_epi8(_mm256_set1_epi8(
static_cast<char>(i >> gridLayoutL2.x)), kj);
140 auto skirt_j = _mm256_cmpeq_epi8(_mm256_and_si256(row, mask_j_epi8), mask_j_epi8);
143 auto r0 = _mm256_load_si256((__m256i*)(codes + gridLayout.x + i));
144 auto r1 = _mm256_loadu_si256((__m256i*)(codes + gridLayout.x + i + 1));
145 auto r2 = _mm256_blendv_epi8(r1, r0, skirt_i);
146 auto r3 = _mm256_or_si256(r0, _mm256_slli_epi32(r2, 1));
147 auto r4 = _mm256_blendv_epi8(r3, t3, skirt_j);
149 auto t5 = _mm256_or_si256(t3, _mm256_slli_epi32(r4, 2));
151 auto ones_ = _mm256_cmpeq_epi8(_mm256_setzero_si256(), _mm256_setzero_si256());
152 auto m2 = _mm256_cmpeq_epi8(t5, ones_);
153 auto code_epi8 = _mm256_andnot_si256(m2, t5);
154 _mm256_store_si256((__m256i*)(codes + i), code_epi8);
156 auto m3 = _mm256_cmpeq_epi8(code_epi8, _mm256_setzero_si256());
157 uint32_t zerolanes = _mm256_movemask_epi8(m3);
158 if (zerolanes == 0xffffffff)
continue;
160 auto skirt_k = _mm256_set1_epi8((i >> (gridLayoutL2.x + gridLayoutL2.y)) == ((1u << gridLayoutL2.z) - 1u) ? 255 : 0);
161 auto skirt = _mm256_or_si256(_mm256_or_si256(skirt_i, skirt_j), skirt_k);
163 for (
unsigned r = 0; r < 32; r++) {
164 if(m3.m256i_u8[r]==0) {
165 auto code = code_epi8.m256i_u8[r];
166 offsets[i + r] = vertexCount;
167 auto k = ((i + r) << 8) | l;
169 auto axes = axesTable[code];
171 vtxwork[vertexCount++] = k | (4 << 5);
174 vtxwork[vertexCount++] = k | (2 << 5);
177 vtxwork[vertexCount++] = k | (1 << 5);
179 if(skirt.m256i_u8[r] == 0) {
182 auto * as = indexTable + 16 * size_t(code);
183 for (
unsigned ll = 0; ll < Ni; ll+=3) {
184 idxwork[indexCount++] = k | (as[ll + 0] << 2);
185 idxwork[indexCount++] = k | (as[ll + 1] << 2);
186 idxwork[indexCount++] = k | (as[ll + 2] << 2);
195 if (
auto t = vertexCount; t) {
196 while ((t & 7)) vtxwork[t++] = vtxwork[t - 1];
198 if (
auto t = indexCount; t) {
199 while (t & 7) idxwork[t++] = idxwork[indexCount - 1];
202 vertexCount_ = vertexCount;
203 occupiedCells_ = occupiedCells;
204 indexCount_ = indexCount;
213 const glm::uvec3 gridLayout_,
215 const float* thresholds,
218 size3_t gridLayout = { gridLayout_.x, gridLayout_.y, gridLayout_.z };
219 size3_t cellLayout = { gridLayout.x - 1, gridLayout.y - 1, gridLayout.z - 1 };
221 size3_t gridLayoutL2 = {
222 static_cast<size_t>(log2(gridLayout_.x)),
223 static_cast<size_t>(log2(gridLayout_.y)),
224 static_cast<size_t>(log2(gridLayout_.z))
226 assert((
size_t(1) << gridLayoutL2.x) == gridLayout.x &&
227 (
size_t(1) << gridLayoutL2.y) == gridLayout.y &&
228 (
size_t(1) << gridLayoutL2.z) == gridLayout.z);
231 auto * work = (uint32_t*)(tmp + gridLayout.x*gridLayout.y*gridLayout.z*((
sizeof(uint32_t) +
sizeof(uint8_t))*T_n));
234 const auto mask_j = _mm256_set1_epi32(
static_cast<int>(gridLayout.y - 1));
235 const auto mask_k = _mm256_set1_epi32(
static_cast<int>(gridLayout.z - 1));
236 const auto X_L2_epi64 = _mm_insert_epi64(_mm_undefined_si128(), gridLayoutL2.x, 0);
237 const auto XY_L2_epi64 = _mm_insert_epi64(_mm_undefined_si128(), gridLayoutL2.x + gridLayoutL2.y, 0);
238 const auto oneOverTn = _mm256_set1_ps(1.f / T_n);
240 const auto one = _mm256_set1_epi32(1);
241 const auto X = _mm256_set1_epi32(
static_cast<int>(gridLayout.x));
242 const auto XY = _mm256_set1_epi32(
static_cast<int>(gridLayout.x*gridLayout.y));
243 const auto C3 = _mm256_setr_ps(thresholds[0], thresholds[1], thresholds[2], thresholds[3],
244 thresholds[0], thresholds[1], thresholds[2], thresholds[3]);
245 for (
size_t v = 0; v < vn; v += 8) {
246 auto w = _mm256_loadu_si256((__m256i*)(work + v));
247 auto o0 = _mm256_srli_epi32(w, 8);
248 auto a = _mm256_i32gather_ps(values, o0, 4);
251 auto mask_i = _mm256_sub_epi32(X, one);
252 auto i = _mm256_and_si256(o0, mask_i);
253 auto j = _mm256_and_si256(_mm256_srl_epi32(o0, X_L2_epi64), mask_j);
254 auto k = _mm256_and_si256(_mm256_srl_epi32(o0, XY_L2_epi64), mask_k);
257 auto i0_less = _mm256_cmpgt_epi32(mask_i, i);
258 auto j0_less = _mm256_cmpgt_epi32(mask_j, j);
259 auto k0_less = _mm256_cmpgt_epi32(mask_k, k);
262 auto ax = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o0, _mm256_and_si256(i0_less, one)), 4), a);
263 auto ay = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o0, _mm256_and_si256(j0_less, X)), 4), a);
264 auto az = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o0, _mm256_and_si256(k0_less, XY)), 4), a);
266 auto di = _mm256_and_si256(_mm256_srli_epi32(w, 5), one);
267 auto dj = _mm256_and_si256(_mm256_srli_epi32(w, 6), one);
268 auto dk = _mm256_and_si256(_mm256_srli_epi32(w, 7), one);
271 auto mx = _mm256_cmpeq_epi32(di, one);
272 auto my = _mm256_cmpeq_epi32(dj, one);
273 auto mz = _mm256_cmpeq_epi32(dk, one);
276 auto o1 = _mm256_add_epi32(_mm256_add_epi32(o0, di),
277 _mm256_add_epi32(_mm256_and_si256(my, X),
278 _mm256_and_si256(mz, XY)));
279 auto b = _mm256_i32gather_ps(values, o1, 4);
282 auto i1_less = _mm256_cmpgt_epi32(mask_i, _mm256_add_epi32(i, di));
283 auto j1_less = _mm256_cmpgt_epi32(mask_j, _mm256_add_epi32(j, dj));
284 auto k1_less = _mm256_cmpgt_epi32(mask_k, _mm256_add_epi32(k, dk));
287 auto bx = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o1, _mm256_and_si256(i1_less, one)), 4), b);
288 auto by = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o1, _mm256_and_si256(j1_less, X)), 4), b);
289 auto bz = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o1, _mm256_and_si256(k1_less, XY)), 4), b);
291 auto l = _mm256_and_si256(w, _mm256_set1_epi32((1 << 5) - 1));
292 auto T = _mm256_permutevar_ps(C3, l);
293 auto t = _mm256_mul_ps(_mm256_sub_ps(T, a), _mm256_rcp_ps(_mm256_sub_ps(b, a)));
296 auto px = _mm256_add_ps(_mm256_cvtepi32_ps(i), _mm256_and_ps(_mm256_castsi256_ps(mx), t));
297 auto py = _mm256_add_ps(_mm256_cvtepi32_ps(j), _mm256_and_ps(_mm256_castsi256_ps(my), t));
298 auto pz = _mm256_add_ps(_mm256_cvtepi32_ps(k), _mm256_and_ps(_mm256_castsi256_ps(mz), t));
301 auto gx = _mm256_sub_ps(_mm256_mul_ps(t, _mm256_sub_ps(ax, bx)), ax);
302 auto gy = _mm256_sub_ps(_mm256_mul_ps(t, _mm256_sub_ps(ay, by)), ay);
303 auto gz = _mm256_sub_ps(_mm256_mul_ps(t, _mm256_sub_ps(az, bz)), az);
305 auto half = _mm256_set1_ps(0.5f);
306 auto tp = _mm256_mul_ps(oneOverTn, _mm256_add_ps(_mm256_cvtepi32_ps(l), half));
317 auto a0 = _mm256_unpacklo_ps(px, py);
318 auto a2 = _mm256_unpacklo_ps(pz, gx);
319 auto a4 = _mm256_unpacklo_ps(gy, gz);
320 auto a6 = _mm256_unpacklo_ps(tp, half);
322 auto b0_ = _mm256_shuffle_ps(a0, a2, _MM_SHUFFLE(1, 0, 3, 2));
323 auto b0 = _mm256_blend_ps(a0, b0_, 0xCC);
324 auto b1 = _mm256_blend_ps(a2, b0_, 0x33);
329 auto b4_ = _mm256_shuffle_ps(a4, a6, _MM_SHUFFLE(1, 0, 3, 2));
330 auto b4 = _mm256_blend_ps(a4, b4_, 0xCC);
331 auto b5 = _mm256_blend_ps(a6, b4_, 0x33);
335 _mm256_store_ps((
float*)vertices.
data + 8 * (v + 0), _mm256_permute2f128_ps(b0, b4, 0x20));
336 _mm256_store_ps((
float*)vertices.
data + 8 * (v + 4), _mm256_permute2f128_ps(b0, b4, 0x31));
338 auto a1 = _mm256_unpackhi_ps(px, py);
339 auto a3 = _mm256_unpackhi_ps(pz, gx);
341 auto b2_ = _mm256_shuffle_ps(a1, a3, _MM_SHUFFLE(1, 0, 3, 2));
342 auto b2 = _mm256_blend_ps(a1, b2_, 0xCC);
343 auto b3 = _mm256_blend_ps(a3, b2_, 0x33);
347 auto a5 = _mm256_unpackhi_ps(gy, gz);
348 auto a7 = _mm256_unpackhi_ps(tp, half);
350 auto b6_ = _mm256_shuffle_ps(a5, a7, _MM_SHUFFLE(1, 0, 3, 2));
351 auto b6 = _mm256_blend_ps(a5, b6_, 0xCC);
352 auto b7 = _mm256_blend_ps(a7, b6_, 0x33);
357 _mm256_store_ps((
float*)vertices.
data + 8 * (v + 1), _mm256_permute2f128_ps(b1, b5, 0x20));
358 _mm256_store_ps((
float*)vertices.
data + 8 * (v + 2), _mm256_permute2f128_ps(b2, b6, 0x20));
359 _mm256_store_ps((
float*)vertices.
data + 8 * (v + 3), _mm256_permute2f128_ps(b3, b7, 0x20));
360 _mm256_store_ps((
float*)vertices.
data + 8 * (v + 5), _mm256_permute2f128_ps(b1, b5, 0x31));
361 _mm256_store_ps((
float*)vertices.
data + 8 * (v + 6), _mm256_permute2f128_ps(b2, b6, 0x31));
362 _mm256_store_ps((
float*)vertices.
data + 8 * (v + 7), _mm256_permute2f128_ps(b3, b7, 0x31));
365 for (
size_t ll = 0; ll < 8 && v + ll < vn; ll++) {
366 vertices[v + ll].pos.x = px.m256_f32[ll];
367 vertices[v + ll].pos.y = py.m256_f32[ll];
368 vertices[v + ll].pos.z = pz.m256_f32[ll];
369 vertices[v + ll].nrm.x = gx.m256_f32[ll];
370 vertices[v + ll].nrm.y = gy.m256_f32[ll];
371 vertices[v + ll].nrm.z = gz.m256_f32[ll];
372 vertices[v + ll].tex.x = tp.m256_f32[ll];
373 vertices[v + ll].tex.y = 0.5f;
383 unsigned indexExtract(uint32_t* sub_mesh,
386 const glm::uvec3 gridLayout,
389 const unsigned vertexCount)
391 const auto cellLayout = gridLayout - glm::uvec3(1, 1, 1);
392 const auto * axesTable = MarchingCubes::axesTable().data();
393 const auto * indexTable = MarchingCubes::indexTable().data();
396 for (
unsigned l = 0; l < T_n; l++) {
397 auto * offsets = (uint32_t*)(tmp + gridLayout.x*gridLayout.y*gridLayout.z *
sizeof(uint32_t)*l);
398 auto * codes = tmp + gridLayout.x*gridLayout.y*gridLayout.z*(
sizeof(uint32_t)*T_n + l);
400 for (
unsigned k = 0; k < cellLayout.z; k++) {
401 for (
unsigned j = 0; j < cellLayout.y; j++) {
402 for (
unsigned i = 0; i < cellLayout.x; i++) {
404 if (code == 0)
continue;
406 auto * axisShifts = indexTable + 16 * code;
409 auto axisShift = *axisShifts++;
410 if (axisShift == 255)
break;
412 auto ii = i + (axisShift & 8 ? 1 : 0);
413 auto jj = j + (axisShift & 16 ? 1 : 0);
414 auto kk = k + (axisShift & 32 ? 1 : 0);
416 auto shiftedCell = (kk*gridLayout.y + jj)*gridLayout.x + ii;
418 const auto shiftedCode = codes[shiftedCell];
419 const auto axes = axesTable[shiftedCode] & axisShift;
421 const auto ix = offsets[shiftedCell] + ((axes & 4) ? 1 : 0) + ((axes & 2) ? 1 : 0);
422 assert(ix < vertexCount);
424 assert(
sizeof(Idx)!=2 || ix < 0xffff);
425 indices[iix++] = (Idx)ix;
440 unsigned indexExtractAVX2(uint32_t* indices,
442 const glm::uvec3 gridLayout_,
444 const unsigned indexCount,
447 size3_t gridLayout = { gridLayout_.x, gridLayout_.y, gridLayout_.z };
448 size3_t cellLayout = { gridLayout.x - 1, gridLayout.y - 1, gridLayout.z - 1 };
449 const auto * axesTable = MarchingCubes::axesTable().data();
450 auto * idxwork = (uint32_t*)(tmp + gridLayout.x*gridLayout.y*gridLayout.z*((
sizeof(uint32_t) +
sizeof(uint8_t) + 3 *
sizeof(uint32_t))*T_n));
452 size3_t gridLayoutL2 = {
453 static_cast<size_t>(log2(gridLayout_.x)),
454 static_cast<size_t>(log2(gridLayout_.y)),
455 static_cast<size_t>(log2(gridLayout_.z))
457 assert((
size_t(1) << gridLayoutL2.x) == gridLayout.x &&
458 (
size_t(1) << gridLayoutL2.y) == gridLayout.y &&
459 (
size_t(1) << gridLayoutL2.z) == gridLayout.z);
462 auto * offsets = (uint32_t*)tmp;
463 auto * codes = tmp + gridLayout.x*gridLayout.y*gridLayout.z*(
sizeof(uint32_t)*T_n);
465 const auto one_epi32 = _mm256_set1_epi32(1);
466 const auto three_epi32 = _mm256_set1_epi32(3);
467 const auto gridL2_3_epi64 = _mm_set1_epi64x(gridLayoutL2.x + gridLayoutL2.y + gridLayoutL2.z);
468 const auto X_epi32 = _mm256_set1_epi32(
static_cast<int>(gridLayout.x));
469 const auto XY_epi32 = _mm256_set1_epi32(
static_cast<int>(gridLayout.x*gridLayout.y));
470 const auto mask_255_epi32 = _mm256_set1_epi32(255);
471 for (
size_t iix = 0; iix < indexCount; iix += 8) {
472 const auto w = _mm256_load_si256((
const __m256i*)(idxwork + iix));
473 const auto o0 = _mm256_srli_epi32(w, 8);
476 const auto shiftI = _mm256_and_si256(_mm256_srli_epi32(w, 5), one_epi32);
477 const auto shiftJ = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(_mm256_srli_epi32(w, 6), one_epi32), one_epi32), X_epi32);
478 const auto shiftK = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(_mm256_srli_epi32(w, 7), one_epi32), one_epi32), XY_epi32);
480 const auto shiftedCell = _mm256_add_epi32(_mm256_add_epi32(o0, shiftI),
481 _mm256_add_epi32(shiftJ, shiftK));
483 const auto L = _mm256_sll_epi32(_mm256_and_si256(w, three_epi32), gridL2_3_epi64);
484 const auto I = _mm256_add_epi32(shiftedCell, L);
486 const auto shiftedCode = _mm256_and_si256( _mm256_i32gather_epi32((
const int*)codes, I, 1), mask_255_epi32);
487 const auto axisShift = _mm256_srli_epi32(w, 2);
488 const auto axes = _mm256_and_si256(_mm256_i32gather_epi32((
const int*)axesTable, shiftedCode, 1), axisShift);
490 const auto addJ = _mm256_and_si256(_mm256_srli_epi32(axes, 1), one_epi32);
491 const auto addK = _mm256_and_si256(_mm256_srli_epi32(axes, 2), one_epi32);
492 const auto offset = _mm256_add_epi32(_mm256_i32gather_epi32((
const int*)offsets, I, 4),
493 _mm256_add_epi32(addJ, addK));
495 _mm256_store_si256((__m256i*)(indices + iix), offset);
506namespace Cogs::Core::EchoSounder {
516 const glm::uvec3 gridLayout,
519 const float *thresholds,
size_t count)
521 assert(gridLayout.x != 0 && gridLayout.y != 0 && gridLayout.z != 0);
523 const uint32_t T_n = (uint32_t)count;
532 const auto cellLayout = gridLayout - glm::uvec3(1, 1, 1);
534 scratch.resize(T_n*(
sizeof(uint32_t) +
sizeof(uint8_t) + 3 *
sizeof(uint32_t) + 15*
sizeof(uint32_t))*gridLayout.x*gridLayout.y*gridLayout.z,
false);
535 auto * tmp = (uint8_t*)scratch.data();
538 unsigned occupiedCells = 0;
539 unsigned indexCount = 0;
540 unsigned vertexCount = 0;
542 auto timer = Timer::startNew();
543 analyzeAVX2(tmp, vertexCount, occupiedCells, indexCount, values, gridLayout, T_n, thresholds);
544 analyze = timer.elapsedMicroseconds();
546 if (occupiedCells == 0) {
552 auto mesh = context->meshManager->createLocked();
557 auto vertices = mesh->map<Vertex>(VertexDataType::Interleaved0, VertexFormats::Pos3fNorm3fTex2f, vertexCount);
560 timer = Timer::startNew();
561 auto vix = vertexExtractAVX2(vertices, tmp, values, gridLayout, T_n, thresholds, vertexCount);
562 assert(vix == vertexCount);
563 vtx = timer.elapsedMicroseconds();
566 std::vector<uint32_t> sub_mesh(T_n+1, 0);
567 mesh->clearIndexes();
568 Idx *indices = (Idx*)mesh->mapStream(VertexDataType::Indexes, 0, indexCount,
sizeof(Idx),
true);
569 timer = Timer::startNew();
570 auto iix = indexExtract(sub_mesh.data(), indices, tmp, gridLayout, T_n, indexCount, vertexCount);
571 assert(iix == indexCount);
574 idx = timer.elapsedMicroseconds();
576 auto subMeshes = mesh->mapSubMeshes(T_n);
577 for (
unsigned l = 0; l < T_n; l++) {
578 uint32_t start = sub_mesh[l];
579 uint32_t size = sub_mesh[l+1]-sub_mesh[l];
582 mesh->setCount(indexCount);
584 auto box = Geometry::makeEmptyBoundingBox<Geometry::BoundingBox>();
585 for(
unsigned i=0; i<vertexCount; i++){
586 glm::vec3 pos = vertices[i].pos;
587 box.min = glm::min(box.min, pos);
588 box.max = glm::max(box.max, pos);
590 mesh->setBounds(box);
593 return mesh.getHandle();
A Context instance contains all the services, systems and runtime components needed to use Cogs.
Log implementation class.
COGSCORE_DLL_API const std::vector< unsigned char > & indexCountTable()
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....
constexpr Log getLogger(const char(&name)[LEN]) noexcept
Wrapper for mapped stream data automatically updating the Mesh resource mapped from when destructed.
Element * data
Pointer to the element data mapped from the Mesh data stream.
@ IndexesChanged
The index data of the mesh changed.
@ Indexed
The mesh should be drawn indexed, using index data to order the triangle vertexes.
@ ClockwiseWinding
The mesh uses clockwise winding order for it's triangles as opposed to the counter-clockwise default.
static const ResourceHandle_t NoHandle
Handle representing a default (or none if default not present) resource.
@ TriangleList
List of triangles.