2#include "Resources/Resources.h"
3#include "Resources/MeshManager.h"
4#include "Resources/VertexFormats.h"
8#include "../../../IsoSurfaces/MarchingCubesTables.h"
10#include "Foundation/Logging/Logger.h"
11#include "Foundation/Memory/MemoryBuffer.h"
12#include "Foundation/Platform/Timer.h"
28 struct size3_t {
size_t x, y, z; };
31 void analyzeAVX2(uint8_t* tmp,
32 unsigned& vertexCount_,
33 unsigned& occupiedCells_,
34 unsigned& indexCount_,
36 size_t gridLayoutL2_x,
37 size_t gridLayoutL2_y,
40 const float* thresholds)
42 const size_t gridLayout_x = size_t(1) << gridLayoutL2_x;
43 const size_t gridLayout_y = size_t(1) << gridLayoutL2_y;
46 const size_t cellLayout_z = gridLayout_z - 1;
48 assert((gridLayout_x & 7) == 0 && (gridLayout_y & 7) == 0);
49 assert((gridLayout_y*gridLayout_x & 31) == 0);
50 assert(gridLayout_x <= 256 &&
"x must fit inside a byte");
52 const auto * axesTable = MarchingCubes::axesTable().data();
53 const auto * indexTable = MarchingCubes::indexTable().data();
55 auto * vtxwork = (uint32_t*)(tmp + gridLayout_x * gridLayout_y*gridLayout_z*((
sizeof(uint32_t) +
sizeof(uint8_t))*T_n));
56 auto * idxwork = (uint32_t*)(tmp + gridLayout_x * gridLayout_y*gridLayout_z*((
sizeof(uint32_t) +
sizeof(uint8_t) + 3 *
sizeof(uint32_t))*T_n));
58 unsigned vertexCount = 0;
59 unsigned occupiedCells = 0;
60 unsigned indexCount = 0;
61 for (
unsigned l = 0; l < T_n; l++) {
63 auto * offsets = (uint32_t*)(tmp + gridLayout_x * gridLayout_y*gridLayout_z *
sizeof(uint32_t)*l);
64 auto * codes = (tmp + gridLayout_x * gridLayout_y*gridLayout_z*(
sizeof(uint32_t)*T_n + l));
66 auto T = _mm256_set1_ps(thresholds[l]);
68 auto M = _mm256_set1_epi32(1);
70 for (
unsigned k = 0; k < gridLayout_z*gridLayout_y*gridLayout_x; k += 8) {
71 auto t0 = _mm256_load_ps(values + k);
72 auto t1 = _mm256_and_si256(M, _mm256_castps_si256(_mm256_cmp_ps(t0, T, _CMP_LT_OQ)));
73 auto t2 = _mm256_extracti128_si256(t1, 1);
74 auto t3 = _mm256_extracti128_si256(t1, 0);
75 auto t4 = _mm_packus_epi32(t3, t2);
76 auto t5 = _mm_packus_epi16(t4, _mm_setzero_si128());
77 _mm_storel_epi64((__m128i*)c, t5);
82 auto * c = (__m256i*)codes;
83 const auto M = (gridLayout_y*gridLayout_x) /
sizeof(__m256i);
84 for (
unsigned k = 0; k < cellLayout_z; k++) {
85 for (
unsigned j = 0; j < M; j++) {
86 auto t0 = _mm256_load_si256((__m256i*)c);
87 auto t1 = _mm256_load_si256((__m256i*)(c + M));
88 auto t2 = _mm256_slli_epi32(t1, 4);
89 auto t3 = _mm256_or_si256(t0, t2);
90 _mm256_store_si256(c++, t3);
93 for (
unsigned j = 0; j < M; j++) {
94 auto t0 = _mm256_load_si256((__m256i*)c);
95 auto t2 = _mm256_slli_epi32(t0, 4);
96 auto t3 = _mm256_or_si256(t0, t2);
97 _mm256_store_si256(c++, t3);
101 auto mask_i_epi8 = _mm256_set1_epi8(
static_cast<char>(gridLayout_x - 1));
102 auto mask_j_epi8 = _mm256_set1_epi8(
static_cast<char>(gridLayout_y - 1));
103 auto stride = _mm256_set1_epi8(32);
104 auto ki = _mm256_set_epi8(31, 30, 29, 28,
112 auto kj = _mm256_set_epi8(31 >> gridLayoutL2_x, 30 >> gridLayoutL2_x, 29 >> gridLayoutL2_x, 28 >> gridLayoutL2_x,
113 27 >> gridLayoutL2_x, 26 >> gridLayoutL2_x, 25 >> gridLayoutL2_x, 24 >> gridLayoutL2_x,
114 23 >> gridLayoutL2_x, 22 >> gridLayoutL2_x, 21 >> gridLayoutL2_x, 20 >> gridLayoutL2_x,
115 19 >> gridLayoutL2_x, 18 >> gridLayoutL2_x, 17 >> gridLayoutL2_x, 16 >> gridLayoutL2_x,
116 15 >> gridLayoutL2_x, 14 >> gridLayoutL2_x, 13 >> gridLayoutL2_x, 12 >> gridLayoutL2_x,
117 11 >> gridLayoutL2_x, 10 >> gridLayoutL2_x, 9 >> gridLayoutL2_x, 8 >> gridLayoutL2_x,
118 7 >> gridLayoutL2_x, 6 >> gridLayoutL2_x, 5 >> gridLayoutL2_x, 4 >> gridLayoutL2_x,
119 3 >> gridLayoutL2_x, 2 >> gridLayoutL2_x, 1 >> gridLayoutL2_x, 0 >> gridLayoutL2_x);
120 auto ones = _mm256_set1_epi32(~0);
126 for (
unsigned i = 0; i < gridLayout_z*gridLayout_y*gridLayout_x; i += 32) {
127 auto skirt_i = _mm256_cmpeq_epi8(_mm256_and_si256(ki, mask_i_epi8), mask_i_epi8);
128 ki = _mm256_add_epi8(ki, stride);
131 auto t0 = _mm256_load_si256((__m256i*)(codes + i));
132 auto t1 = _mm256_loadu_si256((__m256i*)(codes + i + 1));
133 auto t2 = _mm256_blendv_epi8(t1, t0, skirt_i);
134 auto t3 = _mm256_or_si256(t0, _mm256_slli_epi32(t2, 1));
136 auto row = _mm256_add_epi8(_mm256_set1_epi8(
static_cast<char>(i >> gridLayoutL2_x)), kj);
137 auto skirt_j = _mm256_cmpeq_epi8(_mm256_and_si256(row, mask_j_epi8), mask_j_epi8);
140 auto r0 = _mm256_load_si256((__m256i*)(codes + gridLayout_x + i));
141 auto r1 = _mm256_loadu_si256((__m256i*)(codes + gridLayout_x + i + 1));
142 auto r2 = _mm256_blendv_epi8(r1, r0, skirt_i);
143 auto r3 = _mm256_or_si256(r0, _mm256_slli_epi32(r2, 1));
144 auto r4 = _mm256_blendv_epi8(r3, t3, skirt_j);
146 auto t5 = _mm256_or_si256(t3, _mm256_slli_epi32(r4, 2));
148 auto ones_ = _mm256_cmpeq_epi8(_mm256_setzero_si256(), _mm256_setzero_si256());
149 auto m2 = _mm256_cmpeq_epi8(t5, ones_);
150 auto code_epi8 = _mm256_andnot_si256(m2, t5);
151 _mm256_store_si256((__m256i*)(codes + i), code_epi8);
153 auto m3 = _mm256_cmpeq_epi8(code_epi8, _mm256_setzero_si256());
154 uint32_t zerolanes = _mm256_movemask_epi8(m3);
155 if (zerolanes == 0xffffffff)
continue;
157 auto skirt_k = _mm256_set1_epi8((i >> (gridLayoutL2_x + gridLayoutL2_y)) == (gridLayout_z - 1) ? 255 : 0);
158 auto skirt = _mm256_or_si256(_mm256_or_si256(skirt_i, skirt_j), skirt_k);
159 uint32_t skirtbits = _mm256_movemask_epi8(skirt);
161 for (
unsigned r = 0; r < 32; r++) {
162 if (((zerolanes >> r) & 1) == 0) {
163 auto code = code_epi8.m256i_u8[r];
164 offsets[i + r] = vertexCount_ + vertexCount;
166 auto axes = axesTable[code];
168 vtxwork[vertexCount++] = ((i + r) << 8) | (4 << 5) | l;
171 vtxwork[vertexCount++] = ((i + r) << 8) | (2 << 5) | l;
174 vtxwork[vertexCount++] = ((i + r) << 8) | (1 << 5) | l;
177 if (((skirtbits >> r) & 1) == 0) {
179 for (
auto * axisShifts = indexTable + 16 * code; *axisShifts != 255; axisShifts++) {
180 idxwork[indexCount++] = ((i + r) << 8) | (*axisShifts << 2) | l;
189 if (
auto t = vertexCount; t) {
190 while ((t & 7)) vtxwork[t++] = vtxwork[t - 1];
192 if (
auto t = indexCount; t) {
193 while (t & 7) idxwork[t++] = idxwork[indexCount - 1];
196 vertexCount_ += vertexCount;
197 occupiedCells_ += occupiedCells;
198 indexCount_ += indexCount;
204 size_t vertexExtractAVX2(Vertex* vertices,
207 size_t gridLayoutL2_x,
208 size_t gridLayoutL2_y,
211 const float* thresholds,
214 const size_t gridLayout_x = size_t(1) << gridLayoutL2_x;
215 const size_t gridLayout_y = size_t(1) << gridLayoutL2_y;
218 const size_t cellLayout_z = gridLayout_z - 1;
219 const auto * work = (uint32_t*)(tmp + gridLayout_x * gridLayout_y*gridLayout_z*((
sizeof(uint32_t) +
sizeof(uint8_t))*T_n));
222 const auto mask_j = _mm256_set1_epi32(
static_cast<int>(gridLayout_y - 1));
224 const auto cellLayout_z_epi32 = _mm256_set1_epi32(
static_cast<int>(cellLayout_z));
226 const auto X_L2_epi64 = _mm_insert_epi64(_mm_undefined_si128(), gridLayoutL2_x, 0);
227 const auto XY_L2_epi64 = _mm_insert_epi64(_mm_undefined_si128(), gridLayoutL2_x + gridLayoutL2_y, 0);
228 const auto oneOverTn = _mm256_set1_ps(1.f / T_n);
230 const auto one = _mm256_set1_epi32(1);
231 const auto X = _mm256_set1_epi32(
static_cast<int>(gridLayout_x));
232 const auto XY = _mm256_set1_epi32(
static_cast<int>(gridLayout_x*gridLayout_y));
233 const auto C3 = _mm256_setr_ps(thresholds[0], thresholds[1], thresholds[2], thresholds[3],
234 thresholds[0], thresholds[1], thresholds[2], thresholds[3]);
235 for (
size_t v = 0; v < vn; v += 8) {
236 auto w = _mm256_loadu_si256((__m256i*)(work + v));
237 auto o0 = _mm256_srli_epi32(w, 8);
238 auto a = _mm256_i32gather_ps(values, o0, 4);
241 auto mask_i = _mm256_sub_epi32(X, one);
242 auto i = _mm256_and_si256(o0, mask_i);
243 auto j = _mm256_and_si256(_mm256_srl_epi32(o0, X_L2_epi64), mask_j);
244 auto k = _mm256_srl_epi32(o0, XY_L2_epi64);
247 auto i0_less = _mm256_cmpgt_epi32(mask_i, i);
248 auto j0_less = _mm256_cmpgt_epi32(mask_j, j);
249 auto k0_less = _mm256_cmpgt_epi32(cellLayout_z_epi32, k);
252 auto ax = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o0, _mm256_and_si256(i0_less, one)), 4), a);
253 auto ay = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o0, _mm256_and_si256(j0_less, X)), 4), a);
254 auto az = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o0, _mm256_and_si256(k0_less, XY)), 4), a);
256 auto di = _mm256_and_si256(_mm256_srli_epi32(w, 5), one);
257 auto dj = _mm256_and_si256(_mm256_srli_epi32(w, 6), one);
258 auto dk = _mm256_and_si256(_mm256_srli_epi32(w, 7), one);
261 auto mx = _mm256_cmpeq_epi32(di, one);
262 auto my = _mm256_cmpeq_epi32(dj, one);
263 auto mz = _mm256_cmpeq_epi32(dk, one);
266 auto o1 = _mm256_add_epi32(_mm256_add_epi32(o0, di),
267 _mm256_add_epi32(_mm256_and_si256(my, X),
268 _mm256_and_si256(mz, XY)));
269 auto b = _mm256_i32gather_ps(values, o1, 4);
272 auto i1_less = _mm256_cmpgt_epi32(mask_i, _mm256_add_epi32(i, di));
273 auto j1_less = _mm256_cmpgt_epi32(mask_j, _mm256_add_epi32(j, dj));
274 auto k1_less = _mm256_cmpgt_epi32(cellLayout_z_epi32, _mm256_add_epi32(k, dk));
277 auto bx = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o1, _mm256_and_si256(i1_less, one)), 4), b);
278 auto by = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o1, _mm256_and_si256(j1_less, X)), 4), b);
279 auto bz = _mm256_sub_ps(_mm256_i32gather_ps(values, _mm256_add_epi32(o1, _mm256_and_si256(k1_less, XY)), 4), b);
281 auto l = _mm256_and_si256(w, _mm256_set1_epi32((1 << 5) - 1));
282 auto T = _mm256_permutevar_ps(C3, l);
283 auto t = _mm256_mul_ps(_mm256_sub_ps(T, a), _mm256_rcp_ps(_mm256_sub_ps(b, a)));
286 auto px = _mm256_add_ps(_mm256_cvtepi32_ps(i), _mm256_and_ps(_mm256_castsi256_ps(mx), t));
287 auto py = _mm256_add_ps(_mm256_cvtepi32_ps(j), _mm256_and_ps(_mm256_castsi256_ps(my), t));
288 auto pz = _mm256_add_ps(_mm256_cvtepi32_ps(k), _mm256_and_ps(_mm256_castsi256_ps(mz), t));
291 auto gx = _mm256_sub_ps(_mm256_mul_ps(t, _mm256_sub_ps(ax, bx)), ax);
292 auto gy = _mm256_sub_ps(_mm256_mul_ps(t, _mm256_sub_ps(ay, by)), ay);
293 auto gz = _mm256_sub_ps(_mm256_mul_ps(t, _mm256_sub_ps(az, bz)), az);
295 auto half = _mm256_set1_ps(0.5f);
296 auto tp = _mm256_mul_ps(oneOverTn, _mm256_add_ps(_mm256_cvtepi32_ps(l), half));
307 auto a0 = _mm256_unpacklo_ps(px, py);
308 auto a2 = _mm256_unpacklo_ps(pz, gx);
309 auto a4 = _mm256_unpacklo_ps(gy, gz);
310 auto a6 = _mm256_unpacklo_ps(tp, half);
312 auto b0_ = _mm256_shuffle_ps(a0, a2, _MM_SHUFFLE(1, 0, 3, 2));
313 auto b0 = _mm256_blend_ps(a0, b0_, 0xCC);
314 auto b1 = _mm256_blend_ps(a2, b0_, 0x33);
319 auto b4_ = _mm256_shuffle_ps(a4, a6, _MM_SHUFFLE(1, 0, 3, 2));
320 auto b4 = _mm256_blend_ps(a4, b4_, 0xCC);
321 auto b5 = _mm256_blend_ps(a6, b4_, 0x33);
325 _mm256_store_ps((
float*)vertices + 8 * (v + 0), _mm256_permute2f128_ps(b0, b4, 0x20));
326 _mm256_store_ps((
float*)vertices + 8 * (v + 4), _mm256_permute2f128_ps(b0, b4, 0x31));
328 auto a1 = _mm256_unpackhi_ps(px, py);
329 auto a3 = _mm256_unpackhi_ps(pz, gx);
331 auto b2_ = _mm256_shuffle_ps(a1, a3, _MM_SHUFFLE(1, 0, 3, 2));
332 auto b2 = _mm256_blend_ps(a1, b2_, 0xCC);
333 auto b3 = _mm256_blend_ps(a3, b2_, 0x33);
337 auto a5 = _mm256_unpackhi_ps(gy, gz);
338 auto a7 = _mm256_unpackhi_ps(tp, half);
340 auto b6_ = _mm256_shuffle_ps(a5, a7, _MM_SHUFFLE(1, 0, 3, 2));
341 auto b6 = _mm256_blend_ps(a5, b6_, 0xCC);
342 auto b7 = _mm256_blend_ps(a7, b6_, 0x33);
347 _mm256_store_ps((
float*)vertices + 8 * (v + 1), _mm256_permute2f128_ps(b1, b5, 0x20));
348 _mm256_store_ps((
float*)vertices + 8 * (v + 2), _mm256_permute2f128_ps(b2, b6, 0x20));
349 _mm256_store_ps((
float*)vertices + 8 * (v + 3), _mm256_permute2f128_ps(b3, b7, 0x20));
350 _mm256_store_ps((
float*)vertices + 8 * (v + 5), _mm256_permute2f128_ps(b1, b5, 0x31));
351 _mm256_store_ps((
float*)vertices + 8 * (v + 6), _mm256_permute2f128_ps(b2, b6, 0x31));
352 _mm256_store_ps((
float*)vertices + 8 * (v + 7), _mm256_permute2f128_ps(b3, b7, 0x31));
355 for (
size_t ll = 0; ll < 8 && v + ll < vn; ll++) {
356 vertices[v + ll].pos.x = px.m256_f32[ll];
357 vertices[v + ll].pos.y = py.m256_f32[ll];
358 vertices[v + ll].pos.z = pz.m256_f32[ll];
359 vertices[v + ll].nrm.x = gx.m256_f32[ll];
360 vertices[v + ll].nrm.y = gy.m256_f32[ll];
361 vertices[v + ll].nrm.z = gz.m256_f32[ll];
362 vertices[v + ll].tex.x = tp.m256_f32[ll];
363 vertices[v + ll].tex.y = 0.5f;
373 unsigned indexExtract(uint32_t* sub_mesh,
376 const size3_t gridLayout,
379 const size_t vertexCount)
381 const size3_t cellLayout = { gridLayout.x - 1, gridLayout.y - 1, gridLayout.z - 1 };
382 const auto * axesTable = MarchingCubes::axesTable().data();
383 const auto * indexTable = MarchingCubes::indexTable().data();
386 for (
unsigned l = 0; l < T_n; l++) {
387 auto * offsets = (uint32_t*)(tmp + gridLayout.x*gridLayout.y*gridLayout.z *
sizeof(uint32_t)*l);
388 auto * codes = tmp + gridLayout.x*gridLayout.y*gridLayout.z*(
sizeof(uint32_t)*T_n + l);
390 for (
unsigned k = 0; k < cellLayout.z; k++) {
391 for (
unsigned j = 0; j < cellLayout.y; j++) {
392 for (
unsigned i = 0; i < cellLayout.x; i++) {
394 if (code == 0)
continue;
396 auto * axisShifts = indexTable + 16 * code;
399 auto axisShift = *axisShifts++;
400 if (axisShift == 255)
break;
402 auto ii = i + (axisShift & 8 ? 1 : 0);
403 auto jj = j + (axisShift & 16 ? 1 : 0);
404 auto kk = k + (axisShift & 32 ? 1 : 0);
406 auto shiftedCell = (kk*gridLayout.y + jj)*gridLayout.x + ii;
408 const auto shiftedCode = codes[shiftedCell];
409 const auto axes = axesTable[shiftedCode] & axisShift;
411 const auto ix = offsets[shiftedCell] + ((axes & 4) ? 1 : 0) + ((axes & 2) ? 1 : 0);
412 assert(ix < vertexCount);
414 assert(
sizeof(Idx)!=2 || ix < 0xffff);
415 indices[iix++] = (Idx)ix;
430 unsigned indexExtractAVX2(uint32_t* indices,
432 unsigned gridLayoutL2_x,
433 unsigned gridLayoutL2_y,
434 unsigned gridLayout_z,
436 const unsigned indexCount,
437 const unsigned maxVertex)
439 const unsigned gridLayout_x = 1 << gridLayoutL2_x;
440 const unsigned gridLayout_y = 1 << gridLayoutL2_y;
445 const auto * axesTable = MarchingCubes::axesTable().data();
447 const auto * idxwork = (uint32_t*)(tmp + gridLayout_x*gridLayout_y*gridLayout_z*((
sizeof(uint32_t) +
sizeof(uint8_t) + 3 *
sizeof(uint32_t))*T_n));
448 const auto * codes = tmp + gridLayout_x*gridLayout_y*gridLayout_z*(
sizeof(uint32_t)*T_n);
449 const auto * offsets = (uint32_t*)tmp;
451 const auto one_epi32 = _mm256_set1_epi32(1);
452 const auto three_epi32 = _mm256_set1_epi32(3);
456 const auto grid_3_epi32 = _mm256_set1_epi32(gridLayout_x*gridLayout_y*gridLayout_z);
458 const auto X_epi32 = _mm256_set1_epi32(gridLayout_x);
459 const auto XY_epi32 = _mm256_set1_epi32(gridLayout_x*gridLayout_y);
460 const auto mask_255_epi32 = _mm256_set1_epi32(255);
461 for (
unsigned iix = 0; iix < indexCount; iix += 8) {
462 const auto w = _mm256_load_si256((
const __m256i*)(idxwork + iix));
463 const auto o0 = _mm256_srli_epi32(w, 8);
466 const auto shiftI = _mm256_and_si256(_mm256_srli_epi32(w, 5), one_epi32);
467 const auto shiftJ = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(_mm256_srli_epi32(w, 6), one_epi32), one_epi32), X_epi32);
468 const auto shiftK = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(_mm256_srli_epi32(w, 7), one_epi32), one_epi32), XY_epi32);
470 const auto shiftedCell = _mm256_add_epi32(_mm256_add_epi32(o0, shiftI),
471 _mm256_add_epi32(shiftJ, shiftK));
475 const auto L = _mm256_mullo_epi32(grid_3_epi32, _mm256_and_si256(w, three_epi32));
477 const auto I = _mm256_add_epi32(shiftedCell, L);
479 const auto shiftedCode = _mm256_and_si256(_mm256_i32gather_epi32((
const int*)codes, I, 1), mask_255_epi32);
480 const auto axisShift = _mm256_srli_epi32(w, 2);
481 const auto axes = _mm256_and_si256(_mm256_i32gather_epi32((
const int*)axesTable, shiftedCode, 1), axisShift);
483 const auto addJ = _mm256_and_si256(_mm256_srli_epi32(axes, 1), one_epi32);
484 const auto addK = _mm256_and_si256(_mm256_srli_epi32(axes, 2), one_epi32);
485 const auto offset = _mm256_add_epi32(_mm256_i32gather_epi32((
const int*)offsets, I, 4),
486 _mm256_add_epi32(addJ, addK));
488 for (
unsigned i = 0; i < 8; i++) {
489 assert(offset.m256i_u32[i] < maxVertex);
491 _mm256_storeu_si256((__m256i*)(indices + iix), offset);
501namespace Cogs::Core::EchoSounder {
511 const glm::uvec3 gridLayout_,
514 const float *thresholds,
size_t count)
516 size3_t gridLayout = { gridLayout_.x, gridLayout_.y, gridLayout_.z };
517 size3_t gridLayoutL2 = {
518 static_cast<size_t>(log2(gridLayout_.x)),
519 static_cast<size_t>(log2(gridLayout_.y)),
520 static_cast<size_t>(log2(gridLayout_.z))
522 assert(gridLayout.x != 0 && gridLayout.y != 0 && gridLayout.z != 0);
523 assert((
size_t(1) << gridLayoutL2.x) == gridLayout.x &&
524 (
size_t(1) << gridLayoutL2.y) == gridLayout.y &&
525 (
size_t(1) << gridLayoutL2.z) == gridLayout.z);
527 const size_t T_n = count;
530 const size_t parts = 2;
531 assert((parts < gridLayout.z - 1) &&
"Parts must be at least a slice thick");
533 const size_t partSpacing = (gridLayout.z - 2) / parts;
534 const size_t partSize = partSpacing + 1;
535 assert(1 <= partSpacing);
536 assert(parts*partSpacing + 1 < gridLayout.z);
537 assert(gridLayout.z <= (parts + 1)*partSpacing);
540 size_t partScratchSize = (
sizeof(uint32_t) +
sizeof(uint8_t) + 3 *
sizeof(uint32_t) + 15 *
sizeof(uint32_t))*T_n*gridLayout.x*gridLayout.y*partSize;
541 scratch.resize(parts * partScratchSize,
false);
542 auto * tmp = (uint8_t*)scratch.data();
545 unsigned occupiedCells = 0;
546 unsigned indexCount = 0;
547 unsigned vertexCount = 0;
548 std::vector<unsigned> partVertexOffset(parts + 1);
549 std::vector<unsigned> partIndexOffset(parts + 1);
552 auto timer = Timer::startNew();
553 for (
size_t p = 0; p < parts; p++) {
554 size_t slice_a = partSpacing * p;
555 size_t slice_b = std::min(slice_a + partSize, gridLayout.z);
557 partVertexOffset[p] = vertexCount;
558 partIndexOffset[p] = indexCount;
559 analyzeAVX2(tmp + partScratchSize * p, vertexCount, occupiedCells, indexCount,
560 values + slice_a * gridLayout.x*gridLayout.y,
561 gridLayoutL2.x, gridLayoutL2.y, slice_b - slice_a,
564 partVertexOffset[parts] = vertexCount;
565 partIndexOffset[parts] = indexCount;
566 analyze = timer.elapsedMicroseconds();
568 if (occupiedCells == 0) {
574 auto mesh = context->meshManager->createLocked();
578 auto vertices = mesh->map<Vertex>(VertexDataType::Interleaved0, VertexFormats::Pos3fNorm3fTex2f, vertexCount);
581 timer = Timer::startNew();
582 for (
size_t p = 0; p < parts; p++) {
583 size_t vn = partVertexOffset[p + 1] - partVertexOffset[p];
584 if (vn == 0)
continue;
586 size_t slice_a = partSpacing * p;
587 size_t slice_b = std::min(slice_a + partSize, gridLayout.z);
588 vertexExtractAVX2(vertices.data + partVertexOffset[p],
589 tmp + partScratchSize * p,
590 values + slice_a * gridLayout.x*gridLayout.y,
591 gridLayoutL2.x, gridLayoutL2.y, slice_b - slice_a,
592 T_n, thresholds, vn);
594 vtx = timer.elapsedMicroseconds();
611 std::vector<uint32_t> sub_mesh(T_n+1, 0);
612 mesh->clearIndexes();
613 Idx *indices = (Idx*)mesh->mapStream(VertexDataType::Indexes, 0, indexCount,
sizeof(Idx),
true);
614 timer = Timer::startNew();
615 auto iix = indexExtract(sub_mesh.data(), indices, tmp, gridLayout, T_n, indexCount, vertexCount);
616 assert(iix == indexCount);
619 idx = timer.elapsedMicroseconds();
621 auto subMeshes = mesh->mapSubMeshes(
static_cast<uint32_t
>(T_n));
622 for (
size_t l = 0; l < T_n; l++) {
623 uint32_t start = sub_mesh[l];
624 uint32_t size = sub_mesh[l+1]-sub_mesh[l];
627 mesh->setCount(indexCount);
629 auto box = Geometry::makeEmptyBoundingBox<Geometry::BoundingBox>();
630 for(
unsigned i=0; i<vertexCount; i++){
631 glm::vec3 pos = vertices[i].pos;
632 box.min = glm::min(box.min, pos);
633 box.max = glm::max(box.max, pos);
635 mesh->setBounds(box);
638 return mesh.getHandle();
A Context instance contains all the services, systems and runtime components needed to use Cogs.
Log implementation class.
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....
constexpr Log getLogger(const char(&name)[LEN]) noexcept
@ IndexesChanged
The index data of the mesh changed.
@ Indexed
The mesh should be drawn indexed, using index data to order the triangle vertexes.
@ ClockwiseWinding
The mesh uses clockwise winding order for it's triangles as opposed to the counter-clockwise default.
static const ResourceHandle_t NoHandle
Handle representing a default (or none if default not present) resource.
@ TriangleList
List of triangles.