Cogs.Core
NormalVectors.cpp
1#include "GeometryProcessing.h"
2
3#include "Context.h"
4#include "Services/TaskManager.h"
5#include "Services/Features.h"
6#include "Platform/Instrumentation.h"
7
8#include "Foundation/Memory/MemoryBuffer.h"
9#include "Foundation/Platform/Timer.h"
10
11#include <algorithm>
12
13#include <glm/glm.hpp>
14#include <glm/gtc/type_ptr.hpp>
15
16#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
17 #include <xmmintrin.h>
18#endif
19
20using namespace Cogs::Core;
21
22namespace {
23
24 struct Corner
25 {
26 uint32_t Vp, Vn;
27 uint32_t triangle;
28 uint32_t next;
29 };
30
31 template<typename T>
32 inline T* alignUpwards(void* ptr, size_t alignment=64)
33 {
34 auto a = alignment - 1;
35 return reinterpret_cast<T*>(((size_t)ptr + a)&(~a));
36 }
37
38#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
39 inline __m128 loadVec3(const float* p)
40 {
41#if 1
42 // Load 4 floats directly, risk of reading one float outside range. SSE2.
43 return _mm_loadu_ps(p);
44#else
45 // Load first part as a double and then insert third element with a shuffle. SSE2.
46 __m128 t0 = _mm_castpd_ps(_mm_load_sd((const double*)p));
47 __m128 t1 = _mm_load_ss(p + 2);
48 return _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(1, 0, 1, 0));
49#endif
50 }
51
52 inline void storeVec3(float* p, __m128 v)
53 {
54 _mm_store_sd((double*)p, _mm_castps_pd(v));
55 _mm_store_ss(p + 2, _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)));
56 }
57#endif
58
59 struct ClearStridedVec3Task
60 {
61 float* normals;
62 uint32_t normalStride;
63 uint32_t ia, ib;
64 std::atomic<uint64_t>* elapsed_us;
65
66 ClearStridedVec3Task(float* normals,
67 uint32_t normalStride,
68 uint32_t ia, uint32_t ib,
69 std::atomic<uint64_t>* elapsed_us):
70 normals(normals),
71 normalStride(normalStride),
72 ia(ia), ib(ib),
73 elapsed_us(elapsed_us)
74 {}
75
76 void operator()()
77 {
78 CpuInstrumentationScope(SCOPE_GEOMETRY, "StrClr");
79 auto timer = Cogs::Timer::startNew();
80
81 for (uint32_t i = ia; i < ib; i++) {
82 normals[normalStride*i + 0] = 0.0f;
83 normals[normalStride*i + 1] = 0.0f;
84 normals[normalStride*i + 2] = 0.0f;
85 }
86
87 if (elapsed_us) {
88 elapsed_us->fetch_add(timer.elapsedMicroseconds());
89 }
90 }
91 };
92
93 struct TriangleNormalsTask
94 {
95 const uint32_t* indices;
96 float* normals;
97 const float* vertices;
98 uint32_t vertexStride;
99 uint32_t ia, ib;
100 std::atomic<uint64_t>* elapsed_us;
101
102 TriangleNormalsTask(const uint32_t* indices,
103 float* normals,
104 const float* vertices,
105 uint32_t vertexStride,
106 uint32_t ia, uint32_t ib,
107 std::atomic<uint64_t>* elapsed_us = nullptr):
108 indices(indices),
109 normals(normals),
110 vertices(vertices),
111 vertexStride(vertexStride),
112 ia(ia), ib(ib),
113 elapsed_us(elapsed_us)
114 {}
115
116 void operator()()
117 {
118 CpuInstrumentationScope(SCOPE_GEOMETRY, "TriNrm");
119 auto timer = Cogs::Timer::startNew();
120 for (uint32_t i = ia; i < ib; i++) {
121 const auto i0 = indices[3*i + 0];
122 const auto i1 = indices[3*i + 1];
123 const auto i2 = indices[3*i + 2];
124 const auto & a = glm::make_vec3(vertices + vertexStride*i0);
125 const auto & b = glm::make_vec3(vertices + vertexStride*i1);
126 const auto & c = glm::make_vec3(vertices + vertexStride*i2);
127 const auto n = glm::cross(c - a, b - a);
128
129 *reinterpret_cast<glm::vec3*>(normals + 4 * i) = n;
130 }
131 if (elapsed_us) {
132 elapsed_us->fetch_add(timer.elapsedMicroseconds());
133 }
134 }
135 };
136
137#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
138 struct TriangleNormalsTaskSSE2a
139 {
140 const uint32_t* indices;
141 float* triangleNormals;
142 const float* vertices;
143 uint32_t vertexStride;
144 uint32_t ia, ib;
145 std::atomic<uint64_t>* elapsed_us;
146
147 TriangleNormalsTaskSSE2a(const uint32_t* indices,
148 float* triangleNormals,
149 const float* vertices,
150 uint32_t vertexStride,
151 uint32_t ia, uint32_t ib,
152 std::atomic<uint64_t>* elapsed_us):
153 indices(indices),
154 triangleNormals(triangleNormals),
155 vertices(vertices),
156 vertexStride(vertexStride),
157 ia(ia), ib(ib),
158 elapsed_us(elapsed_us)
159 {}
160
161 void operator()()
162 {
163 CpuInstrumentationScope(SCOPE_GEOMETRY, "TriNrmSSE2");
164 auto timer = Cogs::Timer::startNew();
165 uint32_t i = ia;
166#if 1
167 auto moo = indices + 3 * ia;
168 for (; i + 1 < ib; i+=2) {
169 const auto ix0_a = *moo++;
170 const auto ix1_a = *moo++;
171 const auto ix2_a = *moo++;
172
173 const auto ix0_b = *moo++;
174 const auto ix1_b = *moo++;
175 const auto ix2_b = *moo++;
176
177 __m128 a_a = loadVec3(vertices + vertexStride*ix0_a);
178 __m128 a_b = loadVec3(vertices + vertexStride*ix0_b);
179 __m128 b_a = loadVec3(vertices + vertexStride*ix1_a);
180 __m128 b_b = loadVec3(vertices + vertexStride*ix1_b);
181 __m128 c_a = loadVec3(vertices + vertexStride*ix2_a);
182 __m128 c_b = loadVec3(vertices + vertexStride*ix2_b);
183
184 __m128 ca_xyz_a = _mm_sub_ps(c_a, a_a);
185 __m128 ca_xyz_b = _mm_sub_ps(c_b, a_b);
186 __m128 ca_yzx_a = _mm_shuffle_ps(ca_xyz_a, ca_xyz_a, _MM_SHUFFLE(3, 0, 2, 1));
187 __m128 ca_yzx_b = _mm_shuffle_ps(ca_xyz_b, ca_xyz_b, _MM_SHUFFLE(3, 0, 2, 1));
188
189 __m128 ba_xyz_a = _mm_sub_ps(b_a, a_a);
190 __m128 ba_xyz_b = _mm_sub_ps(b_b, a_b);
191 __m128 ba_yzx_a = _mm_shuffle_ps(ba_xyz_a, ba_xyz_a, _MM_SHUFFLE(3, 0, 2, 1));
192 __m128 ba_yzx_b = _mm_shuffle_ps(ba_xyz_b, ba_xyz_b, _MM_SHUFFLE(3, 0, 2, 1));
193
194 __m128 f_zxy_a = _mm_mul_ps(ca_xyz_a, ba_yzx_a);
195 __m128 f_zxy_b = _mm_mul_ps(ca_xyz_b, ba_yzx_b);
196 __m128 g_zxy_a = _mm_mul_ps(ba_xyz_a, ca_yzx_a);
197 __m128 g_zxy_b = _mm_mul_ps(ba_xyz_b, ca_yzx_b);
198
199 __m128 n_zxy_a = _mm_sub_ps(f_zxy_a, g_zxy_a);
200 __m128 n_zxy_b = _mm_sub_ps(f_zxy_b, g_zxy_b);
201 __m128 n_xyz_a = _mm_shuffle_ps(n_zxy_a, n_zxy_a, _MM_SHUFFLE(3, 0, 2, 1));
202 __m128 n_xyz_b = _mm_shuffle_ps(n_zxy_b, n_zxy_b, _MM_SHUFFLE(3, 0, 2, 1));
203
204 _mm_store_ps(triangleNormals + 4 * (i + 0), n_xyz_a);
205 _mm_store_ps(triangleNormals + 4 * (i + 1), n_xyz_b);
206 }
207#endif
208 for (; i < ib; i++) {
209 const auto ix0 = indices[3 * i + 0];
210 const auto ix1 = indices[3 * i + 1];
211 const auto ix2 = indices[3 * i + 2];
212
213 __m128 a = loadVec3(vertices + vertexStride*ix0);
214 __m128 b = loadVec3(vertices + vertexStride*ix1);
215 __m128 c = loadVec3(vertices + vertexStride*ix2);
216 __m128 ca_xyz = _mm_sub_ps(c, a);
217 __m128 ca_yzx = _mm_shuffle_ps(ca_xyz, ca_xyz, _MM_SHUFFLE(3, 0, 2, 1));
218
219 __m128 ba_xyz = _mm_sub_ps(b, a);
220 __m128 ba_yzx = _mm_shuffle_ps(ba_xyz, ba_xyz, _MM_SHUFFLE(3, 0, 2, 1));
221
222 __m128 f_zxy = _mm_mul_ps(ca_xyz, ba_yzx);
223 __m128 g_zxy = _mm_mul_ps(ba_xyz, ca_yzx);
224
225 __m128 n_zxy = _mm_sub_ps(f_zxy, g_zxy);
226 __m128 n_xyz = _mm_shuffle_ps(n_zxy, n_zxy, _MM_SHUFFLE(3, 0, 2, 1));
227
228 _mm_store_ps(triangleNormals + 4 * i, n_xyz);
229 }
230 if (elapsed_us) {
231 elapsed_us->fetch_add(timer.elapsedMicroseconds());
232 }
233 }
234 };
235
236 struct TriangleNormalsTaskSSE2b
237 {
238 const uint32_t* indices;
239 float* triangleNormals;
240 const float* vertices;
241 uint32_t vertexStride;
242 uint32_t ia, ib;
243 std::atomic<uint64_t>* elapsed_us;
244
245 TriangleNormalsTaskSSE2b(const uint32_t* indices,
246 float* triangleNormals,
247 const float* vertices,
248 uint32_t vertexStride,
249 uint32_t ia, uint32_t ib,
250 std::atomic<uint64_t>* elapsed_us = nullptr):
251 indices(indices),
252 triangleNormals(triangleNormals),
253 vertices(vertices),
254 vertexStride(vertexStride),
255 ia(ia), ib(ib),
256 elapsed_us(elapsed_us)
257 {}
258
259 void operator()()
260 {
261 CpuInstrumentationScope(SCOPE_GEOMETRY, "NrmAddSSE2");
262 auto timer = Cogs::Timer::startNew();
263
264 for (uint32_t i = ia; i < ib; i += 4) {
265 uint32_t ii[4];
266 uint32_t ix[4][3];
267
268 for (uint32_t j = 0; j < 4; j++) {
269 ii[j] = std::min(ib - 1, i + j);
270 for (uint32_t k = 0; k < 3; k++) {
271 ix[j][k] = indices[3 * ii[j] + k];
272 }
273 }
274
275 __m128 a_x = _mm_loadu_ps(vertices + vertexStride*ix[0][0]);
276 __m128 a_y = _mm_loadu_ps(vertices + vertexStride*ix[1][0]);
277 __m128 a_z = _mm_loadu_ps(vertices + vertexStride*ix[2][0]);
278 __m128 a_w = _mm_loadu_ps(vertices + vertexStride*ix[3][0]);
279 _MM_TRANSPOSE4_PS(a_x, a_y, a_z, a_w);
280
281 __m128 b_x = _mm_loadu_ps(vertices + vertexStride*ix[0][1]);
282 __m128 b_y = _mm_loadu_ps(vertices + vertexStride*ix[1][1]);
283 __m128 b_z = _mm_loadu_ps(vertices + vertexStride*ix[2][1]);
284 __m128 b_w = _mm_loadu_ps(vertices + vertexStride*ix[3][1]);
285 _MM_TRANSPOSE4_PS(b_x, b_y, b_z, b_w);
286
287 __m128 c_x = _mm_loadu_ps(vertices + vertexStride*ix[0][2]);
288 __m128 c_y = _mm_loadu_ps(vertices + vertexStride*ix[1][2]);
289 __m128 c_z = _mm_loadu_ps(vertices + vertexStride*ix[2][2]);
290 __m128 c_w = _mm_loadu_ps(vertices + vertexStride*ix[3][2]);
291 _MM_TRANSPOSE4_PS(c_x, c_y, c_z, c_w);
292
293 __m128 ba_x = _mm_sub_ps(b_x, a_x);
294 __m128 ba_y = _mm_sub_ps(b_y, a_y);
295 __m128 ba_z = _mm_sub_ps(b_z, a_z);
296
297 __m128 ca_x = _mm_sub_ps(c_x, a_x);
298 __m128 ca_y = _mm_sub_ps(c_y, a_y);
299 __m128 ca_z = _mm_sub_ps(c_z, a_z);
300
301 __m128 n_x = _mm_sub_ps(_mm_mul_ps(ca_y, ba_z), _mm_mul_ps(ba_y, ca_z));
302 __m128 n_y = _mm_sub_ps(_mm_mul_ps(ca_z, ba_x), _mm_mul_ps(ba_z, ca_x));
303 __m128 n_z = _mm_sub_ps(_mm_mul_ps(ca_x, ba_y), _mm_mul_ps(ba_x, ca_y));
304 __m128 n_w = _mm_setzero_ps();
305 _MM_TRANSPOSE4_PS(n_x, n_y, n_z, n_w);
306
307 _mm_store_ps(triangleNormals + 4 * ii[0], n_x);
308 _mm_store_ps(triangleNormals + 4 * ii[1], n_y);
309 _mm_store_ps(triangleNormals + 4 * ii[2], n_z);
310 _mm_store_ps(triangleNormals + 4 * ii[3], n_w);
311 }
312 if (elapsed_us) {
313 elapsed_us->fetch_add(timer.elapsedMicroseconds());
314 }
315 }
316 };
317#endif
318
319 struct VertexNormalsTask
320 {
321 float* vertexNormals;
322 uint32_t vertexNormalStride;
323 const float * triangleNormals;
324 const uint32_t* inverseIndexHead;
325 const uint32_t* inverseIndexNext;
326 uint32_t ia, ib;
327 std::atomic<uint64_t>* elapsed_us;
328
329 VertexNormalsTask(float* vertexNormals,
330 uint32_t vertexNormalStride,
331 const float * triangleNormals,
332 const uint32_t* inverseIndexHead,
333 const uint32_t* inverseIndexNext,
334 uint32_t ia, uint32_t ib,
335 std::atomic<uint64_t>* elapsed_us):
336 vertexNormals(vertexNormals),
337 vertexNormalStride(vertexNormalStride),
338 triangleNormals(triangleNormals),
339 inverseIndexHead(inverseIndexHead),
340 inverseIndexNext(inverseIndexNext),
341 ia(ia), ib(ib),
342 elapsed_us(elapsed_us)
343 {}
344
345 void operator()()
346 {
347 CpuInstrumentationScope(SCOPE_GEOMETRY, "VtxNrm");
348 auto timer = Cogs::Timer::startNew();
349
350 for (uint32_t i = ia; i < ib; i++) {
351 glm::vec3 n;
352
353 auto ix = inverseIndexHead[i];
354 while (ix != ~0u) {
355 n += *reinterpret_cast<const glm::vec3*>(triangleNormals + 4 * (ix/3));
356 ix = inverseIndexNext[ix];
357 }
358 n = glm::normalize(n);
359
360 *reinterpret_cast<glm::vec3*>(vertexNormals + vertexNormalStride*i) = n;
361 }
362 if (elapsed_us) {
363 elapsed_us->fetch_add(timer.elapsedMicroseconds());
364 }
365 }
366 };
367
368#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
369 struct VertexNormalsTaskSSE2
370 {
371 float* vertexNormals;
372 uint32_t vertexNormalStride;
373 const float * triangleNormals;
374 const uint32_t* inverseIndexHead;
375 const uint32_t* inverseIndexNext;
376 uint32_t ia, ib;
377 std::atomic<uint64_t>* elapsed_us;
378
379 VertexNormalsTaskSSE2(float* vertexNormals,
380 uint32_t vertexNormalStride,
381 const float * triangleNormals,
382 const uint32_t* inverseIndexHead,
383 const uint32_t* inverseIndexNext,
384 uint32_t ia, uint32_t ib,
385 std::atomic<uint64_t>* elapsed_us):
386 vertexNormals(vertexNormals),
387 vertexNormalStride(vertexNormalStride),
388 triangleNormals(triangleNormals),
389 inverseIndexHead(inverseIndexHead),
390 inverseIndexNext(inverseIndexNext),
391 ia(ia), ib(ib),
392 elapsed_us(elapsed_us)
393 {}
394
395 void operator()()
396 {
397 CpuInstrumentationScope(SCOPE_GEOMETRY, "VtxNrmSSE2");
398 auto timer = Cogs::Timer::startNew();
399
400 for (uint32_t i = ia; i < ib; i += 4) {
401
402 uint32_t ii[4];
403 __m128 n[4];
404 for (uint32_t l = 0; l < 4; l++) {
405 auto j = std::min(ib - 1, i + l);
406 ii[l] = j;
407
408 __m128 m = _mm_setzero_ps();
409 auto ix = inverseIndexHead[j];
410 while (ix != ~0) {
411 m = _mm_add_ps(m, _mm_load_ps(triangleNormals + 4 * (ix / 3)));
412 ix = inverseIndexNext[ix];
413 }
414 n[l] = m;
415 }
416 __m128 x = n[0];
417 __m128 y = n[1];
418 __m128 z = n[2];
419 __m128 w = n[3];
420
421 _MM_TRANSPOSE4_PS(x, y, z, w);
422 __m128 xx = _mm_mul_ps(x, x);
423 __m128 yy = _mm_mul_ps(y, y);
424 __m128 zz = _mm_mul_ps(z, z);
425 __m128 s0 = _mm_add_ps(xx, yy);
426 __m128 s1 = _mm_add_ps(s0, zz);
427
428 __m128 r = _mm_rsqrt_ps(s1);
429
430 storeVec3(vertexNormals + vertexNormalStride*ii[0], _mm_mul_ps(_mm_shuffle_ps(r, r, _MM_SHUFFLE(0, 0, 0, 0)), n[0]));
431 storeVec3(vertexNormals + vertexNormalStride*ii[1], _mm_mul_ps(_mm_shuffle_ps(r, r, _MM_SHUFFLE(1, 1, 1, 1)), n[1]));
432 storeVec3(vertexNormals + vertexNormalStride*ii[2], _mm_mul_ps(_mm_shuffle_ps(r, r, _MM_SHUFFLE(2, 2, 2, 2)), n[2]));
433 storeVec3(vertexNormals + vertexNormalStride*ii[3], _mm_mul_ps(_mm_shuffle_ps(r, r, _MM_SHUFFLE(3, 3, 3, 3)), n[3]));
434 }
435 if (elapsed_us) {
436 elapsed_us->fetch_add(timer.elapsedMicroseconds());
437 }
438 }
439 };
440#endif
441}
442
443void Cogs::Core::GeometryProcessing::normalsFromIndexedTriangles(Context* /*context*/,
444 std::vector<glm::vec3>& N,
445 std::vector<uint32_t>& remap,
446 std::vector<uint32_t>& newIndices,
447 const float* P,
448 size_t P_stride, // byte-stride
449 const uint32_t Nv,
450 const uint32_t* indices,
451 const uint32_t Ni,
452 const float featureAngle,
453 const float protrusionAngle,
454 const bool flip)
455{
456 auto Nt = Ni / 3;
457 const auto P_element_stride = uint32_t(P_stride / sizeof(float));
458 Cogs::Memory::MemoryBuffer triangleNormals(4 * sizeof(float)*Nt + 64);
459 auto * triangleNormalsPtr = reinterpret_cast<float*>(((size_t)triangleNormals.data() + 63)&(~63));
460
461#if 0
462 const uint32_t taskSize = 10000;
463 // Fix: we need normalized normals here since we check angles.
464
465 // Dispatch calculate per-triangle normals
466 TaskId group = context->taskManager->createGroup();
467 const bool useSSE = context->features->supported(CPUFeature::SSE2);
468 for (uint32_t i = 0; i < Nt; i += taskSize) {
469 if (useSSE) {
470 tm->enqueueChild(group, TriangleNormalsTaskSSE2b(indices,
471 triangleNormalsPtr,
472 P,
473 P_element_stride,
474 i, glm::min(Nt, i + taskSize)));
475 }
476 else {
477 tm->enqueueChild(group, TriangleNormalsTask(indices,
478 triangleNormalsPtr,
479 P,
480 P_element_stride,
481 i, glm::min(Nt, i + taskSize)));
482 }
483 }
484#endif
485 if (flip == false) {
486 for (size_t i = 0; i < Nt; i++) {
487 const auto & a = glm::make_vec3(P + P_element_stride * indices[3 * i + 0]);
488 const auto & b = glm::make_vec3(P + P_element_stride * indices[3 * i + 1]);
489 const auto & c = glm::make_vec3(P + P_element_stride * indices[3 * i + 2]);
490 const auto n = glm::cross(c - a, b - a);
491 *reinterpret_cast<glm::vec3*>(triangleNormalsPtr + 4 * i) = glm::normalize(n);
492 }
493 }
494 else {
495 for (size_t i = 0; i < Nt; i++) {
496 const auto & a = glm::make_vec3(P + P_element_stride * indices[3 * i + 0]);
497 const auto & b = glm::make_vec3(P + P_element_stride * indices[3 * i + 2]);
498 const auto & c = glm::make_vec3(P + P_element_stride * indices[3 * i + 1]);
499 const auto n = glm::cross(c - a, b - a);
500 *reinterpret_cast<glm::vec3*>(triangleNormalsPtr + 4 * i) = glm::normalize(n);
501 }
502 }
503
504 // Attach corners to vertices.
505 std::vector<uint32_t> head(Nv, ~0u);
506 std::vector<Corner> corner(Ni);
507 for (uint32_t t = 0; t < Nt; t++) {
508 const auto a = indices[3 * t + 0];
509 const auto b = indices[3 * t + 1];
510 const auto c = indices[3 * t + 2];
511
512 corner[3 * t + 0].Vp = c;
513 corner[3 * t + 0].Vn = b;
514 corner[3 * t + 0].triangle = (t<<2) + 0;
515 corner[3 * t + 0].next = head[a];
516 head[a] = 3 * t + 0;
517
518 corner[3 * t + 1].Vp = a;
519 corner[3 * t + 1].Vn = c;
520 corner[3 * t + 1].triangle = (t<<2) + 1;
521 corner[3 * t + 1].next = head[b];
522 head[b] = 3 * t + 1;
523
524 corner[3 * t + 2].Vp = b;
525 corner[3 * t + 2].Vn = a;
526 corner[3 * t + 2].triangle = (t<<2) + 2;
527 corner[3 * t + 2].next = head[c];
528 head[c] = 3 * t + 2;
529 }
530
531 const float protrusionCos = glm::cos(protrusionAngle);
532 const float featureCos = glm::cos(featureAngle);
533
534 std::vector<Corner> vertexCorners;
535 std::vector<Corner> pie;
536
537 N.clear();
538 remap.clear();
539
540 newIndices.clear();
541 newIndices.resize(Ni);
542
543 // Note: can be done in parallel tasks.
544 for (uint32_t v = 0; v < Nv; v++) {
545 glm::vec3 n_avg;
546
547 vertexCorners.clear();
548 for (auto n = head[v]; n != ~0u; n = corner[n].next) {
549 vertexCorners.push_back(corner[n]);
550 n_avg += glm::make_vec3(triangleNormalsPtr + 4 * (vertexCorners.back().triangle >> 2));
551 }
552 n_avg = glm::normalize(n_avg);
553
554 bool first = true;
555 float minProtrusion = 1.f;
556 while (!vertexCorners.empty()) {
557
558 pie.clear();
559 pie.push_back(vertexCorners.back());
560 vertexCorners.pop_back();
561
562 redo1:
563 // Grow forwards
564 for (size_t l = 0; l < vertexCorners.size(); l++) {
565 if (pie.back().Vn == vertexCorners[l].Vp) {
566
567 const auto q = glm::make_vec3(triangleNormalsPtr + 4 * (vertexCorners[l].triangle >> 2));
568 if (featureCos <= glm::dot(glm::make_vec3(triangleNormalsPtr + 4 * (pie.back().triangle >> 2)), q))
569 {
570 minProtrusion = glm::min(minProtrusion, glm::dot(n_avg, q));
571 pie.push_back(vertexCorners[l]);
572 vertexCorners[l] = vertexCorners.back();
573 vertexCorners.pop_back();
574 goto redo1;
575 }
576 }
577 }
578
579 if (!vertexCorners.empty()) {
580 // Note: From here on the pie isn't fully ordered, instead of reversing everything
581 // we just swap the front and back and start growing backwards from the back.
582 std::swap(pie.front(), pie.back());
583 redo2:
584 // Grow backwards
585 for (size_t l = 0; l < vertexCorners.size(); l++) {
586 if (pie.back().Vp == vertexCorners[l].Vn) {
587
588 const auto q = glm::make_vec3(triangleNormalsPtr + 4 * (vertexCorners[l].triangle >> 2));
589 if (featureCos <= glm::dot(glm::make_vec3(triangleNormalsPtr + 4 * (pie.back().triangle>>2)), q))
590 {
591 minProtrusion = glm::min(minProtrusion, glm::dot(n_avg, q));
592 pie.push_back(vertexCorners[l]);
593 vertexCorners[l] = vertexCorners.back();
594 vertexCorners.pop_back();
595 goto redo2;
596 }
597 }
598 }
599
600 }
601
602 if (first && vertexCorners.empty() && minProtrusion < protrusionCos) {
603 // Don't smooth at protrusion points.
604 for (auto & slice : pie) {
605 auto ix = uint32_t(N.size());
606 newIndices[3 * (slice.triangle >> 2) + (slice.triangle & 3)] = ix;
607 remap.push_back(v);
608 N.push_back(glm::make_vec3(triangleNormalsPtr + 4 * (slice.triangle >> 2)));
609 }
610 }
611 else {
612 auto ix = uint32_t(N.size());
613 glm::vec3 n;
614 for (auto & slice : pie) {
615 n += glm::make_vec3(triangleNormalsPtr + 4 * (slice.triangle >> 2));
616 newIndices[3 * (slice.triangle >> 2) + (slice.triangle & 3)] = ix;
617 }
618 remap.push_back(v);
619 N.push_back(glm::normalize(n));
620 }
621 first = false;
622 }
623 }
624
625}
626
627void Cogs::Core::GeometryProcessing::normalsFromIndexedTriangles(Context* context,
628 float* normals,
629 uint32_t normalStride,
630 const float* vertices,
631 uint32_t vertexStride,
632 const uint32_t numVertices,
633 const uint32_t* indices,
634 const uint32_t numIndices,
635 const uint32_t taskSize_,
636 std::atomic<uint64_t>* elapsed_us)
637{
638 auto & tm = context->taskManager;
639 auto taskSize = 4 * taskSize_;
640 auto numTriangles = numIndices / 3;
641
642 // fixme: add fast-track for small meshes
643
644 // Buffers
645 Cogs::Memory::MemoryBuffer inverseHead(sizeof(uint32_t)*numVertices + 64);
646 Cogs::Memory::MemoryBuffer inverseNext(sizeof(uint32_t)*numIndices + 64);
647 Cogs::Memory::MemoryBuffer triangleNormals(4 * sizeof(float)*numTriangles + 64);
648 std::memset(inverseHead.data(), ~0, inverseHead.size());
649
650 auto * inverseHeadPtr = reinterpret_cast<uint32_t*>(((size_t)inverseHead.data() + 63)&(~63));
651 auto * inverseNextPtr = reinterpret_cast<uint32_t*>(((size_t)inverseNext.data() + 63)&(~63));
652
653 // Build vertex lists
654 {
655 TaskId group = context->taskManager->createGroup();
656 for (uint32_t ia = 0; ia < numIndices; ia += taskSize) {
657 auto ib = std::min(numIndices, ia + taskSize);
658 tm->enqueueChild(group, [ia, ib, inverseHeadPtr, inverseNextPtr, indices]
659 {
660 CpuInstrumentationScope(SCOPE_GEOMETRY, "NrmInvIx");
661 for (uint32_t i = ia; i < ib; i++) {
662#if defined( _WIN32 )
663 inverseNextPtr[i] = _InterlockedExchange((volatile long*)(inverseHeadPtr + indices[i]), i);
664#else
665 inverseNextPtr[i] = __atomic_exchange_n(inverseHeadPtr + indices[i], i, __ATOMIC_ACQ_REL);
666#endif
667 }
668 });
669 }
670 tm->wait(group);
671 tm->destroy(group);
672 }
673
674 auto * triangleNormalsPtr = reinterpret_cast<float*>(((size_t)triangleNormals.data() + 63)&(~63));
675
676 // Calculate all triangle normals
677 {
678 TaskId group = context->taskManager->createGroup();
679#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
680 if (context->features->supported(CPUFeature::SSE2)) {
681 for (uint32_t i = 0; i < numTriangles; i += taskSize) {
682 tm->enqueueChild(group, TriangleNormalsTaskSSE2b(indices,
683 triangleNormalsPtr,
684 vertices,
685 vertexStride,
686 i, glm::min(numTriangles, i + taskSize),
687 elapsed_us));
688 }
689 }
690 else
691#endif
692 {
693 for (uint32_t i = 0; i < numTriangles; i += taskSize) {
694 tm->enqueueChild(group, TriangleNormalsTask(indices,
695 triangleNormalsPtr,
696 vertices,
697 vertexStride,
698 i, glm::min(numTriangles, i + taskSize),
699 elapsed_us));
700 }
701 }
702 tm->wait(group);
703 tm->destroy(group);
704 }
705
706 // Aggregate triangle normals for each vertex
707 {
708 TaskId group = context->taskManager->createGroup();
709
710#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
711 if (context->features->supported(CPUFeature::SSE2)) {
712 for (uint32_t i = 0; i < numVertices; i += taskSize) {
713 tm->enqueueChild(group, VertexNormalsTaskSSE2(normals,
714 normalStride,
715 triangleNormalsPtr,
716 inverseHeadPtr,
717 inverseNextPtr,
718 i, std::min(numVertices, i + taskSize),
719 elapsed_us));
720 }
721 }
722 else
723#endif
724 {
725 for (uint32_t i = 0; i < numVertices; i += taskSize) {
726 tm->enqueueChild(group,
727 VertexNormalsTask(normals,
728 normalStride,
729 triangleNormalsPtr,
730 inverseHeadPtr,
731 inverseNextPtr,
732 i, std::min(numVertices, i + taskSize),
733 elapsed_us));
734 }
735 }
736 tm->wait(group);
737 tm->destroy(group);
738 }
739
740}
A Context instance contains all the services, systems and runtime components needed to use Cogs.
Definition: Context.h:83
std::unique_ptr< class Features > features
Features service instance.
Definition: Context.h:177
std::unique_ptr< class TaskManager > taskManager
TaskManager service instance.
Definition: Context.h:186
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....
Task id struct used to identify unique Task instances.
Definition: TaskManager.h:20