Cogs.Core
Analyze.cpp
1
2#include "Services/Features.h"
3#include "Platform/Instrumentation.h"
4#include "Services/TaskManager.h"
5#include "Services/Variables.h"
6#include "Context.h"
7
8#include "MarchingCubesTables.h"
9#include "IsoSurfaces_internal.h"
10
11#include "Foundation/BitTwiddling/PowerOfTwo.h"
12#include "Foundation/Platform/Threads.h"
13
14
15namespace {
16 using namespace Cogs::Core;
17
18 inline void buildOffsetTable(std::vector<int32_t>& table)
19 {
20 int32_t sum = 0;
21 for (size_t i = 0; i < table.size(); i++) {
22 int32_t v = table[i];
23 table[i] = sum;
24 sum += v;
25 }
26 }
27
28
29 template<typename T>
30 struct AnalyzeTileTask
31 {
33 glm::ivec3 id;
34 void operator()();
35 };
36
37 template<> void AnalyzeTileTask<float>::operator()()
38 {
39#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
40 if (g->tryToUseAVX2 && g->context->features->supported(Cogs::Core::CPUFeature::AVX2)) {
41 analyzeTile_f32_AVX2(g, id);
42 return;
43 }
44 if (g->context->features->supported(Cogs::Core::CPUFeature::SSE2)) {
45 analyzeTile_f32_SSE(g, id);
46 return;
47 }
48#endif
49 analyzeTile_f32(g, id);
50 }
51
52 template<> void AnalyzeTileTask<uint16_t>::operator()()
53 {
54#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
55 if (g->tryToUseAVX2 && g->context->features->supported(Cogs::Core::CPUFeature::AVX2)) {
56 analyzeTile_u16_AVX2(g, id);
57 return;
58 }
59 if (g->context->features->supported(Cogs::Core::CPUFeature::SSE2)) {
60 analyzeTile_u16_SSE(g, id);
61 return;
62 }
63#endif
64 analyzeTile_u16(g, id);
65 }
66
67 template<typename Type>
68 void analyzeGeneric(Context* context,
69 std::vector<int32_t>& vertexOffsets, // [L]
70 std::vector<int32_t>& indexOffsets, // [L]
71 std::vector<int32_t>& cellOffsets, // [L]
74 Cogs::Memory::TypedBuffer<int32_t>& activeCellVertexOffsets, // One element per active cell.
75 Cogs::Memory::TypedBuffer<int32_t>& activeCellIndexOffsets, // One element per active cell.
76 Cogs::Memory::TypedBuffer<int32_t>& activeCellIndices, // One element per active cell.
78 const std::vector<Type>& thresholds,
79 const bool exteriorIsLess,
80 const Type* field,
81 const glm::ivec3 fieldDim,
82 const glm::ivec3 gridA,
83 const glm::ivec3 gridB,
84 std::atomic<uint64_t>* elapsed_us)
85 {
86 CpuInstrumentationScope(SCOPE_ISOSURFACES, "IsoSurface::analyze");
87
88 const auto * axesTable = MarchingCubes::axesTable().data();
89 const auto * indexCountTable = MarchingCubes::indexCountTable().data();
90 const size_t Nt = thresholds.size();
91 const auto M = gridB - gridA;
92
93 const size_t layerStride = (size_t)M.x * (size_t)M.y * (size_t)M.z;
94
95 cellOffsets.clear();
96 cellOffsets.resize((size_t)Nt + 1);
97
98 Cogs::Memory::TypedBuffer<uint8_t> activeCellCasesTmp(Nt*layerStride);
99 Cogs::Memory::TypedBuffer<int32_t> activeCellIndicesTmp(Nt*layerStride);
100
101 cellMap.resize(Nt*layerStride);
102 //auto * cellMapPtr = cellMap.data();
103
104 auto * actCellCasesTmp = activeCellCasesTmp.data();
105 auto * actCellIndicesTmp = activeCellIndicesTmp.data();
106
107 auto Q = std::max(4u, static_cast<unsigned>(std::ceil(std::cbrt(8 * Cogs::Threads::hardwareConcurrency()))));
108
109 glm::ivec3 taskSizeClassify;
110 taskSizeClassify.x = Cogs::roundUpToPowerOfTwo((static_cast<unsigned>(M.x) + Q - 1) / Q);
111 taskSizeClassify.y = Cogs::roundUpToPowerOfTwo((static_cast<unsigned>(M.y) + Q - 1) / Q);
112 taskSizeClassify.z = Cogs::roundUpToPowerOfTwo((static_cast<unsigned>(M.z) + Q - 1) / Q);
113
114 assert((0 < taskSizeClassify.x) && (0 < taskSizeClassify.y) && (0 < taskSizeClassify.z));
115
116 const glm::ivec3 tiles((M.x + taskSizeClassify.x - 1) / taskSizeClassify.x,
117 (M.y + taskSizeClassify.y - 1) / taskSizeClassify.y,
118 (M.z + taskSizeClassify.z - 1) / taskSizeClassify.z);
119
120 std::vector<std::atomic<int>> cellOffsetsAtomic(Nt);
121 glm::ivec3 tile;
122
124 aTileG.context = context;
125 aTileG.tileSize = taskSizeClassify;
126 aTileG.tiles = tiles;
127 aTileG.gridA = gridA;
128 aTileG.fieldDim = fieldDim;
129 aTileG.M = M;
130 aTileG.field = field;
131 aTileG.thresholds = thresholds.data();
132 aTileG.Nt = (unsigned)Nt;
133 aTileG.exteriorIsLess = exteriorIsLess;
134 aTileG.cellOffsets = cellOffsetsAtomic.data();
135 aTileG.cellMap = cellMap.data();
136 aTileG.activeCellCases = activeCellCasesTmp.data();
137 aTileG.activeCellIndices = activeCellIndicesTmp.data();
138 aTileG.elapsed_us = elapsed_us;
139 aTileG.tryToUseAVX2 = context->variables->get("Volumetric.IsoSurfaces.AVX2", false);
140
141
142 auto analyzeGroup = context->taskManager->createGroup();
143 for (tile.z = 0; tile.z < tiles.z; tile.z++) {
144 for (tile.y = 0; tile.y < tiles.y; tile.y++) {
145 for (tile.x = 0; tile.x < tiles.x; tile.x++) {
146 AnalyzeTileTask<Type> task;
147 task.g = &aTileG;
148 task.id = tile;
149 context->taskManager->enqueueChild(analyzeGroup, task);
150 }
151 }
152 }
153 context->taskManager->wait(analyzeGroup);
154 context->taskManager->destroy(analyzeGroup);
155 for (auto i : aTileG.scratchBuffers) {
156 delete i;
157 }
158 aTileG.scratchBuffers.clear();
159
160 for (size_t i = 0; i < Nt; i++) {
161 cellOffsets[i] = cellOffsetsAtomic[i];
162 }
163
164 buildOffsetTable(cellOffsets);
165
166 activeCellCases.resize(cellOffsets.back());
167 activeCellVertexOffsets.resize((size_t)cellOffsets.back() + Nt);
168 activeCellIndexOffsets.resize((size_t)cellOffsets.back() + Nt);
169 activeCellIndices.resize(cellOffsets.back());
170 activeCellIJK.resize(cellOffsets.back());
171
172 auto eightConcurrency = std::max((size_t)1, (8 * static_cast<int>(Cogs::Threads::hardwareConcurrency()) + Nt - 1) / Nt);
173 auto prefixSumGroup = context->taskManager->createGroup();
174 std::vector<IsoSurfaces::AnalyzePopulateCounts::Global> analyzePopCntsG(Nt);
175 for (size_t t = 0; t < Nt; t++) {
176
177
178 const auto Nc = (size_t)(cellOffsets[t + 1] - cellOffsets[t]);
179
180 analyzePopCntsG[t].context = context;
181 analyzePopCntsG[t].axesTable = axesTable;
182 analyzePopCntsG[t].indexCountTable = indexCountTable;
183 analyzePopCntsG[t].actCellCasesIn = actCellCasesTmp + layerStride * t;
184 analyzePopCntsG[t].actCellIndicesIn = actCellIndicesTmp + layerStride * t;
185 analyzePopCntsG[t].M = M;
186 analyzePopCntsG[t].gridA = gridA;
187 analyzePopCntsG[t].gridB = gridB;
188 analyzePopCntsG[t].actCellCasesOut = activeCellCases.data() + cellOffsets[t];
189 analyzePopCntsG[t].actCellIndicesOut = activeCellIndices.data() + cellOffsets[t];
190 analyzePopCntsG[t].actCellIJKOut = activeCellIJK.data() + cellOffsets[t];
191 analyzePopCntsG[t].actCellVtxCntOut = activeCellVertexOffsets.data() + cellOffsets[t] + t;
192 analyzePopCntsG[t].actCellIdxCntOut = activeCellIndexOffsets.data() + cellOffsets[t] + t;
193 analyzePopCntsG[t].elapsed_us = elapsed_us;
194
195 auto popCntGroup = context->taskManager->createGroup();
196
197 auto taskSizePopCnt = std::max((size_t)1024, (Nc + eightConcurrency - 1) / eightConcurrency);
198 for (size_t c = 0; c < Nc; c += taskSizePopCnt) {
200 popCntTask.g = &analyzePopCntsG[t];
201 popCntTask.ca = c;
202 popCntTask.cb = std::min(c + taskSizePopCnt, Nc);
203 context->taskManager->enqueueChild(popCntGroup, popCntTask);
204 }
205
206 IsoSurfaces::AnalyzePrefixSum prefixSumTask;
207 prefixSumTask.antecedent = popCntGroup;
208 prefixSumTask.context = context;
209 prefixSumTask.actCellVtxOff = activeCellVertexOffsets.data() + cellOffsets[t] + t;
210 prefixSumTask.actCellIdxOff = activeCellIndexOffsets.data() + cellOffsets[t] + t;
211 prefixSumTask.Nc = Nc;
212 prefixSumTask.elapsed_us = elapsed_us;
213 context->taskManager->enqueueChild(prefixSumGroup, prefixSumTask);
214 }
215 context->taskManager->wait(prefixSumGroup);
216 context->taskManager->destroy(prefixSumGroup);
217
218 vertexOffsets.resize(Nt + 1);
219 indexOffsets.resize(Nt + 1);
220 for (size_t t = 0; t < Nt; t++) {
221 vertexOffsets[t] = activeCellVertexOffsets.data()[cellOffsets[t + 1] + t];
222 indexOffsets[t] = activeCellIndexOffsets.data()[cellOffsets[t + 1] + t];
223 }
224 buildOffsetTable(vertexOffsets);
225 buildOffsetTable(indexOffsets);
226 }
227}
228
229Cogs::Memory::MemoryBuffer* IsoSurfaces::AnalyzeGlobalState::scratchAcquire(uint32_t byteCount)
230{
231 Cogs::Memory::MemoryBuffer* scratch_ = nullptr;
232 {
233 std::lock_guard<std::mutex> guard(scratchLock);
234 if (!scratchBuffers.empty()) {
235 scratch_ = scratchBuffers.back();
236 scratchBuffers.pop_back();
237 }
238 }
239 if (!scratch_) {
240 const glm::ivec3 scratchSize = tileSize + glm::ivec3(1);
241 scratch_ = new Cogs::Memory::MemoryBuffer(byteCount);
242 }
243 else {
244 assert(scratch_->size() == byteCount);
245 }
246 return scratch_;
247}
248
249void IsoSurfaces::AnalyzeGlobalState::scratchRelease(Cogs::Memory::MemoryBuffer* scratch_)
250{
251 std::lock_guard<std::mutex> guard(scratchLock);
252 scratchBuffers.push_back(scratch_);
253}
254
255
256
257void Cogs::Core::IsoSurfaces::analyze(Context* context,
258 std::vector<int32_t>& vertexOffsets, // [L]
259 std::vector<int32_t>& indexOffsets, // [L]
260 std::vector<int32_t>& cellOffsets, // [L]
262 Cogs::Memory::TypedBuffer<uint8_t>& activeCellCases,
263 Cogs::Memory::TypedBuffer<int32_t>& activeCellVertexOffsets, // One element per active cell.
264 Cogs::Memory::TypedBuffer<int32_t>& activeCellIndexOffsets, // One element per active cell.
265 Cogs::Memory::TypedBuffer<int32_t>& activeCellIndices, // One element per active cell.
267 const std::vector<float>& thresholds,
268 const bool exteriorIsLess,
269 const float* field,
270 const glm::ivec3 fieldDim,
271 const glm::ivec3 gridA,
272 const glm::ivec3 gridB,
273 std::atomic<uint64_t>* elapsed_us)
274{
275 analyzeGeneric(context,
276 vertexOffsets, indexOffsets, cellOffsets, cellMap,
277 activeCellCases, activeCellVertexOffsets, activeCellIndexOffsets, activeCellIndices, activeCellIJK,
278 thresholds, exteriorIsLess,
279 field, fieldDim, gridA, gridB,
280 elapsed_us);
281}
A Context instance contains all the services, systems and runtime components needed to use Cogs.
Definition: Context.h:83
std::unique_ptr< class TaskManager > taskManager
TaskManager service instance.
Definition: Context.h:186
std::unique_ptr< class Variables > variables
Variables service instance.
Definition: Context.h:180
COGSCORE_DLL_API const std::vector< unsigned char > & indexCountTable()
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....
uint8_t roundUpToPowerOfTwo(uint8_t x)
Definition: PowerOfTwo.h:28