2#include "Services/Features.h"
3#include "Platform/Instrumentation.h"
4#include "Services/TaskManager.h"
5#include "Services/Variables.h"
8#include "MarchingCubesTables.h"
9#include "IsoSurfaces_internal.h"
11#include "Foundation/BitTwiddling/PowerOfTwo.h"
12#include "Foundation/Platform/Threads.h"
18 inline void buildOffsetTable(std::vector<int32_t>& table)
21 for (
size_t i = 0; i < table.size(); i++) {
30 struct AnalyzeTileTask
37 template<>
void AnalyzeTileTask<float>::operator()()
39#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
40 if (g->tryToUseAVX2 && g->context->features->supported(Cogs::Core::CPUFeature::AVX2)) {
41 analyzeTile_f32_AVX2(g,
id);
44 if (g->context->features->supported(Cogs::Core::CPUFeature::SSE2)) {
45 analyzeTile_f32_SSE(g,
id);
49 analyzeTile_f32(g,
id);
52 template<>
void AnalyzeTileTask<uint16_t>::operator()()
54#if !defined(EMSCRIPTEN) && !defined(__APPLE__)
55 if (g->tryToUseAVX2 && g->context->features->supported(Cogs::Core::CPUFeature::AVX2)) {
56 analyzeTile_u16_AVX2(g,
id);
59 if (g->context->features->supported(Cogs::Core::CPUFeature::SSE2)) {
60 analyzeTile_u16_SSE(g,
id);
64 analyzeTile_u16(g,
id);
67 template<
typename Type>
68 void analyzeGeneric(
Context* context,
69 std::vector<int32_t>& vertexOffsets,
70 std::vector<int32_t>& indexOffsets,
71 std::vector<int32_t>& cellOffsets,
78 const std::vector<Type>& thresholds,
79 const bool exteriorIsLess,
81 const glm::ivec3 fieldDim,
82 const glm::ivec3 gridA,
83 const glm::ivec3 gridB,
84 std::atomic<uint64_t>* elapsed_us)
86 CpuInstrumentationScope(SCOPE_ISOSURFACES,
"IsoSurface::analyze");
88 const auto * axesTable = MarchingCubes::axesTable().data();
90 const size_t Nt = thresholds.size();
91 const auto M = gridB - gridA;
93 const size_t layerStride = (size_t)M.x * (
size_t)M.y * (size_t)M.z;
96 cellOffsets.resize((
size_t)Nt + 1);
101 cellMap.resize(Nt*layerStride);
104 auto * actCellCasesTmp = activeCellCasesTmp.data();
105 auto * actCellIndicesTmp = activeCellIndicesTmp.data();
107 auto Q = std::max(4u,
static_cast<unsigned>(std::ceil(std::cbrt(8 * Cogs::Threads::hardwareConcurrency()))));
109 glm::ivec3 taskSizeClassify;
114 assert((0 < taskSizeClassify.x) && (0 < taskSizeClassify.y) && (0 < taskSizeClassify.z));
116 const glm::ivec3 tiles((M.x + taskSizeClassify.x - 1) / taskSizeClassify.x,
117 (M.y + taskSizeClassify.y - 1) / taskSizeClassify.y,
118 (M.z + taskSizeClassify.z - 1) / taskSizeClassify.z);
120 std::vector<std::atomic<int>> cellOffsetsAtomic(Nt);
124 aTileG.context = context;
125 aTileG.tileSize = taskSizeClassify;
126 aTileG.tiles = tiles;
127 aTileG.gridA = gridA;
128 aTileG.fieldDim = fieldDim;
130 aTileG.field = field;
131 aTileG.thresholds = thresholds.data();
132 aTileG.Nt = (unsigned)Nt;
133 aTileG.exteriorIsLess = exteriorIsLess;
134 aTileG.cellOffsets = cellOffsetsAtomic.data();
135 aTileG.cellMap = cellMap.data();
136 aTileG.activeCellCases = activeCellCasesTmp.data();
137 aTileG.activeCellIndices = activeCellIndicesTmp.data();
138 aTileG.elapsed_us = elapsed_us;
139 aTileG.tryToUseAVX2 = context->
variables->get(
"Volumetric.IsoSurfaces.AVX2",
false);
142 auto analyzeGroup = context->
taskManager->createGroup();
143 for (tile.z = 0; tile.z < tiles.z; tile.z++) {
144 for (tile.y = 0; tile.y < tiles.y; tile.y++) {
145 for (tile.x = 0; tile.x < tiles.x; tile.x++) {
146 AnalyzeTileTask<Type> task;
149 context->
taskManager->enqueueChild(analyzeGroup, task);
155 for (
auto i : aTileG.scratchBuffers) {
158 aTileG.scratchBuffers.clear();
160 for (
size_t i = 0; i < Nt; i++) {
161 cellOffsets[i] = cellOffsetsAtomic[i];
164 buildOffsetTable(cellOffsets);
166 activeCellCases.resize(cellOffsets.back());
167 activeCellVertexOffsets.resize((
size_t)cellOffsets.back() + Nt);
168 activeCellIndexOffsets.resize((
size_t)cellOffsets.back() + Nt);
169 activeCellIndices.resize(cellOffsets.back());
170 activeCellIJK.resize(cellOffsets.back());
172 auto eightConcurrency = std::max((
size_t)1, (8 *
static_cast<int>(Cogs::Threads::hardwareConcurrency()) + Nt - 1) / Nt);
173 auto prefixSumGroup = context->
taskManager->createGroup();
174 std::vector<IsoSurfaces::AnalyzePopulateCounts::Global> analyzePopCntsG(Nt);
175 for (
size_t t = 0; t < Nt; t++) {
178 const auto Nc = (size_t)(cellOffsets[t + 1] - cellOffsets[t]);
180 analyzePopCntsG[t].context = context;
181 analyzePopCntsG[t].axesTable = axesTable;
183 analyzePopCntsG[t].actCellCasesIn = actCellCasesTmp + layerStride * t;
184 analyzePopCntsG[t].actCellIndicesIn = actCellIndicesTmp + layerStride * t;
185 analyzePopCntsG[t].M = M;
186 analyzePopCntsG[t].gridA = gridA;
187 analyzePopCntsG[t].gridB = gridB;
188 analyzePopCntsG[t].actCellCasesOut = activeCellCases.data() + cellOffsets[t];
189 analyzePopCntsG[t].actCellIndicesOut = activeCellIndices.data() + cellOffsets[t];
190 analyzePopCntsG[t].actCellIJKOut = activeCellIJK.data() + cellOffsets[t];
191 analyzePopCntsG[t].actCellVtxCntOut = activeCellVertexOffsets.data() + cellOffsets[t] + t;
192 analyzePopCntsG[t].actCellIdxCntOut = activeCellIndexOffsets.data() + cellOffsets[t] + t;
193 analyzePopCntsG[t].elapsed_us = elapsed_us;
195 auto popCntGroup = context->
taskManager->createGroup();
197 auto taskSizePopCnt = std::max((
size_t)1024, (Nc + eightConcurrency - 1) / eightConcurrency);
198 for (
size_t c = 0; c < Nc; c += taskSizePopCnt) {
200 popCntTask.g = &analyzePopCntsG[t];
202 popCntTask.cb = std::min(c + taskSizePopCnt, Nc);
203 context->
taskManager->enqueueChild(popCntGroup, popCntTask);
207 prefixSumTask.antecedent = popCntGroup;
208 prefixSumTask.context = context;
209 prefixSumTask.actCellVtxOff = activeCellVertexOffsets.data() + cellOffsets[t] + t;
210 prefixSumTask.actCellIdxOff = activeCellIndexOffsets.data() + cellOffsets[t] + t;
211 prefixSumTask.Nc = Nc;
212 prefixSumTask.elapsed_us = elapsed_us;
213 context->
taskManager->enqueueChild(prefixSumGroup, prefixSumTask);
218 vertexOffsets.resize(Nt + 1);
219 indexOffsets.resize(Nt + 1);
220 for (
size_t t = 0; t < Nt; t++) {
221 vertexOffsets[t] = activeCellVertexOffsets.data()[cellOffsets[t + 1] + t];
222 indexOffsets[t] = activeCellIndexOffsets.data()[cellOffsets[t + 1] + t];
224 buildOffsetTable(vertexOffsets);
225 buildOffsetTable(indexOffsets);
233 std::lock_guard<std::mutex> guard(scratchLock);
234 if (!scratchBuffers.empty()) {
235 scratch_ = scratchBuffers.back();
236 scratchBuffers.pop_back();
240 const glm::ivec3 scratchSize = tileSize + glm::ivec3(1);
244 assert(scratch_->size() == byteCount);
251 std::lock_guard<std::mutex> guard(scratchLock);
252 scratchBuffers.push_back(scratch_);
257void Cogs::Core::IsoSurfaces::analyze(
Context* context,
258 std::vector<int32_t>& vertexOffsets,
259 std::vector<int32_t>& indexOffsets,
260 std::vector<int32_t>& cellOffsets,
267 const std::vector<float>& thresholds,
268 const bool exteriorIsLess,
270 const glm::ivec3 fieldDim,
271 const glm::ivec3 gridA,
272 const glm::ivec3 gridB,
273 std::atomic<uint64_t>* elapsed_us)
275 analyzeGeneric(context,
276 vertexOffsets, indexOffsets, cellOffsets, cellMap,
277 activeCellCases, activeCellVertexOffsets, activeCellIndexOffsets, activeCellIndices, activeCellIJK,
278 thresholds, exteriorIsLess,
279 field, fieldDim, gridA, gridB,
A Context instance contains all the services, systems and runtime components needed to use Cogs.
std::unique_ptr< class TaskManager > taskManager
TaskManager service instance.
std::unique_ptr< class Variables > variables
Variables service instance.
COGSCORE_DLL_API const std::vector< unsigned char > & indexCountTable()
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....
uint8_t roundUpToPowerOfTwo(uint8_t x)