Cogs.Core
Blur.cpp
1#include "Platform/Instrumentation.h"
2#include "Blur.h"
3
4#include "Context.h"
5
6#include "Services/Features.h"
7#include "Services/TaskManager.h"
8
9#include "Foundation/Memory/MemoryBuffer.h"
10#include "Foundation/Platform/Threads.h"
11#include "Foundation/Platform/Timer.h"
12
13#include <vector>
14#include <algorithm>
15#include <iostream>
16
17using namespace Cogs::Core;
18
19namespace {
20 double elapsed = 0.0;
21 uint32_t invocations = 0;
22}
23
24float Blur::boxFilterWidthFromStdDev(const float stddev,
25 const uint32_t quality)
26{
27#if 1
28 const double C3 = -0.00216225;
29 const double C2 = 0.049443778;
30 const double C1 = -0.405850586;
31 const double C0 = 1.806223754;
32
33 const double slope = ((C3*quality + C2)*quality + C1)*quality + C0;
34
35 return static_cast<float>(slope * stddev);
36#else
37 auto A = (stddev*stddev) / (0.45f*quality);
38 auto B = std::pow(A, 1.f / 1.96f);
39 return B;
40#endif
41}
42
43void Blur::boxFilterIterations3D(Context* context,
44 float* output,
45 const float* input,
46 const glm::uvec3 size,
47 const float radius,
48 const uint32_t iterations)
49{
50
51 auto minSize = std::min(std::min(size.x, size.y), size.z);
52 if (minSize == 0) return;
53
54 auto rolex = Timer::startNew();
55
56 float r = std::max(0.f, std::min(radius, minSize / 2.f));
57
58#ifndef EMSCRIPTEN
59 bool useSSE = context->features->supported(CPUFeature::SSE)
60 && ((size.x & 3) == 0)
61 && ((size.y & 3) == 0)
62 && ((size.z & 3) == 0);
63 //&& (invocations & 1);
64#else
65 bool useSSE = false;
66#endif
67
68 DynamicCpuInstrumentationScope(SCOPE_VOLUMETRIC, "blur", useSSE ? "sse" : "plain");
69
70 const auto concurrency = std::max(1u, (uint32_t)Cogs::Threads::hardwareConcurrency());
71 std::vector<Memory::TypedBuffer<float>> lineA = std::vector<Memory::TypedBuffer<float>>(concurrency);
72 std::vector<Memory::TypedBuffer<float>> lineB = std::vector<Memory::TypedBuffer<float>>(concurrency);;
73 for (uint32_t t = 0; t < concurrency; t++) {
74 lineA[t].resize(16 * (std::max(size.x, std::max(size.y, size.z)) + 4));
75 lineB[t].resize(16 * (std::max(size.x, std::max(size.y, size.z)) + 4));
76 }
77
78#if 1
79 const auto ySplit = std::max(4u, (size.y + concurrency - 1) / concurrency);
80 const auto zSplit = std::max(4u, (size.z + concurrency - 1) / concurrency);
81
82 auto xGroup = context->taskManager->createGroup();
83 for (uint32_t t = 0; t < concurrency; t++) {
84 uint32_t a = zSplit*t;
85 uint32_t b = std::min(a + zSplit, size.z);
86 if (a < b) {
87 if (useSSE) context->taskManager->enqueueChild(xGroup, BoxFilterPassXTaskSSE{ input, output, lineA[t].data(), lineB[t].data(), size, r, iterations, a, b });
88 else context->taskManager->enqueueChild(xGroup, BoxFilterPassXTask{ input, output, lineA[t].data(), lineB[t].data(), size, r, iterations, a, b });
89 }
90 }
91 context->taskManager->wait(xGroup);
92 context->taskManager->destroy(xGroup);
93
94 auto yGroup = context->taskManager->createGroup();
95 for (uint32_t t = 0; t < concurrency; t++) {
96 uint32_t a = zSplit*t;
97 uint32_t b = std::min(a + zSplit, size.z);
98 if (a < b) {
99 if (useSSE) context->taskManager->enqueueChild(yGroup, BoxFilterPassYTaskSSE{ output, output, lineA[t].data(), lineB[t].data(), size, r, iterations, a, b });
100 else context->taskManager->enqueueChild(yGroup, BoxFilterPassYTask{ output, output, lineA[t].data(), lineB[t].data(), size, r, iterations, a, b });
101 }
102 }
103 context->taskManager->wait(yGroup);
104 context->taskManager->destroy(yGroup);
105
106 auto zGroup = context->taskManager->createGroup();
107 for (uint32_t t = 0; t < concurrency; t++) {
108 uint32_t a = ySplit*t;
109 uint32_t b = std::min(a + ySplit, size.y);
110 if (a < b) {
111 if (useSSE) context->taskManager->enqueueChild(zGroup, BoxFilterPassZTaskSSE{ output, output, lineA[t].data(), lineB[t].data(), size, r, iterations, a, b });
112 else context->taskManager->enqueueChild(zGroup, BoxFilterPassZTask{ output, output, lineA[t].data(), lineB[t].data(), size, r, iterations, a, b });
113 }
114 }
115 context->taskManager->wait(zGroup);
116 context->taskManager->destroy(zGroup);
117#elif 1
118 BoxFilterPassXTaskSSE{ input, output, lineA[0].data(), lineB[0].data(), size, r, iterations, 0, size.z }();
119 BoxFilterPassYTaskSSE{ output, output, lineA[0].data(), lineB[0].data(), size, r, iterations, 0, size.z }();
120 BoxFilterPassZTaskSSE{ output, output, lineA[0].data(), lineB[0].data(), size, r, iterations, 0, size.y }();
121#else
122 BoxFilterPassXTask{ input, output, lineA[0].data(), lineB[0].data(), size, r, iterations, 0, size.z }();
123 BoxFilterPassYTask{ output, output, lineA[0].data(), lineB[0].data(), size, r, iterations, 0, size.z }();
124 BoxFilterPassZTask{ output, output, lineA[0].data(), lineB[0].data(), size, r, iterations, 0, size.y }();
125#endif
126
127 elapsed += rolex.elapsedSeconds();
128 invocations++;
129}
A Context instance contains all the services, systems and runtime components needed to use Cogs.
Definition: Context.h:83
std::unique_ptr< class Features > features
Features service instance.
Definition: Context.h:177
std::unique_ptr< class TaskManager > taskManager
TaskManager service instance.
Definition: Context.h:186
Contains the Engine, Renderer, resource managers and other systems needed to run Cogs....