Cogs.Core: Extensions/IsoSurfaces/Analyze

#include "IsoSurfaces_internal.h"
#include "Platform/Instrumentation.h"
 
#include "Foundation/Platform/Timer.h"
 
#include <algorithm>
 
using std::min;
using glm::ivec3;
using glm::ivec4;
using glm::vec4;
 
using namespace Cogs::Core;
using Cogs::Memory::TypedBuffer;
 
 
void Cogs::Core::IsoSurfaces::analyzeTile_f32(AnalyzeGlobalState* g, const glm::ivec3 id)
{
  CpuInstrumentationScope(SCOPE_ISOSURFACES, "analyzeTile_f32");
  auto timer = Timer::startNew();
 
  const float* field = (const float*)g->field;
  const float* thresholds = (const float*)g->thresholds;
  const ivec3 fieldDim = g->fieldDim;
  const ivec3 tileSize = g->tileSize;
  const ivec3 scratchSize = tileSize + ivec3(1);
  const ivec3 M = g->M;
  const auto exteriorIsLess = g->exteriorIsLess;
  auto * scratch = g->scratchAcquire(4 * sizeof(int) * (scratchSize.x * scratchSize.y * scratchSize.z + 1));
 
  ivec3 rA = g->tileSize * id;
  ivec3 rB = glm::min(g->M, rA + g->tileSize);
  const auto tileSizeClamped = glm::min(tileSize, rB - rA);
  const size_t layerStride = g->M.x*g->M.y*g->M.z;
 
  for (unsigned tOff = 0; tOff < g->Nt; tOff += 4) {
    auto * s = reinterpret_cast<ivec4*>(scratch->data());
 
    auto lanes = std::min(4u, g->Nt - tOff);
    assert(0 < lanes);
 
    vec4 T(thresholds[tOff + std::min(0u, lanes - 1)],
           thresholds[tOff + std::min(1u, lanes - 1)],
           thresholds[tOff + std::min(2u, lanes - 1)],
           thresholds[tOff + std::min(3u, lanes - 1)]);
 
    for (int k = 0; k < scratchSize.z; k++) {
      for (int j = 0; j < scratchSize.y; j++) {
        for (int i = 0; i < scratchSize.x; i++) {
          int kk = k + rA.z + g->gridA.z;
          int jj = j + rA.y + g->gridA.y;
          int ii = i + rA.x + g->gridA.x;
 
          bool m0 = (0 <= ii) && (0 <= jj) && (0 <= kk);
          bool m1 = (ii < fieldDim.x) && (jj < fieldDim.y) && (kk < fieldDim.z);
 
          ivec4 b;
          if (m0 && m1) {
            auto v = field[(kk*fieldDim.y + jj)*fieldDim.x + ii];
            b = ivec4(v < T.x ? 1 : 0, v < T.y ? 1 : 0, v < T.z ? 1 : 0, v < T.w ? 1 : 0);
          }
          else {
            b = ivec4(exteriorIsLess);
          }
          s[(k*scratchSize.y + j)*scratchSize.x + i] = b;
        }
      }
    }
 
    for (int i = 0; i < scratchSize.x*scratchSize.y*scratchSize.z - 1; i++) {
      s[i] = s[i] | (s[i + 1] << 1);
    }
 
    for (int i = 0; i < scratchSize.x*(scratchSize.y*scratchSize.z - 1); i++) {
      s[i] = s[i] | (s[i + scratchSize.x] << 2);
    }
 
    for (int i = 0; i < scratchSize.x*scratchSize.y*(scratchSize.z - 1); i++) {
      auto t = s[i] | (s[i + scratchSize.x*scratchSize.y] << 4);
      t.x = t.x == 255 ? 0 : t.x;
      t.y = t.y == 255 ? 0 : t.y;
      t.z = t.z == 255 ? 0 : t.z;
      t.w = t.w == 255 ? 0 : t.w;
      s[i] = t;
    }
 
    ivec4 Nc(0);
    for (int k = 0; k < tileSizeClamped.z; k++) {
      for (int j = 0; j < tileSizeClamped.y; j++) {
        for (int i = 0; i < tileSizeClamped.x; i++) {
          ivec4 codes = s[(k*scratchSize.y + j)*scratchSize.x + i];
          Nc.x += codes.x ? 1 : 0;
          Nc.y += codes.y ? 1 : 0;
          Nc.z += codes.z ? 1 : 0;
          Nc.w += codes.w ? 1 : 0;
        }
      }
    }
 
    // Count number of active cells and allocate space.
    ivec4 Oc(0);
    for (unsigned l = 0; l < lanes; l++) {
      const auto t = tOff + l;
      Oc[l] = g->cellOffsets[t].fetch_add(Nc[l]);
    }
 
    ivec4 Ni(0);
    for (int k = 0; k < tileSizeClamped.z; k++) {
      for (int j = 0; j < tileSizeClamped.y; j++) {
        for (int i = 0; i < tileSizeClamped.x; i++) {
          const auto I = ivec3(i, j, k) + rA;
 
          ivec4 codes = s[(k*scratchSize.y + j)*scratchSize.x + i];
 
          ivec4 none(codes.x == 0 ? 1 : 0,
                     codes.y == 0 ? 1 : 0,
                     codes.z == 0 ? 1 : 0,
                     codes.w == 0 ? 1 : 0);
 
          if (none.x && none.y && none.z && none.w) continue;
 
          for (unsigned l = 0; l < lanes; l++) {
            if (none[l]) continue;
 
            const auto t = tOff + l;
            const auto uncompactedCellIndex = (I.z*M.y + I.y)*M.x + I.x;
            const auto code = codes[l];
            const auto c = Oc[l] + (Ni[l]++);
            g->cellMap[layerStride*t + uncompactedCellIndex] = c;
            g->activeCellCases[layerStride*t + c] = (uint8_t)code;
            g->activeCellIndices[layerStride*t + c] = uncompactedCellIndex;
          }
        }
      }
    }
  }
 
  g->scratchRelease(scratch);
  if (g->elapsed_us != nullptr) {
    g->elapsed_us->fetch_add(timer.elapsedMicroseconds());
  }
}
 
 
void Cogs::Core::IsoSurfaces::analyzeTile_u16(AnalyzeGlobalState* g, const glm::ivec3 id)
{
  CpuInstrumentationScope(SCOPE_ISOSURFACES, "AnalyzeTile");
  auto timer = Timer::startNew();
 
  const uint16_t* field = (const uint16_t*)g->field;
  const uint16_t* thresholds = (const uint16_t*)g->thresholds;
 
  const ivec3 fieldDim = g->fieldDim;
  const ivec3 tileSize = g->tileSize;
  const ivec3 scratchSize = tileSize + ivec3(1);
  const ivec3 M = g->M;
  const auto exteriorIsLess = g->exteriorIsLess;
  auto * scratch = g->scratchAcquire(4 * sizeof(int) * ((size_t)scratchSize.x * (size_t)scratchSize.y * (size_t)scratchSize.z + 1));
 
  ivec3 rA = g->tileSize * id;
  ivec3 rB = glm::min(g->M, rA + g->tileSize);
  const auto tileSizeClamped = glm::min(tileSize, rB - rA);
  const size_t layerStride = (size_t)g->M.x * (size_t)g->M.y * (size_t)g->M.z;
 
  for (unsigned tOff = 0; tOff < g->Nt; tOff += 4) {
    auto * s = reinterpret_cast<ivec4*>(scratch->data());
 
    auto lanes = std::min(4u, g->Nt - tOff);
    assert(0 < lanes);
 
    vec4 T(thresholds[tOff + std::min(0u, lanes - 1)],
           thresholds[tOff + std::min(1u, lanes - 1)],
           thresholds[tOff + std::min(2u, lanes - 1)],
           thresholds[tOff + std::min(3u, lanes - 1)]);
 
    for (int k = 0; k < scratchSize.z; k++) {
      for (int j = 0; j < scratchSize.y; j++) {
        for (int i = 0; i < scratchSize.x; i++) {
          int kk = k + rA.z + g->gridA.z;
          int jj = j + rA.y + g->gridA.y;
          int ii = i + rA.x + g->gridA.x;
 
          bool m0 = (0 <= ii) && (0 <= jj) && (0 <= kk);
          bool m1 = (ii < fieldDim.x) && (jj < fieldDim.y) && (kk < fieldDim.z);
 
          ivec4 b;
          if (m0 && m1) {
            auto v = field[(kk*fieldDim.y + jj)*fieldDim.x + ii];
            b = ivec4(v < T.x ? 1 : 0, v < T.y ? 1 : 0, v < T.z ? 1 : 0, v < T.w ? 1 : 0);
          }
          else {
            b = ivec4(exteriorIsLess);
          }
          s[(k*scratchSize.y + j)*scratchSize.x + i] = b;
        }
      }
    }
 
    for (int i = 0; i < scratchSize.x*scratchSize.y*scratchSize.z - 1; i++) {
      s[i] = s[i] | (s[i + 1] << 1);
    }
 
    for (int i = 0; i < scratchSize.x*(scratchSize.y*scratchSize.z - 1); i++) {
      s[i] = s[i] | (s[i + scratchSize.x] << 2);
    }
 
    for (int i = 0; i < scratchSize.x*scratchSize.y*(scratchSize.z - 1); i++) {
      auto t = s[i] | (s[i + scratchSize.x*scratchSize.y] << 4);
      t.x = t.x == 255 ? 0 : t.x;
      t.y = t.y == 255 ? 0 : t.y;
      t.z = t.z == 255 ? 0 : t.z;
      t.w = t.w == 255 ? 0 : t.w;
      s[i] = t;
    }
 
    ivec4 Nc(0);
    for (int k = 0; k < tileSizeClamped.z; k++) {
      for (int j = 0; j < tileSizeClamped.y; j++) {
        for (int i = 0; i < tileSizeClamped.x; i++) {
          ivec4 codes = s[(k*scratchSize.y + j)*scratchSize.x + i];
          Nc.x += codes.x ? 1 : 0;
          Nc.y += codes.y ? 1 : 0;
          Nc.z += codes.z ? 1 : 0;
          Nc.w += codes.w ? 1 : 0;
        }
      }
    }
 
    // Count number of active cells and allocate space.
    ivec4 Oc(0);
    for (unsigned l = 0; l < lanes; l++) {
      const auto t = tOff + l;
      Oc[l] = g->cellOffsets[t].fetch_add(Nc[l]);
    }
 
    ivec4 Ni(0);
    for (int k = 0; k < tileSizeClamped.z; k++) {
      for (int j = 0; j < tileSizeClamped.y; j++) {
        for (int i = 0; i < tileSizeClamped.x; i++) {
          const auto I = ivec3(i, j, k) + rA;
 
          ivec4 codes = s[(k*scratchSize.y + j)*scratchSize.x + i];
 
          ivec4 none(codes.x == 0 ? 1 : 0,
                     codes.y == 0 ? 1 : 0,
                     codes.z == 0 ? 1 : 0,
                     codes.w == 0 ? 1 : 0);
 
          if (none.x && none.y && none.z && none.w) continue;
 
          for (unsigned l = 0; l < lanes; l++) {
            if (none[l]) continue;
 
            const auto t = tOff + l;
            const auto uncompactedCellIndex = (I.z*M.y + I.y)*M.x + I.x;
            const auto code = codes[l];
            const auto c = Oc[l] + (Ni[l]++);
            g->cellMap[layerStride*t + uncompactedCellIndex] = c;
            g->activeCellCases[layerStride*t + c] = (uint8_t)code;
            g->activeCellIndices[layerStride*t + c] = uncompactedCellIndex;
          }
        }
      }
    }
  }
 
  g->scratchRelease(scratch);
  if (g->elapsed_us != nullptr) {
    g->elapsed_us->fetch_add(timer.elapsedMicroseconds());
  }
}