Skip to content
Snippets Groups Projects
Commit 3edc0441 authored by Fabian Reister's avatar Fabian Reister
Browse files

Merge remote-tracking branch 'origin/master' into armem/dev

parents 84b15bd8 ac4dbdce
No related branches found
No related tags found
No related merge requests found
......@@ -9,6 +9,8 @@
#include <Inventor/nodes/SoMaterialBinding.h>
#include <Inventor/nodes/SoPointSet.h>
#include <x86intrin.h>
namespace armarx::viz::coin
{
struct VisualizationPointCloud : TypedElementVisualization<SoSeparator>
......@@ -32,31 +34,168 @@ namespace armarx::viz::coin
node->addChild(new SoPointSet);
}
__attribute__((target("default")))
bool update(ElementType const& element)
{
data::ColoredPointList const& pcl = element.points;
int pclSize = (int)pcl.size();
colors.resize(pclSize);
coords.resize(pclSize);
int singleBufferSize = pclSize * 3;
buffer.resize(singleBufferSize * 2);
const float conv = 1.0f / 255.0f;
SbColor* colorsData = colors.data();
SbVec3f* coordsData = coords.data();
float* coordsData = buffer.data();
float* colorsData = buffer.data() + singleBufferSize;
data::ColoredPoint const* pclData = pcl.data();
for (int i = 0; i < pclSize; ++i)
{
data::ColoredPoint point = pcl[i];
float r = point.color.r * conv;
float g = point.color.g * conv;
float b = point.color.b * conv;
colorsData[i].setValue(r, g, b);
coordsData[i].setValue(point.x, point.y, point.z);
data::ColoredPoint point = pclData[i];
colorsData[i * 3 + 0] = point.color.r * conv;
colorsData[i * 3 + 1] = point.color.g * conv;
colorsData[i * 3 + 2] = point.color.b * conv;
coordsData[i * 3 + 0] = point.x;
coordsData[i * 3 + 1] = point.y;
coordsData[i * 3 + 2] = point.z;
}
pclMat->diffuseColor.setValuesPointer(pclSize, colors.data());
pclMat->ambientColor.setValuesPointer(pclSize, colors.data());
pclMat->diffuseColor.setValuesPointer(pclSize, colorsData);
pclMat->ambientColor.setValuesPointer(pclSize, colorsData);
pclMat->transparency = element.transparency;
pclCoords->point.setValuesPointer(pclSize, coords.data());
pclCoords->point.setValuesPointer(pclSize, coordsData);
pclStyle->pointSize = element.pointSizeInPixels;
return true;
}
__attribute__((target("sse4.1")))
bool update(ElementType const& element)
{
data::ColoredPointList const& pcl = element.points;
int pclSize = (int)pcl.size();
// Enlarge and align the buffers
int singleBufferSize = (pclSize + 3) * 3;
if (singleBufferSize % 16 != 0)
{
singleBufferSize += 16 - singleBufferSize % 16;
}
buffer.resize(singleBufferSize * 2);
float* positionsData = buffer.data();
float* colorsData = buffer.data() + singleBufferSize;
float* pclIn = (float*)pcl.data();
float* colorsOut = colorsData;
float* positionsOut = positionsData;
__m128 convColor = _mm_set1_ps(1.0f / 255.0f);
__m128i offsetBase = _mm_set_epi32(12, 8, 4, 0);
__m128i offsetIncrement = _mm_set1_epi32(16);
__m128i maxOffset = _mm_set1_epi32(4 * (pclSize - 1));
__m128 floatMax = _mm_set1_ps(FLT_MAX);
// std::uint64_t timerStart = __rdtsc();
// Work on four points at a time
for (int i = 0; i < pclSize; i += 4)
{
// Ensure that the offsets do not exceed the input size
__m128i offsets = _mm_min_epi32(offsetBase, maxOffset);
// Load four colored point
// Memory layout of a colored point: c z y x
__m128 cp0 = _mm_loadu_ps(pclIn + _mm_extract_epi32(offsets, 0));
__m128 cp1 = _mm_loadu_ps(pclIn + _mm_extract_epi32(offsets, 1));
__m128 cp2 = _mm_loadu_ps(pclIn + _mm_extract_epi32(offsets, 2));
__m128 cp3 = _mm_loadu_ps(pclIn + _mm_extract_epi32(offsets, 3));
// Shift the colored point data, so that we can blend them together
// 0 -> x, 1 -> y, 2 -> z, 3 -> c
// shiftedP0: c z y x
//__m128i shiftedP0 = _mm_shuffle_epi32((__m128i)cp0, _MM_SHUFFLE(3, 2, 1, 0));
__m128i shiftedP0 = (__m128i)cp0;
// shiftedP1: x c z y
__m128i shiftedP1 = _mm_shuffle_epi32((__m128i)cp1, _MM_SHUFFLE(0, 3, 2, 1));
// shiftedP2: y x c z
__m128i shiftedP2 = _mm_shuffle_epi32((__m128i)cp2, _MM_SHUFFLE(1, 0, 3, 2));
// shiftedP3: z y x c
__m128i shiftedP3 = _mm_shuffle_epi32((__m128i)cp3, _MM_SHUFFLE(2, 1, 0, 3));
// Blend together entries from two colored points to gather the packed position data
// [x] (P1) [z y x] (P0) ==> 1000 = 8
__m128 p0 = _mm_blend_ps((__m128)shiftedP0, (__m128)shiftedP1, 8);
// [y x] (P2) [z y] (P1) ==> 1100 = 12
__m128 p1 = _mm_blend_ps((__m128)shiftedP1, (__m128)shiftedP2, 12);
// [z y x] (P3) [z] (P2) ==> 1110 = 14
__m128 p2 = _mm_blend_ps((__m128)shiftedP2, (__m128)shiftedP3, 14);
// Replace NaNs with zeros
p0 = _mm_and_ps(p0, _mm_cmplt_ps(p0, floatMax));
p1 = _mm_and_ps(p1, _mm_cmplt_ps(p1, floatMax));
p2 = _mm_and_ps(p2, _mm_cmplt_ps(p2, floatMax));
// Store the position data (3 registers, 4 position values)
_mm_storeu_ps(positionsOut + 0, p0);
_mm_storeu_ps(positionsOut + 4, p1);
_mm_storeu_ps(positionsOut + 8, p2);
// Blend together the entries from all four colored points to gather packed color data
// [c] (P0) [c z y] (P1) ==> 0111 ==> 7
__m128 c01 = _mm_blend_ps((__m128)shiftedP0, (__m128)shiftedP1, 7);
// [y x c] (P2) [c] (P3) ==> 0001 ==> 1
__m128 c23 = _mm_blend_ps((__m128)shiftedP2, (__m128)shiftedP3, 1);
// [c c] (c01) [c c] (c23) ==> 0011 ==> 3
__m128i c0123 = _mm_castps_si128(_mm_blend_ps(c01, c23, 3));
// Extract the color channels from the packed color data
// c = b g r a (8-bit each)
// Range [0, 255]
__m128i redI = _mm_and_si128(_mm_bsrli_si128(c0123, 1), _mm_set1_epi32(0xFF));
__m128i greenI = _mm_and_si128(_mm_bsrli_si128(c0123, 2), _mm_set1_epi32(0xFF));
__m128i blueI = _mm_and_si128(_mm_bsrli_si128(c0123, 3), _mm_set1_epi32(0xFF));
// Convert the integer color channels to float channels
// Range [0.0, 1.0]
__m128 red = _mm_mul_ps(_mm_cvtepi32_ps(redI), convColor);
__m128 green = _mm_mul_ps(_mm_cvtepi32_ps(greenI), convColor);
__m128 blue = _mm_mul_ps(_mm_cvtepi32_ps(blueI), convColor);
// Construct the output memory order (4 color values in 3 registers)
// [r g b] [r
// g b] [r g
// b] [r g b]
__m128 c0 = _mm_setr_ps(red[3], green[3], blue[3], red[2]);
__m128 c1 = _mm_setr_ps(green[2], blue[2], red[1], green[1]);
__m128 c2 = _mm_setr_ps(blue[1], red[0], green[0], blue[0]);
// Store the color data (3 registers, 4 color values)
_mm_storeu_ps(colorsOut + 0, c0);
_mm_storeu_ps(colorsOut + 4, c1);
_mm_storeu_ps(colorsOut + 8, c2);
// Advance the output pointers by 4 values (3 floats per position/color)
colorsOut += 12;
positionsOut += 12;
// Move the input offsets to the next 4 colored points
offsetBase = _mm_add_epi32(offsetBase, offsetIncrement);
}
// std::uint64_t timerEnd = __rdtsc();
// int timerDiff = (int)(timerEnd - timerStart);
// float ticksPerPoint = (float)timerDiff / pclSize;
// printf("%c[2KUpdate Time %d\tT/Point %g\n", 27, timerDiff, ticksPerPoint);
pclMat->diffuseColor.setValuesPointer(pclSize, colorsData);
pclMat->ambientColor.setValuesPointer(pclSize, colorsData);
pclMat->transparency = element.transparency;
pclCoords->point.setValuesPointer(pclSize, positionsData);
pclStyle->pointSize = element.pointSizeInPixels;
......@@ -69,5 +208,7 @@ namespace armarx::viz::coin
std::vector<SbColor> colors;
std::vector<SbVec3f> coords;
std::vector<float> buffer;
};
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment