diff --git a/SimoxUtility/CMakeLists.txt b/SimoxUtility/CMakeLists.txt index 808d0f13b58f99dcb96ddd000b983feec7267c21..a88c86cefac5190551030afe8effefc7569647a4 100644 --- a/SimoxUtility/CMakeLists.txt +++ b/SimoxUtility/CMakeLists.txt @@ -80,6 +80,11 @@ SET(SOURCES math/pose/invert.cpp math/pose/orthogonalize.cpp + math/statistics/BoxPlotStats.cpp + math/statistics/Histogram.cpp + math/statistics/measures.cpp + + shapes/AxisAlignedBoundingBox.cpp shapes/json_conversions.cpp ) @@ -211,6 +216,10 @@ SET(INCLUDES math/similarity/cosine_similarity.h math/similarity/angular_similarity.h + math/statistics/BoxPlotStats.h + math/statistics/Histogram.h + math/statistics/measures.h + meta/undefined_t/undefined_t.h meta/undefined_t/is_set.h diff --git a/SimoxUtility/math.h b/SimoxUtility/math.h index 4adee69cd2cf8b759b4503f0830c018f789ffe1f..831dc4ca47cab6ec3d30ed744ca82479c25e5b6a 100644 --- a/SimoxUtility/math.h +++ b/SimoxUtility/math.h @@ -17,5 +17,6 @@ #include "math/rescale.h" #include "math/scale_value.h" #include "math/similarity.h" +#include "math/statistics.h" #include "math/sum.h" #include "math/zero.h" diff --git a/SimoxUtility/math/statistics.h b/SimoxUtility/math/statistics.h new file mode 100644 index 0000000000000000000000000000000000000000..5f69040d4d7f28db306e08ded492d898d4619e40 --- /dev/null +++ b/SimoxUtility/math/statistics.h @@ -0,0 +1,7 @@ +#pragma once + +// This file is generated! + +#include "statistics/BoxPlotStats.h" +#include "statistics/Histogram.h" +#include "statistics/measures.h" diff --git a/SimoxUtility/math/statistics/BoxPlotStats.cpp b/SimoxUtility/math/statistics/BoxPlotStats.cpp new file mode 100644 index 0000000000000000000000000000000000000000..301e13f4d09f598b9ffe0c30ed5983230384e119 --- /dev/null +++ b/SimoxUtility/math/statistics/BoxPlotStats.cpp @@ -0,0 +1,54 @@ +#include "BoxPlotStats.h" + +#include "measures.h" + + +namespace simox::math +{ + + +BoxPlotStats::BoxPlotStats() = default; + + +BoxPlotStats::BoxPlotStats(const std::vector<float>& values, bool isSorted, float whisk) : + whisk(whisk) +{ + set(values, isSorted); +} + +void BoxPlotStats::set(const std::vector<float>& _values, bool isSorted) +{ + const std::vector<float>& values = isSorted ? _values : sorted(_values); + + this->minimum = math::min(values, true); + this->maximum = math::max(values, true); + + this->lowerQuartile = math::lowerQuartile(values, true); + this->median = math::median(values, true); + this->upperQuartile = math::upperQuartile(values, true); + + const float iqr = interquartileRange(lowerQuartile, upperQuartile); + + this->minWhisker = lowerQuartile - whisk * iqr; + this->maxWhisker = upperQuartile + whisk * iqr; + + // Compute outliers and correct whiskers if necessary. + { + auto it = values.begin(); + for (; it != values.end() && *it < minWhisker; ++it) + { + outliers.push_back(*it); + } + minWhisker = (it != values.begin()) ? *it : minimum; + } + { + auto rit = values.rbegin(); + for (; rit != values.rend() && *rit > maxWhisker; ++rit) + { + outliers.push_back(*rit); + } + maxWhisker = (rit != values.rbegin()) ? *rit : maximum; + } +} + +} diff --git a/SimoxUtility/math/statistics/BoxPlotStats.h b/SimoxUtility/math/statistics/BoxPlotStats.h new file mode 100644 index 0000000000000000000000000000000000000000..9295fc7ea5ac3f58df5d8574b7d62eb2bf8ff964 --- /dev/null +++ b/SimoxUtility/math/statistics/BoxPlotStats.h @@ -0,0 +1,37 @@ +#pragma once + +#include <vector> + + +namespace simox::math +{ + + /** + * @brief Computes and stores statistical measures found in a box plot. + */ + class BoxPlotStats + { + public: + + + BoxPlotStats(); + BoxPlotStats(const std::vector<float>& values, bool isSorted = false, float whisk = 1.5); + + void set(const std::vector<float>& values, bool isSorted = false); + + float whisk = 1.5; + + float minimum; + float minWhisker; + float lowerQuartile; + float median; + float upperQuartile; + float maxWhisker; + float maximum; + + std::vector<float> outliers; + + }; + +} + diff --git a/SimoxUtility/math/statistics/Histogram.cpp b/SimoxUtility/math/statistics/Histogram.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e2732f5beb8fad9c5194feef1d5fdd4033edfc28 --- /dev/null +++ b/SimoxUtility/math/statistics/Histogram.cpp @@ -0,0 +1,178 @@ +#include "Histogram.h" + +#include <algorithm> + + +namespace simox::math +{ + + +Histogram::Histogram() +{} + +Histogram::Histogram(const std::vector<float>& data, std::size_t numBins) +{ + setMinMax(data); + resetBins(numBins); + insert(data); +} + +Histogram::Histogram(const std::vector<float>& data, float min, float max, std::size_t numBins) +{ + setMinMax(min, max); + resetBins(numBins); + insert(data); +} + +void Histogram::resetBins(std::size_t numBins) +{ + bins.assign(numBins, 0); +} + +std::size_t Histogram::valueToIndex(float value) const +{ + // normalized = 0..1 + float normalized = (value - min) / (max - min); + std::size_t index = static_cast<std::size_t>(normalized * getNumberOfBins()); + + // avoid out-of-bounce errors + // (if histogram is falsely used, this could lead to peaks at the edges) + return std::max(std::size_t(0), std::min(getNumberOfBins() - 1, index)); +} + +float Histogram::indexToValue(std::size_t index) const +{ + float normalized = static_cast<float>(index) / getNumberOfBins(); + float value = normalized * (max - min) + min; + return value; +} + +void Histogram::insert(float value) +{ + bins[valueToIndex(value)]++; +} + +void Histogram::insert(const std::vector<float>& values) +{ + for (float v : values) + { + insert(v); + } +} + +void Histogram::setMinMax(float min, float max) +{ + this->min = min; + this->max = max; +} + +void Histogram::setMinMax(const std::vector<float>& data) +{ + float min = data.front(); + float max = data.front(); + + for (float d : data) + { + min = std::min(min, d); + max = std::max(max, d); + } + setMinMax(min, max); +} + +const std::vector<std::size_t>& Histogram::getBins() const +{ + return bins; +} + +std::size_t Histogram::getNumberOfBins() const +{ + return bins.size(); +} + +float Histogram::getMin() const +{ + return min; +} + +float Histogram::getMax() const +{ + return max; +} + +std::size_t Histogram::getMinBinIndex() const +{ + return static_cast<std::size_t>( + std::distance(bins.begin(), std::min_element(bins.begin(), bins.end()))); +} + +float Histogram::getMinBinValue() const +{ + return indexToValue(getMaxBinIndex()); +} + +std::size_t Histogram::getMaxBinIndex() const +{ + return static_cast<std::size_t>( + std::distance(bins.begin(), std::max_element(bins.begin(), bins.end()))); +} + +float Histogram::getMaxBinValue() const +{ + return indexToValue(getMaxBinIndex()); +} + +void Histogram::applyMedianFilter(std::size_t size) +{ + std::vector<std::size_t> newBins(bins.size()); + std::vector<std::size_t> neighborhood(2 * size + 1); + + for (std::size_t index = 0; index < bins.size(); index++) + { + // handle cases at borders + std::size_t beginIndex = std::max(index - size, std::size_t(0)); + std::size_t endIndex = std::min(index + size + 1, bins.size()); + std::size_t num = endIndex - beginIndex; // common case: num == 2*size+1 + + // example: index = 3, size = 2 + // => beginIndex = 3-2 = 1, endIndex = 3+2+1 = 6, num = 6-1 = 5 + // => neighborhood = [1, 6) == [1,5] + + neighborhood.assign(bins.begin() + beginIndex, bins.begin() + endIndex); + + std::sort(neighborhood.begin(), neighborhood.begin() + num); + + newBins[index] = neighborhood[num / 2]; // == median + // (ignore cases where num is even, this only happens at the borders) + } + + bins = newBins; +} + +std::ostream& operator<<(std::ostream& os, const Histogram& histo) +{ + os << "Histogram:\n"; + os << "min:;" << histo.min << ";max:;" << histo.max << ";\n"; + + os << "Index:;"; + for (std::size_t index = 0; index < histo.bins.size(); index++) + { + os << index << ";"; + } + os << "\n"; + os << "Values:;"; + for (std::size_t index = 0; index < histo.bins.size(); index++) + { + os << histo.indexToValue(index) << ";"; + } + os << "\n"; + os << "Counts:;"; + for (std::size_t index = 0; index < histo.bins.size(); index++) + { + os << histo.bins[index] << ";"; + } + os << "\n"; + + return os; +} + +} diff --git a/SimoxUtility/math/statistics/Histogram.h b/SimoxUtility/math/statistics/Histogram.h new file mode 100644 index 0000000000000000000000000000000000000000..eb49b3d299c8290a47fab373f903d0be6ebc2846 --- /dev/null +++ b/SimoxUtility/math/statistics/Histogram.h @@ -0,0 +1,98 @@ +#pragma once + +#include <vector> +#include <ostream> + + +namespace simox::math +{ + + /** + * @brief Histogram for one-dimensional float data. + */ + class Histogram + { + public: + + /// No initialization constructor. + Histogram(); + + /// Construct with the given data. + /// Minimum and maximum are derived automatically. + Histogram(const std::vector<float>& data, std::size_t numBins = 128); + + /// Construct with the given data and given limits. + Histogram(const std::vector<float>& data, float min, float max, std::size_t numBins = 128); + + + /// Set the number of bins and set them to 0. + void resetBins(std::size_t numBins); + + /// Insert the given value into the histogram. + void insert(float value); + /// Inserts the given values into the histogram. + void insert(const std::vector<float>& value); + + /// Set the limits. + void setMinMax(float min, float max); + /// Set the limits from the given data. + void setMinMax(const std::vector<float>& data); + + + /// Get the bins. + const std::vector<std::size_t>& getBins() const; + + /// Get the number of bins. + std::size_t getNumberOfBins() const; + + /// Transfer the given value to its corresponding bin index. + std::size_t valueToIndex(float value) const; + /// Get the value at the center of the bin with the given index. + float indexToValue(std::size_t index) const; + + /// Get the minimum, i.e. the lower limit. + float getMin() const; + /// Get the maximum, i.e. the upper limit. + float getMax() const; + + /// Return the index of the bin containing the fewest data points. + std::size_t getMinBinIndex() const; + /// Returns the corresponding value of the bin containing the fewest data points. + float getMinBinValue() const; + + /// Return the index of the bin containing the most data points. + std::size_t getMaxBinIndex() const; + /// Returns the corresponding value of the bin containing the most data points. + float getMaxBinValue() const; + + + /** + * @brief Applies a median filter to the histogram bins. + * + * The size specifies the number of neighours (in each direction) + * considered. + * That is, if k = size, 2k+1 values are considered for each point + * (k neighbours in each direction). + * + * @param size the size of the neighbourhood (in each direction) + */ + void applyMedianFilter(std::size_t size = 2); + + + /// Streams a CVS-like description of the histogram containing the bin data. + friend std::ostream& operator<<(std::ostream& os, const Histogram& histo); + + + private: + + /// The minimum mapped value. + float min; + /// The maximum mapped value. + float max; + + /// The bins. + std::vector<std::size_t> bins; + + }; + +} diff --git a/SimoxUtility/math/statistics/measures.cpp b/SimoxUtility/math/statistics/measures.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26236c5edc3a268093d15ca2b412391498082c62 --- /dev/null +++ b/SimoxUtility/math/statistics/measures.cpp @@ -0,0 +1,120 @@ +#include "measures.h" + +#include <algorithm> +#include <cmath> +#include <stdexcept> + +#include <SimoxUtility/error/SimoxError.h> + + +static void checkNotEmpty(const std::vector<float>& values) +{ + if (values.empty()) + { + throw simox::error::SimoxError("Passed vector of values is empty."); + } +} + +void simox::math::sort(std::vector<float>& values) +{ + std::sort(values.begin(), values.end()); +} + +std::vector<float> simox::math::sorted(const std::vector<float>& values) +{ + std::vector<float> s = values; + std::sort(s.begin(), s.end()); + return s; +} + +float simox::math::min(const std::vector<float>& values, bool isSorted) +{ + checkNotEmpty(values); + return isSorted ? values.front() : *std::min_element(values.begin(), values.end()); +} + +float simox::math::max(const std::vector<float>& values, bool isSorted) +{ + checkNotEmpty(values); + return isSorted ? values.back() : *std::max_element(values.begin(), values.end()); +} + +float simox::math::mean(const std::vector<float>& values) +{ + checkNotEmpty(values); + + float sum = 0; + for (float v : values) + { + sum += v; + } + return sum / values.size(); +} + +float simox::math::stddev(const std::vector<float>& values) +{ + return stddev(values, mean(values)); +} + +float simox::math::stddev(const std::vector<float>& values, float mean) +{ + checkNotEmpty(values); + float sum = 0; + for (float v : values) + { + float diff = v - mean; + sum += diff * diff; + } + float variance = sum / (values.size() - 1); + return std::sqrt(variance); +} + +float simox::math::quantile(const std::vector<float>& _values, float p, bool isSorted) +{ + checkNotEmpty(_values); + const std::vector<float>& values = isSorted ? _values : sorted(_values); + + float location = p < 1 ? p * values.size() : values.size() - 1; + + std::size_t floor = static_cast<std::size_t>(std::floor(location)); + std::size_t ceil = static_cast<std::size_t>(std::ceil(location)); + + if (floor == ceil) + { + return values.at(floor); + } + else + { + float t = location - floor; + return (1 - t) * values.at(floor) + t * values.at(ceil); + } +} + +float simox::math::lowerQuartile(const std::vector<float>& values, bool isSorted) +{ + return quantile(values, .25, isSorted); +} + +float simox::math::median(const std::vector<float>& values, bool isSorted) +{ + return quantile(values, .5, isSorted); +} + +float simox::math::upperQuartile(const std::vector<float>& values, bool isSorted) +{ + return quantile(values, .75, isSorted); +} + +float simox::math::interquartileRange(const std::vector<float>& _values, bool isSorted) +{ + checkNotEmpty(_values); + + const std::vector<float>& values = isSorted ? _values : sorted(_values); + return interquartileRange(lowerQuartile(values, true), upperQuartile(values, true)); +} + +float simox::math::interquartileRange(float lowerQuartile, float upperQuartile) +{ + return upperQuartile - lowerQuartile; +} + diff --git a/SimoxUtility/math/statistics/measures.h b/SimoxUtility/math/statistics/measures.h new file mode 100644 index 0000000000000000000000000000000000000000..67c19c271bf36a6a367cf45992365415b36d147b --- /dev/null +++ b/SimoxUtility/math/statistics/measures.h @@ -0,0 +1,29 @@ +#pragma once + +#include <vector> + + +namespace simox::math +{ + + void sort(std::vector<float>& values); + std::vector<float> sorted(const std::vector<float>& values); + + + float min(const std::vector<float>& values, bool isSorted=false); + float max(const std::vector<float>& values, bool isSorted=false); + float mean(const std::vector<float>& values); + + float stddev(const std::vector<float>& values); + float stddev(const std::vector<float>& values, float mean); + + float quantile(const std::vector<float>& values, float p, bool isSorted=false); + + float lowerQuartile(const std::vector<float>& values, bool isSorted=false); + float median(const std::vector<float>& values, bool isSorted=false); + float upperQuartile(const std::vector<float>& values, bool isSorted=false); + + float interquartileRange(const std::vector<float>& values, bool isSorted=false); + float interquartileRange(float lowerQuartile, float upperQuartile); + +}