knncolle
Collection of KNN methods in C++
Loading...
Searching...
No Matches
distances.hpp
Go to the documentation of this file.
1#ifndef KNNCOLLE_DISTANCES_HPP
2#define KNNCOLLE_DISTANCES_HPP
3
4#include <cmath>
5#include <cstddef>
6#include <string>
7#include <unordered_map>
8#include <functional>
9#include <cstring>
10#include <memory>
11#include <filesystem>
12
13#include "utils.hpp"
14
21namespace knncolle {
22
29template<typename Data_, typename Distance_>
31public:
35 DistanceMetric() = default;
36 DistanceMetric(const DistanceMetric&) = default;
37 DistanceMetric(DistanceMetric&&) = default;
38 DistanceMetric& operator=(const DistanceMetric&) = default;
39 DistanceMetric& operator=(DistanceMetric&&) = default;
40 virtual ~DistanceMetric() = default;
45public:
57 virtual Distance_ raw(std::size_t num_dimensions, const Data_* x, const Data_* y) const = 0;
58
63 virtual Distance_ normalize(Distance_ raw) const = 0;
64
69 virtual Distance_ denormalize(Distance_ norm) const = 0;
70
71public:
96 virtual void save([[maybe_unused]] const std::filesystem::path& dir) const {
97 throw std::runtime_error("saving is not supported");
98 }
99};
100
104inline static constexpr const char* euclidean_distance_save_name = "knncolle::Euclidean";
105
111template<typename Data_, typename Distance_>
112class EuclideanDistance final : public DistanceMetric<Data_, Distance_> {
113public:
117 Distance_ raw(std::size_t num_dimensions, const Data_* x, const Data_* y) const {
118 Distance_ output = 0;
119 for (std::size_t d = 0; d < num_dimensions; ++d) {
120 auto delta = static_cast<Distance_>(x[d]) - static_cast<Distance_>(y[d]); // casting to ensure consistent precision/signedness regardless of Data_.
121 output += delta * delta;
122 }
123 return output;
124 }
125
126 Distance_ normalize(Distance_ raw) const {
127 return std::sqrt(raw);
128 }
129
130 Distance_ denormalize(Distance_ norm) const {
131 return norm * norm;
132 }
133
134 void save(const std::filesystem::path& dir) const {
135 quick_save(dir / "DISTANCE", euclidean_distance_save_name, std::strlen(euclidean_distance_save_name));
136 }
140};
141
145inline static constexpr const char* manhattan_distance_save_name = "knncolle::Manhattan";
146
153template<typename Data_, typename Distance_>
154class ManhattanDistance final : public DistanceMetric<Data_, Distance_> {
155public:
159 Distance_ raw(std::size_t num_dimensions, const Data_* x, const Data_* y) const {
160 Distance_ output = 0;
161 for (std::size_t d = 0; d < num_dimensions; ++d) {
162 auto delta = static_cast<Distance_>(x[d]) - static_cast<Distance_>(y[d]); // casting to ensure consistent precision/signedness regardless of Data_.
163 output += std::abs(delta);
164 }
165 return output;
166 }
167
168 Distance_ normalize(Distance_ raw) const {
169 return raw;
170 }
171
172 Distance_ denormalize(Distance_ norm) const {
173 return norm;
174 }
175
176 void save(const std::filesystem::path& dir) const {
177 quick_save(dir / "DISTANCE", manhattan_distance_save_name, std::strlen(manhattan_distance_save_name));
178 }
182};
183
191template<typename Data_, typename Distance_>
192using LoadDistanceMetricFunction = std::function<DistanceMetric<Data_, Distance_>* (const std::filesystem::path&)>;
193
194
204template<typename Data_, typename Distance_>
205inline std::unordered_map<std::string, LoadDistanceMetricFunction<Data_, Distance_> >& load_distance_metric_registry() {
206 static std::unordered_map<std::string, LoadDistanceMetricFunction<Data_, Distance_> > registry;
207 return registry;
208}
209
216template<typename Data_, typename Distance_>
219 reg[euclidean_distance_save_name] = [](const std::filesystem::path&) -> DistanceMetric<Data_, Distance_>* { return new EuclideanDistance<Data_, Distance_>; };
220}
221
228template<typename Data_, typename Distance_>
231 reg[manhattan_distance_save_name] = [](const std::filesystem::path&) -> DistanceMetric<Data_, Distance_>* { return new ManhattanDistance<Data_, Distance_>; };
232}
233
239class LoadDistanceMetricNotFoundError final : public std::runtime_error {
240public:
244 LoadDistanceMetricNotFoundError(std::string distance, std::filesystem::path path) :
245 std::runtime_error("cannot find a load_distance_metric_registry() function for '" + distance + "' at '" + path.string() + "'"),
246 my_distance(std::move(distance)),
247 my_path(std::move(path))
248 {}
253private:
254 std::string my_distance;
255 std::filesystem::path my_path;
256
257public:
261 const std::string& get_distance() const {
262 return my_distance;
263 }
264
268 const std::filesystem::path& get_path() const {
269 return my_path;
270 }
271};
272
284template<typename Data_, typename Distance_>
286 const auto metric_path = dir / "DISTANCE";
287 const auto metric_name = quick_load_as_string(metric_path);
288
290 auto it = reg.find(metric_name);
291 if (it == reg.end()) {
292 throw LoadDistanceMetricNotFoundError(metric_name, metric_path);
293 }
294
295 return (it->second)(dir);
296}
297
298}
299
300#endif
Interface for a distance metric.
Definition distances.hpp:30
virtual Distance_ normalize(Distance_ raw) const =0
virtual Distance_ denormalize(Distance_ norm) const =0
virtual Distance_ raw(std::size_t num_dimensions, const Data_ *x, const Data_ *y) const =0
virtual void save(const std::filesystem::path &dir) const
Definition distances.hpp:96
Compute Euclidean distances between two input vectors.
Definition distances.hpp:112
Exception for unknown distance metrics in load_distance_metric_raw().
Definition distances.hpp:239
const std::filesystem::path & get_path() const
Definition distances.hpp:268
const std::string & get_distance() const
Definition distances.hpp:261
Compute Manhattan distances between two input vectors.
Definition distances.hpp:154
Collection of KNN algorithms.
Definition Bruteforce.hpp:29
std::function< DistanceMetric< Data_, Distance_ > *(const std::filesystem::path &)> LoadDistanceMetricFunction
Definition distances.hpp:192
void register_load_euclidean_distance()
Definition distances.hpp:217
std::string quick_load_as_string(const std::filesystem::path &path)
Definition utils.hpp:74
void register_load_manhattan_distance()
Definition distances.hpp:229
DistanceMetric< Data_, Distance_ > * load_distance_metric_raw(const std::filesystem::path &dir)
Definition distances.hpp:285
void quick_save(const std::filesystem::path &path, const Input_ *const contents, const Length_ length)
Definition utils.hpp:33
std::unordered_map< std::string, LoadDistanceMetricFunction< Data_, Distance_ > > & load_distance_metric_registry()
Definition distances.hpp:205
Miscellaneous utilities for knncolle