knncolle
Collection of KNN methods in C++
Loading...
Searching...
No Matches
L2Normalized.hpp
Go to the documentation of this file.
1#ifndef KNNCOLLE_L2_NORMALIZED_HP
2#define KNNCOLLE_L2_NORMALIZED_HP
3
4#include <vector>
5#include <cmath>
6#include <memory>
7#include <limits>
8#include <cstddef>
9#include <type_traits>
10#include <cstring>
11#include <string>
12#include <filesystem>
13
14#include "Searcher.hpp"
15#include "Prebuilt.hpp"
16#include "Builder.hpp"
17#include "Matrix.hpp"
18#include "NumericType.hpp"
19#include "utils.hpp"
20
26namespace knncolle {
27
31inline static constexpr const char* l2normalized_prebuilt_save_name = "knncolle::L2Normalized";
32
48template<typename Normalized_>
49std::function<void(const std::filesystem::path&)>& custom_save_for_l2normalized_normalized() {
50 static std::function<void(const std::filesystem::path&)> fun;
51 return fun;
52}
53
57namespace internal {
58
59template<typename Data_, typename Normalized_>
60void l2norm(const Data_* ptr, std::size_t ndim, Normalized_* buffer) {
61 Normalized_ l2 = 0;
62 for (std::size_t d = 0; d < ndim; ++d) {
63 Normalized_ val = ptr[d]; // cast to Normalized_ to avoid issues with integer overflow.
64 buffer[d] = val;
65 l2 += val * val;
66 }
67
68 if (l2 > 0) {
69 l2 = std::sqrt(l2);
70 for (std::size_t d = 0; d < ndim; ++d) {
71 buffer[d] /= l2;
72 }
73 }
74}
75
76}
77
78template<typename Index_, typename Data_, typename Distance_, typename Normalized_, class Searcher_>
79class L2NormalizedSearcher final : public Searcher<Index_, Data_, Distance_> {
80public:
81 L2NormalizedSearcher(std::unique_ptr<Searcher_> searcher, std::size_t num_dimensions) :
82 my_searcher(std::move(searcher)),
83 buffer(sanisizer::cast<I<decltype(buffer.size())> >(num_dimensions))
84 {}
85
86private:
87 // No way around this; the L2-normalized values must be floating-point,
88 // so the internal searcher must accept floats.
89 static_assert(std::is_floating_point<Normalized_>::value);
90
91 std::unique_ptr<Searcher_> my_searcher;
92 std::vector<Normalized_> buffer;
93
94public:
95 void search(Index_ i, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
96 my_searcher->search(i, k, output_indices, output_distances);
97 }
98
99 void search(const Data_* ptr, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
100 auto normalized = buffer.data();
101 internal::l2norm(ptr, buffer.size(), normalized);
102 my_searcher->search(normalized, k, output_indices, output_distances);
103 }
104
105public:
106 bool can_search_all() const {
107 return my_searcher->can_search_all();
108 }
109
110 Index_ search_all(Index_ i, Distance_ threshold, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
111 return my_searcher->search_all(i, threshold, output_indices, output_distances);
112 }
113
114 Index_ search_all(const Data_* ptr, Distance_ threshold, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
115 auto normalized = buffer.data();
116 internal::l2norm(ptr, buffer.size(), normalized);
117 return my_searcher->search_all(normalized, threshold, output_indices, output_distances);
118 }
119};
120
121template<typename Index_, typename Data_, typename Distance_>
122Prebuilt<Index_, Data_, Distance_>* load_prebuilt_raw(const std::filesystem::path&);
123
124template<typename Index_, typename Data_, typename Distance_, typename Normalized_>
125class L2NormalizedPrebuilt final : public Prebuilt<Index_, Data_, Distance_> {
126public:
127 L2NormalizedPrebuilt(std::unique_ptr<Prebuilt<Index_, Normalized_, Distance_> > prebuilt) : my_prebuilt(std::move(prebuilt)) {}
128
129private:
130 std::unique_ptr<Prebuilt<Index_, Normalized_, Distance_> > my_prebuilt;
131
132public:
133 Index_ num_observations() const {
134 return my_prebuilt->num_observations();
135 }
136
137 std::size_t num_dimensions() const {
138 return my_prebuilt->num_dimensions();
139 }
140
141public:
142 std::unique_ptr<Searcher<Index_, Data_, Distance_> > initialize() const {
143 return initialize_known();
144 }
145
146 auto initialize_known() const {
147 typedef I<decltype(*(my_prebuilt->initialize_known()))> KnownSearcher;
148 return std::make_unique<L2NormalizedSearcher<Index_, Data_, Distance_, Normalized_, KnownSearcher> >(my_prebuilt->initialize_known(), my_prebuilt->num_dimensions());
149 }
150
151public:
152 void save(const std::filesystem::path& dir) const {
153 quick_save(dir / "ALGORITHM", l2normalized_prebuilt_save_name, std::strlen(l2normalized_prebuilt_save_name));
154 auto norm_type = get_numeric_type<Normalized_>();
155 quick_save(dir / "NORMALIZED", &norm_type, 1);
156
157 auto& cust = custom_save_for_l2normalized_normalized<Normalized_>();
158 if (cust) {
159 cust(dir);
160 }
161
162 const auto indexdir = dir / "INDEX";
163 std::filesystem::create_directory(indexdir);
164 my_prebuilt->save(indexdir);
165 }
166
167 L2NormalizedPrebuilt(const std::filesystem::path& dir) : my_prebuilt(load_prebuilt_raw<Index_, Normalized_, Distance_>(dir / "INDEX")) {}
168};
169
170template<typename Index_, typename Data_, typename Normalized_, typename Matrix_>
171class L2NormalizedMatrix;
172
173template<typename Index_, typename Data_, typename Normalized_, class Extractor_ = MatrixExtractor<Data_> >
174class L2NormalizedMatrixExtractor final : public MatrixExtractor<Normalized_> {
175public:
176 L2NormalizedMatrixExtractor(std::unique_ptr<Extractor_> extractor, std::size_t dim) :
177 my_extractor(std::move(extractor)),
178 buffer(sanisizer::cast<I<decltype(buffer.size())> >(dim))
179 {}
180
181private:
182 std::unique_ptr<Extractor_> my_extractor;
183 std::vector<Normalized_> buffer;
184
185public:
186 const Normalized_* next() {
187 auto raw = my_extractor->next();
188 auto normalized = buffer.data();
189 internal::l2norm(raw, buffer.size(), normalized);
190 return normalized;
191 }
192};
210template<typename Index_, typename Data_, typename Normalized_, typename Matrix_ = Matrix<Index_, Data_> >
211class L2NormalizedMatrix final : public Matrix<Index_, Normalized_> {
212public:
216 L2NormalizedMatrix(const Matrix_& matrix) : my_matrix(matrix) {}
221private:
222 static_assert(std::is_same<decltype(std::declval<Matrix_>().num_observations()), Index_>::value);
223 static_assert(std::is_same<typename std::remove_pointer<decltype(std::declval<Matrix_>().new_extractor()->next())>::type, const Data_>::value);
224
225 const Matrix_& my_matrix;
226
227public:
228 std::size_t num_dimensions() const {
229 return my_matrix.num_dimensions();
230 }
231
232 Index_ num_observations() const {
233 return my_matrix.num_observations();
234 }
235
239 auto new_known_extractor() const {
240 typedef I<decltype(*(my_matrix.new_known_extractor()))> KnownExtractor;
241 return std::make_unique<L2NormalizedMatrixExtractor<Index_, Data_, Normalized_, KnownExtractor> >(my_matrix.new_known_extractor(), num_dimensions());
242 }
243
244 std::unique_ptr<MatrixExtractor<Normalized_> > new_extractor() const {
245 return new_known_extractor();
246 }
247};
248
264template<typename Index_, typename Data_, typename Distance_, typename Normalized_, class Matrix_ = Matrix<Index_, Data_> >
265class L2NormalizedBuilder final : public Builder<Index_, Data_, Distance_, Matrix_> {
266public:
271
283 typedef typename std::conditional<
284 std::is_base_of<Matrix_, NormalizedMatrix>::value,
285 Matrix_,
288
289public:
293 L2NormalizedBuilder(std::shared_ptr<const Builder<Index_, Normalized_, Distance_, BuilderMatrix> > builder) : my_builder(std::move(builder)) {}
294
295private:
296 std::shared_ptr<const Builder<Index_, Normalized_, Distance_, BuilderMatrix> > my_builder;
297
298public:
302 Prebuilt<Index_, Data_, Distance_>* build_raw(const Matrix_& data) const {
303 return build_known_raw(data);
304 }
309public:
313 auto build_known_raw(const Matrix_& data) const {
314 NormalizedMatrix normalized(data);
315 return new L2NormalizedPrebuilt<Index_, Data_, Distance_, Normalized_>(my_builder->build_unique(normalized));
316 }
317
321 auto build_known_unique(const Matrix_& data) const {
322 return std::unique_ptr<I<decltype(*(build_known_raw(data)))> >(build_known_raw(data));
323 }
324
328 auto build_known_shared(const Matrix_& data) const {
329 return std::shared_ptr<I<decltype(*(build_known_raw(data)))> >(build_known_raw(data));
330 }
331};
332
333}
334
335#endif
Interface to build nearest-neighbor indices.
Interface for the input matrix.
Preserve numeric types when saving prebuilt indices.
Interface for prebuilt nearest-neighbor indices.
Interface for searching nearest-neighbor indices.
Interface to build nearest-neighbor search indices.
Definition Builder.hpp:28
virtual Prebuilt< Index_, Data_, Distance_ > * build_raw(const Matrix_ &data) const =0
Wrapper around a builder with L2 normalization.
Definition L2Normalized.hpp:265
L2NormalizedMatrix< Index_, Data_, Normalized_, Matrix_ > NormalizedMatrix
Definition L2Normalized.hpp:270
L2NormalizedBuilder(std::shared_ptr< const Builder< Index_, Normalized_, Distance_, BuilderMatrix > > builder)
Definition L2Normalized.hpp:293
auto build_known_raw(const Matrix_ &data) const
Definition L2Normalized.hpp:313
std::conditional< std::is_base_of< Matrix_, NormalizedMatrix >::value, Matrix_, NormalizedMatrix >::type BuilderMatrix
Definition L2Normalized.hpp:287
auto build_known_shared(const Matrix_ &data) const
Definition L2Normalized.hpp:328
auto build_known_unique(const Matrix_ &data) const
Definition L2Normalized.hpp:321
Wrapper around a matrix with L2 normalization.
Definition L2Normalized.hpp:211
std::size_t num_dimensions() const
Definition L2Normalized.hpp:228
Index_ num_observations() const
Definition L2Normalized.hpp:232
auto new_known_extractor() const
Definition L2Normalized.hpp:239
std::unique_ptr< MatrixExtractor< Normalized_ > > new_extractor() const
Definition L2Normalized.hpp:244
Interface for matrix data.
Definition Matrix.hpp:59
Interface for prebuilt nearest-neighbor search indices.
Definition Prebuilt.hpp:29
Collection of KNN algorithms.
Definition Bruteforce.hpp:29
Prebuilt< Index_, Data_, Distance_ > * load_prebuilt_raw(const std::filesystem::path &dir)
Definition load_prebuilt.hpp:105
void quick_save(const std::filesystem::path &path, const Input_ *const contents, const Length_ length)
Definition utils.hpp:33
std::function< void(const std::filesystem::path &)> & custom_save_for_l2normalized_normalized()
Definition L2Normalized.hpp:49
Miscellaneous utilities for knncolle