knncolle_annoy
Annoy nearest neighbors in knncolle
Loading...
Searching...
No Matches
knncolle_annoy.hpp
Go to the documentation of this file.
1#ifndef KNNCOLLE_ANNOY_HPP
2#define KNNCOLLE_ANNOY_HPP
3
4#include <vector>
5#include <type_traits>
6#include <algorithm>
7#include <memory>
8#include <cstddef>
9
10#include "knncolle/knncolle.hpp"
11#include "annoy/annoylib.h"
12#include "annoy/kissrandom.h"
13
23namespace knncolle_annoy {
24
33 int num_trees = 50;
34
40 double search_mult = -1;
41};
42
43template<typename Index_, typename Data_, typename Distance_, typename AnnoyDistance_, typename AnnoyIndex_, typename AnnoyData_, class AnnoyRng_, class AnnoyThreadPolicy_>
44class AnnoyPrebuilt;
45
62template<
63 typename Index_,
64 typename Data_,
65 typename Distance_,
66 class AnnoyDistance_,
67 typename AnnoyIndex_ = Index_,
68 typename AnnoyData_ = float,
69 class AnnoyRng_ = Annoy::Kiss64Random,
70 class AnnoyThreadPolicy_ = Annoy::AnnoyIndexSingleThreadedBuildPolicy
71>
72class AnnoySearcher final : public knncolle::Searcher<Index_, Data_, Distance_> {
73private:
75
76 static constexpr bool same_internal_data = std::is_same<Data_, AnnoyData_>::value;
77 typename std::conditional<!same_internal_data, std::vector<AnnoyData_>, bool>::type my_buffer;
78
79 static constexpr bool same_internal_index = std::is_same<Index_, AnnoyIndex_>::value;
80 std::vector<AnnoyIndex_> my_indices;
81
82 static constexpr bool same_internal_distance = std::is_same<Distance_, AnnoyData_>::value;
83 typename std::conditional<!same_internal_distance, std::vector<AnnoyData_>, bool>::type my_distances;
84
85 int get_search_k(int k) const {
86 if (my_parent.my_search_mult < 1) {
87 return -1; // instructs Annoy to use k * num_trees.
88 } else {
89 return my_parent.my_search_mult * static_cast<double>(k) + 0.5; // rounded.
90 }
91 }
92
93public:
98 if constexpr(!same_internal_data) {
99 my_buffer.resize(my_parent.my_dim);
100 }
101 }
106private:
107 std::pair<std::vector<AnnoyIndex_>*, std::vector<AnnoyData_>*> obtain_pointers(std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances, Index_ k) {
108 std::vector<AnnoyIndex_>* icopy_ptr = &my_indices;
109 if (output_indices) {
110 if constexpr(same_internal_index) {
111 icopy_ptr = output_indices;
112 }
113 }
114 icopy_ptr->clear();
115 icopy_ptr->reserve(k);
116
117 std::vector<AnnoyData_>* dcopy_ptr = NULL;
118 if (output_distances) {
119 if constexpr(same_internal_distance) {
120 dcopy_ptr = output_distances;
121 } else {
122 dcopy_ptr = &my_distances;
123 }
124 dcopy_ptr->clear();
125 dcopy_ptr->reserve(k);
126 }
127
128 return std::make_pair(icopy_ptr, dcopy_ptr);
129 }
130
131 template<typename Type_>
132 static void remove_self(std::vector<Type_>& vec, std::size_t at) {
133 if (at < vec.size()) {
134 vec.erase(vec.begin() + at);
135 } else {
136 vec.pop_back();
137 }
138 }
139
140 template<typename Source_, typename Dest_>
141 static void copy_skip_self(const std::vector<Source_>& source, std::vector<Dest_>& dest, std::size_t at) {
142 auto sIt = source.begin();
143 auto end = source.size();
144 dest.clear();
145 dest.reserve(end - 1);
146
147 if (at < end) {
148 dest.insert(dest.end(), sIt, sIt + at);
149 dest.insert(dest.end(), sIt + at + 1, source.end());
150 } else {
151 // Just in case we're full of ties at duplicate points, such that 'c'
152 // is not in the set. Note that, if self_found=false, we must have at
153 // least 'k+2' points for 'c' to not be detected as its own neighbor.
154 // Thus there is no need to worry whether 'end - 1 != k'; we
155 // are guaranteed to return 'k' elements in this case.
156 dest.insert(dest.end(), sIt, sIt + end - 1);
157 }
158 }
159
160public:
161 void search(Index_ i, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
162 Index_ kp1 = k + 1; // +1, as it forgets to discard 'self'.
163 auto ptrs = obtain_pointers(output_indices, output_distances, kp1);
164 auto icopy_ptr = ptrs.first;
165 auto dcopy_ptr = ptrs.second;
166
167 my_parent.my_index.get_nns_by_item(i, kp1, get_search_k(kp1), icopy_ptr, dcopy_ptr);
168
169 std::size_t at;
170 {
171 const auto& cur_i = *icopy_ptr;
172 at = cur_i.size();
173 AnnoyIndex_ icopy = i;
174 for (std::size_t x = 0, end = cur_i.size(); x < end; ++x) {
175 if (cur_i[x] == icopy) {
176 at = x;
177 break;
178 }
179 }
180 }
181
182 if (output_indices) {
183 if constexpr(same_internal_index) {
184 remove_self(*output_indices, at);
185 } else {
186 copy_skip_self(my_indices, *output_indices, at);
187 }
188 }
189
190 if (output_distances) {
191 if constexpr(same_internal_distance) {
192 remove_self(*output_distances, at);
193 } else {
194 copy_skip_self(my_distances, *output_distances, at);
195 }
196 }
197 }
198
199private:
200 void search_raw(const AnnoyData_* query, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
201 auto ptrs = obtain_pointers(output_indices, output_distances, k);
202 auto icopy_ptr = ptrs.first;
203 auto dcopy_ptr = ptrs.second;
204
205 my_parent.my_index.get_nns_by_vector(query, k, get_search_k(k), icopy_ptr, dcopy_ptr);
206
207 if (output_indices) {
208 if constexpr(!same_internal_index) {
209 output_indices->clear();
210 output_indices->insert(output_indices->end(), my_indices.begin(), my_indices.end());
211 }
212 }
213
214 if (output_distances) {
215 if constexpr(!same_internal_distance) {
216 output_distances->clear();
217 output_distances->insert(output_distances->end(), my_distances.begin(), my_distances.end());
218 }
219 }
220 }
221
222public:
223 void search(const Data_* query, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
224 if constexpr(same_internal_data) {
225 search_raw(query, k, output_indices, output_distances);
226 } else {
227 std::copy_n(query, my_parent.my_dim, my_buffer.begin());
228 search_raw(my_buffer.data(), k, output_indices, output_distances);
229 }
230 }
231};
232
249template<
250 typename Index_,
251 typename Data_,
252 typename Distance_,
253 class AnnoyDistance_,
254 typename AnnoyIndex_ = Index_,
255 typename AnnoyData_ = float,
256 class AnnoyRng_ = Annoy::Kiss64Random,
257 class AnnoyThreadPolicy_ = Annoy::AnnoyIndexSingleThreadedBuildPolicy
258>
259class AnnoyPrebuilt final : public knncolle::Prebuilt<Index_, Data_, Distance_> {
260public:
264 template<class Matrix_>
265 AnnoyPrebuilt(const Matrix_& data, const AnnoyOptions& options) :
266 my_dim(data.num_dimensions()),
267 my_obs(data.num_observations()),
268 my_search_mult(options.search_mult),
269 my_index(my_dim)
270 {
271 auto work = data.new_extractor();
272 if constexpr(std::is_same<Data_, AnnoyData_>::value) {
273 for (Index_ i = 0; i < my_obs; ++i) {
274 auto ptr = work->next();
275 my_index.add_item(i, ptr);
276 }
277 } else {
278 std::vector<AnnoyData_> incoming(my_dim);
279 for (Index_ i = 0; i < my_obs; ++i) {
280 auto ptr = work->next();
281 std::copy_n(ptr, my_dim, incoming.begin());
282 my_index.add_item(i, incoming.data());
283 }
284 }
285
286 my_index.build(options.num_trees);
287 return;
288 }
293private:
294 std::size_t my_dim;
295 Index_ my_obs;
296 double my_search_mult;
297 Annoy::AnnoyIndex<AnnoyIndex_, AnnoyData_, AnnoyDistance_, AnnoyRng_, AnnoyThreadPolicy_> my_index;
298
299 friend class AnnoySearcher<Index_, Data_, Distance_, AnnoyDistance_, AnnoyIndex_, AnnoyData_, AnnoyRng_, AnnoyThreadPolicy_>;
300
301public:
302 std::size_t num_dimensions() const {
303 return my_dim;
304 }
305
306 Index_ num_observations() const {
307 return my_obs;
308 }
309
313 std::unique_ptr<knncolle::Searcher<Index_, Data_, Distance_> > initialize() const {
314 return std::make_unique<AnnoySearcher<Index_, Data_, Distance_, AnnoyDistance_, AnnoyIndex_, AnnoyData_, AnnoyRng_, AnnoyThreadPolicy_> >(*this);
315 }
316};
317
345template<
346 typename Index_,
347 typename Data_,
348 typename Distance_,
349 class AnnoyDistance_,
350 typename AnnoyIndex_ = Index_,
351 typename AnnoyData_ = float,
352 class AnnoyRng_ = Annoy::Kiss64Random,
353 class AnnoyThreadPolicy_ = Annoy::AnnoyIndexSingleThreadedBuildPolicy,
354 class Matrix_ = knncolle::Matrix<Index_, Data_>
355>
356class AnnoyBuilder : public knncolle::Builder<Index_, Data_, Distance_, Matrix_> {
357private:
358 AnnoyOptions my_options;
359
360public:
364 AnnoyBuilder(AnnoyOptions options) : my_options(std::move(options)) {}
365
369 AnnoyBuilder() = default;
370
376 return my_options;
377 }
378
379public:
386};
387
388}
389
390#endif
Perform an approximate nearest neighbor search with Annoy.
Definition knncolle_annoy.hpp:356
AnnoyBuilder(AnnoyOptions options)
Definition knncolle_annoy.hpp:364
AnnoyOptions & get_options()
Definition knncolle_annoy.hpp:375
knncolle::Prebuilt< Index_, Data_, Distance_ > * build_raw(const Matrix_ &data) const
Definition knncolle_annoy.hpp:383
Prebuilt index for an Annoy search.
Definition knncolle_annoy.hpp:259
std::unique_ptr< knncolle::Searcher< Index_, Data_, Distance_ > > initialize() const
Definition knncolle_annoy.hpp:313
Searcher on an Annoy index.
Definition knncolle_annoy.hpp:72
Approximate nearest neighbor search with Annoy.
Options for AnnoyBuilder().
Definition knncolle_annoy.hpp:28
int num_trees
Definition knncolle_annoy.hpp:33
double search_mult
Definition knncolle_annoy.hpp:40