knncolle_annoy
Annoy nearest neighbors in knncolle
Loading...
Searching...
No Matches
knncolle_annoy.hpp
Go to the documentation of this file.
1#ifndef KNNCOLLE_ANNOY_HPP
2#define KNNCOLLE_ANNOY_HPP
3
4#include <vector>
5#include <type_traits>
6#include <algorithm>
7#include <memory>
8
10#include "annoy/annoylib.h"
11#include "annoy/kissrandom.h"
12
22namespace knncolle_annoy {
23
32 int num_trees = 50;
33
39 double search_mult = -1;
40};
41
42template<typename Index_, typename Data_, typename Distance_, typename AnnoyDistance_, typename AnnoyIndex_, typename AnnoyData_, class AnnoyRng_, class AnnoyThreadPolicy_>
43class AnnoyPrebuilt;
44
61template<
62 typename Index_,
63 typename Data_,
64 typename Distance_,
65 class AnnoyDistance_,
66 typename AnnoyIndex_ = Index_,
67 typename AnnoyData_ = float,
68 class AnnoyRng_ = Annoy::Kiss64Random,
69 class AnnoyThreadPolicy_ = Annoy::AnnoyIndexSingleThreadedBuildPolicy
70>
71class AnnoySearcher final : public knncolle::Searcher<Index_, Data_, Distance_> {
72private:
74
75 static constexpr bool same_internal_data = std::is_same<Data_, AnnoyData_>::value;
76 typename std::conditional<!same_internal_data, std::vector<AnnoyData_>, bool>::type my_buffer;
77
78 static constexpr bool same_internal_index = std::is_same<Index_, AnnoyIndex_>::value;
79 std::vector<AnnoyIndex_> my_indices;
80
81 static constexpr bool same_internal_distance = std::is_same<Distance_, AnnoyData_>::value;
82 typename std::conditional<!same_internal_distance, std::vector<AnnoyData_>, bool>::type my_distances;
83
84 int get_search_k(int k) const {
85 if (my_parent.my_search_mult < 0) {
86 return -1;
87 } else {
88 return my_parent.my_search_mult * static_cast<double>(k) + 0.5; // rounded.
89 }
90 }
91
92public:
97 if constexpr(!same_internal_data) {
98 my_buffer.resize(my_parent.my_dim);
99 }
100 }
105private:
106 std::pair<std::vector<AnnoyIndex_>*, std::vector<AnnoyData_>*> obtain_pointers(std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances, Index_ k) {
107 std::vector<AnnoyIndex_>* icopy_ptr = &my_indices;
108 if (output_indices) {
109 if constexpr(same_internal_index) {
110 icopy_ptr = output_indices;
111 }
112 }
113 icopy_ptr->clear();
114 icopy_ptr->reserve(k);
115
116 std::vector<AnnoyData_>* dcopy_ptr = NULL;
117 if (output_distances) {
118 if constexpr(same_internal_distance) {
119 dcopy_ptr = output_distances;
120 } else {
121 dcopy_ptr = &my_distances;
122 }
123 dcopy_ptr->clear();
124 dcopy_ptr->reserve(k);
125 }
126
127 return std::make_pair(icopy_ptr, dcopy_ptr);
128 }
129
130 template<typename Type_>
131 static void remove_self(std::vector<Type_>& vec, size_t at) {
132 if (at < vec.size()) {
133 vec.erase(vec.begin() + at);
134 } else {
135 vec.pop_back();
136 }
137 }
138
139 template<typename Source_, typename Dest_>
140 static void copy_skip_self(const std::vector<Source_>& source, std::vector<Dest_>& dest, size_t at) {
141 auto sIt = source.begin();
142 size_t end = source.size();
143 dest.clear();
144 dest.reserve(end - 1);
145
146 if (at < end) {
147 dest.insert(dest.end(), sIt, sIt + at);
148 dest.insert(dest.end(), sIt + at + 1, source.end());
149 } else {
150 // Just in case we're full of ties at duplicate points, such that 'c'
151 // is not in the set. Note that, if self_found=false, we must have at
152 // least 'k+2' points for 'c' to not be detected as its own neighbor.
153 // Thus there is no need to worry whether 'end - 1 != k'; we
154 // are guaranteed to return 'k' elements in this case.
155 dest.insert(dest.end(), sIt, sIt + end - 1);
156 }
157 }
158
159public:
160 void search(Index_ i, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
161 Index_ kp1 = k + 1; // +1, as it forgets to discard 'self'.
162 auto ptrs = obtain_pointers(output_indices, output_distances, kp1);
163 auto icopy_ptr = ptrs.first;
164 auto dcopy_ptr = ptrs.second;
165
166 my_parent.my_index.get_nns_by_item(i, kp1, get_search_k(kp1), icopy_ptr, dcopy_ptr);
167
168 size_t at;
169 {
170 const auto& cur_i = *icopy_ptr;
171 at = cur_i.size();
172 AnnoyIndex_ icopy = i;
173 for (size_t x = 0, end = cur_i.size(); x < end; ++x) {
174 if (cur_i[x] == icopy) {
175 at = x;
176 break;
177 }
178 }
179 }
180
181 if (output_indices) {
182 if constexpr(same_internal_index) {
183 remove_self(*output_indices, at);
184 } else {
185 copy_skip_self(my_indices, *output_indices, at);
186 }
187 }
188
189 if (output_distances) {
190 if constexpr(same_internal_distance) {
191 remove_self(*output_distances, at);
192 } else {
193 copy_skip_self(my_distances, *output_distances, at);
194 }
195 }
196 }
197
198private:
199 void search_raw(const AnnoyData_* query, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
200 auto ptrs = obtain_pointers(output_indices, output_distances, k);
201 auto icopy_ptr = ptrs.first;
202 auto dcopy_ptr = ptrs.second;
203
204 my_parent.my_index.get_nns_by_vector(query, k, get_search_k(k), icopy_ptr, dcopy_ptr);
205
206 if (output_indices) {
207 if constexpr(!same_internal_index) {
208 output_indices->clear();
209 output_indices->insert(output_indices->end(), my_indices.begin(), my_indices.end());
210 }
211 }
212
213 if (output_distances) {
214 if constexpr(!same_internal_distance) {
215 output_distances->clear();
216 output_distances->insert(output_distances->end(), my_distances.begin(), my_distances.end());
217 }
218 }
219 }
220
221public:
222 void search(const Data_* query, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
223 if constexpr(same_internal_data) {
224 search_raw(query, k, output_indices, output_distances);
225 } else {
226 std::copy_n(query, my_parent.my_dim, my_buffer.begin());
227 search_raw(my_buffer.data(), k, output_indices, output_distances);
228 }
229 }
230};
231
248template<
249 typename Index_,
250 typename Data_,
251 typename Distance_,
252 class AnnoyDistance_,
253 typename AnnoyIndex_ = Index_,
254 typename AnnoyData_ = float,
255 class AnnoyRng_ = Annoy::Kiss64Random,
256 class AnnoyThreadPolicy_ = Annoy::AnnoyIndexSingleThreadedBuildPolicy
257>
258class AnnoyPrebuilt final : public knncolle::Prebuilt<Index_, Data_, Distance_> {
259public:
263 template<class Matrix_>
264 AnnoyPrebuilt(const Matrix_& data, const AnnoyOptions& options) :
265 my_dim(data.num_dimensions()),
266 my_obs(data.num_observations()),
267 my_search_mult(options.search_mult),
268 my_index(my_dim)
269 {
270 auto work = data.new_extractor();
271 if constexpr(std::is_same<Data_, AnnoyData_>::value) {
272 for (Index_ i = 0; i < my_obs; ++i) {
273 auto ptr = work->next();
274 my_index.add_item(i, ptr);
275 }
276 } else {
277 std::vector<AnnoyData_> incoming(my_dim);
278 for (Index_ i = 0; i < my_obs; ++i) {
279 auto ptr = work->next();
280 std::copy_n(ptr, my_dim, incoming.begin());
281 my_index.add_item(i, incoming.data());
282 }
283 }
284
285 my_index.build(options.num_trees);
286 return;
287 }
292private:
293 size_t my_dim;
294 Index_ my_obs;
295 double my_search_mult;
296 Annoy::AnnoyIndex<AnnoyIndex_, AnnoyData_, AnnoyDistance_, AnnoyRng_, AnnoyThreadPolicy_> my_index;
297
298 friend class AnnoySearcher<Index_, Data_, Distance_, AnnoyDistance_, AnnoyIndex_, AnnoyData_, AnnoyRng_, AnnoyThreadPolicy_>;
299
300public:
301 size_t num_dimensions() const {
302 return my_dim;
303 }
304
305 Index_ num_observations() const {
306 return my_obs;
307 }
308
312 std::unique_ptr<knncolle::Searcher<Index_, Data_, Distance_> > initialize() const {
313 return std::make_unique<AnnoySearcher<Index_, Data_, Distance_, AnnoyDistance_, AnnoyIndex_, AnnoyData_, AnnoyRng_, AnnoyThreadPolicy_> >(*this);
314 }
315};
316
344template<
345 typename Index_,
346 typename Data_,
347 typename Distance_,
348 class AnnoyDistance_,
349 typename AnnoyIndex_ = Index_,
350 typename AnnoyData_ = float,
351 class AnnoyRng_ = Annoy::Kiss64Random,
352 class AnnoyThreadPolicy_ = Annoy::AnnoyIndexSingleThreadedBuildPolicy,
353 class Matrix_ = knncolle::Matrix<Index_, Data_>
354>
355class AnnoyBuilder : public knncolle::Builder<Index_, Data_, Distance_, Matrix_> {
356private:
357 AnnoyOptions my_options;
358
359public:
363 AnnoyBuilder(AnnoyOptions options) : my_options(std::move(options)) {}
364
368 AnnoyBuilder() = default;
369
375 return my_options;
376 }
377
378public:
385};
386
387}
388
389#endif
Perform an approximate nearest neighbor search with Annoy.
Definition knncolle_annoy.hpp:355
AnnoyBuilder(AnnoyOptions options)
Definition knncolle_annoy.hpp:363
AnnoyOptions & get_options()
Definition knncolle_annoy.hpp:374
knncolle::Prebuilt< Index_, Data_, Distance_ > * build_raw(const Matrix_ &data) const
Definition knncolle_annoy.hpp:382
Prebuilt index for an Annoy search.
Definition knncolle_annoy.hpp:258
std::unique_ptr< knncolle::Searcher< Index_, Data_, Distance_ > > initialize() const
Definition knncolle_annoy.hpp:312
Searcher on an Annoy index.
Definition knncolle_annoy.hpp:71
Approximate nearest neighbor search with Annoy.
Options for AnnoyBuilder().
Definition knncolle_annoy.hpp:27
int num_trees
Definition knncolle_annoy.hpp:32
double search_mult
Definition knncolle_annoy.hpp:39