knncolle_annoy
Annoy nearest neighbors in knncolle
Loading...
Searching...
No Matches
knncolle_annoy.hpp
Go to the documentation of this file.
1#ifndef KNNCOLLE_ANNOY_HPP
2#define KNNCOLLE_ANNOY_HPP
3
4#include <vector>
5#include <type_traits>
6#include <algorithm>
7#include <memory>
8
10#include "annoy/annoylib.h"
11#include "annoy/kissrandom.h"
12
22namespace knncolle_annoy {
23
32 int num_trees = 50;
33
39 double search_mult = -1;
40};
41
42template<class Distance_, typename Dim_, typename Index_, typename Float_, typename InternalIndex_, typename InternalData_>
43class AnnoyPrebuilt;
44
57template<class Distance_, typename Dim_, typename Index_, typename Float_, typename InternalIndex_, typename InternalData_>
58class AnnoySearcher : public knncolle::Searcher<Index_, Float_> {
59private:
61
62 static constexpr bool same_internal_data = std::is_same<Float_, InternalData_>::value;
63 typename std::conditional<!same_internal_data, std::vector<InternalData_>, bool>::type my_buffer, my_distances;
64
65 static constexpr bool same_internal_index = std::is_same<Index_, InternalIndex_>::value;
66 std::vector<InternalIndex_> my_indices;
67
68 int get_search_k(int k) const {
69 if (my_parent->my_search_mult < 0) {
70 return -1;
71 } else {
72 return my_parent->my_search_mult * static_cast<double>(k) + 0.5; // rounded.
73 }
74 }
75
76public:
81 if constexpr(!same_internal_data) {
82 my_buffer.resize(my_parent->my_dim);
83 }
84 }
89private:
90 auto obtain_pointers(std::vector<Index_>* output_indices, std::vector<Float_>* output_distances, Index_ k) {
91 std::vector<InternalIndex_>* icopy_ptr = &my_indices;
92 if (output_indices) {
93 if constexpr(same_internal_index) {
94 icopy_ptr = output_indices;
95 }
96 }
97 icopy_ptr->clear();
98 icopy_ptr->reserve(k);
99
100 std::vector<InternalData_>* dcopy_ptr = NULL;
101 if (output_distances) {
102 if constexpr(same_internal_data) {
103 dcopy_ptr = output_distances;
104 } else {
105 dcopy_ptr = &my_distances;
106 }
107 dcopy_ptr->clear();
108 dcopy_ptr->reserve(k);
109 }
110
111 return std::make_pair(icopy_ptr, dcopy_ptr);
112 }
113
114 template<typename Type_>
115 static void remove_self(std::vector<Type_>& vec, size_t at) {
116 if (at < vec.size()) {
117 vec.erase(vec.begin() + at);
118 } else {
119 vec.pop_back();
120 }
121 }
122
123 template<typename Source_, typename Dest_>
124 static void copy_skip_self(const std::vector<Source_>& source, std::vector<Dest_>& dest, size_t at) {
125 auto sIt = source.begin();
126 size_t end = source.size();
127 dest.clear();
128 dest.reserve(end - 1);
129
130 if (at < end) {
131 dest.insert(dest.end(), sIt, sIt + at);
132 dest.insert(dest.end(), sIt + at + 1, source.end());
133 } else {
134 // Just in case we're full of ties at duplicate points, such that 'c'
135 // is not in the set. Note that, if self_found=false, we must have at
136 // least 'k+2' points for 'c' to not be detected as its own neighbor.
137 // Thus there is no need to worry whether 'end - 1 != k'; we
138 // are guaranteed to return 'k' elements in this case.
139 dest.insert(dest.end(), sIt, sIt + end - 1);
140 }
141 }
142
143public:
147 void search(Index_ i, Index_ k, std::vector<Index_>* output_indices, std::vector<Float_>* output_distances) {
148 Index_ kp1 = k + 1; // +1, as it forgets to discard 'self'.
149 auto ptrs = obtain_pointers(output_indices, output_distances, kp1);
150 auto icopy_ptr = ptrs.first;
151 auto dcopy_ptr = ptrs.second;
152
153 my_parent->my_index.get_nns_by_item(i, kp1, get_search_k(kp1), icopy_ptr, dcopy_ptr);
154
155 size_t at;
156 {
157 const auto& cur_i = *icopy_ptr;
158 at = cur_i.size();
159 InternalIndex_ icopy = i;
160 for (size_t x = 0, end = cur_i.size(); x < end; ++x) {
161 if (cur_i[x] == icopy) {
162 at = x;
163 break;
164 }
165 }
166 }
167
168 if (output_indices) {
169 if constexpr(same_internal_index) {
170 remove_self(*output_indices, at);
171 } else {
172 copy_skip_self(my_indices, *output_indices, at);
173 }
174 }
175
176 if (output_distances) {
177 if constexpr(same_internal_data) {
178 remove_self(*output_distances, at);
179 } else {
180 copy_skip_self(my_distances, *output_distances, at);
181 }
182 }
183 }
184
185private:
186 void search_raw(const InternalData_* query, Index_ k, std::vector<Index_>* output_indices, std::vector<Float_>* output_distances) {
187 auto ptrs = obtain_pointers(output_indices, output_distances, k);
188 auto icopy_ptr = ptrs.first;
189 auto dcopy_ptr = ptrs.second;
190
191 my_parent->my_index.get_nns_by_vector(query, k, get_search_k(k), icopy_ptr, dcopy_ptr);
192
193 if (output_indices) {
194 if constexpr(!same_internal_index) {
195 output_indices->clear();
196 output_indices->insert(output_indices->end(), my_indices.begin(), my_indices.end());
197 }
198 }
199
200 if (output_distances) {
201 if constexpr(!same_internal_data) {
202 output_distances->clear();
203 output_distances->insert(output_distances->end(), my_distances.begin(), my_distances.end());
204 }
205 }
206 }
207
208public:
212 void search(const Float_* query, Index_ k, std::vector<Index_>* output_indices, std::vector<Float_>* output_distances) {
213 if constexpr(same_internal_data) {
214 search_raw(query, k, output_indices, output_distances);
215 } else {
216 std::copy_n(query, my_parent->my_dim, my_buffer.begin());
217 search_raw(my_buffer.data(), k, output_indices, output_distances);
218 }
219 }
220};
221
236template<class Distance_, typename Dim_, typename Index_, typename Float_, typename InternalIndex_, typename InternalData_>
237class AnnoyPrebuilt : public knncolle::Prebuilt<Dim_, Index_, Float_> {
238public:
242 template<class Matrix_>
243 AnnoyPrebuilt(const Matrix_& data, const AnnoyOptions& options) :
244 my_dim(data.num_dimensions()),
245 my_obs(data.num_observations()),
246 my_search_mult(options.search_mult),
247 my_index(my_dim)
248 {
249 typedef typename Matrix_::data_type Data_;
250 auto work = data.create_workspace();
251 if constexpr(std::is_same<Data_, InternalData_>::value) {
252 for (Index_ i = 0; i < my_obs; ++i) {
253 auto ptr = data.get_observation(work);
254 my_index.add_item(i, ptr);
255 }
256 } else {
257 std::vector<InternalData_> incoming(my_dim);
258 for (Index_ i = 0; i < my_obs; ++i) {
259 auto ptr = data.get_observation(work);
260 std::copy_n(ptr, my_dim, incoming.begin());
261 my_index.add_item(i, incoming.data());
262 }
263 }
264
265 my_index.build(options.num_trees);
266 return;
267 }
272private:
273 Dim_ my_dim;
274 Index_ my_obs;
275 double my_search_mult;
276 Annoy::AnnoyIndex<InternalIndex_, InternalData_, Distance_, Annoy::Kiss64Random, Annoy::AnnoyIndexSingleThreadedBuildPolicy> my_index;
277
278 friend class AnnoySearcher<Distance_, Dim_, Index_, Float_, InternalIndex_, InternalData_>;
279
280public:
284 Dim_ num_dimensions() const {
285 return my_dim;
286 }
287
291 Index_ num_observations() const {
292 return my_obs;
293 }
294
298 std::unique_ptr<knncolle::Searcher<Index_, Float_> > initialize() const {
299 return std::make_unique<AnnoySearcher<Distance_, Dim_, Index_, Float_, InternalIndex_, InternalData_> >(this);
300 }
301};
302
325template<
326 class Distance_ = Annoy::Euclidean,
328 typename Float_ = double,
329 typename InternalIndex_ = typename Matrix_::index_type,
330 typename InternalData_ = float>
331class AnnoyBuilder : public knncolle::Builder<Matrix_, Float_> {
332private:
333 AnnoyOptions my_options;
334
335public:
339 AnnoyBuilder(AnnoyOptions options) : my_options(std::move(options)) {}
340
344 AnnoyBuilder() = default;
345
351 return my_options;
352 }
353
354public:
361};
362
363}
364
365#endif
Perform an approximate nearest neighbor search with Annoy.
Definition knncolle_annoy.hpp:331
AnnoyBuilder(AnnoyOptions options)
Definition knncolle_annoy.hpp:339
AnnoyOptions & get_options()
Definition knncolle_annoy.hpp:350
knncolle::Prebuilt< typename Matrix_::dimension_type, typename Matrix_::index_type, Float_ > * build_raw(const Matrix_ &data) const
Definition knncolle_annoy.hpp:358
Prebuilt index for an Annoy search.
Definition knncolle_annoy.hpp:237
Index_ num_observations() const
Definition knncolle_annoy.hpp:291
Dim_ num_dimensions() const
Definition knncolle_annoy.hpp:284
std::unique_ptr< knncolle::Searcher< Index_, Float_ > > initialize() const
Definition knncolle_annoy.hpp:298
Searcher on an Annoy index.
Definition knncolle_annoy.hpp:58
void search(Index_ i, Index_ k, std::vector< Index_ > *output_indices, std::vector< Float_ > *output_distances)
Definition knncolle_annoy.hpp:147
void search(const Float_ *query, Index_ k, std::vector< Index_ > *output_indices, std::vector< Float_ > *output_distances)
Definition knncolle_annoy.hpp:212
Approximate nearest neighbor search with Annoy.
Options for AnnoyBuilder() and AnnoyPrebuilt().
Definition knncolle_annoy.hpp:27
int num_trees
Definition knncolle_annoy.hpp:32
double search_mult
Definition knncolle_annoy.hpp:39