1#ifndef KNNCOLLE_L2_NORMALIZED_HP
2#define KNNCOLLE_L2_NORMALIZED_HP
31inline static constexpr const char* l2normalized_prebuilt_save_name =
"knncolle::L2Normalized";
48template<
typename Normalized_>
50 static std::function<void(
const std::filesystem::path&)> fun;
59template<
typename Data_,
typename Normalized_>
60void l2norm(
const Data_* ptr, std::size_t ndim, Normalized_* buffer) {
62 for (std::size_t d = 0; d < ndim; ++d) {
63 Normalized_ val = ptr[d];
70 for (std::size_t d = 0; d < ndim; ++d) {
78template<
typename Index_,
typename Data_,
typename Distance_,
typename Normalized_,
class Searcher_>
79class L2NormalizedSearcher final :
public Searcher<Index_, Data_, Distance_> {
81 L2NormalizedSearcher(std::unique_ptr<Searcher_> searcher, std::size_t num_dimensions) :
82 my_searcher(std::move(searcher)),
83 buffer(sanisizer::cast<I<decltype(buffer.size())> >(num_dimensions))
89 static_assert(std::is_floating_point<Normalized_>::value);
91 std::unique_ptr<Searcher_> my_searcher;
92 std::vector<Normalized_> buffer;
95 void search(Index_ i, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
96 my_searcher->search(i, k, output_indices, output_distances);
99 void search(
const Data_* ptr, Index_ k, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
100 auto normalized = buffer.data();
101 internal::l2norm(ptr, buffer.size(), normalized);
102 my_searcher->search(normalized, k, output_indices, output_distances);
106 bool can_search_all()
const {
107 return my_searcher->can_search_all();
110 Index_ search_all(Index_ i, Distance_ threshold, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
111 return my_searcher->search_all(i, threshold, output_indices, output_distances);
114 Index_ search_all(
const Data_* ptr, Distance_ threshold, std::vector<Index_>* output_indices, std::vector<Distance_>* output_distances) {
115 auto normalized = buffer.data();
116 internal::l2norm(ptr, buffer.size(), normalized);
117 return my_searcher->search_all(normalized, threshold, output_indices, output_distances);
121template<
typename Index_,
typename Data_,
typename Distance_>
122Prebuilt<Index_, Data_, Distance_>*
load_prebuilt_raw(
const std::filesystem::path&);
124template<
typename Index_,
typename Data_,
typename Distance_,
typename Normalized_>
125class L2NormalizedPrebuilt final :
public Prebuilt<Index_, Data_, Distance_> {
127 L2NormalizedPrebuilt(std::unique_ptr<Prebuilt<Index_, Normalized_, Distance_> > prebuilt) : my_prebuilt(std::move(prebuilt)) {}
130 std::unique_ptr<Prebuilt<Index_, Normalized_, Distance_> > my_prebuilt;
133 Index_ num_observations()
const {
134 return my_prebuilt->num_observations();
137 std::size_t num_dimensions()
const {
138 return my_prebuilt->num_dimensions();
142 std::unique_ptr<Searcher<Index_, Data_, Distance_> > initialize()
const {
143 return initialize_known();
146 auto initialize_known()
const {
147 typedef I<
decltype(*(my_prebuilt->initialize_known()))> KnownSearcher;
148 return std::make_unique<L2NormalizedSearcher<Index_, Data_, Distance_, Normalized_, KnownSearcher> >(my_prebuilt->initialize_known(), my_prebuilt->num_dimensions());
152 void save(
const std::filesystem::path& dir)
const {
153 quick_save(dir /
"ALGORITHM", l2normalized_prebuilt_save_name, std::strlen(l2normalized_prebuilt_save_name));
154 auto norm_type = get_numeric_type<Normalized_>();
155 quick_save(dir /
"NORMALIZED", &norm_type, 1);
157 auto& cust = custom_save_for_l2normalized_normalized<Normalized_>();
162 const auto indexdir = dir /
"INDEX";
163 std::filesystem::create_directory(indexdir);
164 my_prebuilt->save(indexdir);
167 L2NormalizedPrebuilt(
const std::filesystem::path& dir) : my_prebuilt(
load_prebuilt_raw<Index_, Normalized_, Distance_>(dir /
"INDEX")) {}
170template<
typename Index_,
typename Data_,
typename Normalized_,
typename Matrix_>
171class L2NormalizedMatrix;
173template<
typename Index_,
typename Data_,
typename Normalized_,
class Extractor_ = MatrixExtractor<Data_> >
174class L2NormalizedMatrixExtractor final :
public MatrixExtractor<Normalized_> {
176 L2NormalizedMatrixExtractor(std::unique_ptr<Extractor_> extractor, std::size_t dim) :
177 my_extractor(std::move(extractor)),
178 buffer(sanisizer::cast<I<decltype(buffer.size())> >(dim))
182 std::unique_ptr<Extractor_> my_extractor;
183 std::vector<Normalized_> buffer;
186 const Normalized_* next() {
187 auto raw = my_extractor->next();
188 auto normalized = buffer.data();
189 internal::l2norm(raw, buffer.size(), normalized);
210template<
typename Index_,
typename Data_,
typename Normalized_,
typename Matrix_ = Matrix<Index_, Data_> >
222 static_assert(std::is_same<decltype(std::declval<Matrix_>().num_observations()), Index_>::value);
223 static_assert(std::is_same<typename std::remove_pointer<decltype(std::declval<Matrix_>().new_extractor()->next())>::type,
const Data_>::value);
225 const Matrix_& my_matrix;
229 return my_matrix.num_dimensions();
233 return my_matrix.num_observations();
240 typedef I<
decltype(*(my_matrix.new_known_extractor()))> KnownExtractor;
241 return std::make_unique<L2NormalizedMatrixExtractor<Index_, Data_, Normalized_, KnownExtractor> >(my_matrix.new_known_extractor(),
num_dimensions());
264template<
typename Index_,
typename Data_,
typename Distance_,
typename Normalized_,
class Matrix_ = Matrix<Index_, Data_> >
283 typedef typename std::conditional<
284 std::is_base_of<Matrix_, NormalizedMatrix>::value,
296 std::shared_ptr<const Builder<Index_, Normalized_, Distance_, BuilderMatrix> > my_builder;
315 return new L2NormalizedPrebuilt<Index_, Data_, Distance_, Normalized_>(my_builder->build_unique(normalized));
Interface to build nearest-neighbor indices.
Interface for the input matrix.
Preserve numeric types when saving prebuilt indices.
Interface for prebuilt nearest-neighbor indices.
Interface for searching nearest-neighbor indices.
Interface to build nearest-neighbor search indices.
Definition Builder.hpp:28
virtual Prebuilt< Index_, Data_, Distance_ > * build_raw(const Matrix_ &data) const =0
Wrapper around a builder with L2 normalization.
Definition L2Normalized.hpp:265
L2NormalizedMatrix< Index_, Data_, Normalized_, Matrix_ > NormalizedMatrix
Definition L2Normalized.hpp:270
L2NormalizedBuilder(std::shared_ptr< const Builder< Index_, Normalized_, Distance_, BuilderMatrix > > builder)
Definition L2Normalized.hpp:293
auto build_known_raw(const Matrix_ &data) const
Definition L2Normalized.hpp:313
std::conditional< std::is_base_of< Matrix_, NormalizedMatrix >::value, Matrix_, NormalizedMatrix >::type BuilderMatrix
Definition L2Normalized.hpp:287
auto build_known_shared(const Matrix_ &data) const
Definition L2Normalized.hpp:328
auto build_known_unique(const Matrix_ &data) const
Definition L2Normalized.hpp:321
Wrapper around a matrix with L2 normalization.
Definition L2Normalized.hpp:211
std::size_t num_dimensions() const
Definition L2Normalized.hpp:228
Index_ num_observations() const
Definition L2Normalized.hpp:232
auto new_known_extractor() const
Definition L2Normalized.hpp:239
std::unique_ptr< MatrixExtractor< Normalized_ > > new_extractor() const
Definition L2Normalized.hpp:244
Interface for matrix data.
Definition Matrix.hpp:59
Interface for prebuilt nearest-neighbor search indices.
Definition Prebuilt.hpp:29
Collection of KNN algorithms.
Definition Bruteforce.hpp:29
Prebuilt< Index_, Data_, Distance_ > * load_prebuilt_raw(const std::filesystem::path &dir)
Definition load_prebuilt.hpp:105
void quick_save(const std::filesystem::path &path, const Input_ *const contents, const Length_ length)
Definition utils.hpp:33
std::function< void(const std::filesystem::path &)> & custom_save_for_l2normalized_normalized()
Definition L2Normalized.hpp:49
Miscellaneous utilities for knncolle