1    	// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2    	// vim: ts=8 sw=2 smarttab
3    	/*
4    	 * Ceph - scalable distributed file system
5    	 *
6    	 * Copyright (C) 2013 Inktank <info@inktank.com>
7    	 *
8    	 * This is free software; you can redistribute it and/or
9    	 * modify it under the terms of the GNU Lesser General Public
10   	 * License version 2.1, as published by the Free Software
11   	 * Foundation.  See file COPYING.
12   	 *
13   	 */
14   	
15   	#ifndef CEPH_OSD_HITSET_H
16   	#define CEPH_OSD_HITSET_H
17   	
18   	#include <string_view>
19   	
20   	#include <boost/scoped_ptr.hpp>
21   	
22   	#include "include/encoding.h"
23   	#include "include/unordered_set.h"
24   	#include "common/bloom_filter.hpp"
25   	#include "common/hobject.h"
26   	
27   	/**
28   	 * generic container for a HitSet
29   	 *
30   	 * Encapsulate a HitSetImpl of any type.  Expose a generic interface
31   	 * to users and wrap the encoded object with a type so that it can be
32   	 * safely decoded later.
33   	 */
34   	
35   	class HitSet {
36   	public:
37   	  typedef enum {
38   	    TYPE_NONE = 0,
39   	    TYPE_EXPLICIT_HASH = 1,
40   	    TYPE_EXPLICIT_OBJECT = 2,
41   	    TYPE_BLOOM = 3
42   	  } impl_type_t;
43   	
44   	  static std::string_view get_type_name(impl_type_t t) {
45   	    switch (t) {
46   	    case TYPE_NONE: return "none";
47   	    case TYPE_EXPLICIT_HASH: return "explicit_hash";
48   	    case TYPE_EXPLICIT_OBJECT: return "explicit_object";
49   	    case TYPE_BLOOM: return "bloom";
50   	    default: return "???";
51   	    }
52   	  }
53   	  std::string_view get_type_name() const {
54   	    if (impl)
55   	      return get_type_name(impl->get_type());
56   	    return get_type_name(TYPE_NONE);
57   	  }
58   	
59   	  /// abstract interface for a HitSet implementation
60   	  class Impl {
61   	  public:
62   	    virtual impl_type_t get_type() const = 0;
63   	    virtual bool is_full() const = 0;
64   	    virtual void insert(const hobject_t& o) = 0;
65   	    virtual bool contains(const hobject_t& o) const = 0;
66   	    virtual unsigned insert_count() const = 0;
67   	    virtual unsigned approx_unique_insert_count() const = 0;
68   	    virtual void encode(ceph::buffer::list &bl) const = 0;
69   	    virtual void decode(ceph::buffer::list::const_iterator& p) = 0;
70   	    virtual void dump(ceph::Formatter *f) const = 0;
71   	    virtual Impl* clone() const = 0;
72   	    virtual void seal() {}
73   	    virtual ~Impl() {}
74   	  };
75   	
76   	  boost::scoped_ptr<Impl> impl;
77   	  bool sealed;
78   	
(1) Event missing_move_assignment: Class "HitSet::Params" may benefit from adding a move assignment operator. See other events which show the copy assignment operator being applied to rvalue(s), where a move assignment may be faster.
Also see events: [copy_assignment]
79   	  class Params {
80   	    /// create an Impl* of the given type
81   	    bool create_impl(impl_type_t t);
82   	
83   	  public:
84   	    class Impl {
85   	    public:
86   	      virtual impl_type_t get_type() const = 0;
87   	      virtual HitSet::Impl *get_new_impl() const = 0;
88   	      virtual void encode(ceph::buffer::list &bl) const {}
89   	      virtual void decode(ceph::buffer::list::const_iterator& p) {}
90   	      virtual void dump(ceph::Formatter *f) const {}
91   	      virtual void dump_stream(std::ostream& o) const {}
92   	      virtual ~Impl() {}
93   	    };
94   	
95   	    Params()  {}
96   	    explicit Params(Impl *i) : impl(i) {}
97   	    virtual ~Params() {}
98   	
99   	    boost::scoped_ptr<Params::Impl> impl;
100  	
101  	    impl_type_t get_type() const {
102  	      if (impl)
103  		return impl->get_type();
104  	      return TYPE_NONE;
105  	    }
106  	
107  	    Params(const Params& o) noexcept;
108  	    const Params& operator=(const Params& o);
109  	
110  	    void encode(ceph::buffer::list &bl) const;
111  	    void decode(ceph::buffer::list::const_iterator& bl);
112  	    void dump(ceph::Formatter *f) const;
113  	    static void generate_test_instances(std::list<HitSet::Params*>& o);
114  	
115  	    friend std::ostream& operator<<(std::ostream& out, const HitSet::Params& p);
116  	  };
117  	
118  	  HitSet() : impl(NULL), sealed(false) {}
119  	  explicit HitSet(Impl *i) : impl(i), sealed(false) {}
120  	  explicit HitSet(const HitSet::Params& params);
121  	
122  	  HitSet(const HitSet& o) {
123  	    sealed = o.sealed;
124  	    if (o.impl)
125  	      impl.reset(o.impl->clone());
126  	    else
127  	      impl.reset(NULL);
128  	  }
129  	  const HitSet& operator=(const HitSet& o) {
130  	    sealed = o.sealed;
131  	    if (o.impl)
132  	      impl.reset(o.impl->clone());
133  	    else
134  	      impl.reset(NULL);
135  	    return *this;
136  	  }
137  	
138  	
139  	  bool is_full() const {
140  	    return impl->is_full();
141  	  }
142  	  /// insert a hash into the set
143  	  void insert(const hobject_t& o) {
144  	    impl->insert(o);
145  	  }
146  	  /// query whether a hash is in the set
147  	  bool contains(const hobject_t& o) const {
148  	    return impl->contains(o);
149  	  }
150  	
151  	  unsigned insert_count() const {
152  	    return impl->insert_count();
153  	  }
154  	  unsigned approx_unique_insert_count() const {
155  	    return impl->approx_unique_insert_count();
156  	  }
157  	  void seal() {
158  	    ceph_assert(!sealed);
159  	    sealed = true;
160  	    impl->seal();
161  	  }
162  	
163  	  void encode(ceph::buffer::list &bl) const;
164  	  void decode(ceph::buffer::list::const_iterator& bl);
165  	  void dump(ceph::Formatter *f) const;
166  	  static void generate_test_instances(std::list<HitSet*>& o);
167  	
168  	private:
169  	  void reset_to_type(impl_type_t type);
170  	};
171  	WRITE_CLASS_ENCODER(HitSet)
172  	WRITE_CLASS_ENCODER(HitSet::Params)
173  	
174  	typedef boost::shared_ptr<HitSet> HitSetRef;
175  	
176  	std::ostream& operator<<(std::ostream& out, const HitSet::Params& p);
177  	
178  	/**
179  	 * explicitly enumerate hash hits in the set
180  	 */
181  	class ExplicitHashHitSet : public HitSet::Impl {
182  	  uint64_t count;
183  	  ceph::unordered_set<uint32_t> hits;
184  	public:
185  	  class Params : public HitSet::Params::Impl {
186  	  public:
187  	    HitSet::impl_type_t get_type() const override {
188  	      return HitSet::TYPE_EXPLICIT_HASH;
189  	    }
190  	    HitSet::Impl *get_new_impl() const override {
191  	      return new ExplicitHashHitSet;
192  	    }
193  	    static void generate_test_instances(std::list<Params*>& o) {
194  	      o.push_back(new Params);
195  	    }
196  	  };
197  	
198  	  ExplicitHashHitSet() : count(0) {}
199  	  explicit ExplicitHashHitSet(const ExplicitHashHitSet::Params *p) : count(0) {}
200  	  ExplicitHashHitSet(const ExplicitHashHitSet &o) : count(o.count),
201  	      hits(o.hits) {}
202  	
203  	  HitSet::Impl *clone() const override {
204  	    return new ExplicitHashHitSet(*this);
205  	  }
206  	
207  	  HitSet::impl_type_t get_type() const override {
208  	    return HitSet::TYPE_EXPLICIT_HASH;
209  	  }
210  	  bool is_full() const override {
211  	    return false;
212  	  }
213  	  void insert(const hobject_t& o) override {
214  	    hits.insert(o.get_hash());
215  	    ++count;
216  	  }
217  	  bool contains(const hobject_t& o) const override {
218  	    return hits.count(o.get_hash());
219  	  }
220  	  unsigned insert_count() const override {
221  	    return count;
222  	  }
223  	  unsigned approx_unique_insert_count() const override {
224  	    return hits.size();
225  	  }
226  	  void encode(ceph::buffer::list &bl) const override {
227  	    ENCODE_START(1, 1, bl);
228  	    encode(count, bl);
229  	    encode(hits, bl);
230  	    ENCODE_FINISH(bl);
231  	  }
232  	  void decode(ceph::buffer::list::const_iterator &bl) override {
233  	    DECODE_START(1, bl);
234  	    decode(count, bl);
235  	    decode(hits, bl);
236  	    DECODE_FINISH(bl);
237  	  }
238  	  void dump(ceph::Formatter *f) const override;
239  	  static void generate_test_instances(std::list<ExplicitHashHitSet*>& o) {
240  	    o.push_back(new ExplicitHashHitSet);
241  	    o.push_back(new ExplicitHashHitSet);
242  	    o.back()->insert(hobject_t());
243  	    o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
244  	    o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
245  	  }
246  	};
247  	WRITE_CLASS_ENCODER(ExplicitHashHitSet)
248  	
249  	/**
250  	 * explicitly enumerate objects in the set
251  	 */
252  	class ExplicitObjectHitSet : public HitSet::Impl {
253  	  uint64_t count;
254  	  ceph::unordered_set<hobject_t> hits;
255  	public:
256  	  class Params : public HitSet::Params::Impl {
257  	  public:
258  	    HitSet::impl_type_t get_type() const override {
259  	      return HitSet::TYPE_EXPLICIT_OBJECT;
260  	    }
261  	    HitSet::Impl *get_new_impl() const override {
262  	      return new ExplicitObjectHitSet;
263  	    }
264  	    static void generate_test_instances(std::list<Params*>& o) {
265  	      o.push_back(new Params);
266  	    }
267  	  };
268  	
269  	  ExplicitObjectHitSet() : count(0) {}
270  	  explicit ExplicitObjectHitSet(const ExplicitObjectHitSet::Params *p) : count(0) {}
271  	  ExplicitObjectHitSet(const ExplicitObjectHitSet &o) : count(o.count),
272  	      hits(o.hits) {}
273  	
274  	  HitSet::Impl *clone() const override {
275  	    return new ExplicitObjectHitSet(*this);
276  	  }
277  	
278  	  HitSet::impl_type_t get_type() const override {
279  	    return HitSet::TYPE_EXPLICIT_OBJECT;
280  	  }
281  	  bool is_full() const override {
282  	    return false;
283  	  }
284  	  void insert(const hobject_t& o) override {
285  	    hits.insert(o);
286  	    ++count;
287  	  }
288  	  bool contains(const hobject_t& o) const override {
289  	    return hits.count(o);
290  	  }
291  	  unsigned insert_count() const override {
292  	    return count;
293  	  }
294  	  unsigned approx_unique_insert_count() const override {
295  	    return hits.size();
296  	  }
297  	  void encode(ceph::buffer::list &bl) const override {
298  	    ENCODE_START(1, 1, bl);
299  	    encode(count, bl);
300  	    encode(hits, bl);
301  	    ENCODE_FINISH(bl);
302  	  }
303  	  void decode(ceph::buffer::list::const_iterator& bl) override {
304  	    DECODE_START(1, bl);
305  	    decode(count, bl);
306  	    decode(hits, bl);
307  	    DECODE_FINISH(bl);
308  	  }
309  	  void dump(ceph::Formatter *f) const override;
310  	  static void generate_test_instances(std::list<ExplicitObjectHitSet*>& o) {
311  	    o.push_back(new ExplicitObjectHitSet);
312  	    o.push_back(new ExplicitObjectHitSet);
313  	    o.back()->insert(hobject_t());
314  	    o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
315  	    o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
316  	  }
317  	};
318  	WRITE_CLASS_ENCODER(ExplicitObjectHitSet)
319  	
320  	/**
321  	 * use a bloom_filter to track hits to the set
322  	 */
323  	class BloomHitSet : public HitSet::Impl {
324  	  compressible_bloom_filter bloom;
325  	
326  	public:
327  	  HitSet::impl_type_t get_type() const override {
328  	    return HitSet::TYPE_BLOOM;
329  	  }
330  	
331  	  class Params : public HitSet::Params::Impl {
332  	  public:
333  	    HitSet::impl_type_t get_type() const override {
334  	      return HitSet::TYPE_BLOOM;
335  	    }
336  	    HitSet::Impl *get_new_impl() const override {
337  	      return new BloomHitSet;
338  	    }
339  	
340  	    uint32_t fpp_micro;    ///< false positive probability / 1M
341  	    uint64_t target_size;  ///< number of unique insertions we expect to this HitSet
342  	    uint64_t seed;         ///< seed to use when initializing the bloom filter
343  	
344  	    Params()
345  	      : fpp_micro(0), target_size(0), seed(0) {}
346  	    Params(double fpp, uint64_t t, uint64_t s)
347  	      : fpp_micro(fpp * 1000000.0), target_size(t), seed(s) {}
348  	    Params(const Params &o)
349  	      : fpp_micro(o.fpp_micro),
350  		target_size(o.target_size),
351  		seed(o.seed) {}
352  	    ~Params() override {}
353  	
354  	    double get_fpp() const {
355  	      return (double)fpp_micro / 1000000.0;
356  	    }
357  	    void set_fpp(double f) {
358  	      fpp_micro = (unsigned)(llrintl(f * 1000000.0));
359  	    }
360  	
361  	    void encode(ceph::buffer::list& bl) const override {
362  	      ENCODE_START(1, 1, bl);
363  	      encode(fpp_micro, bl);
364  	      encode(target_size, bl);
365  	      encode(seed, bl);
366  	      ENCODE_FINISH(bl);
367  	    }
368  	    void decode(ceph::buffer::list::const_iterator& bl) override {
369  	      DECODE_START(1, bl);
370  	      decode(fpp_micro, bl);
371  	      decode(target_size, bl);
372  	      decode(seed, bl);
373  	      DECODE_FINISH(bl);
374  	    }
375  	    void dump(ceph::Formatter *f) const override;
376  	    void dump_stream(std::ostream& o) const override {
377  	      o << "false_positive_probability: "
378  		<< get_fpp() << ", target_size: " << target_size
379  		<< ", seed: " << seed;
380  	    }
381  	    static void generate_test_instances(std::list<Params*>& o) {
382  	      o.push_back(new Params);
383  	      o.push_back(new Params);
384  	      (*o.rbegin())->fpp_micro = 123456;
385  	      (*o.rbegin())->target_size = 300;
386  	      (*o.rbegin())->seed = 99;
387  	    }
388  	  };
389  	
390  	  BloomHitSet() {}
391  	  BloomHitSet(unsigned inserts, double fpp, int seed)
392  	    : bloom(inserts, fpp, seed)
393  	  {}
394  	  explicit BloomHitSet(const BloomHitSet::Params *p) : bloom(p->target_size,
395  	                                                    p->get_fpp(),
396  	                                                    p->seed)
397  	  {}
398  	
399  	  BloomHitSet(const BloomHitSet &o) {
400  	    // oh god
401  	    ceph::buffer::list bl;
402  	    o.encode(bl);
403  	    auto bli = std::cbegin(bl);
404  	    this->decode(bli);
405  	  }
406  	
407  	  HitSet::Impl *clone() const override {
408  	    return new BloomHitSet(*this);
409  	  }
410  	
411  	  bool is_full() const override {
412  	    return bloom.is_full();
413  	  }
414  	
415  	  void insert(const hobject_t& o) override {
416  	    bloom.insert(o.get_hash());
417  	  }
418  	  bool contains(const hobject_t& o) const override {
419  	    return bloom.contains(o.get_hash());
420  	  }
421  	  unsigned insert_count() const override {
422  	    return bloom.element_count();
423  	  }
424  	  unsigned approx_unique_insert_count() const override {
425  	    return bloom.approx_unique_element_count();
426  	  }
427  	  void seal() override {
428  	    // aim for a density of .5 (50% of bit set)
429  	    double pc = bloom.density() * 2.0;
430  	    if (pc < 1.0)
431  	      bloom.compress(pc);
432  	  }
433  	
434  	  void encode(ceph::buffer::list &bl) const override {
435  	    ENCODE_START(1, 1, bl);
436  	    encode(bloom, bl);
437  	    ENCODE_FINISH(bl);
438  	  }
439  	  void decode(ceph::buffer::list::const_iterator& bl) override {
440  	    DECODE_START(1, bl);
441  	    decode(bloom, bl);
442  	    DECODE_FINISH(bl);
443  	  }
444  	  void dump(ceph::Formatter *f) const override;
445  	  static void generate_test_instances(std::list<BloomHitSet*>& o) {
446  	    o.push_back(new BloomHitSet);
447  	    o.push_back(new BloomHitSet(10, .1, 1));
448  	    o.back()->insert(hobject_t());
449  	    o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
450  	    o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
451  	  }
452  	};
453  	WRITE_CLASS_ENCODER(BloomHitSet)
454  	
455  	#endif
456