1    	// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2    	// vim: ts=8 sw=2 smarttab
3    	
4    	#ifndef CEPH_CRUSH_WRAPPER_H
5    	#define CEPH_CRUSH_WRAPPER_H
6    	
7    	#include <stdlib.h>
8    	#include <map>
9    	#include <set>
10   	#include <string>
11   	
12   	#include <iosfwd>
13   	
14   	#include "include/types.h"
15   	
16   	extern "C" {
17   	#include "crush.h"
18   	#include "hash.h"
19   	#include "mapper.h"
20   	#include "builder.h"
21   	}
22   	
23   	#include "include/ceph_assert.h"
24   	#include "include/err.h"
25   	#include "include/encoding.h"
26   	#include "include/mempool.h"
27   	
28   	namespace ceph {
29   	  class Formatter;
30   	}
31   	
32   	namespace CrushTreeDumper {
33   	typedef mempool::osdmap::map<int64_t,std::string> name_map_t;
34   	}
35   	
(1) Event access_dbuff_const: Calling "encode_raw" indexes array "v" at byte position 7. [details]
36   	WRITE_RAW_ENCODER(crush_rule_mask)   // it's all u8's
37   	
38   	inline void encode(const crush_rule_step &s, ceph::buffer::list &bl)
39   	{
40   	  using ceph::encode;
41   	  encode(s.op, bl);
42   	  encode(s.arg1, bl);
43   	  encode(s.arg2, bl);
44   	}
45   	inline void decode(crush_rule_step &s, ceph::buffer::list::const_iterator &p)
46   	{
47   	  using ceph::decode;
48   	  decode(s.op, p);
49   	  decode(s.arg1, p);
50   	  decode(s.arg2, p);
51   	}
52   	
53   	class CrushWrapper {
54   	public:
55   	  // magic value used by OSDMap for a "default" fallback choose_args, used if
56   	  // the choose_arg_map passed to do_rule does not exist.  if this also
57   	  // doesn't exist, fall back to canonical weights.
58   	  enum {
59   	    DEFAULT_CHOOSE_ARGS = -1
60   	  };
61   	
62   	  std::map<int32_t, std::string> type_map; /* bucket/device type names */
63   	  std::map<int32_t, std::string> name_map; /* bucket/device names */
64   	  std::map<int32_t, std::string> rule_name_map;
65   	
66   	  std::map<int32_t, int32_t> class_map; /* item id -> class id */
67   	  std::map<int32_t, std::string> class_name; /* class id -> class name */
68   	  std::map<std::string, int32_t> class_rname; /* class name -> class id */
69   	  std::map<int32_t, std::map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */
70   	  std::map<int64_t, crush_choose_arg_map> choose_args;
71   	
72   	private:
73   	  struct crush_map *crush = nullptr;
74   	
75   	  bool have_uniform_rules = false;
76   	
77   	  /* reverse maps */
78   	  mutable bool have_rmaps = false;
79   	  mutable std::map<std::string, int> type_rmap, name_rmap, rule_name_rmap;
80   	  void build_rmaps() const {
81   	    if (have_rmaps) return;
82   	    build_rmap(type_map, type_rmap);
83   	    build_rmap(name_map, name_rmap);
84   	    build_rmap(rule_name_map, rule_name_rmap);
85   	    have_rmaps = true;
86   	  }
87   	  void build_rmap(const std::map<int, std::string> &f, std::map<std::string, int> &r) const {
88   	    r.clear();
89   	    for (auto p = f.begin(); p != f.end(); ++p)
90   	      r[p->second] = p->first;
91   	  }
92   	
93   	public:
94   	  CrushWrapper(const CrushWrapper& other);
95   	  const CrushWrapper& operator=(const CrushWrapper& other);
96   	
97   	  CrushWrapper() {
98   	    create();
99   	  }
100  	  ~CrushWrapper() {
101  	    if (crush)
102  	      crush_destroy(crush);
103  	    choose_args_clear();
104  	  }
105  	
106  	  crush_map *get_crush_map() { return crush; }
107  	
108  	  /* building */
109  	  void create() {
110  	    if (crush)
111  	      crush_destroy(crush);
112  	    crush = crush_create();
113  	    choose_args_clear();
114  	    ceph_assert(crush);
115  	    have_rmaps = false;
116  	
117  	    set_tunables_default();
118  	  }
119  	
120  	  /**
121  	   * true if any rule has a rule id != its position in the array
122  	   *
123  	   * These indicate "ruleset" IDs that were created by older versions
124  	   * of Ceph.  They are cleaned up in renumber_rules so that eventually
125  	   * we can remove the code for handling them.
126  	   */
127  	  bool has_legacy_rule_ids() const;
128  	
129  	  /**
130  	   * fix rules whose ruleid != ruleset
131  	   *
132  	   * These rules were created in older versions of Ceph.  The concept
133  	   * of a ruleset no longer exists.
134  	   *
135  	   * Return a map of old ID -> new ID.  Caller must update OSDMap
136  	   * to use new IDs.
137  	   */
138  	  std::map<int, int> renumber_rules();
139  	
140  	  /// true if any buckets that aren't straw2
141  	  bool has_non_straw2_buckets() const;
142  	
143  	  // tunables
144  	  void set_tunables_argonaut() {
145  	    crush->choose_local_tries = 2;
146  	    crush->choose_local_fallback_tries = 5;
147  	    crush->choose_total_tries = 19;
148  	    crush->chooseleaf_descend_once = 0;
149  	    crush->chooseleaf_vary_r = 0;
150  	    crush->chooseleaf_stable = 0;
151  	    crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
152  	  }
153  	  void set_tunables_bobtail() {
154  	    crush->choose_local_tries = 0;
155  	    crush->choose_local_fallback_tries = 0;
156  	    crush->choose_total_tries = 50;
157  	    crush->chooseleaf_descend_once = 1;
158  	    crush->chooseleaf_vary_r = 0;
159  	    crush->chooseleaf_stable = 0;
160  	    crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
161  	  }
162  	  void set_tunables_firefly() {
163  	    crush->choose_local_tries = 0;
164  	    crush->choose_local_fallback_tries = 0;
165  	    crush->choose_total_tries = 50;
166  	    crush->chooseleaf_descend_once = 1;
167  	    crush->chooseleaf_vary_r = 1;
168  	    crush->chooseleaf_stable = 0;
169  	    crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
170  	  }
171  	  void set_tunables_hammer() {
172  	    crush->choose_local_tries = 0;
173  	    crush->choose_local_fallback_tries = 0;
174  	    crush->choose_total_tries = 50;
175  	    crush->chooseleaf_descend_once = 1;
176  	    crush->chooseleaf_vary_r = 1;
177  	    crush->chooseleaf_stable = 0;
178  	    crush->allowed_bucket_algs =
179  	      (1 << CRUSH_BUCKET_UNIFORM) |
180  	      (1 << CRUSH_BUCKET_LIST) |
181  	      (1 << CRUSH_BUCKET_STRAW) |
182  	      (1 << CRUSH_BUCKET_STRAW2);
183  	  }
184  	  void set_tunables_jewel() {
185  	    crush->choose_local_tries = 0;
186  	    crush->choose_local_fallback_tries = 0;
187  	    crush->choose_total_tries = 50;
188  	    crush->chooseleaf_descend_once = 1;
189  	    crush->chooseleaf_vary_r = 1;
190  	    crush->chooseleaf_stable = 1;
191  	    crush->allowed_bucket_algs =
192  	      (1 << CRUSH_BUCKET_UNIFORM) |
193  	      (1 << CRUSH_BUCKET_LIST) |
194  	      (1 << CRUSH_BUCKET_STRAW) |
195  	      (1 << CRUSH_BUCKET_STRAW2);
196  	  }
197  	
198  	  void set_tunables_legacy() {
199  	    set_tunables_argonaut();
200  	    crush->straw_calc_version = 0;
201  	  }
202  	  void set_tunables_optimal() {
203  	    set_tunables_jewel();
204  	    crush->straw_calc_version = 1;
205  	  }
206  	  void set_tunables_default() {
207  	    set_tunables_jewel();
208  	    crush->straw_calc_version = 1;
209  	  }
210  	
211  	  int get_choose_local_tries() const {
212  	    return crush->choose_local_tries;
213  	  }
214  	  void set_choose_local_tries(int n) {
215  	    crush->choose_local_tries = n;
216  	  }
217  	
218  	  int get_choose_local_fallback_tries() const {
219  	    return crush->choose_local_fallback_tries;
220  	  }
221  	  void set_choose_local_fallback_tries(int n) {
222  	    crush->choose_local_fallback_tries = n;
223  	  }
224  	
225  	  int get_choose_total_tries() const {
226  	    return crush->choose_total_tries;
227  	  }
228  	  void set_choose_total_tries(int n) {
229  	    crush->choose_total_tries = n;
230  	  }
231  	
232  	  int get_chooseleaf_descend_once() const {
233  	    return crush->chooseleaf_descend_once;
234  	  }
235  	  void set_chooseleaf_descend_once(int n) {
236  	    crush->chooseleaf_descend_once = !!n;
237  	  }
238  	
239  	  int get_chooseleaf_vary_r() const {
240  	    return crush->chooseleaf_vary_r;
241  	  }
242  	  void set_chooseleaf_vary_r(int n) {
243  	    crush->chooseleaf_vary_r = n;
244  	  }
245  	
246  	  int get_chooseleaf_stable() const {
247  	    return crush->chooseleaf_stable;
248  	  }
249  	  void set_chooseleaf_stable(int n) {
250  	    crush->chooseleaf_stable = n;
251  	  }
252  	
253  	  int get_straw_calc_version() const {
254  	    return crush->straw_calc_version;
255  	  }
256  	  void set_straw_calc_version(int n) {
257  	    crush->straw_calc_version = n;
258  	  }
259  	
260  	  unsigned get_allowed_bucket_algs() const {
261  	    return crush->allowed_bucket_algs;
262  	  }
263  	  void set_allowed_bucket_algs(unsigned n) {
264  	    crush->allowed_bucket_algs = n;
265  	  }
266  	
267  	  bool has_argonaut_tunables() const {
268  	    return
269  	      crush->choose_local_tries == 2 &&
270  	      crush->choose_local_fallback_tries == 5 &&
271  	      crush->choose_total_tries == 19 &&
272  	      crush->chooseleaf_descend_once == 0 &&
273  	      crush->chooseleaf_vary_r == 0 &&
274  	      crush->chooseleaf_stable == 0 &&
275  	      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
276  	  }
277  	  bool has_bobtail_tunables() const {
278  	    return
279  	      crush->choose_local_tries == 0 &&
280  	      crush->choose_local_fallback_tries == 0 &&
281  	      crush->choose_total_tries == 50 &&
282  	      crush->chooseleaf_descend_once == 1 &&
283  	      crush->chooseleaf_vary_r == 0 &&
284  	      crush->chooseleaf_stable == 0 &&
285  	      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
286  	  }
287  	  bool has_firefly_tunables() const {
288  	    return
289  	      crush->choose_local_tries == 0 &&
290  	      crush->choose_local_fallback_tries == 0 &&
291  	      crush->choose_total_tries == 50 &&
292  	      crush->chooseleaf_descend_once == 1 &&
293  	      crush->chooseleaf_vary_r == 1 &&
294  	      crush->chooseleaf_stable == 0 &&
295  	      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
296  	  }
297  	  bool has_hammer_tunables() const {
298  	    return
299  	      crush->choose_local_tries == 0 &&
300  	      crush->choose_local_fallback_tries == 0 &&
301  	      crush->choose_total_tries == 50 &&
302  	      crush->chooseleaf_descend_once == 1 &&
303  	      crush->chooseleaf_vary_r == 1 &&
304  	      crush->chooseleaf_stable == 0 &&
305  	      crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
306  					      (1 << CRUSH_BUCKET_LIST) |
307  					      (1 << CRUSH_BUCKET_STRAW) |
308  					      (1 << CRUSH_BUCKET_STRAW2));
309  	  }
310  	  bool has_jewel_tunables() const {
311  	    return
312  	      crush->choose_local_tries == 0 &&
313  	      crush->choose_local_fallback_tries == 0 &&
314  	      crush->choose_total_tries == 50 &&
315  	      crush->chooseleaf_descend_once == 1 &&
316  	      crush->chooseleaf_vary_r == 1 &&
317  	      crush->chooseleaf_stable == 1 &&
318  	      crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
319  					      (1 << CRUSH_BUCKET_LIST) |
320  					      (1 << CRUSH_BUCKET_STRAW) |
321  					      (1 << CRUSH_BUCKET_STRAW2));
322  	  }
323  	
324  	  bool has_optimal_tunables() const {
325  	    return has_jewel_tunables();
326  	  }
327  	  bool has_legacy_tunables() const {
328  	    return has_argonaut_tunables();
329  	  }
330  	
331  	  bool has_nondefault_tunables() const {
332  	    return
333  	      (crush->choose_local_tries != 2 ||
334  	       crush->choose_local_fallback_tries != 5 ||
335  	       crush->choose_total_tries != 19);
336  	  }
337  	  bool has_nondefault_tunables2() const {
338  	    return
339  	      crush->chooseleaf_descend_once != 0;
340  	  }
341  	  bool has_nondefault_tunables3() const {
342  	    return
343  	      crush->chooseleaf_vary_r != 0;
344  	  }
345  	  bool has_nondefault_tunables5() const {
346  	    return
347  	        crush->chooseleaf_stable != 0;
348  	  }
349  	
350  	  bool has_v2_rules() const;
351  	  bool has_v3_rules() const;
352  	  bool has_v4_buckets() const;
353  	  bool has_v5_rules() const;
354  	  bool has_choose_args() const;          // any choose_args
355  	  bool has_incompat_choose_args() const; // choose_args that can't be made compat
356  	
357  	  bool is_v2_rule(unsigned ruleid) const;
358  	  bool is_v3_rule(unsigned ruleid) const;
359  	  bool is_v5_rule(unsigned ruleid) const;
360  	
361  	  std::string get_min_required_version() const {
362  	    if (has_v5_rules() || has_nondefault_tunables5())
363  	      return "jewel";
364  	    else if (has_v4_buckets())
365  	      return "hammer";
366  	    else if (has_nondefault_tunables3())
367  	      return "firefly";
368  	    else if (has_nondefault_tunables2() || has_nondefault_tunables())
369  	      return "bobtail";
370  	    else
371  	      return "argonaut";
372  	  }
373  	
374  	  // default bucket types
375  	  unsigned get_default_bucket_alg() const {
376  	    // in order of preference
377  	    if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2))
378  	      return CRUSH_BUCKET_STRAW2;
379  	    if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW))
380  	      return CRUSH_BUCKET_STRAW;
381  	    if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE))
382  	      return CRUSH_BUCKET_TREE;
383  	    if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST))
384  	      return CRUSH_BUCKET_LIST;
385  	    if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM))
386  	      return CRUSH_BUCKET_UNIFORM;
387  	    return 0;
388  	  }
389  	
390  	  // bucket types
391  	  int get_num_type_names() const {
392  	    return type_map.size();
393  	  }
394  	  int get_max_type_id() const {
395  	    if (type_map.empty())
396  	      return 0;
397  	    return type_map.rbegin()->first;
398  	  }
399  	  int get_type_id(const std::string& name) const {
400  	    build_rmaps();
401  	    if (type_rmap.count(name))
402  	      return type_rmap[name];
403  	    return -1;
404  	  }
405  	  const char *get_type_name(int t) const {
406  	    auto p = type_map.find(t);
407  	    if (p != type_map.end())
408  	      return p->second.c_str();
409  	    return 0;
410  	  }
411  	  void set_type_name(int i, const std::string& name) {
412  	    type_map[i] = name;
413  	    if (have_rmaps)
414  	      type_rmap[name] = i;
415  	  }
416  	
417  	  // item/bucket names
418  	  bool name_exists(const std::string& name) const {
419  	    build_rmaps();
420  	    return name_rmap.count(name);
421  	  }
422  	  bool item_exists(int i) const {
423  	    return name_map.count(i);
424  	  }
425  	  int get_item_id(const std::string& name) const {
426  	    build_rmaps();
427  	    if (name_rmap.count(name))
428  	      return name_rmap[name];
429  	    return 0;  /* hrm */
430  	  }
431  	  const char *get_item_name(int t) const {
432  	    std::map<int,std::string>::const_iterator p = name_map.find(t);
433  	    if (p != name_map.end())
434  	      return p->second.c_str();
435  	    return 0;
436  	  }
437  	  int set_item_name(int i, const std::string& name) {
438  	    if (!is_valid_crush_name(name))
439  	      return -EINVAL;
440  	    name_map[i] = name;
441  	    if (have_rmaps)
442  	      name_rmap[name] = i;
443  	    return 0;
444  	  }
445  	  void swap_names(int a, int b) {
446  	    std::string an = name_map[a];
447  	    std::string bn = name_map[b];
448  	    name_map[a] = bn;
449  	    name_map[b] = an;
450  	    if (have_rmaps) {
451  	      name_rmap[an] = b;
452  	      name_rmap[bn] = a;
453  	    }
454  	  }
455  	  int split_id_class(int i, int *idout, int *classout) const;
456  	
457  	  bool class_exists(const std::string& name) const {
458  	    return class_rname.count(name);
459  	  }
460  	  const char *get_class_name(int i) const {
461  	    auto p = class_name.find(i);
462  	    if (p != class_name.end())
463  	      return p->second.c_str();
464  	    return 0;
465  	  }
466  	  int get_class_id(const std::string& name) const {
467  	    auto p = class_rname.find(name);
468  	    if (p != class_rname.end())
469  	      return p->second;
470  	    else
471  	      return -EINVAL;
472  	  }
473  	  int remove_class_name(const std::string& name) {
474  	    auto p = class_rname.find(name);
475  	    if (p == class_rname.end())
476  	      return -ENOENT;
477  	    int class_id = p->second;
478  	    auto q = class_name.find(class_id);
479  	    if (q == class_name.end())
480  	      return -ENOENT;
481  	    class_rname.erase(name);
482  	    class_name.erase(class_id);
483  	    return 0;
484  	  }
485  	
486  	  int32_t _alloc_class_id() const;
487  	
488  	  int get_or_create_class_id(const std::string& name) {
489  	    int c = get_class_id(name);
490  	    if (c < 0) {
491  	      int i = _alloc_class_id();
492  	      class_name[i] = name;
493  	      class_rname[name] = i;
494  	      return i;
495  	    } else {
496  	      return c;
497  	    }
498  	  }
499  	
500  	  const char *get_item_class(int t) const {
501  	    std::map<int,int>::const_iterator p = class_map.find(t);
502  	    if (p == class_map.end())
503  	      return 0;
504  	    return get_class_name(p->second);
505  	  }
506  	  int get_item_class_id(int t) const {
507  	    auto p = class_map.find(t);
508  	    if (p == class_map.end())
509  	      return -ENOENT;
510  	    return p->second;
511  	  }
512  	  int set_item_class(int i, const std::string& name) {
513  	    if (!is_valid_crush_name(name))
514  	      return -EINVAL;
515  	    class_map[i] = get_or_create_class_id(name);
516  	    return 0;
517  	  }
518  	  int set_item_class(int i, int c) {
519  	    class_map[i] = c;
520  	    return c;
521  	  }
522  	  void get_devices_by_class(const std::string &name,
523  				    std::set<int> *devices) const {
524  	    ceph_assert(devices);
525  	    devices->clear();
526  	    if (!class_exists(name)) {
527  	      return;
528  	    }
529  	    auto cid = get_class_id(name);
530  	    for (auto& p : class_map) {
531  	      if (p.first >= 0 && p.second == cid) {
532  	        devices->insert(p.first);
533  	      }
534  	    }
535  	  }
536  	  void class_remove_item(int i) {
537  	    auto it = class_map.find(i);
538  	    if (it == class_map.end()) {
539  	      return;
540  	    }
541  	    class_map.erase(it);
542  	  }
543  	  int can_rename_item(const std::string& srcname,
544  			      const std::string& dstname,
545  			      std::ostream *ss) const;
546  	  int rename_item(const std::string& srcname,
547  			  const std::string& dstname,
548  			  std::ostream *ss);
549  	  int can_rename_bucket(const std::string& srcname,
550  				const std::string& dstname,
551  				std::ostream *ss) const;
552  	  int rename_bucket(const std::string& srcname,
553  			    const std::string& dstname,
554  			    std::ostream *ss);
555  	
556  	  // rule names
557  	  int rename_rule(const std::string& srcname,
558  	                  const std::string& dstname,
559  	                  std::ostream *ss);
560  	  bool rule_exists(std::string name) const {
561  	    build_rmaps();
562  	    return rule_name_rmap.count(name);
563  	  }
564  	  int get_rule_id(std::string name) const {
565  	    build_rmaps();
566  	    if (rule_name_rmap.count(name))
567  	      return rule_name_rmap[name];
568  	    return -ENOENT;
569  	  }
570  	  const char *get_rule_name(int t) const {
571  	    auto p = rule_name_map.find(t);
572  	    if (p != rule_name_map.end())
573  	      return p->second.c_str();
574  	    return 0;
575  	  }
576  	  void set_rule_name(int i, const std::string& name) {
577  	    rule_name_map[i] = name;
578  	    if (have_rmaps)
579  	      rule_name_rmap[name] = i;
580  	  }
581  	  bool is_shadow_item(int id) const {
582  	    const char *name = get_item_name(id);
583  	    return name && !is_valid_crush_name(name);
584  	  }
585  	
586  	
587  	  /**
588  	   * find tree nodes referenced by rules by a 'take' command
589  	   *
590  	   * Note that these may not be parentless roots.
591  	   */
592  	  void find_takes(std::set<int> *roots) const;
593  	  void find_takes_by_rule(int rule, std::set<int> *roots) const;
594  	
595  	  /**
596  	   * find tree roots
597  	   *
598  	   * These are parentless nodes in the map.
599  	   */
600  	  void find_roots(std::set<int> *roots) const;
601  	
602  	
603  	  /**
604  	   * find tree roots that contain shadow (device class) items only
605  	   */
606  	  void find_shadow_roots(std::set<int> *roots) const {
607  	    std::set<int> all;
608  	    find_roots(&all);
609  	    for (auto& p: all) {
610  	      if (is_shadow_item(p)) {
611  	        roots->insert(p);
612  	      }
613  	    }
614  	  }
615  	
616  	  /**
617  	   * find tree roots that are not shadow (device class) items
618  	   *
619  	   * These are parentless nodes in the map that are not shadow
620  	   * items for device classes.
621  	   */
622  	  void find_nonshadow_roots(std::set<int> *roots) const {
623  	    std::set<int> all;
624  	    find_roots(&all);
625  	    for (auto& p: all) {
626  	      if (!is_shadow_item(p)) {
627  	        roots->insert(p);
628  	      }
629  	    }
630  	  }
631  	
632  	  /**
633  	   * see if an item is contained within a subtree
634  	   *
635  	   * @param root haystack
636  	   * @param item needle
637  	   * @return true if the item is located beneath the given node
638  	   */
639  	  bool subtree_contains(int root, int item) const;
640  	
641  	private:
642  	  /**
643  	   * search for an item in any bucket
644  	   *
645  	   * @param i item
646  	   * @return true if present
647  	   */
648  	  bool _search_item_exists(int i) const;
649  	  bool is_parent_of(int child, int p) const;
650  	public:
651  	
652  	  /**
653  	   * see if item is located where we think it is
654  	   *
655  	   * This verifies that the given item is located at a particular
656  	   * location in the hierarchy.  However, that check is imprecise; we
657  	   * are actually verifying that the most specific location key/value
658  	   * is correct.  For example, if loc specifies that rack=foo and
659  	   * host=bar, it will verify that host=bar is correct; any placement
660  	   * above that level in the hierarchy is ignored.  This matches the
661  	   * semantics for insert_item().
662  	   *
663  	   * @param cct cct
664  	   * @param item item id
665  	   * @param loc location to check (map of type to bucket names)
666  	   * @param weight optional pointer to weight of item at that location
667  	   * @return true if item is at specified location
668  	   */
669  	  bool check_item_loc(CephContext *cct, int item,
670  			      const std::map<std::string,std::string>& loc,
671  			      int *iweight);
672  	  bool check_item_loc(CephContext *cct, int item,
673  			      const std::map<std::string,std::string>& loc,
674  			      float *weight) {
675  	    int iweight;
676  	    bool ret = check_item_loc(cct, item, loc, &iweight);
677  	    if (weight)
678  	      *weight = (float)iweight / (float)0x10000;
679  	    return ret;
680  	  }
681  	
682  	
683  	  /**
684  	   * returns the (type, name) of the parent bucket of id
685  	   *
686  	   * FIXME: ambiguous for items that occur multiple times in the map
687  	   */
688  	  std::pair<std::string,std::string> get_immediate_parent(int id, int *ret = NULL) const;
689  	
690  	  int get_immediate_parent_id(int id, int *parent) const;
691  	
692  	  /**
693  	   * return ancestor of the given type, or 0 if none
694  	   * can pass in a specific crush **rule** to return ancestor from that rule only 
695  	   * (parent is always a bucket and thus <0)
696  	   */
697  	  int get_parent_of_type(int id, int type, int rule = -1) const;
698  	
699  	  /**
700  	   * get the fully qualified location of a device by successively finding
701  	   * parents beginning at ID and ending at highest type number specified in
702  	   * the CRUSH map which assumes that if device foo is under device bar, the
703  	   * type_id of foo < bar where type_id is the integer specified in the CRUSH map
704  	   *
705  	   * returns the location in the form of (type=foo) where type is a type of bucket
706  	   * specified in the CRUSH map and foo is a name specified in the CRUSH map
707  	   */
708  	  std::map<std::string, std::string> get_full_location(int id) const;
709  	
710  	  /**
711  	   * return location map for a item, by name
712  	   */
713  	  int get_full_location(
714  	    const std::string& name,
715  	    std::map<std::string,std::string> *ploc);
716  	
717  	  /*
718  	   * identical to get_full_location(int id) although it returns the type/name
719  	   * pairs in the order they occur in the hierarchy.
720  	   *
721  	   * returns -ENOENT if id is not found.
722  	   */
723  	  int get_full_location_ordered(
724  	    int id,
725  	    std::vector<std::pair<std::string, std::string> >& path) const;
726  	
727  	  /*
728  	   * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path),
729  	   * although it returns a concatenated string with the type/name pairs in descending
730  	   * hierarchical order with format key1=val1,key2=val2.
731  	   *
732  	   * returns the location in descending hierarchy as a string.
733  	   */
734  	  std::string get_full_location_ordered_string(int id) const;
735  	
736  	  /**
737  	   * returns (type_id, type) of all parent buckets between id and
738  	   * default, can be used to check for anomalous CRUSH maps
739  	   */
740  	  std::map<int, std::string> get_parent_hierarchy(int id) const;
741  	
742  	  /**
743  	   * enumerate immediate children of given node
744  	   *
745  	   * @param id parent bucket or device id
746  	   * @return number of items, or error
747  	   */
748  	  int get_children(int id, std::list<int> *children) const;
749  	 /**
750  	   * enumerate all children of given node
751  	   *
752  	   * @param id parent bucket or device id
753  	   * @return number of items, or error
754  	   */
755  	  int get_all_children(int id, std::set<int> *children) const;
756  	  void get_children_of_type(int id,
757  	                            int type,
758  				    std::vector<int> *children,
759  				    bool exclude_shadow = true) const;
760  	  /**
761  	   * enumerate all subtrees by type
762  	   */
763  	  void get_subtree_of_type(int type, std::vector<int> *subtrees);
764  	
765  	
766  	  /**
767  	   * verify upmapping results.
768  	   * return 0 on success or a negative errno on error.
769  	   */
770  	  int verify_upmap(CephContext *cct,
771  	                   int rule_id,
772  	                   int pool_size,
773  	                   const std::vector<int>& up);
774  	
775  	  /**
776  	    * enumerate leaves(devices) of given node
777  	    *
778  	    * @param name parent bucket name
779  	    * @return 0 on success or a negative errno on error.
780  	    */
781  	  int get_leaves(const std::string &name, std::set<int> *leaves) const;
782  	
783  	private:
784  	  int _get_leaves(int id, std::list<int> *leaves) const; // worker
785  	
786  	public:
787  	  /**
788  	   * insert an item into the map at a specific position
789  	   *
790  	   * Add an item as a specific location of the hierarchy.
791  	   * Specifically, we look for the most specific location constraint
792  	   * for which a bucket already exists, and then create intervening
793  	   * buckets beneath that in order to place the item.
794  	   *
795  	   * Note that any location specifiers *above* the most specific match
796  	   * are ignored.  For example, if we specify that osd.12 goes in
797  	   * host=foo, rack=bar, and row=baz, and rack=bar is the most
798  	   * specific match, we will create host=foo beneath that point and
799  	   * put osd.12 inside it.  However, we will not verify that rack=bar
800  	   * is beneath row=baz or move it.
801  	   *
802  	   * In short, we will build out a hierarchy, and move leaves around,
803  	   * but not adjust the hierarchy's internal structure.  Yet.
804  	   *
805  	   * If the item is already present in the map, we will return EEXIST.
806  	   * If the location key/value pairs are nonsensical
807  	   * (rack=nameofdevice), or location specifies that do not attach us
808  	   * to any existing part of the hierarchy, we will return EINVAL.
809  	   *
810  	   * @param cct cct
811  	   * @param id item id
812  	   * @param weight item weight
813  	   * @param name item name
814  	   * @param loc location (map of type to bucket names)
815  	   * @param init_weight_sets initialize weight-set weights to weight (vs 0)
816  	   * @return 0 for success, negative on error
817  	   */
818  	  int insert_item(CephContext *cct, int id, float weight, std::string name,
819  			  const std::map<std::string,std::string>& loc,
820  			  bool init_weight_sets=true);
821  	
822  	  /**
823  	   * move a bucket in the hierarchy to the given location
824  	   *
825  	   * This has the same location and ancestor creation behavior as
826  	   * insert_item(), but will relocate the specified existing bucket.
827  	   *
828  	   * @param cct cct
829  	   * @param id bucket id
830  	   * @param loc location (map of type to bucket names)
831  	   * @return 0 for success, negative on error
832  	   */
833  	  int move_bucket(CephContext *cct, int id, const std::map<std::string,std::string>& loc);
834  	
835  	  /**
836  	   * swap bucket contents of two buckets without touching bucket ids
837  	   *
838  	   * @param cct cct
839  	   * @param src bucket a
840  	   * @param dst bucket b
841  	   * @return 0 for success, negative on error
842  	   */
843  	  int swap_bucket(CephContext *cct, int src, int dst);
844  	
845  	  /**
846  	   * add a link to an existing bucket in the hierarchy to the new location
847  	   *
848  	   * This has the same location and ancestor creation behavior as
849  	   * insert_item(), but will add a new link to the specified existing
850  	   * bucket.
851  	   *
852  	   * @param cct cct
853  	   * @param id bucket id
854  	   * @param loc location (map of type to bucket names)
855  	   * @return 0 for success, negative on error
856  	   */
857  	  int link_bucket(CephContext *cct, int id,
858  			  const std::map<std::string,std::string>& loc);
859  	
860  	  /**
861  	   * add or update an item's position in the map
862  	   *
863  	   * This is analogous to insert_item, except we will move an item if
864  	   * it is already present.
865  	   *
866  	   * @param cct cct
867  	   * @param id item id
868  	   * @param weight item weight
869  	   * @param name item name
870  	   * @param loc location (map of type to bucket names)
871  	   * @return 0 for no change, 1 for successful change, negative on error
872  	   */
873  	  int update_item(CephContext *cct, int id, float weight, std::string name,
874  			  const std::map<std::string, std::string>& loc);
875  	
876  	  /**
877  	   * create or move an item, but do not adjust its weight if it already exists
878  	   *
879  	   * @param cct cct
880  	   * @param item item id
881  	   * @param weight initial item weight (if we need to create it)
882  	   * @param name item name
883  	   * @param loc location (map of type to bucket names)
884  	   * @param init_weight_sets initialize weight-set values to weight (vs 0)
885  	   * @return 0 for no change, 1 for successful change, negative on error
886  	   */
887  	  int create_or_move_item(CephContext *cct, int item, float weight,
888  				  std::string name,
889  				  const std::map<std::string,std::string>& loc,
890  				  bool init_weight_sets=true);
891  	
892  	  /**
893  	   * remove all instances of an item from the map
894  	   *
895  	   * @param cct cct
896  	   * @param id item id to remove
897  	   * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty)
898  	   * @return 0 on success, negative on error
899  	   */
900  	  int remove_item(CephContext *cct, int id, bool unlink_only);
901  	
902  	  /**
903  	   * recursively remove buckets starting at item and stop removing
904  	   * when a bucket is in use.
905  	   *
906  	   * @param item id to remove
907  	   * @return 0 on success, negative on error
908  	   */
909  	  int remove_root(CephContext *cct, int item);
910  	
911  	  /**
912  	   * remove all instances of an item nested beneath a certain point from the map
913  	   *
914  	   * @param cct cct
915  	   * @param id item id to remove
916  	   * @param ancestor ancestor item id under which to search for id
917  	   * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty)
918  	   * @return 0 on success, negative on error
919  	   */
920  	private:
921  	  bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only);
922  	  int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
923  	  bool _bucket_is_in_use(int id);
924  	public:
925  	  int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
926  	
927  	  /**
928  	   * calculate the locality/distance from a given id to a crush location map
929  	   *
930  	   * Specifically, we look for the lowest-valued type for which the
931  	   * location of id matches that described in loc.
932  	   *
933  	   * @param cct cct
934  	   * @param id the existing id in the map
935  	   * @param loc a set of key=value pairs describing a location in the hierarchy
936  	   */
937  	  int get_common_ancestor_distance(CephContext *cct, int id,
938  					   const std::multimap<std::string,std::string>& loc) const;
939  	
940  	  /**
941  	   * parse a set of key/value pairs out of a string vector
942  	   *
943  	   * These are used to describe a location in the CRUSH hierarchy.
944  	   *
945  	   * @param args list of strings (each key= or key=value)
946  	   * @param ploc pointer to a resulting location map or multimap
947  	   */
948  	  static int parse_loc_map(const std::vector<std::string>& args,
949  				   std::map<std::string,std::string> *ploc);
950  	  static int parse_loc_multimap(const std::vector<std::string>& args,
951  					std::multimap<std::string,std::string> *ploc);
952  	
953  	
954  	  /**
955  	   * get an item's weight
956  	   *
957  	   * Will return the weight for the first instance it finds.
958  	   *
959  	   * @param id item id to check
960  	   * @return weight of item
961  	   */
962  	  int get_item_weight(int id) const;
963  	  float get_item_weightf(int id) const {
964  	    return (float)get_item_weight(id) / (float)0x10000;
965  	  }
966  	  int get_item_weight_in_loc(int id,
967  				     const std::map<std::string, std::string> &loc);
968  	  float get_item_weightf_in_loc(int id,
969  					const std::map<std::string, std::string> &loc) {
970  	    return (float)get_item_weight_in_loc(id, loc) / (float)0x10000;
971  	  }
972  	
973  	  int validate_weightf(float weight) {
974  	    uint64_t iweight = weight * 0x10000;
975  	    if (iweight > static_cast<uint64_t>(std::numeric_limits<int>::max())) {
976  	      return -EOVERFLOW;
977  	    }
978  	    return 0;
979  	  }
980  	  int adjust_item_weight(CephContext *cct, int id, int weight,
981  				 bool update_weight_sets=true);
982  	  int adjust_item_weightf(CephContext *cct, int id, float weight,
983  				  bool update_weight_sets=true) {
984  	    int r = validate_weightf(weight);
985  	    if (r < 0) {
986  	      return r;
987  	    }
988  	    return adjust_item_weight(cct, id, (int)(weight * (float)0x10000),
989  				      update_weight_sets);
990  	  }
991  	  int adjust_item_weight_in_bucket(CephContext *cct, int id, int weight,
992  					   int bucket_id,
993  					   bool update_weight_sets);
994  	  int adjust_item_weight_in_loc(CephContext *cct, int id, int weight,
995  					const std::map<std::string,std::string>& loc,
996  					bool update_weight_sets=true);
997  	  int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight,
998  					 const std::map<std::string,std::string>& loc,
999  					 bool update_weight_sets=true) {
1000 	    int r = validate_weightf(weight);
1001 	    if (r < 0) {
1002 	      return r;
1003 	    }
1004 	    return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000),
1005 					     loc, update_weight_sets);
1006 	  }
1007 	  void reweight(CephContext *cct);
1008 	  void reweight_bucket(crush_bucket *b,
1009 			       crush_choose_arg_map& arg_map,
1010 			       std::vector<uint32_t> *weightv);
1011 	
1012 	  int adjust_subtree_weight(CephContext *cct, int id, int weight,
1013 				    bool update_weight_sets=true);
1014 	  int adjust_subtree_weightf(CephContext *cct, int id, float weight,
1015 				     bool update_weight_sets=true) {
1016 	    int r = validate_weightf(weight);
1017 	    if (r < 0) {
1018 	      return r;
1019 	    }
1020 	    return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000),
1021 					 update_weight_sets);
1022 	  }
1023 	
1024 	  /// check if item id is present in the map hierarchy
1025 	  bool check_item_present(int id) const;
1026 	
1027 	
1028 	  /*** devices ***/
1029 	  int get_max_devices() const {
1030 	    if (!crush) return 0;
1031 	    return crush->max_devices;
1032 	  }
1033 	
1034 	
1035 	  /*** rules ***/
1036 	private:
1037 	  crush_rule *get_rule(unsigned ruleno) const {
1038 	    if (!crush) return (crush_rule *)(-ENOENT);
1039 	    if (ruleno >= crush->max_rules)
1040 	      return 0;
1041 	    return crush->rules[ruleno];
1042 	  }
1043 	  crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const {
1044 	    crush_rule *n = get_rule(ruleno);
1045 	    if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL);
1046 	    if (step >= n->len) return (crush_rule_step *)(-EINVAL);
1047 	    return &n->steps[step];
1048 	  }
1049 	
1050 	public:
1051 	  /* accessors */
1052 	  int get_max_rules() const {
1053 	    if (!crush) return 0;
1054 	    return crush->max_rules;
1055 	  }
1056 	  bool rule_exists(unsigned ruleno) const {
1057 	    if (!crush) return false;
1058 	    if (ruleno < crush->max_rules &&
1059 		crush->rules[ruleno] != NULL)
1060 	      return true;
1061 	    return false;
1062 	  }
1063 	  bool rule_has_take(unsigned ruleno, int take) const {
1064 	    if (!crush) return false;
1065 	    crush_rule *rule = get_rule(ruleno);
1066 	    for (unsigned i = 0; i < rule->len; ++i) {
1067 	      if (rule->steps[i].op == CRUSH_RULE_TAKE &&
1068 		  rule->steps[i].arg1 == take) {
1069 		return true;
1070 	      }
1071 	    }
1072 	    return false;
1073 	  }
1074 	  int get_rule_len(unsigned ruleno) const {
1075 	    crush_rule *r = get_rule(ruleno);
1076 	    if (IS_ERR(r)) return PTR_ERR(r);
1077 	    return r->len;
1078 	  }
1079 	  int get_rule_mask_ruleset(unsigned ruleno) const {
1080 	    crush_rule *r = get_rule(ruleno);
1081 	    if (IS_ERR(r)) return -1;
1082 	    return r->mask.ruleset;
1083 	  }
1084 	  int get_rule_mask_type(unsigned ruleno) const {
1085 	    crush_rule *r = get_rule(ruleno);
1086 	    if (IS_ERR(r)) return -1;
1087 	    return r->mask.type;
1088 	  }
1089 	  int get_rule_mask_min_size(unsigned ruleno) const {
1090 	    crush_rule *r = get_rule(ruleno);
1091 	    if (IS_ERR(r)) return -1;
1092 	    return r->mask.min_size;
1093 	  }
1094 	  int get_rule_mask_max_size(unsigned ruleno) const {
1095 	    crush_rule *r = get_rule(ruleno);
1096 	    if (IS_ERR(r)) return -1;
1097 	    return r->mask.max_size;
1098 	  }
1099 	  int get_rule_op(unsigned ruleno, unsigned step) const {
1100 	    crush_rule_step *s = get_rule_step(ruleno, step);
1101 	    if (IS_ERR(s)) return PTR_ERR(s);
1102 	    return s->op;
1103 	  }
1104 	  int get_rule_arg1(unsigned ruleno, unsigned step) const {
1105 	    crush_rule_step *s = get_rule_step(ruleno, step);
1106 	    if (IS_ERR(s)) return PTR_ERR(s);
1107 	    return s->arg1;
1108 	  }
1109 	  int get_rule_arg2(unsigned ruleno, unsigned step) const {
1110 	    crush_rule_step *s = get_rule_step(ruleno, step);
1111 	    if (IS_ERR(s)) return PTR_ERR(s);
1112 	    return s->arg2;
1113 	  }
1114 	
1115 	private:
1116 	  float _get_take_weight_osd_map(int root, std::map<int,float> *pmap) const;
1117 	  void _normalize_weight_map(float sum, const std::map<int,float>& m,
1118 				     std::map<int,float> *pmap) const;
1119 	
1120 	public:
1121 	  /**
1122 	   * calculate a map of osds to weights for a given rule
1123 	   *
1124 	   * Generate a map of which OSDs get how much relative weight for a
1125 	   * given rule.
1126 	   *
1127 	   * @param ruleno [in] rule id
1128 	   * @param pmap [out] map of osd to weight
1129 	   * @return 0 for success, or negative error code
1130 	   */
1131 	  int get_rule_weight_osd_map(unsigned ruleno, std::map<int,float> *pmap) const;
1132 	
1133 	  /**
1134 	   * calculate a map of osds to weights for a given starting root
1135 	   *
1136 	   * Generate a map of which OSDs get how much relative weight for a
1137 	   * given starting root
1138 	   *
1139 	   * @param root node
1140 	   * @param pmap [out] map of osd to weight
1141 	   * @return 0 for success, or negative error code
1142 	   */
1143 	  int get_take_weight_osd_map(int root, std::map<int,float> *pmap) const;
1144 	
1145 	  /* modifiers */
1146 	
1147 	  int add_rule(int ruleno, int len, int type, int minsize, int maxsize) {
1148 	    if (!crush) return -ENOENT;
1149 	    crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize);
1150 	    ceph_assert(n);
1151 	    ruleno = crush_add_rule(crush, n, ruleno);
1152 	    return ruleno;
1153 	  }
1154 	  int set_rule_mask_max_size(unsigned ruleno, int max_size) {
1155 	    crush_rule *r = get_rule(ruleno);
1156 	    if (IS_ERR(r)) return -1;
1157 	    return r->mask.max_size = max_size;
1158 	  }
1159 	  int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) {
1160 	    if (!crush) return -ENOENT;
1161 	    crush_rule *n = get_rule(ruleno);
1162 	    if (!n) return -1;
1163 	    crush_rule_set_step(n, step, op, arg1, arg2);
1164 	    return 0;
1165 	  }
1166 	  int set_rule_step_take(unsigned ruleno, unsigned step, int val) {
1167 	    return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0);
1168 	  }
1169 	  int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) {
1170 	    return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0);
1171 	  }
1172 	  int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) {
1173 	    return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0);
1174 	  }
1175 	  int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) {
1176 	    return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0);
1177 	  }
1178 	  int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) {
1179 	    return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0);
1180 	  }
1181 	  int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) {
1182 	    return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0);
1183 	  }
1184 	  int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) {
1185 	    return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0);
1186 	  }
1187 	  int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
1188 	    return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
1189 	  }
1190 	  int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) {
1191 	    return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type);
1192 	  }
1193 	  int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) {
1194 	    return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type);
1195 	  }
1196 	  int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) {
1197 	    return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type);
1198 	  }
1199 	  int set_rule_step_emit(unsigned ruleno, unsigned step) {
1200 	    return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
1201 	  }
1202 	
1203 	  int add_simple_rule(
1204 	    std::string name, std::string root_name, std::string failure_domain_type,
1205 	    std::string device_class, std::string mode, int rule_type,
1206 	    std::ostream *err = 0);
1207 	
1208 	  /**
1209 	   * @param rno rule[set] id to use, -1 to pick the lowest available
1210 	   */
1211 	  int add_simple_rule_at(
1212 	    std::string name, std::string root_name,
1213 	    std::string failure_domain_type, std::string device_class, std::string mode,
1214 	    int rule_type, int rno, std::ostream *err = 0);
1215 	
1216 	  int remove_rule(int ruleno);
1217 	
1218 	
1219 	  /** buckets **/
1220 	  const crush_bucket *get_bucket(int id) const {
1221 	    if (!crush)
1222 	      return (crush_bucket *)(-EINVAL);
1223 	    unsigned int pos = (unsigned int)(-1 - id);
1224 	    unsigned int max_buckets = crush->max_buckets;
1225 	    if (pos >= max_buckets)
1226 	      return (crush_bucket *)(-ENOENT);
1227 	    crush_bucket *ret = crush->buckets[pos];
1228 	    if (ret == NULL)
1229 	      return (crush_bucket *)(-ENOENT);
1230 	    return ret;
1231 	  }
1232 	private:
1233 	  crush_bucket *get_bucket(int id) {
1234 	    if (!crush)
1235 	      return (crush_bucket *)(-EINVAL);
1236 	    unsigned int pos = (unsigned int)(-1 - id);
1237 	    unsigned int max_buckets = crush->max_buckets;
1238 	    if (pos >= max_buckets)
1239 	      return (crush_bucket *)(-ENOENT);
1240 	    crush_bucket *ret = crush->buckets[pos];
1241 	    if (ret == NULL)
1242 	      return (crush_bucket *)(-ENOENT);
1243 	    return ret;
1244 	  }
1245 	  /**
1246 	   * detach a bucket from its parent and adjust the parent weight
1247 	   *
1248 	   * returns the weight of the detached bucket
1249 	   **/
1250 	  int detach_bucket(CephContext *cct, int item);
1251 	
1252 	  int get_new_bucket_id();
1253 	
1254 	public:
1255 	  int get_max_buckets() const {
1256 	    if (!crush) return -EINVAL;
1257 	    return crush->max_buckets;
1258 	  }
1259 	  int get_next_bucket_id() const {
1260 	    if (!crush) return -EINVAL;
1261 	    return crush_get_next_bucket_id(crush);
1262 	  }
1263 	  bool bucket_exists(int id) const {
1264 	    const crush_bucket *b = get_bucket(id);
1265 	    if (IS_ERR(b))
1266 	      return false;
1267 	    return true;
1268 	  }
1269 	  int get_bucket_weight(int id) const {
1270 	    const crush_bucket *b = get_bucket(id);
1271 	    if (IS_ERR(b)) return PTR_ERR(b);
1272 	    return b->weight;
1273 	  }
1274 	  float get_bucket_weightf(int id) const {
1275 	    const crush_bucket *b = get_bucket(id);
1276 	    if (IS_ERR(b)) return 0;
1277 	    return b->weight / (float)0x10000;
1278 	  }
1279 	  int get_bucket_type(int id) const {
1280 	    const crush_bucket *b = get_bucket(id);
1281 	    if (IS_ERR(b)) return PTR_ERR(b);
1282 	    return b->type;
1283 	  }
1284 	  int get_bucket_alg(int id) const {
1285 	    const crush_bucket *b = get_bucket(id);
1286 	    if (IS_ERR(b)) return PTR_ERR(b);
1287 	    return b->alg;
1288 	  }
1289 	  int get_bucket_hash(int id) const {
1290 	    const crush_bucket *b = get_bucket(id);
1291 	    if (IS_ERR(b)) return PTR_ERR(b);
1292 	    return b->hash;
1293 	  }
1294 	  int get_bucket_size(int id) const {
1295 	    const crush_bucket *b = get_bucket(id);
1296 	    if (IS_ERR(b)) return PTR_ERR(b);
1297 	    return b->size;
1298 	  }
1299 	  int get_bucket_item(int id, int pos) const {
1300 	    const crush_bucket *b = get_bucket(id);
1301 	    if (IS_ERR(b)) return PTR_ERR(b);
1302 	    if ((__u32)pos >= b->size)
1303 	      return PTR_ERR(b);
1304 	    return b->items[pos];
1305 	  }
1306 	  int get_bucket_item_weight(int id, int pos) const {
1307 	    const crush_bucket *b = get_bucket(id);
1308 	    if (IS_ERR(b)) return PTR_ERR(b);
1309 	    return crush_get_bucket_item_weight(b, pos);
1310 	  }
1311 	  float get_bucket_item_weightf(int id, int pos) const {
1312 	    const crush_bucket *b = get_bucket(id);
1313 	    if (IS_ERR(b)) return 0;
1314 	    return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000;
1315 	  }
1316 	
1317 	  /* modifiers */
1318 	  int add_bucket(int bucketno, int alg, int hash, int type, int size,
1319 			 int *items, int *weights, int *idout);
1320 	  int bucket_add_item(crush_bucket *bucket, int item, int weight);
1321 	  int bucket_remove_item(struct crush_bucket *bucket, int item);
1322 	  int bucket_adjust_item_weight(
1323 	    CephContext *cct, struct crush_bucket *bucket, int item, int weight,
1324 	    bool adjust_weight_sets);
1325 	
1326 	  void finalize() {
1327 	    ceph_assert(crush);
1328 	    crush_finalize(crush);
1329 	    if (!name_map.empty() &&
1330 		name_map.rbegin()->first >= crush->max_devices) {
1331 	      crush->max_devices = name_map.rbegin()->first + 1;
1332 	    }
1333 	    have_uniform_rules = !has_legacy_rule_ids();
1334 	  }
1335 	  int bucket_set_alg(int id, int alg);
1336 	
1337 	  int update_device_class(int id, const std::string& class_name,
1338 				  const std::string& name, std::ostream *ss);
1339 	  int remove_device_class(CephContext *cct, int id, std::ostream *ss);
1340 	  int device_class_clone(
1341 	    int original, int device_class,
1342 	    const std::map<int32_t, std::map<int32_t, int32_t>>& old_class_bucket,
1343 	    const std::set<int32_t>& used_ids,
1344 	    int *clone,
1345 	    std::map<int, std::map<int,std::vector<int>>> *cmap_item_weight);
1346 	  bool class_is_in_use(int class_id, std::ostream *ss = nullptr);
1347 	  int rename_class(const std::string& srcname, const std::string& dstname);
1348 	  int populate_classes(
1349 	    const std::map<int32_t, std::map<int32_t, int32_t>>& old_class_bucket);
1350 	  int get_rules_by_class(const std::string &class_name, std::set<int> *rules);
1351 	  int get_rules_by_osd(int osd, std::set<int> *rules);
1352 	  bool _class_is_dead(int class_id);
1353 	  void cleanup_dead_classes();
1354 	  int rebuild_roots_with_classes(CephContext *cct);
1355 	  /* remove unused roots generated for class devices */
1356 	  int trim_roots_with_class(CephContext *cct);
1357 	
1358 	  int reclassify(
1359 	    CephContext *cct,
1360 	    std::ostream& out,
1361 	    const std::map<std::string,std::string>& classify_root,
1362 	    const std::map<std::string,std::pair<std::string,std::string>>& classify_bucket
1363 	    );
1364 	
1365 	  int set_subtree_class(const std::string& name, const std::string& class_name);
1366 	
1367 	  void start_choose_profile() {
1368 	    free(crush->choose_tries);
1369 	    /*
1370 	     * the original choose_total_tries value was off by one (it
1371 	     * counted "retries" and not "tries").  add one to alloc.
1372 	     */
1373 	    crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries),
1374 						  (crush->choose_total_tries + 1));
1375 	    memset(crush->choose_tries, 0,
1376 		   sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
1377 	  }
1378 	  void stop_choose_profile() {
1379 	    free(crush->choose_tries);
1380 	    crush->choose_tries = 0;
1381 	  }
1382 	
1383 	  int get_choose_profile(__u32 **vec) {
1384 	    if (crush->choose_tries) {
1385 	      *vec = crush->choose_tries;
1386 	      return crush->choose_total_tries;
1387 	    }
1388 	    return 0;
1389 	  }
1390 	
1391 	
1392 	  void set_max_devices(int m) {
1393 	    crush->max_devices = m;
1394 	  }
1395 	
1396 	  int find_rule(int ruleset, int type, int size) const {
1397 	    if (!crush) return -1;
1398 	    if (have_uniform_rules &&
1399 		ruleset < (int)crush->max_rules &&
1400 		crush->rules[ruleset] &&
1401 		crush->rules[ruleset]->mask.type == type &&
1402 		crush->rules[ruleset]->mask.min_size <= size &&
1403 		crush->rules[ruleset]->mask.max_size >= size) {
1404 	      return ruleset;
1405 	    }
1406 	    return crush_find_rule(crush, ruleset, type, size);
1407 	  }
1408 	
1409 	  bool ruleset_exists(const int ruleset) const {
1410 	    for (size_t i = 0; i < crush->max_rules; ++i) {
1411 	      if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
1412 		return true;
1413 	      }
1414 	    }
1415 	
1416 	    return false;
1417 	  }
1418 	
1419 	  /**
1420 	   * Return the lowest numbered ruleset of type `type`
1421 	   *
1422 	   * @returns a ruleset ID, or -1 if no matching rules found.
1423 	   */
1424 	  int find_first_ruleset(int type) const {
1425 	    int result = -1;
1426 	
1427 	    for (size_t i = 0; i < crush->max_rules; ++i) {
1428 	      if (crush->rules[i]
1429 	          && crush->rules[i]->mask.type == type
1430 	          && (crush->rules[i]->mask.ruleset < result || result == -1)) {
1431 	        result = crush->rules[i]->mask.ruleset;
1432 	      }
1433 	    }
1434 	
1435 	    return result;
1436 	  }
1437 	
1438 	  bool have_choose_args(int64_t choose_args_index) const {
1439 	    return choose_args.count(choose_args_index);
1440 	  }
1441 	
1442 	  crush_choose_arg_map choose_args_get_with_fallback(
1443 	    int64_t choose_args_index) const {
1444 	    auto i = choose_args.find(choose_args_index);
1445 	    if (i == choose_args.end()) {
1446 	      i = choose_args.find(DEFAULT_CHOOSE_ARGS);
1447 	    }
1448 	    if (i == choose_args.end()) {
1449 	      crush_choose_arg_map arg_map;
1450 	      arg_map.args = NULL;
1451 	      arg_map.size = 0;
1452 	      return arg_map;
1453 	    } else {
1454 	      return i->second;
1455 	    }
1456 	  }
1457 	  crush_choose_arg_map choose_args_get(int64_t choose_args_index) const {
1458 	    auto i = choose_args.find(choose_args_index);
1459 	    if (i == choose_args.end()) {
1460 	      crush_choose_arg_map arg_map;
1461 	      arg_map.args = NULL;
1462 	      arg_map.size = 0;
1463 	      return arg_map;
1464 	    } else {
1465 	      return i->second;
1466 	    }
1467 	  }
1468 	
1469 	  void destroy_choose_args(crush_choose_arg_map arg_map) {
1470 	    for (__u32 i = 0; i < arg_map.size; i++) {
1471 	      crush_choose_arg *arg = &arg_map.args[i];
1472 	      for (__u32 j = 0; j < arg->weight_set_positions; j++) {
1473 		crush_weight_set *weight_set = &arg->weight_set[j];
1474 		free(weight_set->weights);
1475 	      }
1476 	      if (arg->weight_set)
1477 		free(arg->weight_set);
1478 	      if (arg->ids)
1479 		free(arg->ids);
1480 	    }
1481 	    free(arg_map.args);
1482 	  }
1483 	
1484 	  bool create_choose_args(int64_t id, int positions) {
1485 	    if (choose_args.count(id))
1486 	      return false;
1487 	    ceph_assert(positions);
1488 	    auto &cmap = choose_args[id];
1489 	    cmap.args = static_cast<crush_choose_arg*>(calloc(sizeof(crush_choose_arg),
1490 						  crush->max_buckets));
1491 	    cmap.size = crush->max_buckets;
1492 	    for (int bidx=0; bidx < crush->max_buckets; ++bidx) {
1493 	      crush_bucket *b = crush->buckets[bidx];
1494 	      auto &carg = cmap.args[bidx];
1495 	      carg.ids = NULL;
1496 	      carg.ids_size = 0;
1497 	      if (b && b->alg == CRUSH_BUCKET_STRAW2) {
1498 		crush_bucket_straw2 *sb = reinterpret_cast<crush_bucket_straw2*>(b);
1499 		carg.weight_set_positions = positions;
1500 		carg.weight_set = static_cast<crush_weight_set*>(calloc(sizeof(crush_weight_set),
1501 							    carg.weight_set_positions));
1502 		// initialize with canonical weights
1503 		for (int pos = 0; pos < positions; ++pos) {
1504 		  carg.weight_set[pos].size = b->size;
1505 		  carg.weight_set[pos].weights = (__u32*)calloc(4, b->size);
1506 		  for (unsigned i = 0; i < b->size; ++i) {
1507 		    carg.weight_set[pos].weights[i] = sb->item_weights[i];
1508 		  }
1509 		}
1510 	      } else {
1511 		carg.weight_set = NULL;
1512 		carg.weight_set_positions = 0;
1513 	      }
1514 	    }
1515 	    return true;
1516 	  }
1517 	
1518 	  void rm_choose_args(int64_t id) {
1519 	    auto p = choose_args.find(id);
1520 	    if (p != choose_args.end()) {
1521 	      destroy_choose_args(p->second);
1522 	      choose_args.erase(p);
1523 	    }
1524 	  }
1525 	
1526 	  void choose_args_clear() {
1527 	    for (auto w : choose_args)
1528 	      destroy_choose_args(w.second);
1529 	    choose_args.clear();
1530 	  }
1531 	
1532 	  // remove choose_args for buckets that no longer exist, create them for new buckets
1533 	  void update_choose_args(CephContext *cct);
1534 	
1535 	  // adjust choose_args_map weight, preserving the hierarchical summation
1536 	  // property.  used by callers optimizing layouts by tweaking weights.
1537 	  int _choose_args_adjust_item_weight_in_bucket(
1538 	    CephContext *cct,
1539 	    crush_choose_arg_map cmap,
1540 	    int bucketid,
1541 	    int id,
1542 	    const std::vector<int>& weight,
1543 	    std::ostream *ss);
1544 	  int choose_args_adjust_item_weight(
1545 	    CephContext *cct,
1546 	    crush_choose_arg_map cmap,
1547 	    int id, const std::vector<int>& weight,
1548 	    std::ostream *ss);
1549 	  int choose_args_adjust_item_weightf(
1550 	    CephContext *cct,
1551 	    crush_choose_arg_map cmap,
1552 	    int id, const std::vector<double>& weightf,
1553 	    std::ostream *ss) {
1554 	    std::vector<int> weight(weightf.size());
1555 	    for (unsigned i = 0; i < weightf.size(); ++i) {
1556 	      weight[i] = (int)(weightf[i] * (double)0x10000);
1557 	    }
1558 	    return choose_args_adjust_item_weight(cct, cmap, id, weight, ss);
1559 	  }
1560 	
1561 	  int get_choose_args_positions(crush_choose_arg_map cmap) {
1562 	    // infer positions from other buckets
1563 	    for (unsigned j = 0; j < cmap.size; ++j) {
1564 	      if (cmap.args[j].weight_set_positions) {
1565 		return cmap.args[j].weight_set_positions;
1566 	      }
1567 	    }
1568 	    return 1;
1569 	  }
1570 	
1571 	  template<typename WeightVector>
1572 	  void do_rule(int rule, int x, std::vector<int>& out, int maxout,
1573 		       const WeightVector& weight,
1574 		       uint64_t choose_args_index) const {
1575 	    int rawout[maxout];
1576 	    char work[crush_work_size(crush, maxout)];
1577 	    crush_init_workspace(crush, work);
1578 	    crush_choose_arg_map arg_map = choose_args_get_with_fallback(
1579 	      choose_args_index);
1580 	    int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
1581 				       weight.size(), work, arg_map.args);
1582 	    if (numrep < 0)
1583 	      numrep = 0;
1584 	    out.resize(numrep);
1585 	    for (int i=0; i<numrep; i++)
1586 	      out[i] = rawout[i];
1587 	  }
1588 	
1589 	  int _choose_type_stack(
1590 	    CephContext *cct,
1591 	    const std::vector<std::pair<int,int>>& stack,
1592 	    const std::set<int>& overfull,
1593 	    const std::vector<int>& underfull,
1594 	    const std::vector<int>& orig,
1595 	    std::vector<int>::const_iterator& i,
1596 	    std::set<int>& used,
1597 	    std::vector<int> *pw,
1598 	    int root_bucket) const;
1599 	
1600 	  int try_remap_rule(
1601 	    CephContext *cct,
1602 	    int rule,
1603 	    int maxout,
1604 	    const std::set<int>& overfull,
1605 	    const std::vector<int>& underfull,
1606 	    const std::vector<int>& orig,
1607 	    std::vector<int> *out) const;
1608 	
1609 	  bool check_crush_rule(int ruleset, int type, int size, std::ostream& ss) {
1610 	    ceph_assert(crush);
1611 	
1612 	    __u32 i;
1613 	    for (i = 0; i < crush->max_rules; i++) {
1614 	      if (crush->rules[i] &&
1615 		  crush->rules[i]->mask.ruleset == ruleset &&
1616 		  crush->rules[i]->mask.type == type) {
1617 	
1618 	        if (crush->rules[i]->mask.min_size <= size &&
1619 	            crush->rules[i]->mask.max_size >= size) {
1620 	          return true;
1621 	        } else if (size < crush->rules[i]->mask.min_size) {
1622 	          ss << "pool size is smaller than the crush rule min size";
1623 	          return false;
1624 	        } else {
1625 	          ss << "pool size is bigger than the crush rule max size";
1626 	          return false;
1627 	        }
1628 	      }
1629 	    }
1630 	
1631 	    return false;
1632 	  }
1633 	
1634 	  void encode(ceph::buffer::list &bl, uint64_t features) const;
1635 	  void decode(ceph::buffer::list::const_iterator &blp);
1636 	  void decode_crush_bucket(crush_bucket** bptr,
1637 				   ceph::buffer::list::const_iterator &blp);
1638 	  void dump(ceph::Formatter *f) const;
1639 	  void dump_rules(ceph::Formatter *f) const;
1640 	  void dump_rule(int ruleset, ceph::Formatter *f) const;
1641 	  void dump_tunables(ceph::Formatter *f) const;
1642 	  void dump_choose_args(ceph::Formatter *f) const;
1643 	  void list_rules(ceph::Formatter *f) const;
1644 	  void list_rules(std::ostream *ss) const;
1645 	  void dump_tree(std::ostream *out,
1646 	                 ceph::Formatter *f,
1647 			 const CrushTreeDumper::name_map_t& ws,
1648 	                 bool show_shadow = false) const;
1649 	  void dump_tree(std::ostream *out, ceph::Formatter *f) {
1650 	    dump_tree(out, f, CrushTreeDumper::name_map_t());
1651 	  }
1652 	  void dump_tree(ceph::Formatter *f,
1653 			 const CrushTreeDumper::name_map_t& ws) const;
1654 	  static void generate_test_instances(std::list<CrushWrapper*>& o);
1655 	
1656 	  int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);
1657 	
1658 	  static bool is_valid_crush_name(const std::string& s);
1659 	  static bool is_valid_crush_loc(CephContext *cct,
1660 					 const std::map<std::string,std::string>& loc);
1661 	};
1662 	WRITE_CLASS_ENCODER_FEATURES(CrushWrapper)
1663 	
1664 	#endif
1665