1    	// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
2    	// vim: ts=8 sw=2 smarttab
3    	/*
4    	 * Ceph - scalable distributed file system
5    	 *
6    	 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7    	 *
8    	 * This is free software; you can redistribute it and/or
9    	 * modify it under the terms of the GNU Lesser General Public
10   	 * License version 2.1, as published by the Free Software 
11   	 * Foundation.  See file COPYING.
12   	 * 
13   	 */
14   	
15   	
16   	#include "FSMap.h"
17   	
18   	#include "common/StackStringStream.h"
19   	
20   	#include <sstream>
21   	#ifdef WITH_SEASTAR
22   	#include "crimson/common/config_proxy.h"
23   	#else
24   	#include "common/config_proxy.h"
25   	#endif
26   	#include "global/global_context.h"
27   	#include "mon/health_check.h"
28   	
29   	using std::stringstream;
30   	
31   	void Filesystem::dump(Formatter *f) const
32   	{
33   	  f->open_object_section("mdsmap");
34   	  mds_map.dump(f);
35   	  f->close_section();
36   	  f->dump_int("id", fscid);
37   	}
38   	
39   	void FSMap::dump(Formatter *f) const
40   	{
41   	  f->dump_int("epoch", epoch);
42   	  // Use 'default' naming to match 'set-default' CLI
43   	  f->dump_int("default_fscid", legacy_client_fscid);
44   	
45   	  f->open_object_section("compat");
46   	  compat.dump(f);
47   	  f->close_section();
48   	
49   	  f->open_object_section("feature_flags");
50   	  f->dump_bool("enable_multiple", enable_multiple);
51   	  f->dump_bool("ever_enabled_multiple", ever_enabled_multiple);
52   	  f->close_section();
53   	
54   	  f->open_array_section("standbys");
55   	  for (const auto &i : standby_daemons) {
56   	    f->open_object_section("info");
57   	    i.second.dump(f);
58   	    f->dump_int("epoch", standby_epochs.at(i.first));
59   	    f->close_section();
60   	  }
61   	  f->close_section();
62   	
63   	  f->open_array_section("filesystems");
64   	  for (const auto &fs : filesystems) {
65   	    f->open_object_section("filesystem");
66   	    fs.second->dump(f);
67   	    f->close_section();
68   	  }
69   	  f->close_section();
70   	}
71   	
72   	FSMap &FSMap::operator=(const FSMap &rhs)
73   	{
74   	  epoch = rhs.epoch;
75   	  next_filesystem_id = rhs.next_filesystem_id;
76   	  legacy_client_fscid = rhs.legacy_client_fscid;
77   	  compat = rhs.compat;
78   	  enable_multiple = rhs.enable_multiple;
79   	  mds_roles = rhs.mds_roles;
80   	  standby_daemons = rhs.standby_daemons;
81   	  standby_epochs = rhs.standby_epochs;
82   	
83   	  filesystems.clear();
84   	  for (const auto &i : rhs.filesystems) {
85   	    const auto &fs = i.second;
86   	    filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
87   	  }
88   	
89   	  return *this;
90   	}
91   	
92   	void FSMap::generate_test_instances(std::list<FSMap*>& ls)
93   	{
94   	  FSMap *m = new FSMap();
95   	
96   	  std::list<MDSMap*> mds_map_instances;
97   	  MDSMap::generate_test_instances(mds_map_instances);
98   	
99   	  int k = 20;
100  	  for (auto i : mds_map_instances) {
101  	    auto fs = Filesystem::create();
102  	    fs->fscid = k++;
103  	    fs->mds_map = *i;
104  	    delete i;
105  	    m->filesystems[fs->fscid] = fs;
106  	  }
107  	  mds_map_instances.clear();
108  	
109  	  ls.push_back(m);
110  	}
111  	
112  	void FSMap::print(ostream& out) const
113  	{
114  	  out << "e" << epoch << std::endl;
115  	  out << "enable_multiple, ever_enabled_multiple: " << enable_multiple << ","
116  	      << ever_enabled_multiple << std::endl;
117  	  out << "compat: " << compat << std::endl;
118  	  out << "legacy client fscid: " << legacy_client_fscid << std::endl;
119  	  out << " " << std::endl;
120  	
121  	  if (filesystems.empty()) {
122  	    out << "No filesystems configured" << std::endl;
123  	  }
124  	
125  	  for (const auto& p : filesystems) {
126  	    p.second->print(out);
127  	    out << " " << std::endl << " " << std::endl;  // Space out a bit
128  	  }
129  	
130  	  if (!standby_daemons.empty()) {
131  	    out << "Standby daemons:" << std::endl << " " << std::endl;
132  	  }
133  	
134  	  for (const auto &p : standby_daemons) {
135  	    p.second.print_summary(out);
136  	    out << std::endl;
137  	  }
138  	}
139  	
140  	void FSMap::print_summary(Formatter *f, ostream *out) const
141  	{
142  	  if (f) {
143  	    f->dump_unsigned("epoch", get_epoch());
144  	    for (const auto &p : filesystems) {
145  	      auto& fs = p.second;
146  	      f->dump_unsigned("id", fs->fscid);
147  	      f->dump_unsigned("up", fs->mds_map.up.size());
148  	      f->dump_unsigned("in", fs->mds_map.in.size());
149  	      f->dump_unsigned("max", fs->mds_map.max_mds);
150  	    }
151  	  } else {
152  	    auto count = filesystems.size();
153  	    if (count <= 3) {
154  	      bool first = true;
155  	      for (const auto& p : filesystems) {
156  	        const auto& fs = p.second;
157  	        if (!first) {
158  	          *out << " ";
159  	        }
160  	        if (fs->mds_map.is_degraded()) {
161  	          *out << fs->mds_map.fs_name << ":" << fs->mds_map.up.size() << "/" << fs->mds_map.in.size();
162  	        } else {
163  	          *out << fs->mds_map.fs_name << ":" << fs->mds_map.in.size();
164  	        }
165  	        first = false;
166  	      }
167  	    } else {
168  	      *out << count << " fs";
169  	      unsigned degraded = 0;
170  	      CachedStackStringStream css;
171  	      *css << " (degraded: ";
172  	      for (const auto& p : filesystems) {
173  	        const auto& fs = p.second;
174  	        if (fs->mds_map.is_degraded()) {
175  	          degraded++;
176  	          if (degraded <= 3) {
177  	            *css << fs->mds_map.fs_name << ":" << fs->mds_map.up.size() << "/" << fs->mds_map.in.size();
178  	          }
179  	        }
180  	      }
181  	      if (degraded > 0) {
182  	        if (degraded <= 3) {
183  	          *css << ")";
184  	          *out << css->strv();
185  	        } else {
186  	          *out << " (degraded: " << degraded << " fs)";
187  	        }
188  	      }
189  	    }
190  	  }
191  	
192  	  if (f) {
193  	    f->open_array_section("by_rank");
194  	  }
195  	
196  	  std::map<MDSMap::DaemonState,unsigned> by_state;
197  	  std::map<mds_role_t, std::pair<MDSMap::DaemonState, std::string>> by_rank;
198  	  by_state[MDSMap::DaemonState::STATE_STANDBY] = standby_daemons.size();
199  	  for (const auto& [gid, fscid] : mds_roles) {
200  	    if (fscid == FS_CLUSTER_ID_NONE)
201  	      continue;
202  	
203  	    const auto& info = filesystems.at(fscid)->mds_map.get_info_gid(gid);
204  	    auto s = std::string(ceph_mds_state_name(info.state));
205  	    if (info.laggy()) {
206  	      s += "(laggy or crashed)";
207  	    }
208  	
209  	    if (f) {
210  	      f->open_object_section("mds");
211  	      f->dump_unsigned("filesystem_id", fscid);
212  	      f->dump_unsigned("rank", info.rank);
213  	      f->dump_string("name", info.name);
214  	      f->dump_string("status", s);
215  	      f->dump_unsigned("gid", gid);
216  	      f->close_section();
217  	    } else if (info.state != MDSMap::DaemonState::STATE_STANDBY_REPLAY) {
218  	      by_rank[mds_role_t(fscid, info.rank)] = std::make_pair(info.state, info.name + "=" + s);
219  	    }
220  	    by_state[info.state]++;
221  	  }
222  	
223  	  if (f) {
224  	    f->close_section();
225  	  } else {
226  	    if (0 < by_rank.size() && by_rank.size() < 5) {
227  	      if (filesystems.size() > 1) {
228  	        // Disambiguate filesystems
229  	        std::map<std::string, std::string> pretty;
230  	        for (const auto& [role,status] : by_rank) {
231  	          const auto &fs_name = filesystems.at(role.fscid)->mds_map.fs_name;
232  	          CachedStackStringStream css;
233  	          *css << fs_name << ":" << role.rank;
234  	          pretty.emplace(std::piecewise_construct, std::forward_as_tuple(css->strv()), std::forward_as_tuple(status.second));
235  	          --by_state[status.first]; /* already printed! */
236  	        }
237  	        *out << " " << pretty;
238  	      } else {
239  	        // Omit FSCID in output when only one filesystem exists
240  	        std::map<mds_rank_t, std::string> shortened;
241  	        for (const auto& [role,status] : by_rank) {
242  	          shortened[role.rank] = status.second;
243  	          --by_state[status.first]; /* already printed! */
244  	        }
245  	        *out << " " << shortened;
246  	      }
247  	    }
248  	    for (const auto& [state, count] : by_state) {
249  	      if (count > 0) {
250  	        auto s = std::string_view(ceph_mds_state_name(state));
251  	        *out << " " << count << " " << s;
252  	      }
253  	    }
254  	  }
255  	
256  	  if (f) {
257  	    const auto state = MDSMap::DaemonState::STATE_STANDBY;
258  	    auto&& name = ceph_mds_state_name(state);
259  	    auto count = standby_daemons.size();
260  	    f->dump_unsigned(name, count);
261  	  }
262  	
263  	  size_t failed = 0;
264  	  size_t damaged = 0;
265  	  for (const auto& p : filesystems) {
266  	    auto& fs = p.second;
267  	    failed += fs->mds_map.failed.size();
268  	    damaged += fs->mds_map.damaged.size();
269  	  }
270  	
271  	  if (failed > 0) {
272  	    if (f) {
273  	      f->dump_unsigned("failed", failed);
274  	    } else {
275  	      *out << ", " << failed << " failed";
276  	    }
277  	  }
278  	
279  	  if (damaged > 0) {
280  	    if (f) {
281  	      f->dump_unsigned("damaged", damaged);
282  	    } else {
283  	      *out << ", " << damaged << " damaged";
284  	    }
285  	  }
286  	  //if (stopped.size())
287  	  //out << ", " << stopped.size() << " stopped";
288  	}
289  	
290  	mds_gid_t Filesystem::get_standby_replay(mds_gid_t who) const
291  	{
292  	  for (const auto &i : mds_map.mds_info) {
293  	    const auto &info = i.second;
294  	    if (info.state == MDSMap::STATE_STANDBY_REPLAY
295  	        && info.rank == mds_map.mds_info.at(who).rank) {
296  	      return info.global_id;
297  	    }
298  	  }
299  	  return MDS_GID_NONE;
300  	}
301  	
302  	Filesystem::ref FSMap::create_filesystem(std::string_view name,
303  	    int64_t metadata_pool, int64_t data_pool, uint64_t features)
304  	{
305  	  auto fs = Filesystem::create();
306  	  fs->mds_map.epoch = epoch;
307  	  fs->mds_map.fs_name = name;
308  	  fs->mds_map.data_pools.push_back(data_pool);
309  	  fs->mds_map.metadata_pool = metadata_pool;
310  	  fs->mds_map.cas_pool = -1;
311  	  fs->mds_map.compat = compat;
312  	  fs->mds_map.created = ceph_clock_now();
313  	  fs->mds_map.modified = ceph_clock_now();
314  	  fs->mds_map.enabled = true;
315  	  fs->fscid = next_filesystem_id++;
316  	  // ANONYMOUS is only for upgrades from legacy mdsmaps, we should
317  	  // have initialized next_filesystem_id such that it's never used here.
318  	  ceph_assert(fs->fscid != FS_CLUSTER_ID_ANONYMOUS);
319  	  filesystems[fs->fscid] = fs;
320  	
321  	  // Created first filesystem?  Set it as the one
322  	  // for legacy clients to use
323  	  if (filesystems.size() == 1) {
324  	    legacy_client_fscid = fs->fscid;
325  	  }
326  	
327  	  return fs;
328  	}
329  	
330  	Filesystem::const_ref FSMap::get_filesystem(std::string_view name) const
331  	{
332  	  for (const auto& p : filesystems) {
333  	    if (p.second->mds_map.fs_name == name) {
334  	      return p.second;
335  	    }
336  	  }
337  	  return nullptr;
338  	}
339  	
340  	std::vector<Filesystem::const_ref> FSMap::get_filesystems(void) const
341  	{
342  	  std::vector<Filesystem::const_ref> ret;
343  	  for (const auto& p : filesystems) {
344  	    ret.push_back(p.second);
345  	  }
346  	  return ret;
347  	}
348  	
349  	void FSMap::reset_filesystem(fs_cluster_id_t fscid)
350  	{
351  	  auto fs = get_filesystem(fscid);
352  	  auto new_fs = Filesystem::create();
353  	
354  	  // Populate rank 0 as existing (so don't go into CREATING)
355  	  // but failed (so that next available MDS is assigned the rank)
356  	  new_fs->mds_map.in.insert(mds_rank_t(0));
357  	  new_fs->mds_map.failed.insert(mds_rank_t(0));
358  	
359  	  // Carry forward what makes sense
360  	  new_fs->fscid = fs->fscid;
361  	  new_fs->mds_map.inline_data_enabled = fs->mds_map.inline_data_enabled;
362  	  new_fs->mds_map.data_pools = fs->mds_map.data_pools;
363  	  new_fs->mds_map.metadata_pool = fs->mds_map.metadata_pool;
364  	  new_fs->mds_map.cas_pool = fs->mds_map.cas_pool;
365  	  new_fs->mds_map.fs_name = fs->mds_map.fs_name;
366  	  new_fs->mds_map.compat = compat;
367  	  new_fs->mds_map.created = ceph_clock_now();
368  	  new_fs->mds_map.modified = ceph_clock_now();
369  	  new_fs->mds_map.standby_count_wanted = fs->mds_map.standby_count_wanted;
370  	  new_fs->mds_map.enabled = true;
371  	
372  	  // Remember mds ranks that have ever started. (They should load old inotable
373  	  // instead of creating new one if they start again.)
374  	  new_fs->mds_map.stopped.insert(fs->mds_map.in.begin(), fs->mds_map.in.end());
375  	  new_fs->mds_map.stopped.insert(fs->mds_map.stopped.begin(), fs->mds_map.stopped.end());
376  	  new_fs->mds_map.stopped.erase(mds_rank_t(0));
377  	
378  	  // Persist the new FSMap
379  	  filesystems[new_fs->fscid] = new_fs;
380  	}
381  	
382  	void FSMap::get_health(list<pair<health_status_t,string> >& summary,
383  				list<pair<health_status_t,string> > *detail) const
384  	{
385  	  mds_rank_t standby_count_wanted = 0;
386  	  for (const auto &i : filesystems) {
387  	    const auto &fs = i.second;
388  	
389  	    // TODO: move get_health up into here so that we can qualify
390  	    // all the messages with what filesystem they're talking about
391  	    fs->mds_map.get_health(summary, detail);
392  	
393  	    standby_count_wanted = std::max(standby_count_wanted, fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size()));
394  	  }
395  	
396  	  if (standby_count_wanted) {
397  	    std::ostringstream oss;
398  	    oss << "insufficient standby daemons available: have " << standby_daemons.size() << "; want " << standby_count_wanted << " more";
399  	    summary.push_back(make_pair(HEALTH_WARN, oss.str()));
400  	  }
401  	}
402  	
403  	bool FSMap::check_health(void)
404  	{
405  	  bool changed = false;
406  	  for (auto &i : filesystems) {
407  	    changed |= i.second->mds_map.check_health((mds_rank_t)standby_daemons.size());
408  	  }
409  	  return changed;
410  	}
411  	
412  	void FSMap::get_health_checks(health_check_map_t *checks) const
413  	{
414  	  mds_rank_t standby_count_wanted = 0;
415  	  for (const auto &i : filesystems) {
416  	    const auto &fs = i.second;
417  	    health_check_map_t fschecks;
418  	
419  	    fs->mds_map.get_health_checks(&fschecks);
420  	
421  	    // Some of the failed ranks might be transient (i.e. there are standbys
422  	    // ready to replace them).  We will report only on "stuck" failed, i.e.
423  	    // ranks which are failed and have no standby replacement available.
424  	    std::set<mds_rank_t> stuck_failed;
425  	
426  	    for (const auto &rank : fs->mds_map.failed) {
427  	      auto&& replacement = find_replacement_for({fs->fscid, rank}, {});
428  	      if (replacement == MDS_GID_NONE) {
429  	        stuck_failed.insert(rank);
430  	      }
431  	    }
432  	
433  	    // FS_WITH_FAILED_MDS
434  	    if (!stuck_failed.empty()) {
435  	      health_check_t& fscheck = checks->get_or_add(
436  	        "FS_WITH_FAILED_MDS", HEALTH_WARN,
437  	        "%num% filesystem%plurals% %hasorhave% a failed mds daemon", 1);
438  	      ostringstream ss;
439  	      ss << "fs " << fs->mds_map.fs_name << " has " << stuck_failed.size()
440  	         << " failed mds" << (stuck_failed.size() > 1 ? "s" : "");
441  	      fscheck.detail.push_back(ss.str()); }
442  	
443  	    checks->merge(fschecks);
444  	    standby_count_wanted = std::max(
445  	      standby_count_wanted,
446  	      fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size()));
447  	  }
448  	
449  	  // MDS_INSUFFICIENT_STANDBY
450  	  if (standby_count_wanted) {
451  	    std::ostringstream oss, dss;
452  	    oss << "insufficient standby MDS daemons available";
453  	    auto& d = checks->get_or_add("MDS_INSUFFICIENT_STANDBY", HEALTH_WARN, oss.str(), 1);
454  	    dss << "have " << standby_daemons.size() << "; want " << standby_count_wanted
455  		<< " more";
456  	    d.detail.push_back(dss.str());
457  	  }
458  	}
459  	
460  	void FSMap::update_compat(const CompatSet &c)
461  	{
462  	  // We could do something more complicated here to enable
463  	  // different filesystems to be served by different MDS versions,
464  	  // but this is a lot simpler because it doesn't require us to
465  	  // track the compat versions for standby daemons.
466  	  compat = c;
467  	  for (const auto &i : filesystems) {
468  	    MDSMap &mds_map = i.second->mds_map;
469  	    mds_map.compat = c;
470  	    mds_map.epoch = epoch;
471  	  }
472  	}
473  	
474  	void FSMap::encode(bufferlist& bl, uint64_t features) const
475  	{
476  	  ENCODE_START(7, 6, bl);
477  	  encode(epoch, bl);
478  	  encode(next_filesystem_id, bl);
479  	  encode(legacy_client_fscid, bl);
480  	  encode(compat, bl);
481  	  encode(enable_multiple, bl);
482  	  {
483  	    std::vector<Filesystem::ref> v;
484  	    v.reserve(filesystems.size());
485  	    for (auto& p : filesystems) v.emplace_back(p.second);
486  	    encode(v, bl, features);
487  	  }
488  	  encode(mds_roles, bl);
489  	  encode(standby_daemons, bl, features);
490  	  encode(standby_epochs, bl);
491  	  encode(ever_enabled_multiple, bl);
492  	  ENCODE_FINISH(bl);
493  	}
494  	
495  	void FSMap::decode(bufferlist::const_iterator& p)
496  	{
497  	  // The highest MDSMap encoding version before we changed the
498  	  // MDSMonitor to store an FSMap instead of an MDSMap was
499  	  // 5, so anything older than 6 is decoded as an MDSMap,
500  	  // and anything newer is decoded as an FSMap.
501  	  DECODE_START_LEGACY_COMPAT_LEN_16(7, 4, 4, p);
502  	  if (struct_v < 6) {
503  	    // Because the mon used to store an MDSMap where we now
504  	    // store an FSMap, FSMap knows how to decode the legacy
505  	    // MDSMap format (it never needs to encode it though).
506  	    MDSMap legacy_mds_map;
507  	
508  	    // Decoding an MDSMap (upgrade)
509  	    decode(epoch, p);
510  	    decode(legacy_mds_map.flags, p);
511  	    decode(legacy_mds_map.last_failure, p);
512  	    decode(legacy_mds_map.root, p);
513  	    decode(legacy_mds_map.session_timeout, p);
514  	    decode(legacy_mds_map.session_autoclose, p);
515  	    decode(legacy_mds_map.max_file_size, p);
516  	    decode(legacy_mds_map.max_mds, p);
517  	    decode(legacy_mds_map.mds_info, p);
518  	    if (struct_v < 3) {
519  	      __u32 n;
520  	      decode(n, p);
521  	      while (n--) {
522  	        __u32 m;
523  	        decode(m, p);
524  	        legacy_mds_map.data_pools.push_back(m);
525  	      }
526  	      __s32 s;
527  	      decode(s, p);
528  	      legacy_mds_map.cas_pool = s;
529  	    } else {
530  	      decode(legacy_mds_map.data_pools, p);
531  	      decode(legacy_mds_map.cas_pool, p);
532  	    }
533  	
534  	    // kclient ignores everything from here
535  	    __u16 ev = 1;
536  	    if (struct_v >= 2)
537  	      decode(ev, p);
538  	    if (ev >= 3)
539  	      decode(legacy_mds_map.compat, p);
540  	    else
541  	      legacy_mds_map.compat = MDSMap::get_compat_set_base();
542  	    if (ev < 5) {
543  	      __u32 n;
544  	      decode(n, p);
545  	      legacy_mds_map.metadata_pool = n;
546  	    } else {
547  	      decode(legacy_mds_map.metadata_pool, p);
548  	    }
549  	    decode(legacy_mds_map.created, p);
550  	    decode(legacy_mds_map.modified, p);
551  	    decode(legacy_mds_map.tableserver, p);
552  	    decode(legacy_mds_map.in, p);
553  	    std::map<mds_rank_t,int32_t> inc;  // Legacy field, parse and drop
554  	    decode(inc, p);
555  	    decode(legacy_mds_map.up, p);
556  	    decode(legacy_mds_map.failed, p);
557  	    decode(legacy_mds_map.stopped, p);
558  	    if (ev >= 4)
559  	      decode(legacy_mds_map.last_failure_osd_epoch, p);
560  	    if (ev >= 6) {
561  	      if (ev < 10) {
562  		// previously this was a bool about snaps, not a flag map
563  		bool flag;
564  		decode(flag, p);
565  		legacy_mds_map.ever_allowed_features = flag ?
566  		  CEPH_MDSMAP_ALLOW_SNAPS : 0;
567  		decode(flag, p);
568  		legacy_mds_map.explicitly_allowed_features = flag ?
569  		  CEPH_MDSMAP_ALLOW_SNAPS : 0;
570  	      } else {
571  		decode(legacy_mds_map.ever_allowed_features, p);
572  		decode(legacy_mds_map.explicitly_allowed_features, p);
573  	      }
574  	    } else {
575  	      legacy_mds_map.ever_allowed_features = 0;
576  	      legacy_mds_map.explicitly_allowed_features = 0;
577  	    }
578  	    if (ev >= 7)
579  	      decode(legacy_mds_map.inline_data_enabled, p);
580  	
581  	    if (ev >= 8) {
582  	      ceph_assert(struct_v >= 5);
583  	      decode(legacy_mds_map.enabled, p);
584  	      decode(legacy_mds_map.fs_name, p);
585  	    } else {
586  	      legacy_mds_map.fs_name = "default";
587  	      if (epoch > 1) {
588  	        // If an MDS has ever been started, epoch will be greater than 1,
589  	        // assume filesystem is enabled.
590  	        legacy_mds_map.enabled = true;
591  	      } else {
592  	        // Upgrading from a cluster that never used an MDS, switch off
593  	        // filesystem until it's explicitly enabled.
594  	        legacy_mds_map.enabled = false;
595  	      }
596  	    }
597  	
598  	    if (ev >= 9) {
599  	      decode(legacy_mds_map.damaged, p);
600  	    }
601  	
602  	    // We're upgrading, populate filesystems from the legacy fields
603  	    filesystems.clear();
604  	    standby_daemons.clear();
605  	    standby_epochs.clear();
606  	    mds_roles.clear();
607  	    compat = legacy_mds_map.compat;
608  	    enable_multiple = false;
609  	
610  	    // Synthesise a Filesystem from legacy_mds_map, if enabled
611  	    if (legacy_mds_map.enabled) {
612  	      // Construct a Filesystem from the legacy MDSMap
613  	      auto migrate_fs = Filesystem::create();
614  	      migrate_fs->fscid = FS_CLUSTER_ID_ANONYMOUS;
615  	      migrate_fs->mds_map = legacy_mds_map;
616  	      migrate_fs->mds_map.epoch = epoch;
617  	      filesystems[migrate_fs->fscid] = migrate_fs;
618  	
619  	      // List of GIDs that had invalid states
620  	      std::set<mds_gid_t> drop_gids;
621  	
622  	      // Construct mds_roles, standby_daemons, and remove
623  	      // standbys from the MDSMap in the Filesystem.
624  	      for (const auto& [gid, info] : migrate_fs->mds_map.mds_info) {
625  	        if (info.state == MDSMap::STATE_STANDBY_REPLAY) {
626  	          /* drop any legacy standby-replay daemons */
627  	          drop_gids.insert(gid);
628  	        } else if (info.rank == MDS_RANK_NONE) {
629  	          if (info.state != MDSMap::STATE_STANDBY) {
630  	            // Old MDSMaps can have down:dne here, which
631  	            // is invalid in an FSMap (#17837)
632  	            drop_gids.insert(gid);
633  	          } else {
634  	            insert(info); // into standby_daemons
635  	          }
636  	        } else {
637  	          mds_roles[gid] = migrate_fs->fscid;
638  	        }
639  	      }
(1) Event parameter_hidden: declaration hides parameter "p" (declared at line 495)
(2) Event caretline: ^
640  	      for (const auto &p : standby_daemons) {
641  	        // Erase from this Filesystem's MDSMap, because it has
642  	        // been copied into FSMap::Standby_daemons above
643  	        migrate_fs->mds_map.mds_info.erase(p.first);
644  	      }
645  	      for (const auto &gid : drop_gids) {
646  	        // Throw away all info for this MDS because it was identified
647  	        // as having invalid state above.
648  	        migrate_fs->mds_map.mds_info.erase(gid);
649  	      }
650  	
651  	      legacy_client_fscid = migrate_fs->fscid;
652  	    } else {
653  	      legacy_client_fscid = FS_CLUSTER_ID_NONE;
654  	    }
655  	  } else {
656  	    decode(epoch, p);
657  	    decode(next_filesystem_id, p);
658  	    decode(legacy_client_fscid, p);
659  	    decode(compat, p);
660  	    decode(enable_multiple, p);
661  	    {
662  	      std::vector<Filesystem::ref> v;
663  	      decode(v, p);
664  	      filesystems.clear();
665  	      for (auto& ref : v) {
666  	        auto em = filesystems.emplace(std::piecewise_construct, std::forward_as_tuple(ref->fscid), std::forward_as_tuple(std::move(ref)));
667  	        ceph_assert(em.second);
668  	      }
669  	    }
670  	    decode(mds_roles, p);
671  	    decode(standby_daemons, p);
672  	    decode(standby_epochs, p);
673  	    if (struct_v >= 7) {
674  	      decode(ever_enabled_multiple, p);
675  	    }
676  	  }
677  	
678  	  DECODE_FINISH(p);
679  	}
680  	
681  	void FSMap::sanitize(const std::function<bool(int64_t pool)>& pool_exists)
682  	{
683  	  for (auto &fs : filesystems) {
684  	    fs.second->mds_map.sanitize(pool_exists);
685  	  }
686  	}
687  	
688  	void Filesystem::encode(bufferlist& bl, uint64_t features) const
689  	{
690  	  ENCODE_START(1, 1, bl);
691  	  encode(fscid, bl);
692  	  bufferlist mdsmap_bl;
693  	  mds_map.encode(mdsmap_bl, features);
694  	  encode(mdsmap_bl, bl);
695  	  ENCODE_FINISH(bl);
696  	}
697  	
698  	void Filesystem::decode(bufferlist::const_iterator& p)
699  	{
700  	  DECODE_START(1, p);
701  	  decode(fscid, p);
702  	  bufferlist mdsmap_bl;
703  	  decode(mdsmap_bl, p);
704  	  auto mdsmap_bl_iter = mdsmap_bl.cbegin();
705  	  mds_map.decode(mdsmap_bl_iter);
706  	  DECODE_FINISH(p);
707  	}
708  	
709  	int FSMap::parse_filesystem(
710  	      std::string_view ns_str,
711  	      Filesystem::const_ref* result
712  	      ) const
713  	{
714  	  std::string ns_err;
715  	  std::string s(ns_str);
716  	  fs_cluster_id_t fscid = strict_strtol(s.c_str(), 10, &ns_err);
717  	  if (!ns_err.empty() || filesystems.count(fscid) == 0) {
718  	    for (auto &fs : filesystems) {
719  	      if (fs.second->mds_map.fs_name == s) {
720  	        *result = std::const_pointer_cast<const Filesystem>(fs.second);
721  	        return 0;
722  	      }
723  	    }
724  	    return -ENOENT;
725  	  } else {
726  	    *result = get_filesystem(fscid);
727  	    return 0;
728  	  }
729  	}
730  	
731  	void Filesystem::print(std::ostream &out) const
732  	{
733  	  out << "Filesystem '" << mds_map.fs_name
734  	      << "' (" << fscid << ")" << std::endl;
735  	  mds_map.print(out);
736  	}
737  	
738  	bool FSMap::is_any_degraded() const
739  	{
740  	  for (auto& i : filesystems) {
741  	    if (i.second->mds_map.is_degraded()) {
742  	      return true;
743  	    }
744  	  }
745  	  return false;
746  	}
747  	
748  	std::map<mds_gid_t, MDSMap::mds_info_t> FSMap::get_mds_info() const
749  	{
750  	  std::map<mds_gid_t, MDSMap::mds_info_t> result;
751  	  for (const auto &i : standby_daemons) {
752  	    result[i.first] = i.second;
753  	  }
754  	
755  	  for (const auto &i : filesystems) {
756  	    const auto &fs_info = i.second->mds_map.get_mds_info();
757  	    for (const auto &j : fs_info) {
758  	      result[j.first] = j.second;
759  	    }
760  	  }
761  	
762  	  return result;
763  	}
764  	
765  	mds_gid_t FSMap::get_available_standby() const
766  	{
767  	  for (const auto& [gid, info] : standby_daemons) {
768  	    ceph_assert(info.rank == MDS_RANK_NONE);
769  	    ceph_assert(info.state == MDSMap::STATE_STANDBY);
770  	
771  	    if (info.laggy() || info.is_frozen()) {
772  	      continue;
773  	    }
774  	
775  	    return gid;
776  	  }
777  	  return MDS_GID_NONE;
778  	}
779  	
780  	mds_gid_t FSMap::find_mds_gid_by_name(std::string_view s) const
781  	{
782  	  const auto info = get_mds_info();
783  	  for (const auto &p : info) {
784  	    if (p.second.name == s) {
785  	      return p.first;
786  	    }
787  	  }
788  	  return MDS_GID_NONE;
789  	}
790  	
791  	const MDSMap::mds_info_t* FSMap::find_by_name(std::string_view name) const
792  	{
793  	  std::map<mds_gid_t, MDSMap::mds_info_t> result;
794  	  for (const auto &i : standby_daemons) {
795  	    if (i.second.name == name) {
796  	      return &(i.second);
797  	    }
798  	  }
799  	
800  	  for (const auto &i : filesystems) {
801  	    const auto &fs_info = i.second->mds_map.get_mds_info();
802  	    for (const auto &j : fs_info) {
803  	      if (j.second.name == name) {
804  	        return &(j.second);
805  	      }
806  	    }
807  	  }
808  	
809  	  return nullptr;
810  	}
811  	
812  	mds_gid_t FSMap::find_replacement_for(mds_role_t role, std::string_view name) const
813  	{
814  	  auto&& fs = get_filesystem(role.fscid);
815  	
816  	  // First see if we have a STANDBY_REPLAY
817  	  for (const auto& [gid, info] : fs->mds_map.mds_info) {
818  	    if (info.rank == role.rank && info.state == MDSMap::STATE_STANDBY_REPLAY) {
819  	      if (info.is_frozen()) {
820  	        /* the standby-replay is frozen, do nothing! */
821  	        return MDS_GID_NONE;
822  	      } else {
823  	        return gid;
824  	      }
825  	    }
826  	  }
827  	
828  	  return get_available_standby();
829  	}
830  	
831  	void FSMap::sanity() const
832  	{
833  	  if (legacy_client_fscid != FS_CLUSTER_ID_NONE) {
834  	    ceph_assert(filesystems.count(legacy_client_fscid) == 1);
835  	  }
836  	
837  	  for (const auto &i : filesystems) {
838  	    auto fs = i.second;
839  	    ceph_assert(fs->mds_map.compat.compare(compat) == 0);
840  	    ceph_assert(fs->fscid == i.first);
841  	    for (const auto &j : fs->mds_map.mds_info) {
842  	      ceph_assert(j.second.rank != MDS_RANK_NONE);
843  	      ceph_assert(mds_roles.count(j.first) == 1);
844  	      ceph_assert(standby_daemons.count(j.first) == 0);
845  	      ceph_assert(standby_epochs.count(j.first) == 0);
846  	      ceph_assert(mds_roles.at(j.first) == i.first);
847  	      if (j.second.state != MDSMap::STATE_STANDBY_REPLAY) {
848  	        ceph_assert(fs->mds_map.up.at(j.second.rank) == j.first);
849  	        ceph_assert(fs->mds_map.failed.count(j.second.rank) == 0);
850  	        ceph_assert(fs->mds_map.damaged.count(j.second.rank) == 0);
851  	      }
852  	    }
853  	
854  	    for (const auto &j : fs->mds_map.up) {
855  	      mds_rank_t rank = j.first;
856  	      ceph_assert(fs->mds_map.in.count(rank) == 1);
857  	      mds_gid_t gid = j.second;
858  	      ceph_assert(fs->mds_map.mds_info.count(gid) == 1);
859  	    }
860  	  }
861  	
862  	  for (const auto &i : standby_daemons) {
863  	    ceph_assert(i.second.state == MDSMap::STATE_STANDBY);
864  	    ceph_assert(i.second.rank == MDS_RANK_NONE);
865  	    ceph_assert(i.second.global_id == i.first);
866  	    ceph_assert(standby_epochs.count(i.first) == 1);
867  	    ceph_assert(mds_roles.count(i.first) == 1);
868  	    ceph_assert(mds_roles.at(i.first) == FS_CLUSTER_ID_NONE);
869  	  }
870  	
871  	  for (const auto &i : standby_epochs) {
872  	    ceph_assert(standby_daemons.count(i.first) == 1);
873  	  }
874  	
875  	  for (const auto &i : mds_roles) {
876  	    if (i.second == FS_CLUSTER_ID_NONE) {
877  	      ceph_assert(standby_daemons.count(i.first) == 1);
878  	    } else {
879  	      ceph_assert(filesystems.count(i.second) == 1);
880  	      ceph_assert(filesystems.at(i.second)->mds_map.mds_info.count(i.first) == 1);
881  	    }
882  	  }
883  	}
884  	
885  	void FSMap::promote(
886  	    mds_gid_t standby_gid,
887  	    Filesystem& filesystem,
888  	    mds_rank_t assigned_rank)
889  	{
890  	  ceph_assert(gid_exists(standby_gid));
891  	  bool is_standby_replay = mds_roles.at(standby_gid) != FS_CLUSTER_ID_NONE;
892  	  if (!is_standby_replay) {
893  	    ceph_assert(standby_daemons.count(standby_gid));
894  	    ceph_assert(standby_daemons.at(standby_gid).state == MDSMap::STATE_STANDBY);
895  	  }
896  	
897  	  MDSMap &mds_map = filesystem.mds_map;
898  	
899  	  // Insert daemon state to Filesystem
900  	  if (!is_standby_replay) {
901  	    mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid);
902  	  } else {
903  	    ceph_assert(mds_map.mds_info.count(standby_gid));
904  	    ceph_assert(mds_map.mds_info.at(standby_gid).state == MDSMap::STATE_STANDBY_REPLAY);
905  	    ceph_assert(mds_map.mds_info.at(standby_gid).rank == assigned_rank);
906  	  }
907  	  MDSMap::mds_info_t &info = mds_map.mds_info[standby_gid];
908  	
909  	  if (mds_map.stopped.erase(assigned_rank)) {
910  	    // The cluster is being expanded with a stopped rank
911  	    info.state = MDSMap::STATE_STARTING;
912  	  } else if (!mds_map.is_in(assigned_rank)) {
913  	    // The cluster is being expanded with a new rank
914  	    info.state = MDSMap::STATE_CREATING;
915  	  } else {
916  	    // An existing rank is being assigned to a replacement
917  	    info.state = MDSMap::STATE_REPLAY;
918  	    mds_map.failed.erase(assigned_rank);
919  	  }
920  	  info.rank = assigned_rank;
921  	  info.inc = epoch;
922  	  mds_roles[standby_gid] = filesystem.fscid;
923  	
924  	  // Update the rank state in Filesystem
925  	  mds_map.in.insert(assigned_rank);
926  	  mds_map.up[assigned_rank] = standby_gid;
927  	
928  	  // Remove from the list of standbys
929  	  if (!is_standby_replay) {
930  	    standby_daemons.erase(standby_gid);
931  	    standby_epochs.erase(standby_gid);
932  	  }
933  	
934  	  // Indicate that Filesystem has been modified
935  	  mds_map.epoch = epoch;
936  	}
937  	
938  	void FSMap::assign_standby_replay(
939  	    const mds_gid_t standby_gid,
940  	    const fs_cluster_id_t leader_ns,
941  	    const mds_rank_t leader_rank)
942  	{
943  	  ceph_assert(mds_roles.at(standby_gid) == FS_CLUSTER_ID_NONE);
944  	  ceph_assert(gid_exists(standby_gid));
945  	  ceph_assert(!gid_has_rank(standby_gid));
946  	  ceph_assert(standby_daemons.count(standby_gid));
947  	
948  	  // Insert to the filesystem
949  	  auto fs = filesystems.at(leader_ns);
950  	  fs->mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid);
951  	  fs->mds_map.mds_info[standby_gid].rank = leader_rank;
952  	  fs->mds_map.mds_info[standby_gid].state = MDSMap::STATE_STANDBY_REPLAY;
953  	  mds_roles[standby_gid] = leader_ns;
954  	
955  	  // Remove from the list of standbys
956  	  standby_daemons.erase(standby_gid);
957  	  standby_epochs.erase(standby_gid);
958  	
959  	  // Indicate that Filesystem has been modified
960  	  fs->mds_map.epoch = epoch;
961  	}
962  	
963  	void FSMap::erase(mds_gid_t who, epoch_t blacklist_epoch)
964  	{
965  	  if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) {
966  	    standby_daemons.erase(who);
967  	    standby_epochs.erase(who);
968  	  } else {
969  	    auto &fs = filesystems.at(mds_roles.at(who));
970  	    const auto &info = fs->mds_map.mds_info.at(who);
971  	    if (info.state != MDSMap::STATE_STANDBY_REPLAY) {
972  	      if (info.state == MDSMap::STATE_CREATING) {
973  	        // If this gid didn't make it past CREATING, then forget
974  	        // the rank ever existed so that next time it's handed out
975  	        // to a gid it'll go back into CREATING.
976  	        fs->mds_map.in.erase(info.rank);
977  	      } else {
978  	        // Put this rank into the failed list so that the next available
979  	        // STANDBY will pick it up.
980  	        fs->mds_map.failed.insert(info.rank);
981  	      }
982  	      ceph_assert(fs->mds_map.up.at(info.rank) == info.global_id);
983  	      fs->mds_map.up.erase(info.rank);
984  	    }
985  	    fs->mds_map.mds_info.erase(who);
986  	    fs->mds_map.last_failure_osd_epoch = blacklist_epoch;
987  	    fs->mds_map.epoch = epoch;
988  	  }
989  	
990  	  mds_roles.erase(who);
991  	}
992  	
993  	void FSMap::damaged(mds_gid_t who, epoch_t blacklist_epoch)
994  	{
995  	  ceph_assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE);
996  	  auto fs = filesystems.at(mds_roles.at(who));
997  	  mds_rank_t rank = fs->mds_map.mds_info[who].rank;
998  	
999  	  erase(who, blacklist_epoch);
1000 	  fs->mds_map.failed.erase(rank);
1001 	  fs->mds_map.damaged.insert(rank);
1002 	
1003 	  ceph_assert(fs->mds_map.epoch == epoch);
1004 	}
1005 	
1006 	/**
1007 	 * Update to indicate that the rank `rank` is to be removed
1008 	 * from the damaged list of the filesystem `fscid`
1009 	 */
1010 	bool FSMap::undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank)
1011 	{
1012 	  auto fs = filesystems.at(fscid);
1013 	
1014 	  if (fs->mds_map.damaged.erase(rank)) {
1015 	    fs->mds_map.failed.insert(rank);
1016 	    fs->mds_map.epoch = epoch;
1017 	    return true;
1018 	  } else {
1019 	    return false;
1020 	  }
1021 	}
1022 	
1023 	void FSMap::insert(const MDSMap::mds_info_t &new_info)
1024 	{
1025 	  ceph_assert(new_info.state == MDSMap::STATE_STANDBY);
1026 	  ceph_assert(new_info.rank == MDS_RANK_NONE);
1027 	  mds_roles[new_info.global_id] = FS_CLUSTER_ID_NONE;
1028 	  standby_daemons[new_info.global_id] = new_info;
1029 	  standby_epochs[new_info.global_id] = epoch;
1030 	}
1031 	
1032 	std::vector<mds_gid_t> FSMap::stop(mds_gid_t who)
1033 	{
1034 	  ceph_assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE);
1035 	  auto fs = filesystems.at(mds_roles.at(who));
1036 	  const auto &info = fs->mds_map.mds_info.at(who);
1037 	  fs->mds_map.up.erase(info.rank);
1038 	  fs->mds_map.in.erase(info.rank);
1039 	  fs->mds_map.stopped.insert(info.rank);
1040 	
1041 	  // Also drop any standby replays that were following this rank
1042 	  std::vector<mds_gid_t> standbys;
1043 	  for (const auto &i : fs->mds_map.mds_info) {
1044 	    const auto &other_gid = i.first;
1045 	    const auto &other_info = i.second;
1046 	    if (other_info.rank == info.rank
1047 	        && other_info.state == MDSMap::STATE_STANDBY_REPLAY) {
1048 	      standbys.push_back(other_gid);
1049 	      erase(other_gid, 0);
1050 	    }
1051 	  }
1052 	
1053 	  fs->mds_map.mds_info.erase(who);
1054 	  mds_roles.erase(who);
1055 	
1056 	  fs->mds_map.epoch = epoch;
1057 	
1058 	  return standbys;
1059 	}
1060 	
1061 	
1062 	/**
1063 	 * Given one of the following forms:
1064 	 *   <fs name>:<rank>
1065 	 *   <fs id>:<rank>
1066 	 *   <rank>
1067 	 *
1068 	 * Parse into a mds_role_t.  The rank-only form is only valid
1069 	 * if legacy_client_ns is set.
1070 	 */
1071 	int FSMap::parse_role(
1072 	    std::string_view role_str,
1073 	    mds_role_t *role,
1074 	    std::ostream &ss) const
1075 	{
1076 	  size_t colon_pos = role_str.find(":");
1077 	  size_t rank_pos;
1078 	  Filesystem::const_ref fs;
1079 	  if (colon_pos == std::string::npos) {
1080 	    if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
1081 	      ss << "No filesystem selected";
1082 	      return -ENOENT;
1083 	    }
1084 	    fs = get_filesystem(legacy_client_fscid);
1085 	    rank_pos = 0;
1086 	  } else {
1087 	    if (parse_filesystem(role_str.substr(0, colon_pos), &fs) < 0) {
1088 	      ss << "Invalid filesystem";
1089 	      return -ENOENT;
1090 	    }
1091 	    rank_pos = colon_pos+1;
1092 	  }
1093 	
1094 	  mds_rank_t rank;
1095 	  std::string err;
1096 	  std::string rank_str(role_str.substr(rank_pos));
1097 	  long rank_i = strict_strtol(rank_str.c_str(), 10, &err);
1098 	  if (rank_i < 0 || !err.empty()) {
1099 	    ss << "Invalid rank '" << rank_str << "'";
1100 	    return -EINVAL;
1101 	  } else {
1102 	    rank = rank_i;
1103 	  }
1104 	
1105 	  if (fs->mds_map.in.count(rank) == 0) {
1106 	    ss << "Rank '" << rank << "' not found";
1107 	    return -ENOENT;
1108 	  }
1109 	
1110 	  *role = {fs->fscid, rank};
1111 	
1112 	  return 0;
1113 	}
1114 	
1115 	bool FSMap::pool_in_use(int64_t poolid) const
1116 	{
1117 	  for (auto const &i : filesystems) {
1118 	    if (i.second->mds_map.is_data_pool(poolid)
1119 	        || i.second->mds_map.metadata_pool == poolid) {
1120 	      return true;
1121 	    }
1122 	  }
1123 	  return false;
1124 	}
1125