1    	// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
2    	// vim: ts=8 sw=2 smarttab
3    	/*
4    	 * Ceph - scalable distributed file system
5    	 *
6    	 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7    	 *
8    	 * This is free software; you can redistribute it and/or
9    	 * modify it under the terms of the GNU Lesser General Public
10   	 * License version 2.1, as published by the Free Software 
11   	 * Foundation.  See file COPYING.
12   	 * 
13   	 */
14   	
15   	#include "common/debug.h"
16   	#include "mon/health_check.h"
17   	
18   	#include "MDSMap.h"
19   	
20   	#include <sstream>
21   	using std::stringstream;
22   	
23   	#define dout_context g_ceph_context
24   	#define dout_subsys ceph_subsys_
25   	
26   	// features
27   	CompatSet MDSMap::get_compat_set_all() {
28   	  CompatSet::FeatureSet feature_compat;
29   	  CompatSet::FeatureSet feature_ro_compat;
30   	  CompatSet::FeatureSet feature_incompat;
31   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE);
32   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES);
33   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT);
34   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE);
35   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING);
36   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG);
37   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
38   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR);
39   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2);
40   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_SNAPREALM_V2);
41   	
42   	  return CompatSet(feature_compat, feature_ro_compat, feature_incompat);
43   	}
44   	
45   	CompatSet MDSMap::get_compat_set_default() {
46   	  CompatSet::FeatureSet feature_compat;
47   	  CompatSet::FeatureSet feature_ro_compat;
48   	  CompatSet::FeatureSet feature_incompat;
49   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE);
50   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES);
51   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT);
52   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE);
53   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING);
54   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG);
55   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR);
56   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2);
57   	  feature_incompat.insert(MDS_FEATURE_INCOMPAT_SNAPREALM_V2);
58   	
59   	  return CompatSet(feature_compat, feature_ro_compat, feature_incompat);
60   	}
61   	
62   	// base (pre v0.20)
63   	CompatSet MDSMap::get_compat_set_base() {
64   	  CompatSet::FeatureSet feature_compat_base;
65   	  CompatSet::FeatureSet feature_incompat_base;
66   	  feature_incompat_base.insert(MDS_FEATURE_INCOMPAT_BASE);
67   	  CompatSet::FeatureSet feature_ro_compat_base;
68   	
69   	  return CompatSet(feature_compat_base, feature_ro_compat_base, feature_incompat_base);
70   	}
71   	
72   	void MDSMap::mds_info_t::dump(Formatter *f) const
73   	{
74   	  f->dump_unsigned("gid", global_id);
75   	  f->dump_string("name", name);
76   	  f->dump_int("rank", rank);
77   	  f->dump_int("incarnation", inc);
78   	  f->dump_stream("state") << ceph_mds_state_name(state);
79   	  f->dump_int("state_seq", state_seq);
80   	  f->dump_stream("addr") << addrs.get_legacy_str();
81   	  f->dump_object("addrs", addrs);
82   	  if (laggy_since != utime_t())
83   	    f->dump_stream("laggy_since") << laggy_since;
84   	  
85   	  f->open_array_section("export_targets");
86   	  for (set<mds_rank_t>::iterator p = export_targets.begin();
87   	       p != export_targets.end(); ++p) {
88   	    f->dump_int("mds", *p);
89   	  }
90   	  f->close_section();
91   	  f->dump_unsigned("features", mds_features);
92   	  f->dump_unsigned("flags", flags);
93   	}
94   	
95   	void MDSMap::mds_info_t::print_summary(ostream &out) const
96   	{
97   	  out << global_id << ":\t"
98   	      << addrs
99   	      << " '" << name << "'"
100  	      << " mds." << rank
101  	      << "." << inc
102  	      << " " << ceph_mds_state_name(state)
103  	      << " seq " << state_seq;
104  	  if (laggy()) {
105  	    out << " laggy since " << laggy_since;
106  	  }
107  	  if (!export_targets.empty()) {
108  	    out << " export_targets=" << export_targets;
109  	  }
110  	  if (is_frozen()) {
111  	    out << " frozen";
112  	  }
113  	}
114  	
115  	void MDSMap::mds_info_t::generate_test_instances(std::list<mds_info_t*>& ls)
116  	{
117  	  mds_info_t *sample = new mds_info_t();
118  	  ls.push_back(sample);
119  	  sample = new mds_info_t();
120  	  sample->global_id = 1;
121  	  sample->name = "test_instance";
122  	  sample->rank = 0;
123  	  ls.push_back(sample);
124  	}
125  	
126  	void MDSMap::dump(Formatter *f) const
127  	{
128  	  f->dump_int("epoch", epoch);
129  	  f->dump_unsigned("flags", flags);
130  	  f->dump_unsigned("ever_allowed_features", ever_allowed_features);
131  	  f->dump_unsigned("explicitly_allowed_features", explicitly_allowed_features);
132  	  f->dump_stream("created") << created;
133  	  f->dump_stream("modified") << modified;
134  	  f->dump_int("tableserver", tableserver);
135  	  f->dump_int("root", root);
136  	  f->dump_int("session_timeout", session_timeout);
137  	  f->dump_int("session_autoclose", session_autoclose);
138  	  f->dump_stream("min_compat_client") << ceph::to_integer<int>(min_compat_client) << " ("
139  					      << min_compat_client << ")";
140  	  f->dump_int("max_file_size", max_file_size);
141  	  f->dump_int("last_failure", last_failure);
142  	  f->dump_int("last_failure_osd_epoch", last_failure_osd_epoch);
143  	  f->open_object_section("compat");
144  	  compat.dump(f);
145  	  f->close_section();
146  	  f->dump_int("max_mds", max_mds);
147  	  f->open_array_section("in");
148  	  for (set<mds_rank_t>::const_iterator p = in.begin(); p != in.end(); ++p)
149  	    f->dump_int("mds", *p);
150  	  f->close_section();
151  	  f->open_object_section("up");
152  	  for (map<mds_rank_t,mds_gid_t>::const_iterator p = up.begin(); p != up.end(); ++p) {
153  	    char s[14];
154  	    sprintf(s, "mds_%d", int(p->first));
155  	    f->dump_int(s, p->second);
156  	  }
157  	  f->close_section();
158  	  f->open_array_section("failed");
159  	  for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p)
160  	    f->dump_int("mds", *p);
161  	  f->close_section();
162  	  f->open_array_section("damaged");
163  	  for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p)
164  	    f->dump_int("mds", *p);
165  	  f->close_section();
166  	  f->open_array_section("stopped");
167  	  for (set<mds_rank_t>::const_iterator p = stopped.begin(); p != stopped.end(); ++p)
168  	    f->dump_int("mds", *p);
169  	  f->close_section();
170  	  f->open_object_section("info");
171  	  for (map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); ++p) {
172  	    char s[25]; // 'gid_' + len(str(ULLONG_MAX)) + '\0'
173  	    sprintf(s, "gid_%llu", (long long unsigned)p->first);
174  	    f->open_object_section(s);
175  	    p->second.dump(f);
176  	    f->close_section();
177  	  }
178  	  f->close_section();
179  	  f->open_array_section("data_pools");
180  	  for (const auto p: data_pools)
181  	    f->dump_int("pool", p);
182  	  f->close_section();
183  	  f->dump_int("metadata_pool", metadata_pool);
184  	  f->dump_bool("enabled", enabled);
185  	  f->dump_string("fs_name", fs_name);
186  	  f->dump_string("balancer", balancer);
187  	  f->dump_int("standby_count_wanted", std::max(0, standby_count_wanted));
188  	}
189  	
190  	void MDSMap::generate_test_instances(std::list<MDSMap*>& ls)
191  	{
192  	  MDSMap *m = new MDSMap();
193  	  m->max_mds = 1;
194  	  m->data_pools.push_back(0);
195  	  m->metadata_pool = 1;
196  	  m->cas_pool = 2;
197  	  m->compat = get_compat_set_all();
198  	
199  	  // these aren't the defaults, just in case anybody gets confused
200  	  m->session_timeout = 61;
201  	  m->session_autoclose = 301;
202  	  m->max_file_size = 1<<24;
203  	  ls.push_back(m);
204  	}
205  	
206  	void MDSMap::print(ostream& out) const
207  	{
208  	  out << "fs_name\t" << fs_name << "\n";
209  	  out << "epoch\t" << epoch << "\n";
210  	  out << "flags\t" << hex << flags << dec << "\n";
211  	  out << "created\t" << created << "\n";
212  	  out << "modified\t" << modified << "\n";
213  	  out << "tableserver\t" << tableserver << "\n";
214  	  out << "root\t" << root << "\n";
215  	  out << "session_timeout\t" << session_timeout << "\n"
216  	      << "session_autoclose\t" << session_autoclose << "\n";
217  	  out << "max_file_size\t" << max_file_size << "\n";
218  	  out << "min_compat_client\t" << ceph::to_integer<int>(min_compat_client) << " ("
219  				       << min_compat_client << ")\n";
220  	  out << "last_failure\t" << last_failure << "\n"
221  	      << "last_failure_osd_epoch\t" << last_failure_osd_epoch << "\n";
222  	  out << "compat\t" << compat << "\n";
223  	  out << "max_mds\t" << max_mds << "\n";
224  	  out << "in\t" << in << "\n"
225  	      << "up\t" << up << "\n"
226  	      << "failed\t" << failed << "\n"
227  	      << "damaged\t" << damaged << "\n"
228  	      << "stopped\t" << stopped << "\n";
229  	  out << "data_pools\t" << data_pools << "\n";
230  	  out << "metadata_pool\t" << metadata_pool << "\n";
231  	  out << "inline_data\t" << (inline_data_enabled ? "enabled" : "disabled") << "\n";
232  	  out << "balancer\t" << balancer << "\n";
233  	  out << "standby_count_wanted\t" << std::max(0, standby_count_wanted) << "\n";
234  	
235  	  multimap< pair<mds_rank_t, unsigned>, mds_gid_t > foo;
236  	  for (const auto &p : mds_info) {
237  	    foo.insert(std::make_pair(
238  	          std::make_pair(p.second.rank, p.second.inc-1), p.first));
239  	  }
240  	
241  	  for (const auto &p : foo) {
242  	    const mds_info_t& info = mds_info.at(p.second);
243  	    info.print_summary(out);
244  	    out << "\n";
245  	  }
246  	}
247  	
248  	
249  	
250  	void MDSMap::print_summary(Formatter *f, ostream *out) const
251  	{
252  	  map<mds_rank_t,string> by_rank;
253  	  map<string,int> by_state;
254  	
255  	  if (f) {
256  	    f->dump_unsigned("epoch", get_epoch());
257  	    f->dump_unsigned("up", up.size());
258  	    f->dump_unsigned("in", in.size());
259  	    f->dump_unsigned("max", max_mds);
260  	  } else {
261  	    *out << "e" << get_epoch() << ": " << up.size() << "/" << in.size() << "/" << max_mds << " up";
262  	  }
263  	
264  	  if (f)
265  	    f->open_array_section("by_rank");
266  	  for (const auto &p : mds_info) {
267  	    string s = ceph_mds_state_name(p.second.state);
268  	    if (p.second.laggy())
269  	      s += "(laggy or crashed)";
270  	
271  	    if (p.second.rank >= 0 && p.second.state != MDSMap::STATE_STANDBY_REPLAY) {
272  	      if (f) {
273  		f->open_object_section("mds");
274  		f->dump_unsigned("rank", p.second.rank);
275  		f->dump_string("name", p.second.name);
276  		f->dump_string("status", s);
277  		f->close_section();
278  	      } else {
279  		by_rank[p.second.rank] = p.second.name + "=" + s;
280  	      }
281  	    } else {
282  	      by_state[s]++;
283  	    }
284  	  }
285  	  if (f) {
286  	    f->close_section();
287  	  } else {
288  	    if (!by_rank.empty())
289  	      *out << " " << by_rank;
290  	  }
291  	
292  	  for (map<string,int>::reverse_iterator p = by_state.rbegin(); p != by_state.rend(); ++p) {
293  	    if (f) {
294  	      f->dump_unsigned(p->first.c_str(), p->second);
295  	    } else {
296  	      *out << ", " << p->second << " " << p->first;
297  	    }
298  	  }
299  	
300  	  if (!failed.empty()) {
301  	    if (f) {
302  	      f->dump_unsigned("failed", failed.size());
303  	    } else {
304  	      *out << ", " << failed.size() << " failed";
305  	    }
306  	  }
307  	
308  	  if (!damaged.empty()) {
309  	    if (f) {
310  	      f->dump_unsigned("damaged", damaged.size());
311  	    } else {
312  	      *out << ", " << damaged.size() << " damaged";
313  	    }
314  	  }
315  	  //if (stopped.size())
316  	  //out << ", " << stopped.size() << " stopped";
317  	}
318  	
319  	void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
320  				list<pair<health_status_t,string> > *detail) const
321  	{
322  	  if (!failed.empty()) {
323  	    std::ostringstream oss;
324  	    oss << "mds rank"
325  		<< ((failed.size() > 1) ? "s ":" ")
326  		<< failed
327  		<< ((failed.size() > 1) ? " have":" has")
328  		<< " failed";
329  	    summary.push_back(make_pair(HEALTH_ERR, oss.str()));
330  	    if (detail) {
331  	      for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p) {
332  		std::ostringstream oss;
333  		oss << "mds." << *p << " has failed";
334  		detail->push_back(make_pair(HEALTH_ERR, oss.str()));
335  	      }
336  	    }
337  	  }
338  	
339  	  if (!damaged.empty()) {
340  	    std::ostringstream oss;
341  	    oss << "mds rank"
342  		<< ((damaged.size() > 1) ? "s ":" ")
343  		<< damaged
344  		<< ((damaged.size() > 1) ? " are":" is")
345  		<< " damaged";
346  	    summary.push_back(make_pair(HEALTH_ERR, oss.str()));
347  	    if (detail) {
348  	      for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p) {
349  		std::ostringstream oss;
350  		oss << "mds." << *p << " is damaged";
351  		detail->push_back(make_pair(HEALTH_ERR, oss.str()));
352  	      }
353  	    }
354  	  }
355  	
356  	  if (is_degraded()) {
357  	    summary.push_back(make_pair(HEALTH_WARN, "mds cluster is degraded"));
358  	    if (detail) {
359  	      detail->push_back(make_pair(HEALTH_WARN, "mds cluster is degraded"));
360  	      for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) {
361  		if (!is_up(i))
362  		  continue;
363  		mds_gid_t gid = up.find(i)->second;
364  		map<mds_gid_t,mds_info_t>::const_iterator info = mds_info.find(gid);
365  		stringstream ss;
366  		if (is_resolve(i))
367  		  ss << "mds." << info->second.name << " at " << info->second.addrs
368  		     << " rank " << i << " is resolving";
369  		if (is_replay(i))
370  		  ss << "mds." << info->second.name << " at " << info->second.addrs
371  		     << " rank " << i << " is replaying journal";
372  		if (is_rejoin(i))
373  		  ss << "mds." << info->second.name << " at " << info->second.addrs
374  		     << " rank " << i << " is rejoining";
375  		if (is_reconnect(i))
376  		  ss << "mds." << info->second.name << " at " << info->second.addrs
377  		     << " rank " << i << " is reconnecting to clients";
378  		if (ss.str().length())
379  		  detail->push_back(make_pair(HEALTH_WARN, ss.str()));
380  	      }
381  	    }
382  	  }
383  	
384  	  {
385  	  stringstream ss;
386  	  ss << fs_name << " max_mds " << max_mds;
387  	  summary.push_back(make_pair(HEALTH_WARN, ss.str()));
388  	  }
389  	
390  	  if ((mds_rank_t)up.size() < max_mds) {
391  	    stringstream ss;
392  	    ss << fs_name << " has " << up.size()
393  	       << " active MDS(s), but has max_mds of " << max_mds;
394  	    summary.push_back(make_pair(HEALTH_WARN, ss.str()));
395  	  }
396  	
397  	  map<mds_gid_t, mds_info_t>::const_iterator m_end = mds_info.end();
398  	  set<string> laggy;
399  	  for (const auto &u : up) {
400  	    map<mds_gid_t, mds_info_t>::const_iterator m = mds_info.find(u.second);
401  	    if (m == m_end) {
402  	      std::cerr << "Up rank " << u.first << " GID " << u.second << " not found!" << std::endl;
403  	    }
404  	    ceph_assert(m != m_end);
405  	    const mds_info_t &mds_info(m->second);
406  	    if (mds_info.laggy()) {
407  	      laggy.insert(mds_info.name);
408  	      if (detail) {
409  		std::ostringstream oss;
410  		oss << "mds." << mds_info.name << " at " << mds_info.addrs
411  		    << " is laggy/unresponsive";
412  		detail->push_back(make_pair(HEALTH_WARN, oss.str()));
413  	      }
414  	    }
415  	  }
416  	
417  	  if (!laggy.empty()) {
418  	    std::ostringstream oss;
419  	    oss << "mds " << laggy
420  		<< ((laggy.size() > 1) ? " are":" is")
421  		<< " laggy";
422  	    summary.push_back(make_pair(HEALTH_WARN, oss.str()));
423  	  }
424  	
425  	  if (get_max_mds() > 1 &&
426  	      was_snaps_ever_allowed() && !allows_multimds_snaps()) {
427  	    std::ostringstream oss;
428  	    oss << "multi-active mds while there are snapshots possibly created by pre-mimic MDS";
429  	    summary.push_back(make_pair(HEALTH_WARN, oss.str()));
430  	  }
431  	}
432  	
433  	void MDSMap::get_health_checks(health_check_map_t *checks) const
434  	{
435  	  // MDS_DAMAGE
436  	  if (!damaged.empty()) {
437  	    health_check_t& check = checks->get_or_add("MDS_DAMAGE", HEALTH_ERR,
438  						       "%num% mds daemon%plurals% damaged",
439  						       damaged.size());
440  	    for (auto p : damaged) {
441  	      std::ostringstream oss;
442  	      oss << "fs " << fs_name << " mds." << p << " is damaged";
443  	      check.detail.push_back(oss.str());
444  	    }
445  	  }
446  	
447  	  // FS_DEGRADED
448  	  if (is_degraded()) {
449  	    health_check_t& fscheck = checks->get_or_add(
450  	      "FS_DEGRADED", HEALTH_WARN,
451  	      "%num% filesystem%plurals% %isorare% degraded", 1);
452  	    ostringstream ss;
453  	    ss << "fs " << fs_name << " is degraded";
454  	    fscheck.detail.push_back(ss.str());
455  	
456  	    list<string> detail;
457  	    for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) {
458  	      if (!is_up(i))
459  		continue;
460  	      mds_gid_t gid = up.find(i)->second;
461  	      map<mds_gid_t,mds_info_t>::const_iterator info = mds_info.find(gid);
462  	      stringstream ss;
463  	      ss << "fs " << fs_name << " mds." << info->second.name << " at "
464  		 << info->second.addrs << " rank " << i;
465  	      if (is_resolve(i))
466  		ss << " is resolving";
467  	      if (is_replay(i))
468  		ss << " is replaying journal";
469  	      if (is_rejoin(i))
470  		ss << " is rejoining";
471  	      if (is_reconnect(i))
472  		ss << " is reconnecting to clients";
473  	      if (ss.str().length())
474  		detail.push_back(ss.str());
475  	    }
476  	  }
477  	
478  	  // MDS_UP_LESS_THAN_MAX
479  	  if ((mds_rank_t)get_num_in_mds() < get_max_mds()) {
480  	    health_check_t& check = checks->add(
481  	      "MDS_UP_LESS_THAN_MAX", HEALTH_WARN,
482  	      "%num% filesystem%plurals% %isorare% online with fewer MDS than max_mds", 1);
483  	    stringstream ss;
484  	    ss << "fs " << fs_name << " has " << get_num_in_mds()
485  	       << " MDS online, but wants " << get_max_mds();
486  	    check.detail.push_back(ss.str());
487  	  }
488  	
489  	  // MDS_ALL_DOWN
490  	  if ((mds_rank_t)get_num_up_mds() == 0 && get_max_mds() > 0) {
491  	    health_check_t &check = checks->add(
492  	      "MDS_ALL_DOWN", HEALTH_ERR,
493  	      "%num% filesystem%plurals% %isorare% offline", 1);
494  	    stringstream ss;
495  	    ss << "fs " << fs_name << " is offline because no MDS is active for it.";
496  	    check.detail.push_back(ss.str());
497  	  }
498  	
499  	  if (get_max_mds() > 1 &&
500  	      was_snaps_ever_allowed() && !allows_multimds_snaps()) {
501  	    health_check_t &check = checks->add(
502  	      "MULTIMDS_WITH_OLDSNAPS", HEALTH_ERR,
503  	      "%num% filesystem%plurals% %isorare% multi-active mds with old snapshots", 1);
504  	    stringstream ss;
505  	    ss << "multi-active mds while there are snapshots possibly created by pre-mimic MDS";
506  	    check.detail.push_back(ss.str());
507  	  }
508  	
509  	  if (get_inline_data_enabled()) {
510  	    health_check_t &check = checks->add(
511  	      "FS_INLINE_DATA_DEPRECATED", HEALTH_WARN,
512  	      "%num% filesystem%plurals% with deprecated feature inline_data", 1);
513  	    stringstream ss;
514  	    ss << "fs " << fs_name << " has deprecated feature inline_data enabled.";
515  	    check.detail.push_back(ss.str());
516  	  }
517  	}
518  	
519  	void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const
520  	{
521  	  __u8 v = 9;
522  	  if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
523  	    v = 7;
524  	  }
525  	  ENCODE_START(v, 4, bl);
526  	  encode(global_id, bl);
527  	  encode(name, bl);
528  	  encode(rank, bl);
529  	  encode(inc, bl);
530  	  encode((int32_t)state, bl);
531  	  encode(state_seq, bl);
532  	  if (v < 8) {
533  	    encode(addrs.legacy_addr(), bl, features);
534  	  } else {
535  	    encode(addrs, bl, features);
536  	  }
537  	  encode(laggy_since, bl);
538  	  encode(MDS_RANK_NONE, bl); /* standby_for_rank */
539  	  encode(std::string(), bl); /* standby_for_name */
540  	  encode(export_targets, bl);
541  	  encode(mds_features, bl);
542  	  encode(FS_CLUSTER_ID_NONE, bl); /* standby_for_fscid */
543  	  encode(false, bl);
544  	  if (v >= 9) {
545  	    encode(flags, bl);
546  	  }
547  	  ENCODE_FINISH(bl);
548  	}
549  	
550  	void MDSMap::mds_info_t::encode_unversioned(bufferlist& bl) const
551  	{
(1) Event assignment: Assigning: "struct_v" = "3".
Also see events: [overrun-buffer-val]
552  	  __u8 struct_v = 3;
553  	  using ceph::encode;
(2) Event overrun-buffer-val: Overrunning buffer pointed to by "struct_v" of 1 bytes by passing it to a function which accesses it at byte offset 7. [details]
Also see events: [assignment]
554  	  encode(struct_v, bl);
555  	  encode(global_id, bl);
556  	  encode(name, bl);
557  	  encode(rank, bl);
558  	  encode(inc, bl);
559  	  encode((int32_t)state, bl);
560  	  encode(state_seq, bl);
561  	  encode(addrs.legacy_addr(), bl, 0);
562  	  encode(laggy_since, bl);
563  	  encode(MDS_RANK_NONE, bl);
564  	  encode(std::string(), bl);
565  	  encode(export_targets, bl);
566  	}
567  	
568  	void MDSMap::mds_info_t::decode(bufferlist::const_iterator& bl)
569  	{
570  	  DECODE_START_LEGACY_COMPAT_LEN(9, 4, 4, bl);
571  	  decode(global_id, bl);
572  	  decode(name, bl);
573  	  decode(rank, bl);
574  	  decode(inc, bl);
575  	  decode((int32_t&)(state), bl);
576  	  decode(state_seq, bl);
577  	  decode(addrs, bl);
578  	  decode(laggy_since, bl);
579  	  {
580  	    mds_rank_t standby_for_rank;
581  	    decode(standby_for_rank, bl);
582  	  }
583  	  {
584  	    std::string standby_for_name;
585  	    decode(standby_for_name, bl);
586  	  }
587  	  if (struct_v >= 2)
588  	    decode(export_targets, bl);
589  	  if (struct_v >= 5)
590  	    decode(mds_features, bl);
591  	  if (struct_v >= 6) {
592  	    fs_cluster_id_t standby_for_fscid;
593  	    decode(standby_for_fscid, bl);
594  	  }
595  	  if (struct_v >= 7) {
596  	    bool standby_replay;
597  	    decode(standby_replay, bl);
598  	  }
599  	  if (struct_v >= 9) {
600  	    decode(flags, bl);
601  	  }
602  	  DECODE_FINISH(bl);
603  	}
604  	
605  	std::string MDSMap::mds_info_t::human_name() const
606  	{
607  	  // Like "daemon mds.myhost restarted", "Activating daemon mds.myhost"
608  	  std::ostringstream out;
609  	  out << "daemon mds." << name;
610  	  return out.str();
611  	}
612  	
613  	void MDSMap::encode(bufferlist& bl, uint64_t features) const
614  	{
615  	  std::map<mds_rank_t,int32_t> inc;  // Legacy field, fake it so that
616  	                                     // old-mon peers have something sane
617  	                                     // during upgrade
618  	  for (const auto rank : in) {
619  	    inc.insert(std::make_pair(rank, epoch));
620  	  }
621  	
622  	  using ceph::encode;
623  	  if ((features & CEPH_FEATURE_PGID64) == 0) {
624  	    __u16 v = 2;
625  	    encode(v, bl);
626  	    encode(epoch, bl);
627  	    encode(flags, bl);
628  	    encode(last_failure, bl);
629  	    encode(root, bl);
630  	    encode(session_timeout, bl);
631  	    encode(session_autoclose, bl);
632  	    encode(max_file_size, bl);
633  	    encode(max_mds, bl);
634  	    __u32 n = mds_info.size();
635  	    encode(n, bl);
636  	    for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin();
637  		i != mds_info.end(); ++i) {
638  	      encode(i->first, bl);
639  	      encode(i->second, bl, features);
640  	    }
641  	    n = data_pools.size();
642  	    encode(n, bl);
643  	    for (const auto p: data_pools) {
644  	      n = p;
645  	      encode(n, bl);
646  	    }
647  	
648  	    int32_t m = cas_pool;
649  	    encode(m, bl);
650  	    return;
651  	  } else if ((features & CEPH_FEATURE_MDSENC) == 0) {
652  	    __u16 v = 3;
653  	    encode(v, bl);
654  	    encode(epoch, bl);
655  	    encode(flags, bl);
656  	    encode(last_failure, bl);
657  	    encode(root, bl);
658  	    encode(session_timeout, bl);
659  	    encode(session_autoclose, bl);
660  	    encode(max_file_size, bl);
661  	    encode(max_mds, bl);
662  	    __u32 n = mds_info.size();
663  	    encode(n, bl);
664  	    for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin();
665  		i != mds_info.end(); ++i) {
666  	      encode(i->first, bl);
667  	      encode(i->second, bl, features);
668  	    }
669  	    encode(data_pools, bl);
670  	    encode(cas_pool, bl);
671  	
672  	    // kclient ignores everything from here
673  	    __u16 ev = 5;
674  	    encode(ev, bl);
675  	    encode(compat, bl);
676  	    encode(metadata_pool, bl);
677  	    encode(created, bl);
678  	    encode(modified, bl);
679  	    encode(tableserver, bl);
680  	    encode(in, bl);
681  	    encode(inc, bl);
682  	    encode(up, bl);
683  	    encode(failed, bl);
684  	    encode(stopped, bl);
685  	    encode(last_failure_osd_epoch, bl);
686  	    return;
687  	  }
688  	
689  	  ENCODE_START(5, 4, bl);
690  	  encode(epoch, bl);
691  	  encode(flags, bl);
692  	  encode(last_failure, bl);
693  	  encode(root, bl);
694  	  encode(session_timeout, bl);
695  	  encode(session_autoclose, bl);
696  	  encode(max_file_size, bl);
697  	  encode(max_mds, bl);
698  	  encode(mds_info, bl, features);
699  	  encode(data_pools, bl);
700  	  encode(cas_pool, bl);
701  	
702  	  // kclient ignores everything from here
703  	  __u16 ev = 15;
704  	  encode(ev, bl);
705  	  encode(compat, bl);
706  	  encode(metadata_pool, bl);
707  	  encode(created, bl);
708  	  encode(modified, bl);
709  	  encode(tableserver, bl);
710  	  encode(in, bl);
711  	  encode(inc, bl);
712  	  encode(up, bl);
713  	  encode(failed, bl);
714  	  encode(stopped, bl);
715  	  encode(last_failure_osd_epoch, bl);
716  	  encode(ever_allowed_features, bl);
717  	  encode(explicitly_allowed_features, bl);
718  	  encode(inline_data_enabled, bl);
719  	  encode(enabled, bl);
720  	  encode(fs_name, bl);
721  	  encode(damaged, bl);
722  	  encode(balancer, bl);
723  	  encode(standby_count_wanted, bl);
724  	  encode(old_max_mds, bl);
725  	  encode(min_compat_client, bl);
726  	  ENCODE_FINISH(bl);
727  	}
728  	
729  	void MDSMap::sanitize(const std::function<bool(int64_t pool)>& pool_exists)
730  	{
731  	  /* Before we did stricter checking, it was possible to remove a data pool
732  	   * without also deleting it from the MDSMap. Check for that here after
733  	   * decoding the data pools.
734  	   */
735  	
736  	  for (auto it = data_pools.begin(); it != data_pools.end();) {
737  	    if (!pool_exists(*it)) {
738  	      dout(0) << "removed non-existant data pool " << *it << " from MDSMap" << dendl;
739  	      it = data_pools.erase(it);
740  	    } else {
741  	      it++;
742  	    }
743  	  }
744  	}
745  	
746  	void MDSMap::decode(bufferlist::const_iterator& p)
747  	{
748  	  std::map<mds_rank_t,int32_t> inc;  // Legacy field, parse and drop
749  	
750  	  cached_up_features = 0;
751  	  DECODE_START_LEGACY_COMPAT_LEN_16(5, 4, 4, p);
752  	  decode(epoch, p);
753  	  decode(flags, p);
754  	  decode(last_failure, p);
755  	  decode(root, p);
756  	  decode(session_timeout, p);
757  	  decode(session_autoclose, p);
758  	  decode(max_file_size, p);
759  	  decode(max_mds, p);
760  	  decode(mds_info, p);
761  	  if (struct_v < 3) {
762  	    __u32 n;
763  	    decode(n, p);
764  	    while (n--) {
765  	      __u32 m;
766  	      decode(m, p);
767  	      data_pools.push_back(m);
768  	    }
769  	    __s32 s;
770  	    decode(s, p);
771  	    cas_pool = s;
772  	  } else {
773  	    decode(data_pools, p);
774  	    decode(cas_pool, p);
775  	  }
776  	
777  	  // kclient ignores everything from here
778  	  __u16 ev = 1;
779  	  if (struct_v >= 2)
780  	    decode(ev, p);
781  	  if (ev >= 3)
782  	    decode(compat, p);
783  	  else
784  	    compat = get_compat_set_base();
785  	  if (ev < 5) {
786  	    __u32 n;
787  	    decode(n, p);
788  	    metadata_pool = n;
789  	  } else {
790  	    decode(metadata_pool, p);
791  	  }
792  	  decode(created, p);
793  	  decode(modified, p);
794  	  decode(tableserver, p);
795  	  decode(in, p);
796  	  decode(inc, p);
797  	  decode(up, p);
798  	  decode(failed, p);
799  	  decode(stopped, p);
800  	  if (ev >= 4)
801  	    decode(last_failure_osd_epoch, p);
802  	  if (ev >= 6) {
803  	    if (ev < 10) {
804  	      // previously this was a bool about snaps, not a flag map
805  	      bool flag;
806  	      decode(flag, p);
807  	      ever_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0;
808  	      decode(flag, p);
809  	      explicitly_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0;
810  	    } else {
811  	      decode(ever_allowed_features, p);
812  	      decode(explicitly_allowed_features, p);
813  	    }
814  	  } else {
815  	    ever_allowed_features = 0;
816  	    explicitly_allowed_features = 0;
817  	  }
818  	  if (ev >= 7)
819  	    decode(inline_data_enabled, p);
820  	
821  	  if (ev >= 8) {
822  	    ceph_assert(struct_v >= 5);
823  	    decode(enabled, p);
824  	    decode(fs_name, p);
825  	  } else {
826  	    if (epoch > 1) {
827  	      // If an MDS has ever been started, epoch will be greater than 1,
828  	      // assume filesystem is enabled.
829  	      enabled = true;
830  	    } else {
831  	      // Upgrading from a cluster that never used an MDS, switch off
832  	      // filesystem until it's explicitly enabled.
833  	      enabled = false;
834  	    }
835  	  }
836  	
837  	  if (ev >= 9) {
838  	    decode(damaged, p);
839  	  }
840  	
841  	  if (ev >= 11) {
842  	    decode(balancer, p);
843  	  }
844  	
845  	  if (ev >= 12) {
846  	    decode(standby_count_wanted, p);
847  	  }
848  	
849  	  if (ev >= 13) {
850  	    decode(old_max_mds, p);
851  	  }
852  	
853  	  if (ev == 14) {
854  	    int8_t r;
855  	    decode(r, p);
856  	    if (r < 0) {
857  	      min_compat_client = ceph_release_t::unknown;
858  	    } else {
859  	      min_compat_client = ceph_release_t{static_cast<uint8_t>(r)};
860  	    }
861  	  } else if (ev > 14) {
862  	    decode(min_compat_client, p);
863  	  }
864  	
865  	  DECODE_FINISH(p);
866  	}
867  	
868  	MDSMap::availability_t MDSMap::is_cluster_available() const
869  	{
870  	  if (epoch == 0) {
871  	    // If I'm a client, this means I'm looking at an MDSMap instance
872  	    // that was never actually initialized from the mons.  Client should
873  	    // wait.
874  	    return TRANSIENT_UNAVAILABLE;
875  	  }
876  	
877  	  // If a rank is marked damage (unavailable until operator intervenes)
878  	  if (damaged.size()) {
879  	    return STUCK_UNAVAILABLE;
880  	  }
881  	
882  	  // If no ranks are created (filesystem not initialized)
883  	  if (in.empty()) {
884  	    return STUCK_UNAVAILABLE;
885  	  }
886  	
887  	  for (const auto rank : in) {
888  	    if (up.count(rank) && mds_info.at(up.at(rank)).laggy()) {
889  	      // This might only be transient, but because we can't see
890  	      // standbys, we have no way of knowing whether there is a
891  	      // standby available to replace the laggy guy.
892  	      return STUCK_UNAVAILABLE;
893  	    }
894  	  }
895  	
896  	  if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
897  	    // Nobody looks stuck, so indicate to client they should go ahead
898  	    // and try mounting if anybody is active.  This may include e.g.
899  	    // one MDS failing over and another active: the client should
900  	    // proceed to start talking to the active one and let the
901  	    // transiently-unavailable guy catch up later.
902  	    return AVAILABLE;
903  	  } else {
904  	    // Nothing indicating we were stuck, but nobody active (yet)
905  	    //return TRANSIENT_UNAVAILABLE;
906  	
907  	    // Because we don't have standbys in the MDSMap any more, we can't
908  	    // reliably indicate transient vs. stuck, so always say stuck so
909  	    // that the client doesn't block.
910  	    return STUCK_UNAVAILABLE;
911  	  }
912  	}
913  	
914  	bool MDSMap::state_transition_valid(DaemonState prev, DaemonState next)
915  	{
916  	  bool state_valid = true;
917  	  if (next != prev) {
918  	    if (prev == MDSMap::STATE_REPLAY) {
919  	      if (next != MDSMap::STATE_RESOLVE && next != MDSMap::STATE_RECONNECT) {
920  	        state_valid = false;
921  	      }
922  	    } else if (prev == MDSMap::STATE_REJOIN) {
923  	      if (next != MDSMap::STATE_ACTIVE &&
924  		  next != MDSMap::STATE_CLIENTREPLAY &&
925  		  next != MDSMap::STATE_STOPPED) {
926  	        state_valid = false;
927  	      }
928  	    } else if (prev >= MDSMap::STATE_RESOLVE && prev < MDSMap::STATE_ACTIVE) {
929  	      // Once I have entered replay, the only allowable transitions are to
930  	      // the next next along in the sequence.
931  	      if (next != prev + 1) {
932  	        state_valid = false;
933  	      }
934  	    }
935  	  }
936  	
937  	  return state_valid;
938  	}
939  	
940  	bool MDSMap::check_health(mds_rank_t standby_daemon_count)
941  	{
942  	  std::set<mds_rank_t> standbys;
943  	  get_standby_replay_mds_set(standbys);
944  	  std::set<mds_rank_t> actives;
945  	  get_active_mds_set(actives);
946  	  mds_rank_t standbys_avail = (mds_rank_t)standbys.size()+standby_daemon_count;
947  	
948  	  /* If there are standby daemons available/replaying and
949  	   * standby_count_wanted is unset (default), then we set it to 1. This will
950  	   * happen during health checks by the mons. Also, during initial creation
951  	   * of the FS we will have no actives so we don't want to change the default
952  	   * yet.
953  	   */
954  	  if (standby_count_wanted == -1 && actives.size() > 0 && standbys_avail > 0) {
955  	    set_standby_count_wanted(1);
956  	    return true;
957  	  }
958  	  return false;
959  	}
960