1    	// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2    	// vim: ts=8 sw=2 smarttab
3    	
4    	#include "common/debug.h"
5    	#include "common/errno.h"
6    	
7    	#include "librbd/Utils.h"
8    	#include "Policy.h"
9    	
10   	#define dout_context g_ceph_context
11   	#define dout_subsys ceph_subsys_rbd_mirror
12   	#undef dout_prefix
13   	#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \
14   	                           << " " << __func__ << ": "
15   	
16   	namespace rbd {
17   	namespace mirror {
18   	namespace image_map {
19   	
20   	namespace {
21   	
22   	bool is_instance_action(ActionType action_type) {
23   	  switch (action_type) {
24   	  case ACTION_TYPE_ACQUIRE:
25   	  case ACTION_TYPE_RELEASE:
26   	    return true;
27   	  case ACTION_TYPE_NONE:
28   	  case ACTION_TYPE_MAP_UPDATE:
29   	  case ACTION_TYPE_MAP_REMOVE:
30   	    break;
31   	  }
32   	  return false;
33   	}
34   	
35   	} // anonymous namespace
36   	
37   	using ::operator<<;
38   	using librbd::util::unique_lock_name;
39   	
40   	Policy::Policy(librados::IoCtx &ioctx)
41   	  : m_ioctx(ioctx),
42   	    m_map_lock(ceph::make_shared_mutex(
43   	     unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", this))) {
44   	
45   	  // map should at least have once instance
46   	  std::string instance_id = stringify(ioctx.get_instance_id());
47   	  m_map.emplace(instance_id, std::set<std::string>{});
48   	}
49   	
50   	void Policy::init(
51   	    const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) {
52   	  dout(20) << dendl;
53   	
54   	  std::unique_lock map_lock{m_map_lock};
55   	  for (auto& it : image_mapping) {
56   	    ceph_assert(!it.second.instance_id.empty());
57   	    auto map_result = m_map[it.second.instance_id].emplace(it.first);
58   	    ceph_assert(map_result.second);
59   	
60   	    auto image_state_result = m_image_states.emplace(
61   	      it.first, ImageState{it.second.instance_id, it.second.mapped_time});
62   	    ceph_assert(image_state_result.second);
63   	
64   	    // ensure we (re)send image acquire actions to the instance
65   	    auto& image_state = image_state_result.first->second;
66   	    auto start_action = set_state(&image_state,
67   	                                  StateTransition::STATE_INITIALIZING, false);
68   	    ceph_assert(start_action);
69   	  }
70   	}
71   	
72   	LookupInfo Policy::lookup(const std::string &global_image_id) {
73   	  dout(20) << "global_image_id=" << global_image_id << dendl;
74   	
75   	  std::shared_lock map_lock{m_map_lock};
76   	  LookupInfo info;
77   	
78   	  auto it = m_image_states.find(global_image_id);
79   	  if (it != m_image_states.end()) {
80   	    info.instance_id = it->second.instance_id;
81   	    info.mapped_time = it->second.mapped_time;
82   	  }
83   	  return info;
84   	}
85   	
86   	bool Policy::add_image(const std::string &global_image_id) {
87   	  dout(5) << "global_image_id=" << global_image_id << dendl;
88   	
89   	  std::unique_lock map_lock{m_map_lock};
90   	  auto image_state_result = m_image_states.emplace(global_image_id,
91   	                                                   ImageState{});
92   	  auto& image_state = image_state_result.first->second;
93   	  if (image_state.state == StateTransition::STATE_INITIALIZING) {
94   	    // avoid duplicate acquire notifications upon leader startup
95   	    return false;
96   	  }
97   	
98   	  return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false);
99   	}
100  	
101  	bool Policy::remove_image(const std::string &global_image_id) {
102  	  dout(5) << "global_image_id=" << global_image_id << dendl;
103  	
104  	  std::unique_lock map_lock{m_map_lock};
105  	  auto it = m_image_states.find(global_image_id);
106  	  if (it == m_image_states.end()) {
107  	    return false;
108  	  }
109  	
110  	  auto& image_state = it->second;
111  	  return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false);
112  	}
113  	
114  	void Policy::add_instances(const InstanceIds &instance_ids,
115  	                           GlobalImageIds* global_image_ids) {
116  	  dout(5) << "instance_ids=" << instance_ids << dendl;
117  	
118  	  std::unique_lock map_lock{m_map_lock};
119  	  for (auto& instance : instance_ids) {
120  	    ceph_assert(!instance.empty());
121  	    m_map.emplace(instance, std::set<std::string>{});
122  	  }
123  	
124  	  // post-failover, remove any dead instances and re-shuffle their images
125  	  if (m_initial_update) {
126  	    dout(5) << "initial instance update" << dendl;
127  	    m_initial_update = false;
128  	
129  	    std::set<std::string> alive_instances(instance_ids.begin(),
130  	                                          instance_ids.end());
131  	    InstanceIds dead_instances;
132  	    for (auto& map_pair : m_map) {
133  	      if (alive_instances.find(map_pair.first) == alive_instances.end()) {
134  	        dead_instances.push_back(map_pair.first);
135  	      }
136  	    }
137  	
138  	    if (!dead_instances.empty()) {
139  	      remove_instances(m_map_lock, dead_instances, global_image_ids);
140  	    }
141  	  }
142  	
143  	  GlobalImageIds shuffle_global_image_ids;
144  	  do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids);
145  	  dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids
146  	          << "]" << dendl;
147  	  for (auto& global_image_id : shuffle_global_image_ids) {
148  	    auto it = m_image_states.find(global_image_id);
149  	    ceph_assert(it != m_image_states.end());
150  	
151  	    auto& image_state = it->second;
152  	    if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) {
153  	      global_image_ids->emplace(global_image_id);
154  	    }
155  	  }
156  	}
157  	
158  	void Policy::remove_instances(const InstanceIds &instance_ids,
159  	                              GlobalImageIds* global_image_ids) {
160  	  std::unique_lock map_lock{m_map_lock};
161  	  remove_instances(m_map_lock, instance_ids, global_image_ids);
162  	}
163  	
164  	void Policy::remove_instances(const ceph::shared_mutex& lock,
165  	                              const InstanceIds &instance_ids,
166  	                              GlobalImageIds* global_image_ids) {
167  	  ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
168  	  dout(5) << "instance_ids=" << instance_ids << dendl;
169  	
170  	  for (auto& instance_id : instance_ids) {
171  	    auto map_it = m_map.find(instance_id);
172  	    if (map_it == m_map.end()) {
173  	      continue;
174  	    }
175  	
176  	    auto& instance_global_image_ids = map_it->second;
177  	    if (instance_global_image_ids.empty()) {
178  	      m_map.erase(map_it);
179  	      continue;
180  	    }
181  	
182  	    m_dead_instances.insert(instance_id);
183  	    dout(5) << "force shuffling: instance_id=" << instance_id << ", "
184  	            << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl;
185  	    for (auto& global_image_id : instance_global_image_ids) {
186  	      auto it = m_image_states.find(global_image_id);
187  	      ceph_assert(it != m_image_states.end());
188  	
189  	      auto& image_state = it->second;
190  	      if (is_state_scheduled(image_state,
191  	                             StateTransition::STATE_DISSOCIATING)) {
192  	        // don't shuffle images that no longer exist
193  	        continue;
194  	      }
195  	
196  	      if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) {
197  	        global_image_ids->emplace(global_image_id);
198  	      }
199  	    }
200  	  }
201  	}
202  	
203  	ActionType Policy::start_action(const std::string &global_image_id) {
(1) Event getlock: Acquiring lock named "_ZN4ceph18shared_mutex_debugE.rwlock". [details]
204  	  std::unique_lock map_lock{m_map_lock};
205  	
206  	  auto it = m_image_states.find(global_image_id);
(2) Event cond_true: Condition "it != this->m_image_states.end()", taking true branch.
207  	  ceph_assert(it != m_image_states.end());
208  	
209  	  auto& image_state = it->second;
210  	  auto& transition = image_state.transition;
(3) Event cond_true: Condition "transition.action_type != rbd::mirror::image_map::ACTION_TYPE_NONE", taking true branch.
211  	  ceph_assert(transition.action_type != ACTION_TYPE_NONE);
212  	
(4) Event cond_true: Condition "should_gather", taking true branch.
213  	  dout(5) << "global_image_id=" << global_image_id << ", "
214  	          << "state=" << image_state.state << ", "
215  	          << "action_type=" << transition.action_type << dendl;
(5) Event cond_false: Condition "transition.start_policy_action.operator bool()", taking false branch.
216  	  if (transition.start_policy_action) {
217  	    execute_policy_action(global_image_id, &image_state,
218  	                          *transition.start_policy_action);
219  	    transition.start_policy_action = boost::none;
(6) Event if_end: End of if statement.
220  	  }
221  	  return transition.action_type;
222  	}
223  	
224  	bool Policy::finish_action(const std::string &global_image_id, int r) {
225  	  std::unique_lock map_lock{m_map_lock};
226  	
227  	  auto it = m_image_states.find(global_image_id);
228  	  ceph_assert(it != m_image_states.end());
229  	
230  	  auto& image_state = it->second;
231  	  auto& transition = image_state.transition;
232  	  dout(5) << "global_image_id=" << global_image_id << ", "
233  	          << "state=" << image_state.state << ", "
234  	          << "action_type=" << transition.action_type << ", "
235  	          << "r=" << r << dendl;
236  	
237  	  // retry on failure unless it's an RPC message to an instance that is dead
238  	  if (r < 0 &&
239  	      (!is_instance_action(image_state.transition.action_type) ||
240  	       image_state.instance_id == UNMAPPED_INSTANCE_ID ||
241  	       m_dead_instances.find(image_state.instance_id) ==
242  	         m_dead_instances.end())) {
243  	    return true;
244  	  }
245  	
246  	  auto finish_policy_action = transition.finish_policy_action;
247  	  StateTransition::transit(image_state.state, &image_state.transition);
248  	  if (transition.finish_state) {
249  	    // in-progress state machine complete
250  	    ceph_assert(StateTransition::is_idle(*transition.finish_state));
251  	    image_state.state = *transition.finish_state;
252  	    image_state.transition = {};
253  	  }
254  	
255  	  if (StateTransition::is_idle(image_state.state) && image_state.next_state) {
256  	    // advance to pending state machine
257  	    bool start_action = set_state(&image_state, *image_state.next_state, false);
258  	    ceph_assert(start_action);
259  	  }
260  	
261  	  // image state may get purged in execute_policy_action()
262  	  bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE;
263  	  if (finish_policy_action) {
264  	    execute_policy_action(global_image_id, &image_state, *finish_policy_action);
265  	  }
266  	
267  	  return pending_action;
268  	}
269  	
270  	void Policy::execute_policy_action(
271  	    const std::string& global_image_id, ImageState* image_state,
272  	    StateTransition::PolicyAction policy_action) {
273  	  dout(5) << "global_image_id=" << global_image_id << ", "
274  	          << "policy_action=" << policy_action << dendl;
275  	
276  	  switch (policy_action) {
277  	  case StateTransition::POLICY_ACTION_MAP:
278  	    map(global_image_id, image_state);
279  	    break;
280  	  case StateTransition::POLICY_ACTION_UNMAP:
281  	    unmap(global_image_id, image_state);
282  	    break;
283  	  case StateTransition::POLICY_ACTION_REMOVE:
284  	    if (image_state->state == StateTransition::STATE_UNASSOCIATED) {
285  	      ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID);
286  	      ceph_assert(!image_state->next_state);
287  	      m_image_states.erase(global_image_id);
288  	    }
289  	    break;
290  	  }
291  	}
292  	
293  	void Policy::map(const std::string& global_image_id, ImageState* image_state) {
294  	  ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
295  	
296  	  std::string instance_id = image_state->instance_id;
297  	  if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) {
298  	    return;
299  	  }
300  	  if (is_dead_instance(instance_id)) {
301  	    unmap(global_image_id, image_state);
302  	  }
303  	
304  	  instance_id = do_map(m_map, global_image_id);
305  	  ceph_assert(!instance_id.empty());
306  	  dout(5) << "global_image_id=" << global_image_id << ", "
307  	          << "instance_id=" << instance_id << dendl;
308  	
309  	  image_state->instance_id = instance_id;
310  	  image_state->mapped_time = ceph_clock_now();
311  	
312  	  auto ins = m_map[instance_id].emplace(global_image_id);
313  	  ceph_assert(ins.second);
314  	}
315  	
316  	void Policy::unmap(const std::string &global_image_id,
317  	                   ImageState* image_state) {
318  	  ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
319  	
320  	  std::string instance_id = image_state->instance_id;
321  	  if (instance_id == UNMAPPED_INSTANCE_ID) {
322  	    return;
323  	  }
324  	
325  	  dout(5) << "global_image_id=" << global_image_id << ", "
326  	          << "instance_id=" << instance_id << dendl;
327  	
328  	  ceph_assert(!instance_id.empty());
329  	  m_map[instance_id].erase(global_image_id);
330  	  image_state->instance_id = UNMAPPED_INSTANCE_ID;
331  	  image_state->mapped_time = {};
332  	
333  	  if (is_dead_instance(instance_id) && m_map[instance_id].empty()) {
334  	    dout(5) << "removing dead instance_id=" << instance_id << dendl;
335  	    m_map.erase(instance_id);
336  	    m_dead_instances.erase(instance_id);
337  	  }
338  	}
339  	
340  	bool Policy::is_image_shuffling(const std::string &global_image_id) {
341  	  ceph_assert(ceph_mutex_is_locked(m_map_lock));
342  	
343  	  auto it = m_image_states.find(global_image_id);
344  	  ceph_assert(it != m_image_states.end());
345  	  auto& image_state = it->second;
346  	
347  	  // avoid attempting to re-shuffle a pending shuffle
348  	  auto result = is_state_scheduled(image_state,
349  	                                   StateTransition::STATE_SHUFFLING);
350  	  dout(20) << "global_image_id=" << global_image_id << ", "
351  	           << "result=" << result << dendl;
352  	  return result;
353  	}
354  	
355  	bool Policy::can_shuffle_image(const std::string &global_image_id) {
356  	  ceph_assert(ceph_mutex_is_locked(m_map_lock));
357  	
358  	  CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
359  	  int migration_throttle = cct->_conf.get_val<uint64_t>(
360  	    "rbd_mirror_image_policy_migration_throttle");
361  	
362  	  auto it = m_image_states.find(global_image_id);
363  	  ceph_assert(it != m_image_states.end());
364  	  auto& image_state = it->second;
365  	
366  	  utime_t last_shuffled_time = image_state.mapped_time;
367  	
368  	  // idle images that haven't been recently remapped can shuffle
369  	  utime_t now = ceph_clock_now();
370  	  auto result = (StateTransition::is_idle(image_state.state) &&
371  	                 ((migration_throttle <= 0) ||
372  	                  (now - last_shuffled_time >= migration_throttle)));
373  	  dout(10) << "global_image_id=" << global_image_id << ", "
374  	           << "migration_throttle=" << migration_throttle << ", "
375  	           << "last_shuffled_time=" << last_shuffled_time << ", "
376  	           << "result=" << result << dendl;
377  	  return result;
378  	}
379  	
380  	bool Policy::set_state(ImageState* image_state, StateTransition::State state,
381  	                       bool ignore_current_state) {
382  	  if (!ignore_current_state && image_state->state == state) {
383  	    return false;
384  	  } else if (StateTransition::is_idle(image_state->state)) {
385  	    image_state->state = state;
386  	    image_state->next_state = boost::none;
387  	
388  	    StateTransition::transit(image_state->state, &image_state->transition);
389  	    ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE);
390  	    ceph_assert(!image_state->transition.finish_state);
391  	    return true;
392  	  }
393  	
394  	  image_state->next_state = state;
395  	  return false;
396  	}
397  	
398  	bool Policy::is_state_scheduled(const ImageState& image_state,
399  	                                StateTransition::State state) const {
400  	  return (image_state.state == state ||
401  	          (image_state.next_state && *image_state.next_state == state));
402  	}
403  	
404  	} // namespace image_map
405  	} // namespace mirror
406  	} // namespace rbd
407