1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "common/debug.h"
5 #include "common/errno.h"
6
7 #include "librbd/Utils.h"
8 #include "Policy.h"
9
10 #define dout_context g_ceph_context
11 #define dout_subsys ceph_subsys_rbd_mirror
12 #undef dout_prefix
13 #define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \
14 << " " << __func__ << ": "
15
16 namespace rbd {
17 namespace mirror {
18 namespace image_map {
19
20 namespace {
21
22 bool is_instance_action(ActionType action_type) {
23 switch (action_type) {
24 case ACTION_TYPE_ACQUIRE:
25 case ACTION_TYPE_RELEASE:
26 return true;
27 case ACTION_TYPE_NONE:
28 case ACTION_TYPE_MAP_UPDATE:
29 case ACTION_TYPE_MAP_REMOVE:
30 break;
31 }
32 return false;
33 }
34
35 } // anonymous namespace
36
37 using ::operator<<;
38 using librbd::util::unique_lock_name;
39
40 Policy::Policy(librados::IoCtx &ioctx)
41 : m_ioctx(ioctx),
42 m_map_lock(ceph::make_shared_mutex(
43 unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", this))) {
44
45 // map should at least have once instance
46 std::string instance_id = stringify(ioctx.get_instance_id());
47 m_map.emplace(instance_id, std::set<std::string>{});
48 }
49
50 void Policy::init(
51 const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) {
52 dout(20) << dendl;
53
54 std::unique_lock map_lock{m_map_lock};
55 for (auto& it : image_mapping) {
56 ceph_assert(!it.second.instance_id.empty());
57 auto map_result = m_map[it.second.instance_id].emplace(it.first);
58 ceph_assert(map_result.second);
59
60 auto image_state_result = m_image_states.emplace(
61 it.first, ImageState{it.second.instance_id, it.second.mapped_time});
62 ceph_assert(image_state_result.second);
63
64 // ensure we (re)send image acquire actions to the instance
65 auto& image_state = image_state_result.first->second;
66 auto start_action = set_state(&image_state,
67 StateTransition::STATE_INITIALIZING, false);
68 ceph_assert(start_action);
69 }
70 }
71
72 LookupInfo Policy::lookup(const std::string &global_image_id) {
73 dout(20) << "global_image_id=" << global_image_id << dendl;
74
75 std::shared_lock map_lock{m_map_lock};
76 LookupInfo info;
77
78 auto it = m_image_states.find(global_image_id);
79 if (it != m_image_states.end()) {
80 info.instance_id = it->second.instance_id;
81 info.mapped_time = it->second.mapped_time;
82 }
83 return info;
84 }
85
86 bool Policy::add_image(const std::string &global_image_id) {
87 dout(5) << "global_image_id=" << global_image_id << dendl;
88
89 std::unique_lock map_lock{m_map_lock};
90 auto image_state_result = m_image_states.emplace(global_image_id,
91 ImageState{});
92 auto& image_state = image_state_result.first->second;
93 if (image_state.state == StateTransition::STATE_INITIALIZING) {
94 // avoid duplicate acquire notifications upon leader startup
95 return false;
96 }
97
98 return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false);
99 }
100
101 bool Policy::remove_image(const std::string &global_image_id) {
102 dout(5) << "global_image_id=" << global_image_id << dendl;
103
104 std::unique_lock map_lock{m_map_lock};
105 auto it = m_image_states.find(global_image_id);
106 if (it == m_image_states.end()) {
107 return false;
108 }
109
110 auto& image_state = it->second;
111 return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false);
112 }
113
114 void Policy::add_instances(const InstanceIds &instance_ids,
115 GlobalImageIds* global_image_ids) {
116 dout(5) << "instance_ids=" << instance_ids << dendl;
117
118 std::unique_lock map_lock{m_map_lock};
119 for (auto& instance : instance_ids) {
120 ceph_assert(!instance.empty());
121 m_map.emplace(instance, std::set<std::string>{});
122 }
123
124 // post-failover, remove any dead instances and re-shuffle their images
125 if (m_initial_update) {
126 dout(5) << "initial instance update" << dendl;
127 m_initial_update = false;
128
129 std::set<std::string> alive_instances(instance_ids.begin(),
130 instance_ids.end());
131 InstanceIds dead_instances;
132 for (auto& map_pair : m_map) {
133 if (alive_instances.find(map_pair.first) == alive_instances.end()) {
134 dead_instances.push_back(map_pair.first);
135 }
136 }
137
138 if (!dead_instances.empty()) {
139 remove_instances(m_map_lock, dead_instances, global_image_ids);
140 }
141 }
142
143 GlobalImageIds shuffle_global_image_ids;
144 do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids);
145 dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids
146 << "]" << dendl;
147 for (auto& global_image_id : shuffle_global_image_ids) {
148 auto it = m_image_states.find(global_image_id);
149 ceph_assert(it != m_image_states.end());
150
151 auto& image_state = it->second;
152 if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) {
153 global_image_ids->emplace(global_image_id);
154 }
155 }
156 }
157
158 void Policy::remove_instances(const InstanceIds &instance_ids,
159 GlobalImageIds* global_image_ids) {
160 std::unique_lock map_lock{m_map_lock};
161 remove_instances(m_map_lock, instance_ids, global_image_ids);
162 }
163
164 void Policy::remove_instances(const ceph::shared_mutex& lock,
165 const InstanceIds &instance_ids,
166 GlobalImageIds* global_image_ids) {
167 ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
168 dout(5) << "instance_ids=" << instance_ids << dendl;
169
170 for (auto& instance_id : instance_ids) {
171 auto map_it = m_map.find(instance_id);
172 if (map_it == m_map.end()) {
173 continue;
174 }
175
176 auto& instance_global_image_ids = map_it->second;
177 if (instance_global_image_ids.empty()) {
178 m_map.erase(map_it);
179 continue;
180 }
181
182 m_dead_instances.insert(instance_id);
183 dout(5) << "force shuffling: instance_id=" << instance_id << ", "
184 << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl;
185 for (auto& global_image_id : instance_global_image_ids) {
186 auto it = m_image_states.find(global_image_id);
187 ceph_assert(it != m_image_states.end());
188
189 auto& image_state = it->second;
190 if (is_state_scheduled(image_state,
191 StateTransition::STATE_DISSOCIATING)) {
192 // don't shuffle images that no longer exist
193 continue;
194 }
195
196 if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) {
197 global_image_ids->emplace(global_image_id);
198 }
199 }
200 }
201 }
202
203 ActionType Policy::start_action(const std::string &global_image_id) {
(1) Event getlock: |
Acquiring lock named "_ZN4ceph18shared_mutex_debugE.rwlock". [details] |
204 std::unique_lock map_lock{m_map_lock};
205
206 auto it = m_image_states.find(global_image_id);
(2) Event cond_true: |
Condition "it != this->m_image_states.end()", taking true branch. |
207 ceph_assert(it != m_image_states.end());
208
209 auto& image_state = it->second;
210 auto& transition = image_state.transition;
(3) Event cond_true: |
Condition "transition.action_type != rbd::mirror::image_map::ACTION_TYPE_NONE", taking true branch. |
211 ceph_assert(transition.action_type != ACTION_TYPE_NONE);
212
(4) Event cond_true: |
Condition "should_gather", taking true branch. |
213 dout(5) << "global_image_id=" << global_image_id << ", "
214 << "state=" << image_state.state << ", "
215 << "action_type=" << transition.action_type << dendl;
(5) Event cond_false: |
Condition "transition.start_policy_action.operator bool()", taking false branch. |
216 if (transition.start_policy_action) {
217 execute_policy_action(global_image_id, &image_state,
218 *transition.start_policy_action);
219 transition.start_policy_action = boost::none;
(6) Event if_end: |
End of if statement. |
220 }
221 return transition.action_type;
222 }
223
224 bool Policy::finish_action(const std::string &global_image_id, int r) {
225 std::unique_lock map_lock{m_map_lock};
226
227 auto it = m_image_states.find(global_image_id);
228 ceph_assert(it != m_image_states.end());
229
230 auto& image_state = it->second;
231 auto& transition = image_state.transition;
232 dout(5) << "global_image_id=" << global_image_id << ", "
233 << "state=" << image_state.state << ", "
234 << "action_type=" << transition.action_type << ", "
235 << "r=" << r << dendl;
236
237 // retry on failure unless it's an RPC message to an instance that is dead
238 if (r < 0 &&
239 (!is_instance_action(image_state.transition.action_type) ||
240 image_state.instance_id == UNMAPPED_INSTANCE_ID ||
241 m_dead_instances.find(image_state.instance_id) ==
242 m_dead_instances.end())) {
243 return true;
244 }
245
246 auto finish_policy_action = transition.finish_policy_action;
247 StateTransition::transit(image_state.state, &image_state.transition);
248 if (transition.finish_state) {
249 // in-progress state machine complete
250 ceph_assert(StateTransition::is_idle(*transition.finish_state));
251 image_state.state = *transition.finish_state;
252 image_state.transition = {};
253 }
254
255 if (StateTransition::is_idle(image_state.state) && image_state.next_state) {
256 // advance to pending state machine
257 bool start_action = set_state(&image_state, *image_state.next_state, false);
258 ceph_assert(start_action);
259 }
260
261 // image state may get purged in execute_policy_action()
262 bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE;
263 if (finish_policy_action) {
264 execute_policy_action(global_image_id, &image_state, *finish_policy_action);
265 }
266
267 return pending_action;
268 }
269
270 void Policy::execute_policy_action(
271 const std::string& global_image_id, ImageState* image_state,
272 StateTransition::PolicyAction policy_action) {
273 dout(5) << "global_image_id=" << global_image_id << ", "
274 << "policy_action=" << policy_action << dendl;
275
276 switch (policy_action) {
277 case StateTransition::POLICY_ACTION_MAP:
278 map(global_image_id, image_state);
279 break;
280 case StateTransition::POLICY_ACTION_UNMAP:
281 unmap(global_image_id, image_state);
282 break;
283 case StateTransition::POLICY_ACTION_REMOVE:
284 if (image_state->state == StateTransition::STATE_UNASSOCIATED) {
285 ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID);
286 ceph_assert(!image_state->next_state);
287 m_image_states.erase(global_image_id);
288 }
289 break;
290 }
291 }
292
293 void Policy::map(const std::string& global_image_id, ImageState* image_state) {
294 ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
295
296 std::string instance_id = image_state->instance_id;
297 if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) {
298 return;
299 }
300 if (is_dead_instance(instance_id)) {
301 unmap(global_image_id, image_state);
302 }
303
304 instance_id = do_map(m_map, global_image_id);
305 ceph_assert(!instance_id.empty());
306 dout(5) << "global_image_id=" << global_image_id << ", "
307 << "instance_id=" << instance_id << dendl;
308
309 image_state->instance_id = instance_id;
310 image_state->mapped_time = ceph_clock_now();
311
312 auto ins = m_map[instance_id].emplace(global_image_id);
313 ceph_assert(ins.second);
314 }
315
316 void Policy::unmap(const std::string &global_image_id,
317 ImageState* image_state) {
318 ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
319
320 std::string instance_id = image_state->instance_id;
321 if (instance_id == UNMAPPED_INSTANCE_ID) {
322 return;
323 }
324
325 dout(5) << "global_image_id=" << global_image_id << ", "
326 << "instance_id=" << instance_id << dendl;
327
328 ceph_assert(!instance_id.empty());
329 m_map[instance_id].erase(global_image_id);
330 image_state->instance_id = UNMAPPED_INSTANCE_ID;
331 image_state->mapped_time = {};
332
333 if (is_dead_instance(instance_id) && m_map[instance_id].empty()) {
334 dout(5) << "removing dead instance_id=" << instance_id << dendl;
335 m_map.erase(instance_id);
336 m_dead_instances.erase(instance_id);
337 }
338 }
339
340 bool Policy::is_image_shuffling(const std::string &global_image_id) {
341 ceph_assert(ceph_mutex_is_locked(m_map_lock));
342
343 auto it = m_image_states.find(global_image_id);
344 ceph_assert(it != m_image_states.end());
345 auto& image_state = it->second;
346
347 // avoid attempting to re-shuffle a pending shuffle
348 auto result = is_state_scheduled(image_state,
349 StateTransition::STATE_SHUFFLING);
350 dout(20) << "global_image_id=" << global_image_id << ", "
351 << "result=" << result << dendl;
352 return result;
353 }
354
355 bool Policy::can_shuffle_image(const std::string &global_image_id) {
356 ceph_assert(ceph_mutex_is_locked(m_map_lock));
357
358 CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
359 int migration_throttle = cct->_conf.get_val<uint64_t>(
360 "rbd_mirror_image_policy_migration_throttle");
361
362 auto it = m_image_states.find(global_image_id);
363 ceph_assert(it != m_image_states.end());
364 auto& image_state = it->second;
365
366 utime_t last_shuffled_time = image_state.mapped_time;
367
368 // idle images that haven't been recently remapped can shuffle
369 utime_t now = ceph_clock_now();
370 auto result = (StateTransition::is_idle(image_state.state) &&
371 ((migration_throttle <= 0) ||
372 (now - last_shuffled_time >= migration_throttle)));
373 dout(10) << "global_image_id=" << global_image_id << ", "
374 << "migration_throttle=" << migration_throttle << ", "
375 << "last_shuffled_time=" << last_shuffled_time << ", "
376 << "result=" << result << dendl;
377 return result;
378 }
379
380 bool Policy::set_state(ImageState* image_state, StateTransition::State state,
381 bool ignore_current_state) {
382 if (!ignore_current_state && image_state->state == state) {
383 return false;
384 } else if (StateTransition::is_idle(image_state->state)) {
385 image_state->state = state;
386 image_state->next_state = boost::none;
387
388 StateTransition::transit(image_state->state, &image_state->transition);
389 ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE);
390 ceph_assert(!image_state->transition.finish_state);
391 return true;
392 }
393
394 image_state->next_state = state;
395 return false;
396 }
397
398 bool Policy::is_state_scheduled(const ImageState& image_state,
399 StateTransition::State state) const {
400 return (image_state.state == state ||
401 (image_state.next_state && *image_state.next_state == state));
402 }
403
404 } // namespace image_map
405 } // namespace mirror
406 } // namespace rbd
407