1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <sys/utsname.h>
16
17 #include "include/compat.h"
18 #include "pthread.h"
19
20 #include "common/ceph_mutex.h"
21 #include "common/BackTrace.h"
22 #include "common/debug.h"
23 #include "common/safe_io.h"
24 #include "common/version.h"
25
26 #include "include/uuid.h"
27 #include "global/pidfile.h"
28 #include "global/signal_handler.h"
29
30 #include <poll.h>
31 #include <signal.h>
32 #include <sstream>
33 #include <stdlib.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include "common/errno.h"
37 #if defined(_AIX)
38 extern char *sys_siglist[];
39 #endif
40
41 #define dout_context g_ceph_context
42
43 void install_sighandler(int signum, signal_handler_t handler, int flags)
44 {
45 int ret;
46 struct sigaction oldact;
47 struct sigaction act;
48 memset(&act, 0, sizeof(act));
49
50 act.sa_handler = handler;
51 sigemptyset(&act.sa_mask);
52 act.sa_flags = flags;
53
54 ret = sigaction(signum, &act, &oldact);
55 if (ret != 0) {
56 char buf[1024];
57 #if defined(__sun)
58 char message[SIG2STR_MAX];
59 sig2str(signum,message);
60 snprintf(buf, sizeof(buf), "install_sighandler: sigaction returned "
61 "%d when trying to install a signal handler for %s\n",
62 ret, message);
63 #else
64 snprintf(buf, sizeof(buf), "install_sighandler: sigaction returned "
65 "%d when trying to install a signal handler for %s\n",
66 ret, sig_str(signum));
67 #endif
68 dout_emergency(buf);
69 exit(1);
70 }
71 }
72
73 void sighup_handler(int signum)
74 {
75 g_ceph_context->reopen_logs();
76 }
77
78 static void reraise_fatal(int signum)
79 {
80 // Use default handler to dump core
81 int ret = raise(signum);
82
83 // Normally, we won't get here. If we do, something is very weird.
84 char buf[1024];
85 if (ret) {
86 snprintf(buf, sizeof(buf), "reraise_fatal: failed to re-raise "
87 "signal %d\n", signum);
88 dout_emergency(buf);
89 }
90 else {
91 snprintf(buf, sizeof(buf), "reraise_fatal: default handler for "
92 "signal %d didn't terminate the process?\n", signum);
93 dout_emergency(buf);
94 }
95 exit(1);
96 }
97
98
99 // /etc/os-release looks like
100 //
101 // NAME=Fedora
102 // VERSION="28 (Server Edition)"
103 // ID=fedora
104 // VERSION_ID=28
105 //
106 // or
107 //
108 // NAME="Ubuntu"
109 // VERSION="16.04.3 LTS (Xenial Xerus)"
110 // ID=ubuntu
111 // ID_LIKE=debian
112 //
113 // get_from_os_release("FOO=bar\nTHIS=\"that\"\n", "FOO=", ...) will
114 // write "bar\0" to out buffer, which is assumed to be as large as the input
115 // file.
116 static int parse_from_os_release(
117 const char *file, const char *key,
118 char *out)
119 {
120 const char *p = strstr(file, key);
121 if (!p) {
122 return -1;
123 }
124 const char *start = p + strlen(key);
125 const char *end = strchr(start, '\n');
126 if (!end) {
127 return -1;
128 }
129 if (*start == '"' && *(end - 1) == '"') {
130 ++start;
131 --end;
132 }
133 if (start >= end) {
134 return -1;
135 }
136 memcpy(out, start, end - start);
137 out[end - start] = 0;
138 return 0;
139 }
140
141 static void handle_fatal_signal(int signum)
142 {
143 // This code may itself trigger a SIGSEGV if the heap is corrupt. In that
144 // case, SA_RESETHAND specifies that the default signal handler--
145 // presumably dump core-- will handle it.
146 char buf[1024];
147 char pthread_name[16] = {0}; //limited by 16B include terminating null byte.
148 int r = ceph_pthread_getname(pthread_self(), pthread_name, sizeof(pthread_name));
149 (void)r;
150 #if defined(__sun)
151 char message[SIG2STR_MAX];
152 sig2str(signum,message);
153 snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n "
154 "in thread %llx thread_name:%s\n", message, (unsigned long long)pthread_self(),
155 pthread_name);
156 #else
157 snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n "
158 "in thread %llx thread_name:%s\n", sig_str(signum), (unsigned long long)pthread_self(),
159 pthread_name);
160 #endif
161 dout_emergency(buf);
162 pidfile_remove();
163
164 // TODO: don't use an ostringstream here. It could call malloc(), which we
165 // don't want inside a signal handler.
166 // Also fix the backtrace code not to allocate memory.
167 BackTrace bt(1);
168 ostringstream oss;
169 bt.print(oss);
170 dout_emergency(oss.str());
171
172 char base[PATH_MAX] = { 0 };
173 if (g_ceph_context &&
174 g_ceph_context->_conf->crash_dir.size()) {
175 // -- crash dump --
176 // id
177 ostringstream idss;
178 utime_t now = ceph_clock_now();
179 now.gmtime(idss);
180 uuid_d uuid;
181 uuid.generate_random();
182 idss << "_" << uuid;
183 string id = idss.str();
184 std::replace(id.begin(), id.end(), ' ', '_');
185
186 snprintf(base, sizeof(base), "%s/%s",
187 g_ceph_context->_conf->crash_dir.c_str(),
188 id.c_str());
189 int r = ::mkdir(base, 0700);
190 if (r >= 0) {
191 char fn[PATH_MAX*2];
192 snprintf(fn, sizeof(fn)-1, "%s/meta", base);
193 int fd = ::open(fn, O_CREAT|O_WRONLY|O_CLOEXEC, 0600);
194 if (fd >= 0) {
195 JSONFormatter jf(true);
196 jf.open_object_section("crash");
197 jf.dump_string("crash_id", id);
198 now.gmtime(jf.dump_stream("timestamp"));
199 jf.dump_string("process_name", g_process_name);
200 jf.dump_string("entity_name", g_ceph_context->_conf->name.to_str());
201 jf.dump_string("ceph_version", ceph_version_to_str());
202
203 struct utsname u;
204 r = uname(&u);
205 if (r >= 0) {
206 jf.dump_string("utsname_hostname", u.nodename);
207 jf.dump_string("utsname_sysname", u.sysname);
208 jf.dump_string("utsname_release", u.release);
209 jf.dump_string("utsname_version", u.version);
210 jf.dump_string("utsname_machine", u.machine);
211 }
212 #if defined(__linux__)
213 // os-release
214 int in = ::open("/etc/os-release", O_RDONLY|O_CLOEXEC);
215 if (in >= 0) {
216 char buf[4096];
217 r = safe_read(in, buf, sizeof(buf)-1);
218 if (r >= 0) {
219 buf[r] = 0;
220 char v[4096];
221 if (parse_from_os_release(buf, "NAME=", v) >= 0) {
222 jf.dump_string("os_name", v);
223 }
224 if (parse_from_os_release(buf, "ID=", v) >= 0) {
225 jf.dump_string("os_id", v);
226 }
227 if (parse_from_os_release(buf, "VERSION_ID=", v) >= 0) {
228 jf.dump_string("os_version_id", v);
229 }
230 if (parse_from_os_release(buf, "VERSION=", v) >= 0) {
231 jf.dump_string("os_version", v);
232 }
233 }
234 ::close(in);
235 }
236 #endif
237
238 // assert?
239 if (g_assert_condition) {
240 jf.dump_string("assert_condition", g_assert_condition);
241 }
242 if (g_assert_func) {
243 jf.dump_string("assert_func", g_assert_func);
244 }
245 if (g_assert_file) {
246 jf.dump_string("assert_file", g_assert_file);
247 }
248 if (g_assert_line) {
249 jf.dump_unsigned("assert_line", g_assert_line);
250 }
251 if (g_assert_thread_name[0]) {
252 jf.dump_string("assert_thread_name", g_assert_thread_name);
253 }
254 if (g_assert_msg[0]) {
255 jf.dump_string("assert_msg", g_assert_msg);
256 }
257
258 // eio?
259 if (g_eio) {
260 jf.dump_bool("io_error", true);
261 if (g_eio_devname[0]) {
262 jf.dump_string("io_error_devname", g_eio_devname);
263 }
264 if (g_eio_path[0]) {
265 jf.dump_string("io_error_path", g_eio_path);
266 }
267 if (g_eio_error) {
268 jf.dump_int("io_error_code", g_eio_error);
269 }
270 if (g_eio_iotype) {
271 jf.dump_int("io_error_optype", g_eio_iotype);
272 }
273 if (g_eio_offset) {
274 jf.dump_unsigned("io_error_offset", g_eio_offset);
275 }
276 if (g_eio_length) {
277 jf.dump_unsigned("io_error_length", g_eio_length);
278 }
279 }
280
281 // backtrace
282 bt.dump(&jf);
283
284 jf.close_section();
285 ostringstream oss;
286 jf.flush(oss);
287 string s = oss.str();
288 r = safe_write(fd, s.c_str(), s.size());
289 (void)r;
290 ::close(fd);
291 }
292 snprintf(fn, sizeof(fn)-1, "%s/done", base);
293 ::creat(fn, 0444);
294 }
295 }
296
297 // avoid recursion back into logging code if that is where
298 // we got the SEGV.
299 if (g_ceph_context &&
300 g_ceph_context->_log &&
301 !g_ceph_context->_log->is_inside_log_lock()) {
302 // dump to log. this uses the heap extensively, but we're better
303 // off trying than not.
304 derr << buf << std::endl;
305 bt.print(*_dout);
306 *_dout << " NOTE: a copy of the executable, or `objdump -rdS <executable>` "
307 << "is needed to interpret this.\n"
308 << dendl;
309
310 g_ceph_context->_log->dump_recent();
311
312 if (base[0]) {
313 char fn[PATH_MAX*2];
314 snprintf(fn, sizeof(fn)-1, "%s/log", base);
315 g_ceph_context->_log->set_log_file(fn);
316 g_ceph_context->_log->reopen_log_file();
317 g_ceph_context->_log->dump_recent();
318 }
319 }
320
321 if (g_eio) {
322 // if this was an EIO crash, we don't need to trigger a core dump,
323 // since the problem is hardware, or some layer beneath us.
324 _exit(EIO);
325 } else {
326 reraise_fatal(signum);
327 }
328 }
329
330 void install_standard_sighandlers(void)
331 {
332 install_sighandler(SIGSEGV, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
333 install_sighandler(SIGABRT, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
334 install_sighandler(SIGBUS, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
335 install_sighandler(SIGILL, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
336 install_sighandler(SIGFPE, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
337 install_sighandler(SIGXCPU, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
338 install_sighandler(SIGXFSZ, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
339 install_sighandler(SIGSYS, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
340 }
341
342
343
344 /// --- safe handler ---
345
346 #include "common/Thread.h"
347 #include <errno.h>
348
349 #ifdef __APPLE__
350 #include <libproc.h>
351
352 string get_name_by_pid(pid_t pid)
353 {
354 char buf[PROC_PIDPATHINFO_MAXSIZE];
355 int ret = proc_pidpath(pid, buf, sizeof(buf));
356 if (ret == 0) {
357 derr << "Fail to proc_pidpath(" << pid << ")"
358 << " error = " << cpp_strerror(ret)
359 << dendl;
360 return "<unknown>";
361 }
362 return string(buf, ret);
363 }
364 #else
365 string get_name_by_pid(pid_t pid)
366 {
367 // If the PID is 0, its means the sender is the Kernel itself
368 if (pid == 0) {
369 return "Kernel";
370 }
371 char proc_pid_path[PATH_MAX] = {0};
372 snprintf(proc_pid_path, PATH_MAX, PROCPREFIX "/proc/%d/cmdline", pid);
373 int fd = open(proc_pid_path, O_RDONLY);
374
375 if (fd < 0) {
376 fd = -errno;
377 derr << "Fail to open '" << proc_pid_path
378 << "' error = " << cpp_strerror(fd)
379 << dendl;
380 return "<unknown>";
381 }
382 // assuming the cmdline length does not exceed PATH_MAX. if it
383 // really does, it's fine to return a truncated version.
384 char buf[PATH_MAX] = {0};
385 int ret = read(fd, buf, sizeof(buf));
386 close(fd);
387 if (ret < 0) {
388 ret = -errno;
389 derr << "Fail to read '" << proc_pid_path
390 << "' error = " << cpp_strerror(ret)
391 << dendl;
392 return "<unknown>";
393 }
394 std::replace(buf, buf + ret, '\0', ' ');
395 return string(buf, ret);
396 }
397 #endif
398
399 /**
400 * safe async signal handler / dispatcher
401 *
402 * This is an async unix signal handler based on the design from
403 *
404 * http://evbergen.home.xs4all.nl/unix-signals.html
405 *
406 * Features:
407 * - no unsafe work is done in the signal handler itself
408 * - callbacks are called from a regular thread
409 * - signals are not lost, unless multiple instances of the same signal
410 * are sent twice in quick succession.
411 */
412 struct SignalHandler : public Thread {
413 /// to kick the thread, for shutdown, new handlers, etc.
414 int pipefd[2]; // write to [1], read from [0]
415
416 /// to signal shutdown
417 bool stop = false;
418
419 /// for an individual signal
420 struct safe_handler {
421
422 safe_handler() {
423 memset(pipefd, 0, sizeof(pipefd));
424 memset(&handler, 0, sizeof(handler));
425 memset(&info_t, 0, sizeof(info_t));
426 }
427
428 siginfo_t info_t;
429 int pipefd[2]; // write to [1], read from [0]
430 signal_handler_t handler;
431 };
432
433 /// all handlers
434 safe_handler *handlers[32] = {nullptr};
435
436 /// to protect the handlers array
437 ceph::mutex lock = ceph::make_mutex("SignalHandler::lock");
438
439 SignalHandler() {
440 // create signal pipe
441 int r = pipe_cloexec(pipefd, 0);
442 ceph_assert(r == 0);
443 r = fcntl(pipefd[0], F_SETFL, O_NONBLOCK);
444 ceph_assert(r == 0);
445
446 // create thread
447 create("signal_handler");
448 }
449
(1) Event exn_spec_violation: |
An exception of type "_ZN5boost16exception_detail10clone_implINS0_19error_info_injectorINSt8ios_base7failureB5cxx11EEEEE" is thrown but the throw list "throw()" doesn't allow it to be thrown. This will cause a call to unexpected() which usually calls terminate(). |
Also see events: |
[fun_call_w_exception] |
450 ~SignalHandler() override {
(2) Event fun_call_w_exception: |
Called function throws an exception of type "_ZN5boost16exception_detail10clone_implINS0_19error_info_injectorINSt8ios_base7failureB5cxx11EEEEE". [details] |
Also see events: |
[exn_spec_violation] |
451 shutdown();
452 }
453
454 void signal_thread() {
455 int r = write(pipefd[1], "\0", 1);
(1) Event fun_call_w_exception: |
Called function throws an exception of type "_ZN5boost16exception_detail10clone_implINS0_19error_info_injectorINSt8ios_base7failureB5cxx11EEEEE". [details] |
456 ceph_assert(r == 1);
457 }
458
459 void shutdown() {
460 stop = true;
(1) Event fun_call_w_exception: |
Called function throws an exception of type "_ZN5boost16exception_detail10clone_implINS0_19error_info_injectorINSt8ios_base7failureB5cxx11EEEEE". [details] |
461 signal_thread();
462 join();
463 }
464
465 // thread entry point
466 void *entry() override {
467 while (!stop) {
468 // build fd list
469 struct pollfd fds[33];
470
471 lock.lock();
472 int num_fds = 0;
473 fds[num_fds].fd = pipefd[0];
474 fds[num_fds].events = POLLIN | POLLERR;
475 fds[num_fds].revents = 0;
476 ++num_fds;
477 for (unsigned i=0; i<32; i++) {
478 if (handlers[i]) {
479 fds[num_fds].fd = handlers[i]->pipefd[0];
480 fds[num_fds].events = POLLIN | POLLERR;
481 fds[num_fds].revents = 0;
482 ++num_fds;
483 }
484 }
485 lock.unlock();
486
487 // wait for data on any of those pipes
488 int r = poll(fds, num_fds, -1);
489 if (stop)
490 break;
491 if (r > 0) {
492 char v;
493
494 // consume byte from signal socket, if any.
495 TEMP_FAILURE_RETRY(read(pipefd[0], &v, 1));
496
497 lock.lock();
498 for (unsigned signum=0; signum<32; signum++) {
499 if (handlers[signum]) {
500 r = read(handlers[signum]->pipefd[0], &v, 1);
501 if (r == 1) {
502 siginfo_t * siginfo = &handlers[signum]->info_t;
503 ostringstream message;
504 message << "received signal: " << sig_str(signum);
505 switch (siginfo->si_code) {
506 case SI_USER:
507 message << " from " << get_name_by_pid(siginfo->si_pid);
508 // If PID is undefined, it doesn't have a meaning to be displayed
509 if (siginfo->si_pid) {
510 message << " (PID: " << siginfo->si_pid << ")";
511 } else {
512 message << " ( Could be generated by pthread_kill(), raise(), abort(), alarm() )";
513 }
514 message << " UID: " << siginfo->si_uid;
515 break;
516 default:
517 /* As we have a not expected signal, let's report the structure to help debugging */
518 message << ", si_code : " << siginfo->si_code;
519 message << ", si_value (int): " << siginfo->si_value.sival_int;
520 message << ", si_value (ptr): " << siginfo->si_value.sival_ptr;
521 message << ", si_errno: " << siginfo->si_errno;
522 message << ", si_pid : " << siginfo->si_pid;
523 message << ", si_uid : " << siginfo->si_uid;
524 message << ", si_addr" << siginfo->si_addr;
525 message << ", si_status" << siginfo->si_status;
526 break;
527 }
528 derr << message.str() << dendl;
529 handlers[signum]->handler(signum);
530 }
531 }
532 }
533 lock.unlock();
534 }
535 }
536 return NULL;
537 }
538
539 void queue_signal(int signum) {
540 // If this signal handler is registered, the callback must be
541 // defined. We can do this without the lock because we will never
542 // have the signal handler defined without the handlers entry also
543 // being filled in.
544 ceph_assert(handlers[signum]);
545 int r = write(handlers[signum]->pipefd[1], " ", 1);
546 ceph_assert(r == 1);
547 }
548
549 void queue_signal_info(int signum, siginfo_t *siginfo, void * content) {
550 // If this signal handler is registered, the callback must be
551 // defined. We can do this without the lock because we will never
552 // have the signal handler defined without the handlers entry also
553 // being filled in.
554 ceph_assert(handlers[signum]);
555 memcpy(&handlers[signum]->info_t, siginfo, sizeof(siginfo_t));
556 int r = write(handlers[signum]->pipefd[1], " ", 1);
557 ceph_assert(r == 1);
558 }
559
560 void register_handler(int signum, signal_handler_t handler, bool oneshot);
561 void unregister_handler(int signum, signal_handler_t handler);
562 };
563
564 static SignalHandler *g_signal_handler = NULL;
565
566 static void handler_signal_hook(int signum, siginfo_t * siginfo, void * content) {
567 g_signal_handler->queue_signal_info(signum, siginfo, content);
568 }
569
570 void SignalHandler::register_handler(int signum, signal_handler_t handler, bool oneshot)
571 {
572 int r;
573
574 ceph_assert(signum >= 0 && signum < 32);
575
576 safe_handler *h = new safe_handler;
577
578 r = pipe_cloexec(h->pipefd, 0);
579 ceph_assert(r == 0);
580 r = fcntl(h->pipefd[0], F_SETFL, O_NONBLOCK);
581 ceph_assert(r == 0);
582
583 h->handler = handler;
584 lock.lock();
585 handlers[signum] = h;
586 lock.unlock();
587
588 // signal thread so that it sees our new handler
589 signal_thread();
590
591 // install our handler
592 struct sigaction oldact;
593 struct sigaction act;
594 memset(&act, 0, sizeof(act));
595
596 act.sa_handler = (signal_handler_t)handler_signal_hook;
597 sigfillset(&act.sa_mask); // mask all signals in the handler
598 act.sa_flags = SA_SIGINFO | (oneshot ? SA_RESETHAND : 0);
599 int ret = sigaction(signum, &act, &oldact);
600 ceph_assert(ret == 0);
601 }
602
603 void SignalHandler::unregister_handler(int signum, signal_handler_t handler)
604 {
605 ceph_assert(signum >= 0 && signum < 32);
606 safe_handler *h = handlers[signum];
607 ceph_assert(h);
608 ceph_assert(h->handler == handler);
609
610 // restore to default
611 signal(signum, SIG_DFL);
612
613 // _then_ remove our handlers entry
614 lock.lock();
615 handlers[signum] = NULL;
616 lock.unlock();
617
618 // this will wake up select() so that worker thread sees our handler is gone
619 close(h->pipefd[0]);
620 close(h->pipefd[1]);
621 delete h;
622 }
623
624
625 // -------
626
627 void init_async_signal_handler()
628 {
629 ceph_assert(!g_signal_handler);
630 g_signal_handler = new SignalHandler;
631 }
632
633 void shutdown_async_signal_handler()
634 {
635 ceph_assert(g_signal_handler);
636 delete g_signal_handler;
637 g_signal_handler = NULL;
638 }
639
640 void queue_async_signal(int signum)
641 {
642 ceph_assert(g_signal_handler);
643 g_signal_handler->queue_signal(signum);
644 }
645
646 void register_async_signal_handler(int signum, signal_handler_t handler)
647 {
648 ceph_assert(g_signal_handler);
649 g_signal_handler->register_handler(signum, handler, false);
650 }
651
652 void register_async_signal_handler_oneshot(int signum, signal_handler_t handler)
653 {
654 ceph_assert(g_signal_handler);
655 g_signal_handler->register_handler(signum, handler, true);
656 }
657
658 void unregister_async_signal_handler(int signum, signal_handler_t handler)
659 {
660 ceph_assert(g_signal_handler);
661 g_signal_handler->unregister_handler(signum, handler);
662 }
663
664
665
666