1    	// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2    	// vim: ts=8 sw=2 smarttab
3    	/*
4    	 * Ceph - scalable distributed file system
5    	 *
6    	 * Copyright (c) 2015 Hewlett-Packard Development Company, L.P.
7    	 *
8    	 * This is free software; you can redistribute it and/or
9    	 * modify it under the terms of the GNU Lesser General Public
10   	 * License version 2.1, as published by the Free Software
11   	 * Foundation.  See file COPYING.
12   	 *
13   	 */
14   	
15   	#include "include/compat.h"
16   	
17   	#ifdef __FreeBSD__
18   	#include <sys/param.h>
19   	#include <geom/geom_disk.h>
20   	#include <sys/disk.h>
21   	#include <fcntl.h>
22   	#endif
23   	
24   	#include <errno.h>
25   	#include <sys/ioctl.h>
26   	#include <sys/stat.h>
27   	#include <sys/types.h>
28   	#include <sys/stat.h>
29   	#include <fcntl.h>
30   	#include <dirent.h>
31   	#include <boost/algorithm/string/replace.hpp>
32   	//#include "common/debug.h"
33   	#include "include/scope_guard.h"
34   	#include "include/uuid.h"
35   	#include "include/stringify.h"
36   	#include "blkdev.h"
37   	#include "numa.h"
38   	
39   	#include "json_spirit/json_spirit_reader.h"
40   	
41   	int get_device_by_path(const char *path, char* partition, char* device,
42   			       size_t max)
43   	{
44   	  int fd = ::open(path, O_RDONLY|O_DIRECTORY);
45   	  if (fd < 0) {
46   	    return -errno;
47   	  }
48   	  auto close_fd = make_scope_guard([fd] {
49   	    ::close(fd);
50   	  });
51   	  BlkDev blkdev(fd);
52   	  if (auto ret = blkdev.partition(partition, max); ret) {
53   	    return ret;
54   	  }
55   	  if (auto ret = blkdev.wholedisk(device, max); ret) {
56   	    return ret;
57   	  }
58   	  return 0;
59   	}
60   	
61   	
62   	#include "common/blkdev.h"
63   	
64   	#ifdef __linux__
65   	#include <libudev.h>
66   	#include <linux/fs.h>
67   	#include <linux/kdev_t.h>
68   	#include <blkid/blkid.h>
69   	
70   	#include <set>
71   	
72   	#include "common/SubProcess.h"
73   	#include "common/errno.h"
74   	
75   	
76   	#define UUID_LEN 36
77   	
78   	#endif
79   	
80   	
81   	BlkDev::BlkDev(int f)
82   	  : fd(f)
83   	{}
84   	
85   	BlkDev::BlkDev(const std::string& devname)
86   	  : devname(devname)
87   	{}
88   	
89   	int BlkDev::get_devid(dev_t *id) const
90   	{
91   	  struct stat st;
92   	  int r;
93   	  if (fd >= 0) {
94   	    r = fstat(fd, &st);
95   	  } else {
96   	    char path[PATH_MAX];
97   	    snprintf(path, sizeof(path), "/dev/%s", devname.c_str());
98   	    r = stat(path, &st);
99   	  }
100  	  if (r < 0) {
101  	    return -errno;
102  	  }
103  	  *id = S_ISBLK(st.st_mode) ? st.st_rdev : st.st_dev;
104  	  return 0;
105  	}
106  	
107  	#ifdef __linux__
108  	static const char *blkdev_props2strings[] = {
109  	  [BLKDEV_PROP_DEV]                 = "dev",
110  	  [BLKDEV_PROP_DISCARD_GRANULARITY] = "queue/discard_granularity",
111  	  [BLKDEV_PROP_MODEL]               = "device/model",
112  	  [BLKDEV_PROP_ROTATIONAL]          = "queue/rotational",
113  	  [BLKDEV_PROP_SERIAL]              = "device/serial",
114  	  [BLKDEV_PROP_VENDOR]              = "device/device/vendor",
115  	  [BLKDEV_PROP_NUMA_NODE]           = "device/device/numa_node",
116  	  [BLKDEV_PROP_NUMA_CPUS]           = "device/device/local_cpulist",
117  	};
118  	
119  	const char *BlkDev::sysfsdir() const {
120  	  return "/sys";
121  	}
122  	
123  	int BlkDev::get_size(int64_t *psize) const
124  	{
125  	#ifdef BLKGETSIZE64
126  	  int ret = ::ioctl(fd, BLKGETSIZE64, psize);
127  	#elif defined(BLKGETSIZE)
128  	  unsigned long sectors = 0;
129  	  int ret = ::ioctl(fd, BLKGETSIZE, &sectors);
130  	  *psize = sectors * 512ULL;
131  	#else
132  	// cppcheck-suppress preprocessorErrorDirective
133  	# error "Linux configuration error (get_size)"
134  	#endif
135  	  if (ret < 0)
136  	    ret = -errno;
137  	  return ret;
138  	}
139  	
140  	/**
141  	 * get a block device property as a string
142  	 *
143  	 * store property in *val, up to maxlen chars
144  	 * return 0 on success
145  	 * return negative error on error
146  	 */
147  	int64_t BlkDev::get_string_property(blkdev_prop_t prop,
148  					    char *val, size_t maxlen) const
149  	{
150  	  char filename[PATH_MAX], wd[PATH_MAX];
151  	  const char* dev = nullptr;
152  	  assert(prop < BLKDEV_PROP_NUMPROPS);
153  	  const char *propstr = blkdev_props2strings[prop];
154  	
155  	  if (fd >= 0) {
156  	    // sysfs isn't fully populated for partitions, so we need to lookup the sysfs
157  	    // entry for the underlying whole disk.
158  	    if (int r = wholedisk(wd, sizeof(wd)); r < 0)
159  	      return r;
160  	    dev = wd;
161  	  } else {
162  	    dev = devname.c_str();
163  	  }
164  	  if (snprintf(filename, sizeof(filename), "%s/block/%s/%s", sysfsdir(), dev,
165  		       propstr) >= static_cast<int>(sizeof(filename))) {
166  	    return -ERANGE;
167  	  }
168  	
169  	  FILE *fp = fopen(filename, "r");
170  	  if (fp == NULL) {
171  	    return -errno;
172  	  }
173  	
174  	  int r = 0;
175  	  if (fgets(val, maxlen - 1, fp)) {
176  	    // truncate at newline
177  	    char *p = val;
178  	    while (*p && *p != '\n')
179  	      ++p;
180  	    *p = 0;
181  	  } else {
182  	    r = -EINVAL;
183  	  }
184  	  fclose(fp);
185  	  return r;
186  	}
187  	
188  	/**
189  	 * get a block device property
190  	 *
191  	 * return the value (we assume it is positive)
192  	 * return negative error on error
193  	 */
194  	int64_t BlkDev::get_int_property(blkdev_prop_t prop) const
195  	{
196  	  char buff[256] = {0};
197  	  int r = get_string_property(prop, buff, sizeof(buff));
198  	  if (r < 0)
199  	    return r;
200  	  // take only digits
201  	  for (char *p = buff; *p; ++p) {
202  	    if (!isdigit(*p)) {
203  	      *p = 0;
204  	      break;
205  	    }
206  	  }
207  	  char *endptr = 0;
208  	  r = strtoll(buff, &endptr, 10);
209  	  if (endptr != buff + strlen(buff))
210  	    r = -EINVAL;
211  	  return r;
212  	}
213  	
214  	bool BlkDev::support_discard() const
215  	{
216  	  return get_int_property(BLKDEV_PROP_DISCARD_GRANULARITY) > 0;
217  	}
218  	
219  	int BlkDev::discard(int64_t offset, int64_t len) const
220  	{
221  	  uint64_t range[2] = {(uint64_t)offset, (uint64_t)len};
222  	  return ioctl(fd, BLKDISCARD, range);
223  	}
224  	
225  	bool BlkDev::is_nvme() const
226  	{
227  	  char vendor[80];
228  	  // nvme has a device/device/vendor property; infer from that.  There is
229  	  // probably a better way?
230  	  int r = get_string_property(BLKDEV_PROP_VENDOR, vendor, 80);
231  	  return (r == 0);
232  	}
233  	
234  	bool BlkDev::is_rotational() const
235  	{
236  	  return get_int_property(BLKDEV_PROP_ROTATIONAL) > 0;
237  	}
238  	
239  	int BlkDev::get_numa_node(int *node) const
240  	{
241  	  int numa = get_int_property(BLKDEV_PROP_NUMA_NODE);
242  	  if (numa < 0)
243  	    return -1;
244  	  *node = numa;
245  	  return 0;
246  	}
247  	
248  	int BlkDev::dev(char *dev, size_t max) const
249  	{
250  	  return get_string_property(BLKDEV_PROP_DEV, dev, max);
251  	}
252  	
253  	int BlkDev::vendor(char *vendor, size_t max) const
254  	{
255  	  return get_string_property(BLKDEV_PROP_VENDOR, vendor, max);
256  	}
257  	
258  	int BlkDev::model(char *model, size_t max) const
259  	{
260  	  return get_string_property(BLKDEV_PROP_MODEL, model, max);
261  	}
262  	
263  	int BlkDev::serial(char *serial, size_t max) const
264  	{
265  	  return get_string_property(BLKDEV_PROP_SERIAL, serial, max);
266  	}
267  	
268  	int BlkDev::partition(char *partition, size_t max) const
269  	{
270  	  dev_t id;
271  	  int r = get_devid(&id);
272  	  if (r < 0)
273  	    return -EINVAL;  // hrm.
274  	
275  	  char *t = blkid_devno_to_devname(id);
276  	  if (!t) {
277  	    return -EINVAL;
278  	  }
279  	  strncpy(partition, t, max);
280  	  free(t);
281  	  return 0;
282  	}
283  	
284  	int BlkDev::wholedisk(char *device, size_t max) const
285  	{
286  	  dev_t id;
287  	  int r = get_devid(&id);
288  	  if (r < 0)
289  	    return -EINVAL;  // hrm.
290  	
291  	  r = blkid_devno_to_wholedisk(id, device, max, nullptr);
292  	  if (r < 0) {
293  	    return -EINVAL;
294  	  }
295  	  return 0;
296  	}
297  	
298  	static int easy_readdir(const std::string& dir, std::set<std::string> *out)
299  	{
300  	  DIR *h = ::opendir(dir.c_str());
301  	  if (!h) {
302  	    return -errno;
303  	  }
304  	  struct dirent *de = nullptr;
305  	  while ((de = ::readdir(h))) {
306  	    if (strcmp(de->d_name, ".") == 0 ||
307  		strcmp(de->d_name, "..") == 0) {
308  	      continue;
309  	    }
310  	    out->insert(de->d_name);
311  	  }
312  	  closedir(h);
313  	  return 0;
314  	}
315  	
316  	void get_dm_parents(const std::string& dev, std::set<std::string> *ls)
317  	{
318  	  std::string p = std::string("/sys/block/") + dev + "/slaves";
319  	  std::set<std::string> parents;
320  	  easy_readdir(p, &parents);
321  	  for (auto& d : parents) {
322  	    ls->insert(d);
323  	    // recurse in case it is dm-on-dm
324  	    if (d.find("dm-") == 0) {
325  	      get_dm_parents(d, ls);
326  	    }
327  	  }
328  	}
329  	
330  	void get_raw_devices(const std::string& in,
331  			     std::set<std::string> *ls)
332  	{
333  	  if (in.substr(0, 3) == "dm-") {
334  	    std::set<std::string> o;
335  	    get_dm_parents(in, &o);
336  	    for (auto& d : o) {
337  	      get_raw_devices(d, ls);
338  	    }
339  	  } else {
340  	    BlkDev d(in);
341  	    std::string wholedisk;
342  	    if (d.wholedisk(&wholedisk) == 0) {
343  	      ls->insert(wholedisk);
344  	    } else {
345  	      ls->insert(in);
346  	    }
347  	  }
348  	}
349  	
350  	int _get_vdo_stats_handle(const char *devname, std::string *vdo_name)
351  	{
352  	  int vdo_fd = -1;
353  	
354  	  // we need to go from the raw devname (e.g., dm-4) to the VDO volume name.
355  	  // currently the best way seems to be to look at /dev/mapper/* ...
356  	  std::string expect = std::string("../") + devname;  // expected symlink target
357  	  DIR *dir = ::opendir("/dev/mapper");
(1) Event cond_false: Condition "!dir", taking false branch.
358  	  if (!dir) {
359  	    return -1;
(2) Event if_end: End of if statement.
360  	  }
361  	  struct dirent *de = nullptr;
(3) Event cond_true: Condition "de = readdir(dir)", taking true branch.
(7) Event cond_true: Condition "de = readdir(dir)", taking true branch.
(13) Event cond_true: Condition "de = readdir(dir)", taking true branch.
362  	  while ((de = ::readdir(dir))) {
(4) Event cond_true: Condition "de->d_name[0] == '.'", taking true branch.
(8) Event cond_false: Condition "de->d_name[0] == '.'", taking false branch.
(14) Event cond_false: Condition "de->d_name[0] == '.'", taking false branch.
363  	    if (de->d_name[0] == '.')
(5) Event continue: Continuing loop.
(9) Event if_end: End of if statement.
(15) Event if_end: End of if statement.
364  	      continue;
365  	    char fn[4096], target[4096];
366  	    snprintf(fn, sizeof(fn), "/dev/mapper/%s", de->d_name);
(16) Event fs_check_call: Calling function "readlink" to perform check on "fn".
Also see events: [toctou]
367  	    int r = readlink(fn, target, sizeof(target));
(10) Event cond_true: Condition "r < 0", taking true branch.
(17) Event cond_false: Condition "r < 0", taking false branch.
(18) Event cond_false: Condition "r >= 4096 /* (int)sizeof (target) */", taking false branch.
368  	    if (r < 0 || r >= (int)sizeof(target))
(11) Event continue: Continuing loop.
(19) Event if_end: End of if statement.
369  	      continue;
370  	    target[r] = 0;
(20) Event cond_true: Condition "expect == target", taking true branch.
371  	    if (expect == target) {
372  	      snprintf(fn, sizeof(fn), "/sys/kvdo/%s/statistics", de->d_name);
(21) Event toctou: Calling function "open" that uses "fn" after a check function. This can cause a time-of-check, time-of-use race condition.
Also see events: [fs_check_call]
373  	      vdo_fd = ::open(fn, O_RDONLY|O_CLOEXEC); //DIRECTORY);
374  	      if (vdo_fd >= 0) {
375  		*vdo_name = de->d_name;
376  		break;
377  	      }
378  	    }
(6) Event loop: Looping back.
(12) Event loop: Looping back.
379  	  }
380  	  closedir(dir);
381  	  return vdo_fd;
382  	}
383  	
384  	int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
385  	{
386  	  std::set<std::string> devs = { devname };
387  	  while (!devs.empty()) {
388  	    std::string dev = *devs.begin();
389  	    devs.erase(devs.begin());
390  	    int fd = _get_vdo_stats_handle(dev.c_str(), vdo_name);
391  	    if (fd >= 0) {
392  	      // yay, it's vdo
393  	      return fd;
394  	    }
395  	    // ok, see if there are constituent devices
396  	    if (dev.find("dm-") == 0) {
397  	      get_dm_parents(dev, &devs);
398  	    }
399  	  }
400  	  return -1;
401  	}
402  	
403  	int64_t get_vdo_stat(int vdo_fd, const char *property)
404  	{
405  	  int64_t ret = 0;
406  	  int fd = ::openat(vdo_fd, property, O_RDONLY|O_CLOEXEC);
407  	  if (fd < 0) {
408  	    return 0;
409  	  }
410  	  char buf[1024];
411  	  int r = ::read(fd, buf, sizeof(buf) - 1);
412  	  if (r > 0) {
413  	    buf[r] = 0;
414  	    ret = atoll(buf);
415  	  }
416  	  TEMP_FAILURE_RETRY(::close(fd));
417  	  return ret;
418  	}
419  	
420  	bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
421  	{
422  	  int64_t block_size = get_vdo_stat(fd, "block_size");
423  	  int64_t physical_blocks = get_vdo_stat(fd, "physical_blocks");
424  	  int64_t overhead_blocks_used = get_vdo_stat(fd, "overhead_blocks_used");
425  	  int64_t data_blocks_used = get_vdo_stat(fd, "data_blocks_used");
426  	  if (!block_size
427  	      || !physical_blocks
428  	      || !overhead_blocks_used
429  	      || !data_blocks_used) {
430  	    return false;
431  	  }
432  	  int64_t avail_blocks =
433  	    physical_blocks - overhead_blocks_used - data_blocks_used;
434  	  *total = block_size * physical_blocks;
435  	  *avail = block_size * avail_blocks;
436  	  return true;
437  	}
438  	
439  	std::string _decode_model_enc(const std::string& in)
440  	{
441  	  auto v = boost::replace_all_copy(in, "\\x20", " ");
442  	  if (auto found = v.find_last_not_of(" "); found != v.npos) {
443  	    v.erase(found + 1);
444  	  }
445  	  std::replace(v.begin(), v.end(), ' ', '_');
446  	  return v;
447  	}
448  	
449  	// trying to use udev first, and if it doesn't work, we fall back to 
450  	// reading /sys/block/$devname/device/(vendor/model/serial).
451  	std::string get_device_id(const std::string& devname,
452  				  std::string *err)
453  	{
454  	  struct udev_device *dev;
455  	  static struct udev *udev;
456  	  const char *data;
457  	
458  	  udev = udev_new();
459  	  if (!udev) {
460  	    if (err) {
461  	      *err = "udev_new failed";
462  	    }
463  	    return {};
464  	  }
465  	  dev = udev_device_new_from_subsystem_sysname(udev, "block", devname.c_str());
466  	  if (!dev) {
467  	    if (err) {
468  	      *err = std::string("udev_device_new_from_subsystem_sysname failed on '")
469  		+ devname + "'";
470  	    }
471  	    udev_unref(udev);
472  	    return {};
473  	  }
474  	
475  	  // ****
476  	  //   NOTE: please keep this implementation in sync with _get_device_id() in
477  	  //   src/ceph-volume/ceph_volume/util/device.py
478  	  // ****
479  	
480  	  std::string id_vendor, id_model, id_serial, id_serial_short, id_scsi_serial;
481  	  data = udev_device_get_property_value(dev, "ID_VENDOR");
482  	  if (data) {
483  	    id_vendor = data;
484  	  }
485  	  data = udev_device_get_property_value(dev, "ID_MODEL");
486  	  if (data) {
487  	    id_model = data;
488  	    // sometimes, ID_MODEL is "LVM ..." but ID_MODEL_ENC is correct (but
489  	    // encoded with \x20 for space).
490  	    if (id_model.substr(0, 7) == "LVM PV ") {
491  	      const char *enc = udev_device_get_property_value(dev, "ID_MODEL_ENC");
492  	      if (enc) {
493  		id_model = _decode_model_enc(enc);
494  	      } else {
495  		// ignore ID_MODEL then
496  		id_model.clear();
497  	      }
498  	    }
499  	  }
500  	  data = udev_device_get_property_value(dev, "ID_SERIAL_SHORT");
501  	  if (data) {
502  	    id_serial_short = data;
503  	  }
504  	  data = udev_device_get_property_value(dev, "ID_SCSI_SERIAL");
505  	  if (data) {
506  	    id_scsi_serial = data;
507  	  }
508  	  data = udev_device_get_property_value(dev, "ID_SERIAL");
509  	  if (data) {
510  	    id_serial = data;
511  	  }
512  	  udev_device_unref(dev);
513  	  udev_unref(udev);
514  	
515  	  // ID_SERIAL is usually $vendor_$model_$serial, but not always
516  	  // ID_SERIAL_SHORT is mostly always just the serial
517  	  // ID_MODEL is sometimes $vendor_$model, but
518  	  // ID_VENDOR is sometimes $vendor and ID_MODEL just $model and ID_SCSI_SERIAL the real serial number, with ID_SERIAL and ID_SERIAL_SHORT gibberish (ick)
519  	  std::string device_id;
520  	  if (id_vendor.size() && id_model.size() && id_scsi_serial.size()) {
521  	    device_id = id_vendor + '_' + id_model + '_' + id_scsi_serial;
522  	  } else if (id_model.size() && id_serial_short.size()) {
523  	    device_id = id_model + '_' + id_serial_short;
524  	  } else if (id_serial.size()) {
525  	    device_id = id_serial;
526  	    if (device_id.substr(0, 4) == "MTFD") {
527  	      // Micron NVMes hide the vendor
528  	      device_id = "Micron_" + device_id;
529  	    }
530  	  }
531  	  if (device_id.size()) {
532  	    std::replace(device_id.begin(), device_id.end(), ' ', '_');
533  	    return device_id;
534  	  }
535  	
536  	  // either udev_device_get_property_value() failed, or succeeded but
537  	  // returned nothing; trying to read from files.  note that the 'vendor'
538  	  // file rarely contains the actual vendor; it's usually 'ATA'.
539  	  std::string model, serial;
540  	  char buf[1024] = {0};
541  	  BlkDev blkdev(devname);
542  	  if (!blkdev.model(buf, sizeof(buf))) {
543  	    model = buf;
544  	  }
545  	  if (blkdev.serial(buf, sizeof(buf))) {
546  	    serial = buf;
547  	  }
548  	  if (!model.size() || serial.size()) {
549  	    if (err) {
550  	      *err = std::string("fallback method has serial '") + serial
551  		+ "'but no model";
552  	    }
553  	    return {};
554  	  }
555  	
556  	  device_id = model + "_" + serial;
557  	  std::replace(device_id.begin(), device_id.end(), ' ', '_');
558  	  return device_id;
559  	}
560  	
561  	static std::string get_device_vendor(const std::string& devname)
562  	{
563  	  struct udev_device *dev;
564  	  static struct udev *udev;
565  	  const char *data;
566  	
567  	  udev = udev_new();
568  	  if (!udev) {
569  	    return {};
570  	  }
571  	  dev = udev_device_new_from_subsystem_sysname(udev, "block", devname.c_str());
572  	  if (!dev) {
573  	    udev_unref(udev);
574  	    return {};
575  	  }
576  	
577  	  std::string id_vendor, id_model;
578  	  data = udev_device_get_property_value(dev, "ID_VENDOR");
579  	  if (data) {
580  	    id_vendor = data;
581  	  }
582  	  data = udev_device_get_property_value(dev, "ID_MODEL");
583  	  if (data) {
584  	    id_model = data;
585  	  }
586  	  udev_device_unref(dev);
587  	  udev_unref(udev);
588  	
589  	  std::transform(id_vendor.begin(), id_vendor.end(), id_vendor.begin(),
590  			 ::tolower);
591  	  std::transform(id_model.begin(), id_model.end(), id_model.begin(),
592  			 ::tolower);
593  	
594  	  if (id_vendor.size()) {
595  	    return id_vendor;
596  	  }
597  	  if (id_model.size()) {
598  	    int pos = id_model.find(" ");
599  	    if (pos > 0) {
600  	      return id_model.substr(0, pos);
601  	    } else {
602  	      return id_model;
603  	    }
604  	  }
605  	
606  	  std::string vendor, model;
607  	  char buf[1024] = {0};
608  	  BlkDev blkdev(devname);
609  	  if (!blkdev.vendor(buf, sizeof(buf))) {
610  	    vendor = buf;
611  	  }
612  	  if (!blkdev.model(buf, sizeof(buf))) {
613  	    model = buf;
614  	  }
615  	  if (vendor.size()) {
616  	    return vendor;
617  	  }
618  	  if (model.size()) {
619  	     int pos = model.find(" ");
620  	    if (pos > 0) {
621  	      return model.substr(0, pos);
622  	    } else {
623  	      return model;
624  	    }
625  	  }
626  	
627  	  return {};
628  	}
629  	
630  	static int block_device_run_vendor_nvme(
631  	  const string& devname, const string& vendor, int timeout,
632  	  std::string *result)
633  	{
634  	  string device = "/dev/" + devname;
635  	
636  	  SubProcessTimed nvmecli(
637  	    "sudo", SubProcess::CLOSE, SubProcess::PIPE, SubProcess::CLOSE,
638  	    timeout);
639  	  nvmecli.add_cmd_args(
640  	    "nvme",
641  	    vendor.c_str(),
642  	    "smart-log-add",
643  	    "--json",
644  	    device.c_str(),
645  	    NULL);
646  	  int ret = nvmecli.spawn();
647  	  if (ret != 0) {
648  	    *result = std::string("error spawning nvme command: ") + nvmecli.err();
649  	    return ret;
650  	  }
651  	
652  	  bufferlist output;
653  	  ret = output.read_fd(nvmecli.get_stdout(), 100*1024);
654  	  if (ret < 0) {
655  	    bufferlist err;
656  	    err.read_fd(nvmecli.get_stderr(), 100 * 1024);
657  	    *result = std::string("failed to execute nvme: ") + err.to_str();
658  	  } else {
659  	    ret = 0;
660  	    *result = output.to_str();
661  	  }
662  	
663  	  if (nvmecli.join() != 0) {
664  	    *result = std::string("nvme returned an error: ") + nvmecli.err();
665  	    return -EINVAL;
666  	  }
667  	
668  	  return ret;
669  	}
670  	
671  	static int block_device_run_smartctl(const string& devname, int timeout,
672  					     std::string *result)
673  	{
674  	  string device = "/dev/" + devname;
675  	
676  	  // when using --json, smartctl will report its errors in JSON format to stdout 
677  	  SubProcessTimed smartctl(
678  	    "sudo", SubProcess::CLOSE, SubProcess::PIPE, SubProcess::CLOSE,
679  	    timeout);
680  	  smartctl.add_cmd_args(
681  	    "smartctl",
682  	    "-a",
683  	    //"-x",
684  	    "--json=o",
685  	    device.c_str(),
686  	    NULL);
687  	
688  	  int ret = smartctl.spawn();
689  	  if (ret != 0) {
690  	    *result = std::string("error spawning smartctl: ") + smartctl.err();
691  	    return ret;
692  	  }
693  	
694  	  bufferlist output;
695  	  ret = output.read_fd(smartctl.get_stdout(), 100*1024);
696  	  if (ret < 0) {
697  	    *result = std::string("failed read smartctl output: ") + cpp_strerror(-ret);
698  	  } else {
699  	    ret = 0;
700  	    *result = output.to_str();
701  	  }
702  	
703  	  int joinerr = smartctl.join();
704  	  // Bit 0: Command line did not parse.
705  	  // Bit 1: Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode (see '-n' option above).
706  	  // Bit 2: Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure (see '-b' option above).
707  	  // Bit 3: SMART status check returned "DISK FAILING".
708  	  // Bit 4: We found prefail Attributes <= threshold.
709  	  // Bit 5: SMART status check returned "DISK OK" but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past.
710  	  // Bit 6: The device error log contains records of errors.
711  	  // Bit 7: The device self-test log contains records of errors.  [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored.
712  	  if (joinerr & 3) {
713  	    *result = "smartctl returned an error ("s + stringify(joinerr) +
714  	      "): stderr:\n"s + smartctl.err() + "\nstdout:\n"s + *result;
715  	    return -EINVAL;
716  	  }
717  	
718  	  return ret;
719  	}
720  	
721  	static std::string escape_quotes(const std::string& s)
722  	{
723  	  std::string r = s;
724  	  auto pos = r.find("\"");
725  	  while (pos != std::string::npos) {
726  	    r.replace(pos, 1, "\"");
727  	    pos = r.find("\"", pos + 1);
728  	  }
729  	  return r;
730  	}
731  	
732  	int block_device_get_metrics(const string& devname, int timeout,
733  				     json_spirit::mValue *result)
734  	{
735  	  std::string s;
736  	
737  	  // smartctl
738  	  if (int r = block_device_run_smartctl(devname, timeout, &s);
739  	      r != 0) {
740  	    string orig = s;
741  	    s = "{\"error\": \"smartctl failed\", \"dev\": \"/dev/";
742  	    s += devname;
743  	    s += "\", \"smartctl_error_code\": " + stringify(r);
744  	    s += ", \"smartctl_output\": \"" + escape_quotes(orig);
745  	    s += + "\"}";
746  	  } else if (!json_spirit::read(s, *result)) {
747  	    string orig = s;
748  	    s = "{\"error\": \"smartctl returned invalid JSON\", \"dev\": \"/dev/";
749  	    s += devname;
750  	    s += "\",\"output\":\"";
751  	    s += escape_quotes(orig);
752  	    s += "\"}";
753  	  }
754  	  if (!json_spirit::read(s, *result)) {
755  	    return -EINVAL;
756  	  }
757  	
758  	  json_spirit::mObject& base = result->get_obj();
759  	  string vendor = get_device_vendor(devname);
760  	  if (vendor.size()) {
761  	    base["nvme_vendor"] = vendor;
762  	    s.clear();
763  	    json_spirit::mValue nvme_json;
764  	    if (int r = block_device_run_vendor_nvme(devname, vendor, timeout, &s);
765  		r == 0) {
766  	      if (json_spirit::read(s, nvme_json) != 0) {
767  		base["nvme_smart_health_information_add_log"] = nvme_json;
768  	      } else {
769  		base["nvme_smart_health_information_add_log_error"] = "bad json output: "
770  		  + s;
771  	      }
772  	    } else {
773  	      base["nvme_smart_health_information_add_log_error_code"] = r;
774  	      base["nvme_smart_health_information_add_log_error"] = s;
775  	    }
776  	  } else {
777  	    base["nvme_vendor"] = "unknown";
778  	  }
779  	
780  	  return 0;
781  	}
782  	
783  	#elif defined(__APPLE__)
784  	#include <sys/disk.h>
785  	
786  	const char *BlkDev::sysfsdir() const {
787  	  assert(false);  // Should never be called on Apple
788  	  return "";
789  	}
790  	
791  	int BlkDev::dev(char *dev, size_t max) const
792  	{
793  	  struct stat sb;
794  	
795  	  if (fstat(fd, &sb) < 0)
796  	    return -errno;
797  	
798  	  snprintf(dev, max, "%" PRIu64, (uint64_t)sb.st_rdev);
799  	
800  	  return 0;
801  	}
802  	
803  	int BlkDev::get_size(int64_t *psize) const
804  	{
805  	  unsigned long blocksize = 0;
806  	  int ret = ::ioctl(fd, DKIOCGETBLOCKSIZE, &blocksize);
807  	  if (!ret) {
808  	    unsigned long nblocks;
809  	    ret = ::ioctl(fd, DKIOCGETBLOCKCOUNT, &nblocks);
810  	    if (!ret)
811  	      *psize = (int64_t)nblocks * blocksize;
812  	  }
813  	  if (ret < 0)
814  	    ret = -errno;
815  	  return ret;
816  	}
817  	
818  	int64_t BlkDev::get_int_property(blkdev_prop_t prop) const
819  	{
820  	  return 0;
821  	}
822  	
823  	bool BlkDev::support_discard() const
824  	{
825  	  return false;
826  	}
827  	
828  	int BlkDev::discard(int64_t offset, int64_t len) const
829  	{
830  	  return -EOPNOTSUPP;
831  	}
832  	
833  	bool BlkDev::is_nvme() const
834  	{
835  	  return false;
836  	}
837  	
838  	bool BlkDev::is_rotational() const
839  	{
840  	  return false;
841  	}
842  	
843  	int BlkDev::get_numa_node(int *node) const
844  	{
845  	  return -1;
846  	}
847  	
848  	int BlkDev::model(char *model, size_t max) const
849  	{
850  	  return -EOPNOTSUPP;
851  	}
852  	
853  	int BlkDev::serial(char *serial, size_t max) const
854  	{
855  	  return -EOPNOTSUPP;
856  	}
857  	
858  	int BlkDev::partition(char *partition, size_t max) const
859  	{
860  	  return -EOPNOTSUPP;
861  	}
862  	
863  	int BlkDev::wholedisk(char *device, size_t max) const
864  	{
865  	}
866  	
867  	
868  	void get_dm_parents(const std::string& dev, std::set<std::string> *ls)
869  	{
870  	}
871  	
872  	void get_raw_devices(const std::string& in,
873  			     std::set<std::string> *ls)
874  	{
875  	}
876  	
877  	int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
878  	{
879  	  return -1;
880  	}
881  	
882  	int64_t get_vdo_stat(int fd, const char *property)
883  	{
884  	  return 0;
885  	}
886  	
887  	bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
888  	{
889  	  return false;
890  	}
891  	
892  	std::string get_device_id(const std::string& devname,
893  				  std::string *err)
894  	{
895  	  // FIXME: implement me
896  	  if (err) {
897  	    *err = "not implemented";
898  	  }
899  	  return std::string();
900  	}
901  	
902  	#elif defined(__FreeBSD__)
903  	
904  	const char *BlkDev::sysfsdir() const {
905  	  assert(false);  // Should never be called on FreeBSD
906  	  return "";
907  	}
908  	
909  	int BlkDev::dev(char *dev, size_t max) const
910  	{
911  	  struct stat sb;
912  	
913  	  if (fstat(fd, &sb) < 0)
914  	    return -errno;
915  	
916  	  snprintf(dev, max, "%" PRIu64, (uint64_t)sb.st_rdev);
917  	
918  	  return 0;
919  	}
920  	
921  	int BlkDev::get_size(int64_t *psize) const
922  	{
923  	  int ret = ::ioctl(fd, DIOCGMEDIASIZE, psize);
924  	  if (ret < 0)
925  	    ret = -errno;
926  	  return ret;
927  	}
928  	
929  	int64_t BlkDev::get_int_property(blkdev_prop_t prop) const
930  	{
931  	  return 0;
932  	}
933  	
934  	bool BlkDev::support_discard() const
935  	{
936  	#ifdef FREEBSD_WITH_TRIM
937  	  // there is no point to claim support of discard, but
938  	  // unable to do so.
939  	  struct diocgattr_arg arg;
940  	
941  	  strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
942  	  arg.len = sizeof(arg.value.i);
943  	  if (ioctl(fd, DIOCGATTR, &arg) == 0) {
944  	    return (arg.value.i != 0);
945  	  } else {
946  	    return false;
947  	  }
948  	#endif
949  	  return false;
950  	}
951  	
952  	int BlkDev::discard(int64_t offset, int64_t len) const
953  	{
954  	  return -EOPNOTSUPP;
955  	}
956  	
957  	bool BlkDev::is_nvme() const
958  	{
959  	  // FreeBSD doesn't have a good way to tell if a device's underlying protocol
960  	  // is NVME, especially since multiple GEOM transforms may be involved.  So
961  	  // we'll just guess based on the device name.
962  	  struct fiodgname_arg arg;
963  	  const char *nda = "nda";        //CAM-based attachment
964  	  const char *nvd = "nvd";        //CAM-less attachment
965  	  char devname[PATH_MAX];
966  	
967  	  arg.buf = devname;
968  	  arg.len = sizeof(devname);
969  	  if (ioctl(fd, FIODGNAME, &arg) < 0)
970  	    return false; //When in doubt, it's probably not NVME
971  	
972  	  return (strncmp(nvd, devname, strlen(nvd)) == 0 ||
973  	          strncmp(nda, devname, strlen(nda)) == 0);
974  	}
975  	
976  	bool BlkDev::is_rotational() const
977  	{
978  	#if __FreeBSD_version >= 1200049
979  	  struct diocgattr_arg arg;
980  	
981  	  strlcpy(arg.name, "GEOM::rotation_rate", sizeof(arg.name));
982  	  arg.len = sizeof(arg.value.u16);
983  	
984  	  int ioctl_ret = ioctl(fd, DIOCGATTR, &arg);
985  	  bool ret;
986  	  if (ioctl_ret < 0 || arg.value.u16 == DISK_RR_UNKNOWN)
987  	    // DISK_RR_UNKNOWN usually indicates an old drive, which is usually spinny
988  	    ret = true;
989  	  else if (arg.value.u16 == DISK_RR_NON_ROTATING)
990  	    ret = false;
991  	  else if (arg.value.u16 >= DISK_RR_MIN && arg.value.u16 <= DISK_RR_MAX)
992  	    ret = true;
993  	  else
994  	    ret = true;     // Invalid value.  Probably spinny?
995  	
996  	  return ret;
997  	#else
998  	  return true;      // When in doubt, it's probably spinny
999  	#endif
1000 	}
1001 	
1002 	int BlkDev::get_numa_node(int *node) const
1003 	{
1004 	  int numa = get_int_property(BLKDEV_PROP_NUMA_NODE);
1005 	  if (numa < 0)
1006 	    return -1;
1007 	  *node = numa;
1008 	  return 0;
1009 	}
1010 	
1011 	int BlkDev::model(char *model, size_t max) const
1012 	{
1013 	  struct diocgattr_arg arg;
1014 	
1015 	  strlcpy(arg.name, "GEOM::descr", sizeof(arg.name));
1016 	  arg.len = sizeof(arg.value.str);
1017 	  if (ioctl(fd, DIOCGATTR, &arg) < 0) {
1018 	    return -errno;
1019 	  }
1020 	
1021 	  // The GEOM description is of the form "vendor product" for SCSI disks
1022 	  // and "ATA device_model" for ATA disks.  Some vendors choose to put the
1023 	  // vendor name in device_model, and some don't.  Strip the first bit.
1024 	  char *p = arg.value.str;
1025 	  if (p == NULL || *p == '\0') {
1026 	    *model = '\0';
1027 	  } else {
1028 	    (void) strsep(&p, " ");
1029 	    snprintf(model, max, "%s", p);
1030 	  }
1031 	
1032 	  return 0;
1033 	}
1034 	
1035 	int BlkDev::serial(char *serial, size_t max) const
1036 	{
1037 	  char ident[DISK_IDENT_SIZE];
1038 	
1039 	  if (ioctl(fd, DIOCGIDENT, ident) < 0)
1040 	    return -errno;
1041 	
1042 	  snprintf(serial, max, "%s", ident);
1043 	
1044 	  return 0;
1045 	}
1046 	
1047 	void get_dm_parents(const std::string& dev, std::set<std::string> *ls)
1048 	{
1049 	}
1050 	
1051 	void get_raw_devices(const std::string& in,
1052 			     std::set<std::string> *ls)
1053 	{
1054 	}
1055 	
1056 	int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
1057 	{
1058 	  return -1;
1059 	}
1060 	
1061 	int64_t get_vdo_stat(int fd, const char *property)
1062 	{
1063 	  return 0;
1064 	}
1065 	
1066 	bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
1067 	{
1068 	  return false;
1069 	}
1070 	
1071 	std::string get_device_id(const std::string& devname,
1072 				  std::string *err)
1073 	{
1074 	  // FIXME: implement me for freebsd
1075 	  if (err) {
1076 	    *err = "not implemented for FreeBSD";
1077 	  }
1078 	  return std::string();
1079 	}
1080 	
1081 	int block_device_run_smartctl(const char *device, int timeout,
1082 				      std::string *result)
1083 	{
1084 	  // FIXME: implement me for freebsd
1085 	  return -EOPNOTSUPP;  
1086 	}
1087 	
1088 	int block_device_get_metrics(const string& devname, int timeout,
1089 	                             json_spirit::mValue *result)
1090 	{
1091 	  // FIXME: implement me for freebsd
1092 	  return -EOPNOTSUPP;  
1093 	}
1094 	
1095 	int block_device_run_nvme(const char *device, const char *vendor, int timeout,
1096 	             std::string *result)
1097 	{
1098 	  return -EOPNOTSUPP;
1099 	}
1100 	
1101 	static int block_device_devname(int fd, char *devname, size_t max)
1102 	{
1103 	  struct fiodgname_arg arg;
1104 	
1105 	  arg.buf = devname;
1106 	  arg.len = max;
1107 	  if (ioctl(fd, FIODGNAME, &arg) < 0)
1108 	    return -errno;
1109 	  return 0;
1110 	}
1111 	
1112 	int BlkDev::partition(char *partition, size_t max) const
1113 	{
1114 	  char devname[PATH_MAX];
1115 	
1116 	  if (block_device_devname(fd, devname, sizeof(devname)) < 0)
1117 	    return -errno;
1118 	  snprintf(partition, max, "/dev/%s", devname);
1119 	  return 0;
1120 	}
1121 	
1122 	int BlkDev::wholedisk(char *wd, size_t max) const
1123 	{
1124 	  char devname[PATH_MAX];
1125 	
1126 	  if (block_device_devname(fd, devname, sizeof(devname)) < 0)
1127 	    return -errno;
1128 	
1129 	  size_t first_digit = strcspn(devname, "0123456789");
1130 	  // first_digit now indexes the first digit or null character of devname
1131 	  size_t next_nondigit = strspn(&devname[first_digit], "0123456789");
1132 	  next_nondigit += first_digit;
1133 	  // next_nondigit now indexes the first alphabetic or null character after the
1134 	  // unit number
1135 	  strlcpy(wd, devname, next_nondigit + 1);
1136 	  return 0;
1137 	}
1138 	
1139 	#else
1140 	
1141 	const char *BlkDev::sysfsdir() const {
1142 	  assert(false);  // Should never be called on non-Linux
1143 	  return "";
1144 	}
1145 	
1146 	int BlkDev::dev(char *dev, size_t max) const
1147 	{
1148 	  return -EOPNOTSUPP;
1149 	}
1150 	
1151 	int BlkDev::get_size(int64_t *psize) const
1152 	{
1153 	  return -EOPNOTSUPP;
1154 	}
1155 	
1156 	bool BlkDev::support_discard() const
1157 	{
1158 	  return false;
1159 	}
1160 	
1161 	int BlkDev::discard(int fd, int64_t offset, int64_t len) const
1162 	{
1163 	  return -EOPNOTSUPP;
1164 	}
1165 	
1166 	bool BlkDev::is_nvme(const char *devname) const
1167 	{
1168 	  return false;
1169 	}
1170 	
1171 	bool BlkDev::is_rotational(const char *devname) const
1172 	{
1173 	  return false;
1174 	}
1175 	
1176 	int BlkDev::model(char *model, size_t max) const
1177 	{
1178 	  return -EOPNOTSUPP;
1179 	}
1180 	
1181 	int BlkDev::serial(char *serial, size_t max) const
1182 	{
1183 	  return -EOPNOTSUPP;
1184 	}
1185 	
1186 	int BlkDev::partition(char *partition, size_t max) const
1187 	{
1188 	  return -EOPNOTSUPP;
1189 	}
1190 	
1191 	int BlkDev::wholedisk(char *wd, size_t max) const
1192 	{
1193 	  return -EOPNOTSUPP;
1194 	}
1195 	
1196 	void get_dm_parents(const std::string& dev, std::set<std::string> *ls)
1197 	{
1198 	}
1199 	
1200 	void get_raw_devices(const std::string& in,
1201 			     std::set<std::string> *ls)
1202 	{
1203 	}
1204 	
1205 	int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
1206 	{
1207 	  return -1;
1208 	}
1209 	
1210 	int64_t get_vdo_stat(int fd, const char *property)
1211 	{
1212 	  return 0;
1213 	}
1214 	
1215 	bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
1216 	{
1217 	  return false;
1218 	}
1219 	
1220 	std::string get_device_id(const std::string& devname,
1221 				  std::string *err)
1222 	{
1223 	  // not implemented
1224 	  if (err) {
1225 	    *err = "not implemented";
1226 	  }
1227 	  return std::string();
1228 	}
1229 	
1230 	int block_device_run_smartctl(const char *device, int timeout,
1231 				      std::string *result)
1232 	{
1233 	  return -EOPNOTSUPP;
1234 	}
1235 	
1236 	int block_device_get_metrics(const string& devname, int timeout,
1237 	                             json_spirit::mValue *result)
1238 	{
1239 	  return -EOPNOTSUPP;
1240 	}
1241 	
1242 	int block_device_run_nvme(const char *device, const char *vendor, int timeout,
1243 	            std::string *result)
1244 	{
1245 	  return -EOPNOTSUPP;
1246 	}
1247 	
1248 	#endif
1249