#define _GNU_SOURCE 1 #include #include #include #include #include #include #include /* * stats * create: * -c [ ] [ -g ] * delete: * -d [ ] [ -s starting_sector ] * query: * -q [ ] [ --clear ] [ --raw ] [ --bi ] * clear: * --clear [ ] * subdivide: * [ ] -s starting_sector [ -g ] */ extern void *xmalloc(size_t); extern void *xrealloc(void *, size_t); extern char *xstrdup(const char *); #define SYS_BLOCK "/sys/block" #define DEV "/dev/" #define STAT_NAME "statistics" #define ERROR_STATUS 1 #define ERROR_PARAMS 4 #define GRANULARITY_DEFAULT (4096 * 1024 / 512) #define GRANULARITY_SUBDIVIDE 16 #define READ_BATCH 50000 static int retval = 0; static const char *device_name; static int device_specified; #define MODE_CHECK 0 #define MODE_CREATE 1 #define MODE_DELETE 2 #define MODE_QUERY 3 static int mode = MODE_CHECK; static int clear = 0; static int force = 0; static int raw = 0; static int bi = 0; static long long granularity = 0; static int do_subdivide = 0; static long long subdivide = -1; struct collected_data { uint64_t reads; uint64_t reads_merged; uint64_t read_sectors; uint64_t read_msec; uint64_t writes; uint64_t writes_merged; uint64_t write_sectors; uint64_t write_msec; uint64_t io_in_progress; uint64_t io_msec; uint64_t weighted_io_msec; uint64_t reading_msec; uint64_t writing_msec; }; struct all_data { struct collected_data *c; struct range *r; uint64_t start; uint64_t len; size_t depth; }; struct range { int id; uint64_t start; uint64_t end; uint64_t step; size_t steps; struct collected_data *data; }; static void break_into_lines(char *msg, void (*function)(const char *)) { while (*msg) { char *eol = strchr(msg, '\n'); if (!eol) error(ERROR_STATUS, 0, "%s: unterminated line: \"%s\"", device_name, msg); *eol = 0; function(msg); msg = eol + 1; } } static char *get_dm_name(const char *dev) { struct dm_task *dmt; char *name; if (!(dmt = dm_task_create(DM_DEVICE_INFO))) error(ERROR_STATUS, 0, "%s: dm_task_create(DM_DEVICE_TARGET_MSG) failed", dev); if (!dm_task_set_name(dmt, dev)) error(ERROR_STATUS, 0, "%s: dm_task_set_name failed", dev); if (!dm_task_run(dmt)) error(ERROR_STATUS, 0, "%s: DM_DEVICE_INFO ioctl failed", dev); name = xstrdup(dm_task_get_name(dmt)); dm_task_destroy(dmt); return name; } static char *do_message(const char *msg) { struct dm_task *dmt; const char *result; char *result_ret; if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG))) error(ERROR_STATUS, 0, "%s: dm_task_create(DM_DEVICE_TARGET_MSG) failed", device_name); if (!dm_task_set_name(dmt, device_name)) error(ERROR_STATUS, 0, "%s: dm_task_set_name failed", device_name); if (!dm_task_set_sector(dmt, 0)) error(ERROR_STATUS, 0, "%s: dm_task_set_sector failed", device_name); if (!dm_task_set_message(dmt, msg)) error(ERROR_STATUS, 0, "%s: dm_task_set_message(%s) failed", device_name, msg); if (!dm_task_run(dmt)) error(ERROR_STATUS, 0, "%s: the kernel doesn't support statistics on this device", device_name); result = dm_task_get_message_result(dmt); if (!result) { result_ret = NULL; } else { result_ret = xstrdup(result); } dm_task_destroy(dmt); return result_ret; } __attribute__((__format__(__printf__, 1, 2))) static char *do_message_args(const char *msg, ...) { va_list va; char *str, *rs; va_start(va, msg); if (vasprintf(&str, msg, va) < 0) error(ERROR_STATUS, 0, "vasprintf failed (%s)", msg); va_end(va); rs = do_message(str); free(str); return rs; } static struct range *ranges; static size_t n_ranges; static void get_range(const char *line) { uint64_t steps; ranges = xrealloc(ranges, (n_ranges + 1) * sizeof(struct range)); if (sscanf(line, "%d: %"SCNu64"-%"SCNu64" %"SCNu64" STAT_NAME ", &ranges[n_ranges].id, &ranges[n_ranges].start, &ranges[n_ranges].end, &ranges[n_ranges].step) < 4) error(ERROR_STATUS, 0, "%s: invalid status line \"%s\"", device_name, line); steps = (ranges[n_ranges].end - ranges[n_ranges].start + ranges[n_ranges].step - 1) / ranges[n_ranges].step; if ((size_t)steps * sizeof(struct collected_data) / sizeof(struct collected_data) != steps) error(ERROR_STATUS, 0, "%s: integer overflow in steps (%"PRIu64")", device_name, steps); ranges[n_ranges].steps = steps; ranges[n_ranges].data = NULL; n_ranges++; if (!(n_ranges + 1)) error(ERROR_STATUS, 0, "%s: n_ranges overflow", device_name); } static void free_ranges(void) { size_t x; for (x = 0; x < n_ranges; x++) { if (ranges[x].data) free(ranges[x].data); } if (ranges) free(ranges); ranges = NULL; n_ranges = 0; } static void del(void) { size_t x; if (do_subdivide) { size_t best = (size_t)-1; for (x = 0; x < n_ranges; x++) { if (ranges[x].start == subdivide) { if (best == (size_t)-1 || ranges[best].end >= ranges[x].end) best = x; } } if (best == (size_t)-1) { if (!device_specified) return; error(ERROR_STATUS, 0, "%s: subdivide must match the start of existing range when deleting", device_name); } for (x = n_ranges; x > 0; ) { x--; if (ranges[x].start >= ranges[best].start && ranges[x].end <= ranges[best].end) do_message_args("@stats_delete %d", ranges[x].id); } } else { for (x = n_ranges; x > 0; ) { x--; do_message_args("@stats_delete %d", ranges[x].id); } } } static void create(void) { if (!granularity) granularity = GRANULARITY_DEFAULT; if (n_ranges) { if (force) { del(); } else { if (device_specified) error(ERROR_STATUS, 0, "%s: statistics already exist", device_name); return; } } do_message_args("@stats_create - %s%"PRIu64" %s", granularity >= 0 ? "" : "/", llabs(granularity), STAT_NAME ); } static struct range *q_r; static size_t q_idx, q_len; static void get_stats(const char *line) { uint64_t start, end; uint64_t reads, reads_merged, read_sectors, read_msec; uint64_t writes, writes_merged, write_sectors, write_msec; uint64_t io_in_progress, io_msec, weighted_io_msec; uint64_t reading_msec, writing_msec; char dummy; if (!q_len) error(ERROR_STATUS, 0, "%s: too many lines returned", device_name); if (sscanf(line, "%"SCNu64"-%"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64" %"SCNu64"%c", &start, &end, &reads, &reads_merged, &read_sectors, &read_msec, &writes, &writes_merged, &write_sectors, &write_msec, &io_in_progress, &io_msec, &weighted_io_msec, &reading_msec, &writing_msec, &dummy) != 15) error(ERROR_STATUS, 0, "%s: invalid line returned: \"%s\"", device_name, line); q_r->data[q_idx].reads = reads; q_r->data[q_idx].reads_merged = reads_merged; q_r->data[q_idx].read_sectors = read_sectors; q_r->data[q_idx].read_msec = read_msec; q_r->data[q_idx].writes = writes; q_r->data[q_idx].writes_merged = writes_merged; q_r->data[q_idx].write_sectors = write_sectors; q_r->data[q_idx].write_msec = write_msec; q_r->data[q_idx].io_in_progress = io_in_progress; q_r->data[q_idx].io_msec = io_msec; q_r->data[q_idx].weighted_io_msec = weighted_io_msec; q_r->data[q_idx].reading_msec = reading_msec; q_r->data[q_idx].writing_msec = writing_msec; q_idx++; q_len--; } static void query_range(struct range *r) { q_r = r; if (q_r->data) free(q_r->data); q_r->data = xmalloc(q_r->steps * sizeof(struct collected_data)); for (q_idx = 0; q_idx < q_r->steps; ) { char *q; q_len = READ_BATCH; if (q_idx + q_len < q_idx || q_idx + q_len > q_r->steps) q_len = q_r->steps - q_idx; q = do_message_args("@stats_print%s %d %zu %zu", clear ? "_clear" : "", q_r->id, q_idx, q_len); if (!q) error(ERROR_STATUS, 0, "%s: no data returned", device_name); break_into_lines(q, get_stats); if (q_len) error(ERROR_STATUS, 0, "%s: insufficient number of lines returned: %zu, %zu", device_name, q_idx, q_len); free(q); } } static int data_compare(const void *p1, const void *p2) { const struct all_data *a1 = p1; const struct all_data *a2 = p2; if (a1->start < a2->start) return -1; if (a1->start > a2->start) return 1; if (a1->len < a2->len) return 1; if (a1->len > a2->len) return -1; return 0; } static double transferred(uint64_t sectors) { return (double)sectors * (bi ? 512. / 1048576. : 512. / 1000000.); } static double throughput(uint64_t sectors, uint64_t msec) { if (!msec) return 0; return (double)sectors / (double)msec * (bi ? 512. / 1000. * 1000000. / 1048576. : 512. / 1000.); } static double latency(uint64_t requests, uint64_t msec) { if (!msec) return 0; return (double)requests / (double)msec; } static void query(void) { size_t x; size_t total_data = 0, pos; struct all_data *all; for (x = 0; x < n_ranges; x++) { query_range(&ranges[x]); if (total_data + ranges[x].steps < total_data) error(ERROR_STATUS, 0, "%s: integer overflow in total_data (%zu + %zu)", device_name, total_data, ranges[x].steps); total_data += ranges[x].steps; } if (total_data * sizeof(struct all_data) / sizeof(struct all_data) != total_data) error(ERROR_STATUS, 0, "%s: integer overflow in total_data (%zu)", device_name, total_data); if (mode != MODE_QUERY && !do_subdivide) return; all = xmalloc(total_data * sizeof(struct all_data)); pos = 0; for (x = 0; x < n_ranges; x++) { size_t y; uint64_t start = ranges[x].start; for (y = 0; y < ranges[x].steps; y++) { all[pos].c = &ranges[x].data[y]; all[pos].r = &ranges[x]; all[pos].start = start; all[pos].len = ranges[x].step; if (start + all[pos].len > ranges[x].end) all[pos].len = ranges[x].end - start; all[pos].depth = 0; start += ranges[x].step; pos++; } } qsort(all, total_data, sizeof(struct all_data), data_compare); for (x = 0; x < total_data; x++) { size_t y; for (y = x + 1; y < total_data; y++) { if (all[y].start >= all[x].start + all[x].len) break; all[y].depth++; } } if (do_subdivide) { long long g = granularity; for (x = total_data; x > 0; ) { x--; if (all[x].start == subdivide) goto subdivide_found; } if (!device_specified) goto skip_subdivide; error(ERROR_STATUS, 0, "%s: the subdivide argument must match the start of existing range", device_name); subdivide_found: if (all[x].len == 1) { if (!device_specified) goto skip_subdivide; error(ERROR_STATUS, 0, "%s: the range with 1 sector can't be subdivided", device_name); } if (!g) { g = 1; while (g < all[x].len / GRANULARITY_SUBDIVIDE) g *= 2; } if (g > 0 && g >= all[x].len) { if (!device_specified) goto skip_subdivide; error(ERROR_STATUS, 0, "%s: granularity must be smaller than existing range that is being subdivided", device_name); } do_message_args("@stats_create %"PRIu64"-%"PRIu64" %s%"PRIu64" %s", all[x].start, all[x].start + all[x].len, g >= 0 ? "" : "/", llabs(g), STAT_NAME ); } skip_subdivide: if (mode != MODE_QUERY) goto skip_print; if (total_data) if (!device_specified) printf("%s:\n", device_name); for (x = 0; x < total_data; x++) { size_t d; uint64_t end = all[x].start + all[x].len; if (end < all[x].start || end > all[x].r->end) end = all[x].r->end; putchar(' '); for (d = 0; d < all[x].depth; d++) putchar(' '); printf("%"PRIu64"-%"PRIu64":", all[x].start, end ); if (!raw) { printf(" %"PRIu64" %.1fM%sB %.3fM%sB/s %.3fms %"PRIu64" %.1fM%sB %.3fM%sB/s %.3fms", all[x].c->reads, transferred(all[x].c->read_sectors), bi ? "i" : "", throughput(all[x].c->read_sectors, all[x].c->reading_msec), bi ? "i" : "", latency(all[x].c->reads, all[x].c->read_msec), all[x].c->writes, transferred(all[x].c->write_sectors), bi ? "i" : "", throughput(all[x].c->write_sectors, all[x].c->writing_msec), bi ? "i" : "", latency(all[x].c->writes, all[x].c->write_msec) ); } else { printf(" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64"", all[x].c->reads, all[x].c->reads_merged, all[x].c->read_sectors, all[x].c->read_msec, all[x].c->writes, all[x].c->writes_merged, all[x].c->write_sectors, all[x].c->write_msec, all[x].c->io_in_progress, all[x].c->io_msec, all[x].c->weighted_io_msec, all[x].c->reading_msec, all[x].c->writing_msec ); } printf("\n"); } skip_print: free(all); } static void do_operation_for_device(void) { char *list; ranges = NULL; n_ranges = 0; list = do_message_args("@stats_list %s", STAT_NAME); if (!list) error(ERROR_STATUS, 0, "%s: no list returned for", device_name); break_into_lines(list, get_range); free(list); switch (mode) { case MODE_CHECK: if (clear || do_subdivide) query(); break; case MODE_CREATE: create(); break; case MODE_DELETE: del(); break; case MODE_QUERY: query(); break; } free_ranges(); } static const struct poptOption popt_options[] = { { "create", 'c', POPT_ARG_VAL, &mode, MODE_CREATE, "Start collecting statistics", NULL }, { "delete", 'd', POPT_ARG_VAL, &mode, MODE_DELETE, "Delete collected statistics", NULL }, { "query", 'q', POPT_ARG_VAL, &mode, MODE_QUERY, "Query statistics", NULL }, { "clear", 0, POPT_ARG_VAL, &clear, 1, "Clear statistics", NULL }, { "granularity", 'g', POPT_ARG_LONGLONG, &granularity, 0, "The nubmer of sectors in one area", NULL }, { "force", 'f', POPT_ARG_VAL, &force, 1, "Create new statistics and delete existing data", NULL }, { "raw", 'r', POPT_ARG_VAL, &raw, 1, "Report raw values", NULL }, { "bi", 'i', POPT_ARG_VAL, &bi, 1, "Report data in mebibytes rather than megabytes", NULL }, { "subdivide", 's', POPT_ARG_LONGLONG, &subdivide, 0, "Subdivide existing range", NULL }, POPT_TABLEEND }; int main(int argc, const char *argv[]) { poptContext popt_context; int r; const char *s; popt_context = poptGetContext("stats", argc, argv, popt_options, 0); r = poptGetNextOpt(popt_context); if (r < -1) error(ERROR_PARAMS, 0, "bad option %s", poptBadOption(popt_context, 0)); device_name = poptGetArg(popt_context); s = poptGetArg(popt_context); if (s) error(ERROR_PARAMS, 0, "extra argument %s", s); if (clear) { if (mode != MODE_QUERY && mode != MODE_CHECK) error(ERROR_PARAMS, 0, "clear parameter can't be used with start or stop"); } if (raw || bi) { if (mode != MODE_QUERY) error(ERROR_PARAMS, 0, "--raw or --bi is only allowed in query mode"); } if (granularity) { if (mode != MODE_CREATE && subdivide == -1) error(ERROR_PARAMS, 0, "granularity can only be specified with the -c or -s"); } if (subdivide != -1) { if (subdivide < -1) error(ERROR_PARAMS, 0, "invalid subdivide argument"); if (mode != MODE_CHECK && mode != MODE_DELETE) error(ERROR_PARAMS, 0, "subidivide may not be specified with -c or -q"); if (granularity == -1) error(ERROR_PARAMS, 0, "granularity must not be -1 when subdividing"); do_subdivide = 1; } if (!device_name) { DIR *sys_block; struct dirent *de; device_specified = 0; sys_block = opendir(SYS_BLOCK); if (!sys_block) error(ERROR_STATUS, errno, "Unable to open %s", SYS_BLOCK); while (errno = 0, de = readdir(sys_block)) { if (strlen(de->d_name) >= 4 && !memcmp(de->d_name, "dm-", 3)) { char *dn1, *dn2; dn1 = xmalloc(strlen(DEV) + strlen(de->d_name) + 1); strcpy(dn1, DEV); strcat(dn1, de->d_name); dn2 = get_dm_name(dn1); free(dn1); device_name = dn2; do_operation_for_device(); free(dn2); } } if (errno) error(ERROR_STATUS, errno, "Unable to read %s", SYS_BLOCK); if (closedir(sys_block)) error(ERROR_STATUS, errno, "Unable to close %s", SYS_BLOCK); } else { device_specified = 1; do_operation_for_device(); } poptFreeContext(popt_context); return retval; }