/* * Reproduces a nohz_full accounting bug. * * Usage: * * 1. Set a CPU to be nohz_full (ie. pass nohz_full= and isolcpus=) * 2. Build: * $ cc -O2 -Wall -o acct-bug acct-bug.c * 3. Run: * # ./acct-bug 995 * 4. Run 'top -d1' and check user and system time for CPU * * On a nohz_full CPU, this reports 95% of system time utilization * and 5% idle. On a non-nohz_full CPU, this reports 95% user time * and 5% idle. * * Luiz Capitulino */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include static struct timespec nanosleep_arg; /* * Cannibalized from sysjitter */ static unsigned long cpu_mhz; typedef uint64_t cycles_t; typedef uint64_t stamp_t; static inline void frc(uint64_t *pval) { uint32_t low, high; __asm__ __volatile__("rdtsc" : "=a" (low) , "=d" (high)); *pval = ((uint64_t) high << 32) | low; } static inline uint64_t cycles_to_us(uint64_t cycles) { return cycles / cpu_mhz; } static cycles_t __measure_cpu_hz(void) { struct timeval tvs, tve; stamp_t s, e; double sec; frc(&s); e = s; gettimeofday(&tvs, NULL); while( e - s < 1000000 ) frc(&e); gettimeofday(&tve, NULL); sec = tve.tv_sec - tvs.tv_sec + (tve.tv_usec - tvs.tv_usec) / 1e6; return (cycles_t) ((e - s) / sec); } static unsigned measure_cpu_mhz(void) { cycles_t m, mprev, d; mprev = __measure_cpu_hz(); do { m = __measure_cpu_hz(); if( m > mprev ) d = m - mprev; else d = mprev - m; mprev = m; } while( d > m / 1000 ); return (unsigned) (m / 1000000); } static void move_to_cpu(int cpu) { cpu_set_t set; int ret; CPU_ZERO(&set); CPU_SET(cpu, &set); ret = sched_setaffinity(0, sizeof(cpu), &set); if (ret < 0) { fprintf(stderr, "failed to move to CPU%d: %s\n", cpu, strerror(errno)); exit(1); } } static void inline enter_kernel(void) { // nanosleep_arg.tv_sec = nanosleep_arg.tv_nsec = 0 if (nanosleep(&nanosleep_arg, NULL) < 0) { fprintf(stderr, "ERROR: couldn't sleep: %s\n", strerror(errno)); exit(1); } } int main(int argc, char *argv[]) { stamp_t cur, prev, total = 0; stamp_t start, end; int cpu, max_loop_us; if (argc != 3) { printf("usage: acct-bug < CPU > < busy-loop-duration in us\n"); exit(1); } cpu = atoi(argv[1]); max_loop_us = atoi(argv[2]); move_to_cpu(cpu); cpu_mhz = measure_cpu_mhz(); //printf("cpu_mhz = %lu\n", cpu_mhz); frc(&start); prev = start; for ( ;; ) { frc(&cur); total += (cur - prev); if (cycles_to_us(total) > max_loop_us) { enter_kernel(); total = 0; frc(&cur); } prev = cur; } frc(&end); printf("duration=%lu\n", cycles_to_us(end - start)); return 0; }