#include #include #include #include #include #include #include #include #include #include #define LEN (1UL*512*1024*1024) typedef long double t; static t gett(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) perror("clock_gettime"), exit(1); return (t)ts.tv_sec + (t)ts.tv_nsec * (t)1./(t)1000000000.; } static int h; static char *m; #define m_l ((unsigned long *)m) #define LEN_L (LEN / sizeof(unsigned long)) #define m_p ((void **)m) #define LEN_P (LEN / sizeof(void *)) static int n_threads; static int tid = -1; static bool mt = false; static pthread_barrier_t *bar; static pthread_barrierattr_t bar_attr; static long double *global; static void map(int argc, char **argv) { if (argc < 2) fprintf(stderr, "args\n"), exit(1); h = open(argv[1], O_RDWR); if (h < 0) perror(argv[1]), exit(1); if (argc >= 3) { int i; n_threads = atoi(argv[2]); if (n_threads <= 0) fprintf(stderr, "threads\n"), exit(1); mt = true; global = mmap(NULL, n_threads * sizeof(long double), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (global == MAP_FAILED) perror("mmap"), exit(1); bar = mmap(NULL, sizeof(pthread_barrier_t), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (bar == MAP_FAILED) perror("mmap"), exit(1); if (pthread_barrierattr_init(&bar_attr)) fprintf(stderr, "pthread_barrierattr_init\n"), exit(1); if (pthread_barrierattr_setpshared(&bar_attr, PTHREAD_PROCESS_SHARED)) fprintf(stderr, "pthread_barrierattr_setpshared\n"), exit(1); if (pthread_barrier_init(bar, &bar_attr, n_threads)) fprintf(stderr, "pthread_barrier_init\n"), exit(1); for (i = 1; i < n_threads; i++) { if (!fork()) { tid = i; goto child; } } tid = 0; } child: m = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_SHARED_VALIDATE, h, LEN * (getpid() & 63)); if (m == MAP_FAILED) perror("mmap"), exit(1); memset(m, 0, LEN); if (mt) { int r = pthread_barrier_wait(bar); if (r > 0) fprintf(stderr, "pthread_barrier_wait: %s\n", strerror(r)), exit(1); } } #define start_loop \ { \ long double lat, thrpt; \ __attribute__((unused)) unsigned long i; \ unsigned long loops = 0; \ t t1 = gett(), t2; \ do { #define end_loop(q, q2) \ loops++; \ t2 = gett(); \ } while (t2 - t1 < 5); \ lat = ((t2 - t1) / loops / (q)) * 1000000000.; \ thrpt = (t)(q2) * loops / (t2 - t1) / 1000000000.; \ if (mt) global[tid] = thrpt; \ printf("thr: %Lf GB/s, lat: %Lf nsec\n", thrpt, lat); \ /*printf("loops: %lu, time: %Lf, q2: %lu\n", loops, t2 - t1, q2);*/\ while (wait(NULL) > 0); \ if (!tid && mt) { \ long double thrpt_sum = 0; \ int i; \ for (i = 0; i < n_threads; i++) \ thrpt_sum += global[i]; \ printf("sum: %Lf GB/s\n", thrpt_sum); \ } \ }