#include #include #include #include #include #define TEST_SIZE (32768-4096) #define TEST_ENTRIES (TEST_SIZE / sizeof(void *)) void *cache_test[TEST_ENTRIES] __attribute__((aligned(32768))); static void init_cache_test(void) { unsigned long last_p, next_p, i; memset(cache_test, 0, sizeof cache_test); last_p = 0; for (i = 0; i < TEST_ENTRIES - 1; i++) { do { next_p = random() % TEST_ENTRIES; } while (cache_test[next_p] != 0 || next_p == last_p); cache_test[last_p] = &cache_test[next_p]; last_p = next_p; } for (i = 0; i < TEST_ENTRIES; i += 8) { //asm volatile ("clwb %0" :: "m"(cache_test[i]): "memory"); } //asm volatile ("sfence"); } static void walk_cache(void) { void *p = &cache_test[0]; while (p) { p = *(void **)p; } } static unsigned long long rdtsc(void) { unsigned a, d; asm volatile("rdtsc" : "=a"(a), "=d"(d) ::); return a + ((unsigned long long)d << 32); } #if 1 #define LEN (1UL*512*1024*1024) #else #define LEN (1UL*4096) #endif static int h; static char *m; #define m_l ((unsigned long *)m) #define LEN_L (LEN / sizeof(unsigned long)) static void map(char *p) { unsigned long len = LEN; if (len < 2 * 1024 * 1024) len = 2 * 1024 * 1024; h = open(p, O_RDWR); if (h < 0) perror(p), exit(1); m = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED_VALIDATE, h, 0); if (m == MAP_FAILED) perror("mmap"), exit(1); memset(m, 0, LEN); } int main(__attribute__((unused)) int argc, __attribute__((unused)) char *argv[]) { unsigned long long t1, t2, t3, t4; unsigned long i; char mode; if (argc != 3) fprintf(stderr, "args\n"), exit(1); map(argv[1]); mode = argv[2][0]; init_cache_test(); walk_cache(); t1 = rdtsc(); walk_cache(); t2 = rdtsc(); if (mode == 'f') { for (i = 0; i < LEN_L; i += 8) { m_l[i+0] = 0x12345; m_l[i+1] = 0x12345; m_l[i+2] = 0x12345; m_l[i+3] = 0x12345; m_l[i+4] = 0x12345; m_l[i+5] = 0x12345; m_l[i+6] = 0x12345; m_l[i+7] = 0x12345; asm volatile ("clflushopt %0" :: "m"(m_l[i]): "memory"); } } else if (mode == 'w') { for (i = 0; i < LEN_L; i += 8) { m_l[i+0] = 0x12345; m_l[i+1] = 0x12345; m_l[i+2] = 0x12345; m_l[i+3] = 0x12345; m_l[i+4] = 0x12345; m_l[i+5] = 0x12345; m_l[i+6] = 0x12345; m_l[i+7] = 0x12345; asm volatile ("clwb %0" :: "m"(m_l[i]): "memory"); } } else if (mode == 'n') { for (i = 0; i < LEN; i += 8) { asm volatile ("movntiq %%rax, %0" : "=m"(m[i]) :: "rax", "memory"); } } else if (mode == 'm' ) { memset(m_l, 0, LEN); } else if (mode == 'c' ) { long l = atol(argv[2] + 1); while (l) { m_l[--l] = 0x12345; } } t3 = rdtsc(); walk_cache(); t4 = rdtsc(); fprintf(stderr, "%llu - %llu\n", t2 - t1, t4 - t3); }