numeric-linalg
Educational material on the SciPy implementation of numerical linear algebra algorithms
File Name | Size | Mode |
perf.h | 3428B | -rw-r--r-- |
1 #ifndef PERF_H_ 2 #define PERF_H_ 3 4 #ifndef __linux__ 5 #error "The perf.h library can only be compiled in the Linux platform" 6 #endif // __linux__ 7 8 #include <sys/syscall.h> 9 #include <sys/ioctl.h> 10 #include <unistd.h> 11 12 #include <linux/perf_event.h> 13 #include <linux/hw_breakpoint.h> 14 15 #include <assert.h> 16 17 enum { 18 // Here we use L1-dcache-loads & L1-dcache-loads-misses instead of 19 // cache-misses & cache-references because the L1 data cache is the only 20 // CPU-specific cache accessible to perf: the LD cache is shared between 21 // cores 22 CACHE_LOADS = 0, /* L1-dcache-loads */ 23 CACHE_MISSES, /* L1-dcache-loads-misses */ 24 CPU_CYCLES, /* cpu-cycles */ 25 26 PERF_EVENT_COUNT 27 }; 28 29 static_assert(PERF_EVENT_COUNT == 3, 30 "We should update this array if we add more events"); 31 uint32_t perf_event_types[PERF_EVENT_COUNT] = { 32 [CACHE_LOADS] = PERF_TYPE_HW_CACHE, 33 [CACHE_MISSES] = PERF_TYPE_HW_CACHE, 34 [CPU_CYCLES] = PERF_TYPE_HARDWARE, 35 }; 36 37 static_assert(PERF_EVENT_COUNT == 3, 38 "We should update this array if we add more events"); 39 uint64_t perf_event_configs[PERF_EVENT_COUNT] = { 40 [CACHE_LOADS] = PERF_COUNT_HW_CACHE_L1D 41 | (PERF_COUNT_HW_CACHE_OP_READ << 8) 42 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), 43 [CACHE_MISSES] = PERF_COUNT_HW_CACHE_L1D 44 | (PERF_COUNT_HW_CACHE_OP_READ << 8) 45 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), 46 [CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES, 47 }; 48 49 static_assert(PERF_EVENT_COUNT == 3, 50 "We should update this array if we add more events"); 51 const char *perf_event_str[PERF_EVENT_COUNT] = { 52 [CACHE_LOADS] = "CACHE_LOADS", 53 [CACHE_MISSES] = "CACHE_MISSES", 54 [CPU_CYCLES] = "CPU_CYCLES", 55 }; 56 57 static_assert(PERF_EVENT_COUNT == 3, 58 "We should add more filds for this structure " 59 "if we add more events"); 60 typedef struct { 61 uint64_t cache_loads; 62 uint64_t cache_misses; 63 uint64_t cpu_cycles; 64 } PerfResult; 65 66 typedef struct { 67 int fds[PERF_EVENT_COUNT]; 68 } PerfRecorder; 69 70 void perf_start_recording(PerfRecorder *pr, size_t cpu_id) 71 { 72 struct perf_event_attr pe = {0}; 73 74 for (size_t i = 0; i < PERF_EVENT_COUNT; i++) { 75 pe.type = perf_event_types[i]; 76 pe.size = sizeof(struct perf_event_attr); 77 pe.config = perf_event_configs[i]; 78 pe.disabled = 1; // start disabled 79 pe.exclude_kernel = 1; // exclude kernel events 80 pe.exclude_hv = 1; // exclude hypervisor events 81 82 int fd = syscall(SYS_perf_event_open, &pe, 0, 83 (int)cpu_id /* only count events in this specific CPU */, 84 -1, 0); 85 if (fd == -1) { 86 fprintf(stderr, "ERROR: Couldn't open perf event %s!", 87 perf_event_str[i]); 88 exit(EXIT_FAILURE); 89 } 90 91 pr->fds[i] = fd; 92 ioctl(fd, PERF_EVENT_IOC_RESET, 0); // reset the counter 93 ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); // start counting 94 } 95 } 96 97 PerfResult perf_stop_recording(PerfRecorder *pr) 98 { 99 union { uint64_t raw_result[PERF_EVENT_COUNT]; PerfResult result; } r; 100 101 for (size_t i = 0; i < PERF_EVENT_COUNT; i++) { 102 ioctl(pr->fds[i], PERF_EVENT_IOC_DISABLE, 0); // stop counting 103 if (!read(pr->fds[i], &r.raw_result[i], sizeof(uint64_t))) { 104 fprintf(stderr, "ERROR: Coulnd't read perf counter for %s!\n", 105 perf_event_str[i]); 106 exit(EXIT_FAILURE); 107 } 108 close(pr->fds[i]); 109 } 110 111 return r.result; 112 } 113 114 #endif // PERF_H_