numeric-linalg
Educational material on the SciPy implementation of numerical linear algebra algorithms
File Name | Size | Mode |
perf.h | 2829B | -rw-r--r-- |
1 #ifndef PERF_H_ 2 #define PERF_H_ 3 4 #include <sys/syscall.h> 5 #include <sys/ioctl.h> 6 #include <unistd.h> 7 8 #include <linux/perf_event.h> 9 #include <linux/hw_breakpoint.h> 10 11 #include <assert.h> 12 13 typedef enum { 14 // Here we use L1-dcache-loads & L1-dcache-loads-misses instead of 15 // cache-misses & cache-references because the L1 data cache is the only 16 // CPU-specific cache accessible to perf: the LD cache is shared between 17 // cores 18 CACHE_LOADS = 0, /* L1-dcache-loads */ 19 CACHE_MISSES, /* L1-dcache-loads-misses */ 20 21 PERF_EVENT_COUNT 22 } PerfEvent; 23 24 uint32_t perf_event_types[PERF_EVENT_COUNT] = { 25 [CACHE_LOADS] = PERF_TYPE_HW_CACHE, 26 [CACHE_MISSES] = PERF_TYPE_HW_CACHE, 27 }; 28 29 uint64_t perf_event_configs[PERF_EVENT_COUNT] = { 30 [CACHE_LOADS] = PERF_COUNT_HW_CACHE_L1D 31 | (PERF_COUNT_HW_CACHE_OP_READ << 8) 32 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), 33 [CACHE_MISSES] = PERF_COUNT_HW_CACHE_L1D 34 | (PERF_COUNT_HW_CACHE_OP_READ << 8) 35 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), 36 }; 37 38 const char *perf_event_str[PERF_EVENT_COUNT] = { 39 [CACHE_LOADS] = "CACHE_LOADS", 40 [CACHE_MISSES] = "CACHE_MISSES", 41 }; 42 43 static_assert(PERF_EVENT_COUNT == 2, 44 "We should add more filds for this structure" 45 "if we add more events"); 46 typedef struct { 47 uint64_t cache_loads; 48 uint64_t cache_misses; 49 } PerfResult; 50 51 typedef struct { 52 int fds[PERF_EVENT_COUNT]; 53 union { 54 uint64_t raw_result[PERF_EVENT_COUNT]; 55 PerfResult result; 56 }; 57 } PerfRecorder; 58 59 void perf_start_recording(PerfRecorder *pr, size_t cpu_id) 60 { 61 struct perf_event_attr pe = {0}; 62 63 for (size_t i = 0; i < PERF_EVENT_COUNT; i++) { 64 pe.type = perf_event_types[i]; 65 pe.size = sizeof(struct perf_event_attr); 66 pe.config = perf_event_configs[i]; 67 pe.disabled = 1; // start disabled 68 pe.exclude_kernel = 1; // exclude kernel events 69 pe.exclude_hv = 1; // exclude hypervisor events 70 71 int fd = syscall(SYS_perf_event_open, &pe, 0, 72 (int)cpu_id /* only count events in this specific CPU */, 73 -1, 0); 74 if (fd == -1) { 75 fprintf(stderr, "ERROR: Couldn't open perf event %s!", 76 perf_event_str[i]); 77 exit(EXIT_FAILURE); 78 } 79 80 pr->fds[i] = fd; 81 ioctl(fd, PERF_EVENT_IOC_RESET, 0); // reset the counter 82 ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); // start counting 83 } 84 } 85 86 void perf_stop_recording(PerfRecorder *pr) 87 { 88 for (size_t i = 0; i < PERF_EVENT_COUNT; i++) { 89 ioctl(pr->fds[i], PERF_EVENT_IOC_DISABLE, 0); // stop counting 90 if (!read(pr->fds[i], &pr->raw_result[i], sizeof(uint64_t))) { 91 fprintf(stderr, "ERROR: Coulnd't read perf counter for %s!\n", 92 perf_event_str[i]); 93 exit(EXIT_FAILURE); 94 } 95 close(pr->fds[i]); 96 } 97 } 98 99 #endif // PERF_H_