numeric-linalg

Educational material on the SciPy implementation of numerical linear algebra algorithms

File Name Size Mode
perf.h 2829B -rw-r--r--
  1 #ifndef PERF_H_
  2 #define PERF_H_
  3 
  4 #include <sys/syscall.h>
  5 #include <sys/ioctl.h>
  6 #include <unistd.h>
  7 
  8 #include <linux/perf_event.h>
  9 #include <linux/hw_breakpoint.h>
 10 
 11 #include <assert.h>
 12 
 13 typedef enum {
 14   // Here we use L1-dcache-loads & L1-dcache-loads-misses instead of
 15   // cache-misses & cache-references because the L1 data cache is the only
 16   // CPU-specific cache accessible to perf: the LD cache is shared between
 17   // cores
 18   CACHE_LOADS = 0, /* L1-dcache-loads */
 19   CACHE_MISSES,    /* L1-dcache-loads-misses */
 20 
 21   PERF_EVENT_COUNT
 22 } PerfEvent;
 23 
 24 uint32_t perf_event_types[PERF_EVENT_COUNT] = {
 25   [CACHE_LOADS]  = PERF_TYPE_HW_CACHE,
 26   [CACHE_MISSES] = PERF_TYPE_HW_CACHE,
 27 };
 28 
 29 uint64_t perf_event_configs[PERF_EVENT_COUNT] = {
 30   [CACHE_LOADS]  = PERF_COUNT_HW_CACHE_L1D
 31                  | (PERF_COUNT_HW_CACHE_OP_READ << 8)
 32                  | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
 33   [CACHE_MISSES] = PERF_COUNT_HW_CACHE_L1D
 34                  | (PERF_COUNT_HW_CACHE_OP_READ << 8)
 35                  | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
 36 };
 37 
 38 const char *perf_event_str[PERF_EVENT_COUNT] = {
 39   [CACHE_LOADS]  = "CACHE_LOADS",
 40   [CACHE_MISSES] = "CACHE_MISSES",
 41 };
 42 
 43 static_assert(PERF_EVENT_COUNT == 2,
 44               "We should add more filds for this structure"
 45               "if we add more events");
 46 typedef struct {
 47   uint64_t cache_loads;
 48   uint64_t cache_misses;
 49 } PerfResult;
 50 
 51 typedef struct {
 52   int fds[PERF_EVENT_COUNT];
 53   union {
 54     uint64_t   raw_result[PERF_EVENT_COUNT];
 55     PerfResult result;
 56   };
 57 } PerfRecorder;
 58 
 59 void perf_start_recording(PerfRecorder *pr, size_t cpu_id)
 60 {
 61   struct perf_event_attr pe = {0};
 62 
 63   for (size_t i = 0; i < PERF_EVENT_COUNT; i++) {
 64     pe.type = perf_event_types[i];
 65     pe.size = sizeof(struct perf_event_attr);
 66     pe.config = perf_event_configs[i];
 67     pe.disabled = 1;       // start disabled
 68     pe.exclude_kernel = 1; // exclude kernel events
 69     pe.exclude_hv = 1;     // exclude hypervisor events
 70 
 71     int fd = syscall(SYS_perf_event_open, &pe, 0,
 72                      (int)cpu_id /* only count events in this specific CPU */,
 73                      -1, 0);
 74     if (fd == -1) {
 75       fprintf(stderr, "ERROR: Couldn't open perf event %s!",
 76               perf_event_str[i]);
 77       exit(EXIT_FAILURE);
 78     }
 79 
 80     pr->fds[i] = fd;
 81     ioctl(fd, PERF_EVENT_IOC_RESET,  0); // reset the counter
 82     ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); // start counting
 83   }
 84 }
 85 
 86 void perf_stop_recording(PerfRecorder *pr)
 87 {
 88   for (size_t i = 0; i < PERF_EVENT_COUNT; i++) {
 89     ioctl(pr->fds[i], PERF_EVENT_IOC_DISABLE, 0); // stop counting
 90     if (!read(pr->fds[i], &pr->raw_result[i], sizeof(uint64_t))) {
 91       fprintf(stderr, "ERROR: Coulnd't read perf counter for %s!\n",
 92               perf_event_str[i]);
 93       exit(EXIT_FAILURE);
 94     }
 95     close(pr->fds[i]);
 96   }
 97 }
 98 
 99 #endif // PERF_H_