numeric-linalg
Educational material on the SciPy implementation of numerical linear algebra algorithms
File Name | Size | Mode |
cache-ratio.c | 6076B | -rw-r--r-- |
1 #include <stdio.h> 2 #include <stdlib.h> 3 4 #define __USE_GNU 5 #include <sched.h> 6 #include <pthread.h> 7 8 #include <stdint.h> 9 #include <stdbool.h> 10 11 #include <string.h> 12 #include <errno.h> 13 14 #include "progress-bar.h" 15 #include "perf.h" 16 #include "config.h" 17 18 #define HISTOGRAM_SIZE (MAX_N/STEP) 19 #define LOGISTICS_INITIAL_CONDITION (-800.) 20 21 // X-macros for defining variables for each perf event 22 #define EVENTS EVENT(loads) EVENT(misses) 23 24 #define EVENT(name) uint64_t histogram_##name[HISTOGRAM_SIZE]; 25 EVENTS 26 #undef EVENT 27 28 void (*getrf)(int *m, int *n, double *A, int *lda, int *ipiv, int *info); 29 30 ProgressBar progress = { 31 .total = HISTOGRAM_SIZE, 32 .count = 0, 33 .mutex = PTHREAD_MUTEX_INITIALIZER, 34 }; 35 36 // .data is a pointer because it should be allocated in the heap 37 // (.data DOES NOT fit in the stack 🤡) 38 typedef struct { 39 double *ref_data; 40 41 // Input parameters for dgetrf 42 double *data; 43 int32_t ipiv[MAX_N]; 44 int n, m, lda, info; 45 46 size_t id; 47 PerfRecorder recorder; 48 } Thread; 49 50 typedef struct { 51 Thread threads[N_THREADS]; 52 } Benchmarker; 53 54 /* 55 * LAPACK functions 56 */ 57 extern void dgetrf_(int *m, int *n, double *A, int *lda, int *ipiv, int *info); 58 extern void dgetrf2_(int *m, int *n, double *A, int *lda, int *ipiv, int *info); 59 60 PerfResult thread_run_benchmark(Thread *thread, size_t n) 61 { 62 // Reinitializes the values in the .data array: this avoids progressively 63 // moving larger values to the beginning of the array, which would decrise 64 // decrease the number of row interchanges required for computations 65 memcpy(thread->data, thread->ref_data, sizeof(double)*n*n); 66 thread->n = n; thread->m = n; thread->lda = n; 67 68 perf_start_recording(&thread->recorder, thread->id); 69 getrf(&thread->n, &thread->m, thread->data, &thread->lda, 70 thread->ipiv, &thread->info); 71 perf_stop_recording(&thread->recorder); 72 73 return thread->recorder.result; 74 } 75 76 void *thread_benchmark(void *arg) 77 { 78 Thread *thread = (Thread*) arg; 79 80 // We need to lock the running thread to some CPU so that we can mask 81 // perf_event_open with this specific CPU 82 cpu_set_t set; 83 CPU_SET(thread->id, &set); 84 if (sched_setaffinity(0, sizeof(set), &set) == -1) { 85 fprintf(stderr, "ERROR: Could not lock CPU affinity for thread %lu\n", 86 thread->id); 87 } 88 89 // Computations are distributed evenly across threads 90 for (size_t n = 1 + thread->id*STEP; n < MAX_N; n += STEP*N_THREADS) { 91 PerfResult result = thread_run_benchmark(thread, n); 92 size_t i = (n - 1)/STEP; 93 94 histogram_loads[i] = result.cache_loads; 95 histogram_misses[i] = result.cache_misses; 96 97 progress_bar_inc(&progress); 98 } 99 100 return NULL; 101 } 102 103 Benchmarker benchmarker_new(double *ref_data) 104 { 105 Benchmarker bench = {0}; 106 107 // This array will live for the entire duration of the program, 108 // so we might as well leak it 🤡 109 double *data = malloc(sizeof(double)*MAX_N*MAX_N*N_THREADS); 110 if (data == NULL) { 111 fprintf(stderr, "ERROR: Buy more RAM!\n"); 112 exit(EXIT_FAILURE); 113 } 114 115 for (size_t i = 0; i < N_THREADS; i++) { 116 bench.threads[i].data = data + i*MAX_N*MAX_N; 117 bench.threads[i].ref_data = ref_data; 118 bench.threads[i].id = i; 119 } 120 121 return bench; 122 } 123 124 void benchmarker_run(Benchmarker *bench) 125 { 126 pthread_t handles[N_THREADS] = {0}; 127 for (size_t i = 0; i < N_THREADS; i++) { 128 if (pthread_create(&handles[i], NULL, 129 thread_benchmark, 130 &bench->threads[i]) != 0) { 131 fprintf(stderr, "ERROR: Failed to spawn thread %lu!\n", i); 132 exit(EXIT_FAILURE); 133 } 134 } 135 136 for (size_t i = 0; i < N_THREADS; i++) { 137 pthread_join(handles[i], NULL); 138 } 139 } 140 141 142 int main(int argc, char **argv) 143 { 144 #define EVENT(name) const char *output_path_##name; 145 EVENTS 146 #undef EVENT 147 148 if (argc < 2 || strcmp(argv[1], "standard") == 0) { 149 getrf = dgetrf_; 150 #define EVENT(name) \ 151 output_path_##name = OUTPUT_DIR "cache-" #name ".bin"; 152 EVENTS 153 #undef EVENT 154 } else if (strcmp(argv[1], "unblocked") == 0) { 155 getrf = dgetrf2_; 156 #define EVENT(name) \ 157 output_path_##name = OUTPUT_DIR "unblocked-cache-" #name ".bin"; 158 EVENTS 159 #undef EVENT 160 } else { 161 fprintf(stderr, "ERROR: unknown command \"%s\"\n", argv[1]); 162 fprintf(stderr, "USAGE: %s [standard|naive|unblocked]\n", argv[0]); 163 exit(EXIT_FAILURE); 164 } 165 166 // ======================================================================== 167 printf("INFO: Initializing random input data... "); 168 169 // This array will live for the entire duration of the program, 170 // so we might as well leak it 🤡 171 double *ref_data = malloc(sizeof(double)*MAX_N*MAX_N); 172 if (ref_data == NULL) { 173 fprintf(stderr, "ERROR: Buy more RAM!\n"); 174 exit(EXIT_FAILURE); 175 } 176 177 // Pseudorandom data given by the logistic map 178 double acc = LOGISTICS_INITIAL_CONDITION; 179 for (size_t i = 0; i < MAX_N*MAX_N; i++) { 180 ref_data[i] = acc; 181 acc = 1000. - acc*acc/500.; 182 } 183 184 printf("done!\n"); 185 186 // ======================================================================== 187 printf("INFO: Benchmarking the cache miss ratio of dgetrf on %ux%u matrices... (using %u threads)\n", 188 MAX_N, MAX_N, N_THREADS); 189 190 Benchmarker bench = benchmarker_new(ref_data); 191 benchmarker_run(&bench); 192 193 // ======================================================================== 194 printf("INFO: Done with benchmarking! Saving histograms to disk...\n"); 195 196 #define EVENT(name) \ 197 do { \ 198 FILE *output = fopen(output_path_##name, "w"); \ 199 if (output == NULL) { \ 200 fprintf(stderr, \ 201 "ERROR: Coundn't open output file \"%s\": %s", \ 202 output_path_##name, strerror(errno)); \ 203 return EXIT_FAILURE; \ 204 } \ 205 size_t written = fwrite(&histogram_##name, sizeof(uint64_t), HISTOGRAM_SIZE, output); \ 206 if (written < HISTOGRAM_SIZE) { \ 207 fprintf(stderr, \ 208 "ERROR: Failed to write histogram to output file \"%s\": %s\n", \ 209 output_path_##name, strerror(errno)); \ 210 return EXIT_FAILURE; \ 211 } \ 212 } while(0); 213 EVENTS 214 #undef EVENT 215 216 printf("INFO: Done!\n"); 217 218 return EXIT_SUCCESS; 219 }