numeric-linalg
Educational material on the SciPy implementation of numerical linear algebra algorithms
File Name | Size | Mode |
main.c | 5396B | -rw-r--r-- |
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <pthread.h> 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <time.h> 8 9 #include <string.h> 10 #include <errno.h> 11 12 #include "progress-bar.h" 13 #include "config.h" 14 15 #define HISTOGRAM_SIZE (MAX_N/STEP) 16 #define CLOCKS_PER_MILLIS (CLOCKS_PER_SEC/1000) 17 #define LOGISTICS_INITIAL_CONDITION (-800.) 18 19 uint32_t histogram[HISTOGRAM_SIZE]; 20 void (*getrf)(int *m, int *n, double *A, int *lda, int *ipiv, int *info); 21 22 ProgressBar progress = { 23 .total = HISTOGRAM_SIZE, 24 .count = 0, 25 .mutex = PTHREAD_MUTEX_INITIALIZER, 26 }; 27 28 // .data is a pointer because it should be allocated in the heap 29 // (.data DOES NOT fit in the stack 🤡) 30 typedef struct { 31 double *data; 32 double *ref_data; 33 int32_t ipiv[MAX_N]; 34 size_t id; 35 } Thread; 36 37 typedef struct { 38 Thread threads[N_THREADS]; 39 } Benchmarker; 40 41 /* 42 * LAPACK functions 43 */ 44 extern void dgetrf_(int *m, int *n, double *A, int *lda, int *ipiv, int *info); 45 extern void dgetrf2_(int *m, int *n, double *A, int *lda, int *ipiv, int *info); 46 extern void dgetrfnaive_(int *m, int *n, double *A, int *lda, int *ipiv, int *info); 47 48 uint32_t thread_run_benchmark(Thread *thread, size_t n) 49 { 50 int64_t total_time = 0; 51 clock_t start, end; 52 int l_n, l_m, lda, info; 53 54 for (size_t i = 0; i < N_TESTS; i++) { 55 // Reinitializes the values in the .data array: this avoids progressively 56 // moving larger values to the beginning of the array, which would decrise 57 // decrease the number of row interchanges required for computations 58 memcpy(thread->data, thread->ref_data, sizeof(double)*n*n); 59 l_n = n; l_m = n; lda = n; 60 61 start = clock(); 62 getrf(&l_m, &l_n, thread->data, &lda, thread->ipiv, &info); 63 end = clock(); 64 total_time += (end - start)/CLOCKS_PER_MILLIS; 65 } 66 67 return (uint32_t)(total_time/N_TESTS); 68 } 69 70 void *thread_benchmark(void *arg) 71 { 72 Thread *thread = (Thread*) arg; 73 74 // Computations are distributed evenly across threads 75 for (size_t n = 1 + thread->id*STEP; n < MAX_N; n += STEP*N_THREADS) { 76 uint32_t duration = thread_run_benchmark(thread, n); 77 78 histogram[(n - 1)/STEP] = duration; 79 progress_bar_inc(&progress); 80 } 81 82 return NULL; 83 } 84 85 Benchmarker benchmarker_new(double *ref_data) 86 { 87 Benchmarker bench = {0}; 88 89 // This array will live for the entire duration of the program, 90 // so we might as well leak it 🤡 91 double *data = malloc(sizeof(double)*MAX_N*MAX_N*N_THREADS); 92 if (data == NULL) { 93 fprintf(stderr, "ERROR: Buy more RAM!\n"); 94 exit(EXIT_FAILURE); 95 } 96 97 for (size_t i = 0; i < N_THREADS; i++) { 98 bench.threads[i].data = data + i*MAX_N*MAX_N; 99 bench.threads[i].ref_data = ref_data; 100 bench.threads[i].id = i; 101 } 102 103 return bench; 104 } 105 106 void benchmarker_run(Benchmarker *bench) 107 { 108 pthread_t handles[N_THREADS] = {0}; 109 for (size_t i = 0; i < N_THREADS; i++) { 110 if (pthread_create(&handles[i], NULL, 111 thread_benchmark, 112 &bench->threads[i]) != 0) { 113 fprintf(stderr, "ERROR: Failed to spawn thread %lu!\n", i); 114 exit(EXIT_FAILURE); 115 } 116 } 117 118 for (size_t i = 0; i < N_THREADS; i++) { 119 pthread_join(handles[i], NULL); 120 } 121 } 122 123 int main(int argc, char **argv) 124 { 125 const char *output_path; 126 127 if (argc < 2 || strcmp(argv[1], "standard") == 0) { 128 getrf = dgetrf_; 129 output_path = OUTPUT_DIR "histogram-dgetrf.bin"; 130 } else if (strcmp(argv[1], "naive") == 0) { 131 getrf = dgetrfnaive_; 132 output_path = OUTPUT_DIR "histogram-naive.bin"; 133 } else if (strcmp(argv[1], "unblocked") == 0) { 134 getrf = dgetrf2_; 135 output_path = OUTPUT_DIR "histogram-unblocked.bin"; 136 } else { 137 fprintf(stderr, "ERROR: unknown command \"%s\"\n", argv[1]); 138 fprintf(stderr, "USAGE: %s [standard|naive|unblocked]\n", argv[0]); 139 exit(EXIT_FAILURE); 140 } 141 142 // ======================================================================== 143 printf("INFO: Initializing random input data... "); 144 145 // This array will live for the entire duration of the program, 146 // so we might as well leak it 🤡 147 double *ref_data = malloc(sizeof(double)*MAX_N*MAX_N); 148 if (ref_data == NULL) { 149 fprintf(stderr, "ERROR: Buy more RAM!\n"); 150 exit(EXIT_FAILURE); 151 } 152 153 // Pseudorandom data given by the logistic map 154 double acc = LOGISTICS_INITIAL_CONDITION; 155 for (size_t i = 0; i < MAX_N*MAX_N; i++) { 156 ref_data[i] = acc; 157 acc = 1000. - acc*acc/500.; 158 } 159 160 printf("done!\n"); 161 162 // ======================================================================== 163 printf("INFO: Benchmarking the dgetrf function on %ux%u matrices... (using %u threads)\n", 164 MAX_N, MAX_N, N_THREADS); 165 166 Benchmarker bench = benchmarker_new(ref_data); 167 benchmarker_run(&bench); 168 169 // ======================================================================== 170 printf("INFO: Done with benchmarking! Saving histogram to disk...\n"); 171 172 FILE *output = fopen(output_path, "w"); 173 174 if (output == NULL) { 175 fprintf(stderr, 176 "ERROR: Coundn't open output file \"%s\": %s", 177 output_path, strerror(errno)); 178 return EXIT_FAILURE; 179 } 180 size_t written = fwrite(histogram, sizeof(uint32_t), HISTOGRAM_SIZE, output); 181 182 if (written < HISTOGRAM_SIZE) { 183 fprintf(stderr, 184 "ERROR: Failed to write histogram to output file \"%s\": %s\n", 185 output_path, strerror(errno)); 186 return EXIT_FAILURE; 187 } 188 189 printf("INFO: Done!\n"); 190 191 return EXIT_SUCCESS; 192 }