numeric-linalg

Educational material on the SciPy implementation of numerical linear algebra algorithms

File Name Size Mode
main.c 5396B -rw-r--r--
  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <pthread.h>
  4 
  5 #include <stdint.h>
  6 #include <stdbool.h>
  7 #include <time.h>
  8 
  9 #include <string.h>
 10 #include <errno.h>
 11 
 12 #include "progress-bar.h"
 13 #include "config.h"
 14 
 15 #define HISTOGRAM_SIZE              (MAX_N/STEP)
 16 #define CLOCKS_PER_MILLIS           (CLOCKS_PER_SEC/1000)
 17 #define LOGISTICS_INITIAL_CONDITION (-800.)
 18 
 19 uint32_t histogram[HISTOGRAM_SIZE];
 20 void (*getrf)(int *m, int *n, double *A, int *lda, int *ipiv, int *info);
 21 
 22 ProgressBar progress = {
 23   .total = HISTOGRAM_SIZE,
 24   .count = 0,
 25   .mutex = PTHREAD_MUTEX_INITIALIZER,
 26 };
 27 
 28 // .data is a pointer because it should be allocated in the heap
 29 // (.data DOES NOT fit in the stack 🤡)
 30 typedef struct {
 31   double  *data;
 32   double  *ref_data;
 33   int32_t ipiv[MAX_N];
 34   size_t  id;
 35 } Thread;
 36 
 37 typedef struct {
 38   Thread threads[N_THREADS];
 39 } Benchmarker;
 40 
 41 /*
 42  * LAPACK functions
 43  */
 44 extern void dgetrf_(int *m, int *n, double *A, int *lda, int *ipiv, int *info);
 45 extern void dgetrf2_(int *m, int *n, double *A, int *lda, int *ipiv, int *info);
 46 extern void dgetrfnaive_(int *m, int *n, double *A, int *lda, int *ipiv, int *info);
 47 
 48 uint32_t thread_run_benchmark(Thread *thread, size_t n)
 49 {
 50   int64_t total_time = 0;
 51   clock_t start, end;
 52   int l_n, l_m, lda, info;
 53 
 54   for (size_t i = 0; i < N_TESTS; i++) {
 55     // Reinitializes the values in the .data array: this avoids progressively
 56     // moving larger values to the beginning of the array, which would decrise
 57     // decrease the number of row interchanges required for computations
 58     memcpy(thread->data, thread->ref_data, sizeof(double)*n*n);
 59     l_n = n; l_m = n; lda = n;
 60 
 61     start = clock();
 62     getrf(&l_m, &l_n, thread->data, &lda, thread->ipiv, &info);
 63     end = clock();
 64     total_time += (end - start)/CLOCKS_PER_MILLIS;
 65   }
 66 
 67   return (uint32_t)(total_time/N_TESTS);
 68 }
 69 
 70 void *thread_benchmark(void *arg)
 71 {
 72   Thread *thread = (Thread*) arg;
 73 
 74   // Computations are distributed evenly across threads
 75   for (size_t n = 1 + thread->id*STEP; n < MAX_N; n += STEP*N_THREADS) {
 76     uint32_t duration = thread_run_benchmark(thread, n);
 77 
 78     histogram[(n - 1)/STEP] = duration;
 79     progress_bar_inc(&progress);
 80   }
 81 
 82   return NULL;
 83 }
 84 
 85 Benchmarker benchmarker_new(double *ref_data)
 86 {
 87   Benchmarker bench = {0};
 88 
 89   // This array will live for the entire duration of the program,
 90   // so we might as well leak it 🤡
 91   double *data = malloc(sizeof(double)*MAX_N*MAX_N*N_THREADS);
 92   if (data == NULL) {
 93     fprintf(stderr, "ERROR: Buy more RAM!\n");
 94     exit(EXIT_FAILURE);
 95   }
 96 
 97   for (size_t i = 0; i < N_THREADS; i++) {
 98     bench.threads[i].data     = data + i*MAX_N*MAX_N;
 99     bench.threads[i].ref_data = ref_data;
100     bench.threads[i].id       = i;
101   }
102 
103   return bench;
104 }
105 
106 void benchmarker_run(Benchmarker *bench)
107 {
108   pthread_t handles[N_THREADS] = {0};
109   for (size_t i = 0; i < N_THREADS; i++) {
110     if (pthread_create(&handles[i], NULL,
111                        thread_benchmark,
112                        &bench->threads[i]) != 0) {
113       fprintf(stderr, "ERROR: Failed to spawn thread %lu!\n", i);
114       exit(EXIT_FAILURE);
115     }
116   }
117 
118   for (size_t i = 0; i < N_THREADS; i++) {
119     pthread_join(handles[i], NULL);
120   }
121 }
122 
123 int main(int argc, char **argv)
124 {
125   const char *output_path;
126 
127   if (argc < 2 || strcmp(argv[1], "standard") == 0) {
128     getrf = dgetrf_;
129     output_path = OUTPUT_DIR "histogram-dgetrf.bin";
130   } else if (strcmp(argv[1], "naive") == 0) {
131     getrf = dgetrfnaive_;
132     output_path = OUTPUT_DIR "histogram-naive.bin";
133   } else if (strcmp(argv[1], "unblocked") == 0) {
134     getrf = dgetrf2_;
135     output_path = OUTPUT_DIR "histogram-unblocked.bin";
136   } else {
137     fprintf(stderr, "ERROR: unknown command \"%s\"\n", argv[1]);
138     fprintf(stderr, "USAGE: %s [standard|naive|unblocked]\n", argv[0]);
139     exit(EXIT_FAILURE);
140   }
141 
142   // ========================================================================
143   printf("INFO: Initializing random input data... ");
144 
145   // This array will live for the entire duration of the program,
146   // so we might as well leak it 🤡
147   double *ref_data = malloc(sizeof(double)*MAX_N*MAX_N);
148   if (ref_data == NULL) {
149     fprintf(stderr, "ERROR: Buy more RAM!\n");
150     exit(EXIT_FAILURE);
151   }
152 
153   // Pseudorandom data given by the logistic map
154   double acc = LOGISTICS_INITIAL_CONDITION;
155   for (size_t i = 0; i < MAX_N*MAX_N; i++) {
156     ref_data[i] = acc;
157     acc = 1000. - acc*acc/500.;
158   }
159 
160   printf("done!\n");
161 
162   // ========================================================================
163   printf("INFO: Benchmarking the dgetrf function on %ux%u matrices... (using %u threads)\n",
164          MAX_N, MAX_N, N_THREADS);
165 
166   Benchmarker bench = benchmarker_new(ref_data);
167   benchmarker_run(&bench);
168 
169   // ========================================================================
170   printf("INFO: Done with benchmarking! Saving histogram to disk...\n");
171 
172   FILE *output = fopen(output_path, "w");
173 
174   if (output == NULL) {
175     fprintf(stderr,
176             "ERROR: Coundn't open output file \"%s\": %s",
177             output_path, strerror(errno));
178     return EXIT_FAILURE;
179   }
180   size_t written = fwrite(histogram, sizeof(uint32_t), HISTOGRAM_SIZE, output);
181 
182   if (written < HISTOGRAM_SIZE) {
183     fprintf(stderr,
184             "ERROR: Failed to write histogram to output file \"%s\": %s\n",
185             output_path, strerror(errno));
186     return EXIT_FAILURE;
187   }
188 
189   printf("INFO: Done!\n");
190 
191   return EXIT_SUCCESS;
192 }