numeric-linalg

Educational material on the SciPy implementation of numerical linear algebra algorithms

main.c (4355B)

  1 #include <stdio.h>
  2 #include <lapacke.h>
  3 #include <pthread.h>
  4 
  5 #include <stdint.h>
  6 #include <stdbool.h>
  7 #include <time.h>
  8 
  9 #include <string.h>
 10 #include <errno.h>
 11 
 12 #include "config.h"
 13 
 14 #define HISTOGRAM_SIZE              (MAX_N/STEP)
 15 #define CLOCKS_PER_MILLIS           (CLOCKS_PER_SEC/1000)
 16 #define LOGISTICS_INITIAL_CONDITION (-800.)
 17 
 18 #define PROGRESS_BAR_TOTAL HISTOGRAM_SIZE
 19 #include "progress-bar.h"
 20 
 21 uint32_t histogram[HISTOGRAM_SIZE];
 22 
 23 // .data is a pointer because it should be allocated in the heap
 24 // (.data DOES NOT fit in the stack 🤡)
 25 typedef struct {
 26   double  *data;
 27   double  *ref_data;
 28   int32_t ipiv[MAX_N];
 29   size_t  id;
 30 } Thread;
 31 
 32 typedef struct {
 33   Thread threads[N_THREADS];
 34 } Benchmarker;
 35 
 36 uint32_t thread_run_benchmark(Thread *thread, size_t n)
 37 {
 38   int64_t total_time = 0;
 39   clock_t start, end;
 40 
 41   for (size_t i = 0; i < N_TESTS; i++) {
 42     // Reinitializes the values in the .data array: this avoids progressively
 43     // moving larger values to the beginning of the array, which would decrise
 44     // decrease the number of row interchanges required for computations
 45     memcpy(thread->data, thread->ref_data, sizeof(double)*n*n);
 46 
 47     start = clock();
 48     (void)LAPACKE_dgetrf(LAPACK_ROW_MAJOR,
 49                          (lapack_int)n, (lapack_int)n,
 50                          thread->data, (lapack_int)n, thread->ipiv);
 51     end = clock();
 52     total_time += (end - start)/CLOCKS_PER_MILLIS;
 53   }
 54 
 55   return (uint32_t)(total_time/N_TESTS);
 56 }
 57 
 58 void *thread_benchmark(void *arg)
 59 {
 60   Thread *thread = (Thread*) arg;
 61 
 62   // Computations are distributed evenly across threads
 63   for (size_t n = 1 + thread->id*STEP; n < MAX_N; n += STEP*N_THREADS) {
 64     uint32_t duration = thread_run_benchmark(thread, n);
 65 
 66     histogram[(n - 1)/STEP] = duration;
 67     progress_bar_inc();
 68   }
 69 
 70   return NULL;
 71 }
 72 
 73 Benchmarker benchmarker_new(double *ref_data)
 74 {
 75   Benchmarker bench = {0};
 76 
 77   // These arrays will live for the entire duration of the program,
 78   // so we just leak them 🤡
 79   double *data = malloc(sizeof(double)*MAX_N*MAX_N*N_THREADS);
 80   if (data == NULL) {
 81     fprintf(stderr, "ERROR: Buy more RAM!\n");
 82     exit(EXIT_FAILURE);
 83   }
 84 
 85   for (size_t i = 0; i < N_THREADS; i++) {
 86     bench.threads[i].data     = data + i*MAX_N*MAX_N;
 87     bench.threads[i].ref_data = ref_data;
 88     bench.threads[i].id       = i;
 89   }
 90 
 91   return bench;
 92 }
 93 
 94 void benchmarker_run(Benchmarker *bench)
 95 {
 96   pthread_t handles[N_THREADS] = {0};
 97   for (size_t i = 0; i < N_THREADS; i++) {
 98     if (pthread_create(&handles[i], NULL,
 99                        thread_benchmark,
100                        &bench->threads[i]) != 0) {
101       fprintf(stderr, "ERROR: Failed to spawn thread %lu!\n", i);
102       exit(EXIT_FAILURE);
103     }
104   }
105 
106   for (size_t i = 0; i < N_THREADS; i++) {
107     pthread_join(handles[i], NULL);
108   }
109 }
110 
111 int main(void)
112 {
113   const char *output_path = "histogram.bin";
114   FILE *output = fopen(output_path, "w");
115 
116   if (output == NULL) {
117     fprintf(stderr,
118             "ERROR: Coundn't open output file \"%s\": %s",
119             output_path, strerror(errno));
120     return EXIT_FAILURE;
121   }
122 
123   // ========================================================================
124   printf("INFO: Initializing random input data... ");
125 
126   double *ref_data = malloc(sizeof(double)*MAX_N*MAX_N);
127   if (ref_data == NULL) {
128     fprintf(stderr, "ERROR: Buy more RAM!\n");
129     exit(EXIT_FAILURE);
130   }
131 
132   // Pseudorandom data given by the logistic map
133   double acc = LOGISTICS_INITIAL_CONDITION;
134   for (size_t i = 0; i < MAX_N*MAX_N; i++) {
135     ref_data[i] = acc;
136     acc = 1000. - acc*acc/500.;
137   }
138 
139   printf("done!\n");
140 
141   // ========================================================================
142   printf("INFO: Benchmarking the dgetrf function on %ux%u matrices... (using %u threads)\n",
143          MAX_N, MAX_N, N_THREADS);
144 
145   Benchmarker bench = benchmarker_new(ref_data);
146   benchmarker_run(&bench);
147 
148   // ========================================================================
149   printf("INFO: Done with benchmarking! Saving histogram to disk...\n");
150 
151   size_t written = fwrite(histogram, sizeof(uint32_t), HISTOGRAM_SIZE, output);
152 
153   if (written < HISTOGRAM_SIZE) {
154     fprintf(stderr,
155             "ERROR: Failed to write histogram to output file \"%s\": %s\n",
156             output_path, strerror(errno));
157     return EXIT_FAILURE;
158   }
159 
160   printf("INFO: Done!\n");
161 
162   return EXIT_SUCCESS;
163 }