git.gag.com Git - debian/atlc/blob - src/non_gui/finite_difference_multi_threaded.c

   1 /* atlc - arbitrary transmission line calculator, for the analysis of
   2 transmission lines are directional couplers.
   3
   4 Copyright (C) 2002. Dr. David Kirkby, PhD (G8WRB).
   5
   6 This program is free software; you can redistribute it and/or
   7 modify it under the terms of the GNU General Public License
   8 as published by the Free Software Foundation; either package_version 2
   9 of the License, or (at your option) any later package_version.
  10
  11 This program is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with this program; if not, write to the Free Software
  18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
  19 USA.
  20
  21 Dr. David Kirkby, e-mail drkirkby at ntlworld.com
  22
  23 */
  24
  25 #include "config.h"
  26
  27 #define SLOW
  28
  29 #ifndef ENABLE_POSIX_THREADS
  30
  31 /* If we are not comiling for multi-threaded use, a function
  32 is defined, but it is not used. This keeps the linker happy */
  33
  34 double finite_difference_multi_threaded()
  35 {
  36   return(0.0);
  37 }
  38 #endif
  39
  40
  41 #ifdef ENABLE_POSIX_THREADS
  42
  43 #ifdef HAVE_SYS_TYPES_H
  44 #include <sys/types.h>
  45 #endif
  46
  47 #ifdef HAVE_STDLIB_H
  48 #include <stdlib.h>
  49 #endif
  50
  51 #include <pthread.h>
  52
  53 #ifdef HAVE_SCHED_H
  54 #include <sched.h>
  55 #endif
  56
  57 #ifdef HAVE_STRING_H
  58 #include <string.h>
  59 #endif
  60
  61 #include "definitions.h"
  62 #include "exit_codes.h"
  63
  64 extern int coupler;
  65 extern int width, height;
  66 extern double **Vij, **Er;
  67 extern signed char **oddity;
  68 extern int number_of_workers;
  69
  70
  71 /* The algorithm for this is based on one in the book 'Foundations of Multithraded,
  72 Parallel and Distributed Programming' by G. A. Andrews, (2000). Especially
  73 chapter 11, Fig 11.6, "Red/Black Gauss-Seidel using shared vairables'.
  74
  75 Basically the array is a considered a chess board. Since we only use in the computation the
  76 values above, below, to the left and the right of the current pixel, these will all be the
  77 same colour on the chess board (black or white). So we compute all the white squares, which
  78 can all be done in parallel, since they don't depend on each other. Once the white squares
  79 are done, the black ones can be done. Again, these don't depend on the white squares, but
  80 only the black ones. So the algorithm does
  81
  82 1) Covers the array with a large chess board with black and white squares.
  83
  84
  85 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
  86 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
  87 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
  88 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
  89 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
  90 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
  91 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
  92 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
  93 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
  94 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
  95 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
  96 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
  97
  98
  99 2) Split the array into a number of columns, one for each CPU.
 100
 101 B W B W B W B   W B W B W B W   B W B W B W B   W B W B W B W
 102 W B W B W B W   B W B W B W B   W B W B W B W   B W B W B W B
 103 B W B W B W B   W B W B W B W   B W B W B W B   W B W B W B W
 104 W B W B W B W   B W B W B W B   W B W B W B W   B W B W B W B
 105 B W B W B W B   W B W B W B W   B W B W B W B   W B W B W B W
 106 W B W B W B W   B W B W B W B   W B W B W B W   B W B W B W B
 107 B W B W B W B   W B W B W B W   B W B W B W B   W B W B W B W
 108 W B W B W B W   B W B W B W B   W B W B W B W   B W B W B W B
 109 B W B W B W B   W B W B W B W   B W B W B W B   W B W B W B W
 110 W B W B W B W   B W B W B W B   W B W B W B W   B W B W B W B
 111 B W B W B W B   W B W B W B W   B W B W B W B   W B W B W B W
 112 W B W B W B W   B W B W B W B   W B W B W B W   B W B W B W B
 113
 114 3) Do all columns in parallel on the black squares.
 115
 116 B   B   B   B     B   B   B     B   B   B   B     B   B   B
 117   B   B   B     B   B   B   B     B   B   B     B   B   B   B
 118 B   B   B   B     B   B   B     B   B   B   B     B   B   B
 119   B   B   B     B   B   B   B     B   B   B     B   B   B   B
 120 B   B   B   B     B   B   B     B   B   B   B     B   B   B
 121   B   B   B     B   B   B   B     B   B   B     B   B   B   B
 122 B   B   B   B     B   B   B     B   B   B   B     B   B   B
 123   B   B   B     B   B   B   B     B   B   B     B   B   B   B
 124 B   B   B   B     B   B   B     B   B   B   B     B   B   B
 125   B   B   B     B   B   B   B     B   B   B     B   B   B   B
 126 B   B   B   B     B   B   B     B   B   B   B     B   B   B
 127   B   B   B     B   B   B   B     B   B   B     B   B   B   B
 128
 129 Thread 0          Thread 1        Thread 2         Thread 3
 130
 131 4) Wait until all the black squares are done.
 132
 133 5) Compute all the white squares in parallel.
 134
 135   W   W   W     W   W   W   W     W   W   W     W   W   W   W
 136 W   W   W   W     W   W   W     W   W   W   W     W   W   W
 137   W   W   W     W   W   W   W     W   W   W     W   W   W   W
 138 W   W   W   W     W   W   W     W   W   W   W     W   W   W
 139   W   W   W     W   W   W   W     W   W   W     W   W   W   W
 140 W   W   W   W     W   W   W     W   W   W   W     W   W   W
 141   W   W   W     W   W   W   W     W   W   W     W   W   W   W
 142 W   W   W   W     W   W   W     W   W   W   W     W   W   W
 143   W   W   W     W   W   W   W     W   W   W     W   W   W   W
 144 W   W   W   W     W   W   W     W   W   W   W     W   W   W
 145   W   W   W     W   W   W   W     W   W   W     W   W   W   W
 146 W   W   W   W     W   W   W     W   W   W   W     W   W   W
 147 Thread 0          Thread 1        Thread 2         Thread 3
 148
 149
 150 6) Repeat a large number (typically 100) times.
 151
 152 7) Check for convergence.
 153
 154 8) Stop if the solution has converged.
 155 */
 156
 157 extern double r;
 158
 159 pthread_mutex_t barrier;  /* mutex semaphore for the barrier */
 160 pthread_cond_t go;        /* condition variable for leaving */
 161 int numArrived = 0;       /* count of the number who have arrived */
 162
 163 #define BARRIER2
 164
 165 #ifdef BARRIER1
 166 void Barrier() {
 167 #ifndef DEBUG
 168   pthread_mutex_lock(&barrier);
 169 #else
 170   if( pthread_mutex_lock(&barrier) != 0 )
 171     exit_with_msg_and_exit_code("pthread_mutex_lock failed",PTHREAD_MUTEX_LOCK_FAILED);
 172 #endif
 173
 174   numArrived++;
 175   if (numArrived == number_of_workers) {
 176     numArrived = 0;
 177     pthread_cond_broadcast(&go);
 178   } else
 179     pthread_cond_wait(&go, &barrier);
 180 #ifndef DEBUG
 181   pthread_mutex_unlock(&barrier);
 182 #else
 183   if( pthread_mutex_unlock(&barrier) != 0 )
 184     exit_with_msg_and_exit_code("pthread_mutex_unlock failed",PTHREAD_MUTEX_UNLOCK_FAILED);
 185 #endif
 186 }
 187 #endif
 188
 189 #ifdef BARRIER2
 190 int numArrived2[2] = {0, 0};
 191 int barrierNdx = 0;
 192
 193 void Barrier() {
 194   int localNdx;
 195 #ifndef DEBUG
 196   pthread_mutex_lock(&barrier);
 197 #else
 198   if( pthread_mutex_lock(&barrier) != 0 )
 199     exit_with_msg_and_exit_code("pthread_mutex_lock failed in finite_difference_multi_threaded.c",PTHREAD_MUTEX_LOCK_FAILED);
 200 #endif
 201
 202   numArrived2[barrierNdx]++;
 203   if (numArrived2[barrierNdx] == number_of_workers) {
 204     barrierNdx = (barrierNdx + 1)%2;  /* toggle */
 205     numArrived2[barrierNdx] = 0; /* reset other count */
 206
 207 #ifndef DEBUG
 208     pthread_cond_broadcast(&go);
 209 #else
 210     if( pthread_cond_broadcast(&go) != 0)
 211       exit_with_msg_and_exit_code("pthread_cond_broadcast failed in finite_difference_multi_threaded.c",PTHREAD_COND_BROADCAST_FAILED);
 212 #endif
 213   }
 214   else
 215   {
 216     localNdx = barrierNdx; /* wait on "current" numArrived. */
 217     while (numArrived2[localNdx] != number_of_workers)
 218 #ifndef DEBUG
 219        pthread_cond_wait(&go, &barrier);
 220 #else
 221        if( pthread_cond_wait(&go, &barrier) != 0)
 222          exit_with_msg_and_exit_code("pthread_cond_wait failed finite_difference_multi_threaded.c",PTHREAD_COND_WAIT_FAILED);
 223 #endif
 224   }
 225 #ifndef DEBUG
 226   pthread_mutex_unlock(&barrier);
 227 #else
 228   if( pthread_mutex_unlock(&barrier) != 0 )
 229     exit_with_msg_and_exit_code("pthread_mutex_unlock failed finite_difference_multi_threaded.c",PTHREAD_MUTEX_UNLOCK_FAILED);
 230 #endif
 231 }
 232 #endif
 233
 234
 235
 236 /* Each Worker computes values in one strip of the grids.
 237    The main worker loop does two computations to avoid copying from
 238    one grid to the other.  */
 239
 240 void *worker(void *arg) {
 241   int myid = (int) arg;
 242   double r_over_4,a,b,c,d,e,f,g,h;
 243   double one_minus_r;
 244   int i, j, iters, jstart;
 245   int firstcol, lastcol;
 246   int strip_width=width/number_of_workers;
 247
 248   firstcol = myid*strip_width+1;
 249   lastcol = firstcol + strip_width - 1;
 250   r_over_4=r/4.0;
 251   one_minus_r=1-r;
 252   if(myid == number_of_workers -1)
 253     lastcol=width-2;
 254   Barrier();
 255   for (iters = 1; iters <= ITERATIONS; iters++)
 256   {
 257     for(i= firstcol ; i <= lastcol; ++i){
 258       if(i%2 ==1 )
 259         jstart=1;
 260       else
 261         jstart=2;
 262       for(j=jstart ; j < height-1 ; j+=2){
 263         if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */
 264           Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j];
 265         else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */
 266         {
 267           a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]);
 268           b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]);
 269           c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]);
 270           d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]);
 271
 272           e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]);
 273           f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]);
 274           g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]);
 275           h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]);
 276
 277           Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j];
 278         }
 279       }
 280     }
 281     Barrier();
 282     for(i= firstcol ; i <= lastcol; ++i){
 283       if(i%2 ==1 )
 284         jstart=2;
 285       else
 286         jstart=1;
 287       for(j=jstart ; j < height -1; j+=2){
 288         if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */
 289           Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j];
 290         else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */
 291         {
 292           a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]);
 293           b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]);
 294           c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]);
 295           d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]);
 296
 297           e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]);
 298           f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]);
 299           g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]);
 300           h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]);
 301
 302           Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j];
 303         }
 304       }
 305     }
 306     Barrier();
 307   }
 308   Barrier();
 309   return(0);
 310 }
 311
 312 double finite_difference_multi_threaded()
 313 {
 314   int i, j, ret, thread_number;
 315   pthread_t thread_id[MAXIMUM_PROCESSING_DEVICES];
 316
 317
 318   double capacitance_per_metre, energy_per_metre;
 319
 320
 321   /* initialize mutex and condition variable */
 322   pthread_mutex_init(&barrier, NULL);
 323   pthread_cond_init(&go, NULL);
 324   for(thread_number=0;thread_number<number_of_workers;thread_number++){
 325     ret=pthread_create(&thread_id[thread_number],NULL, worker,(void *)thread_number);
 326     if(ret != 0)
 327       exit_with_msg_and_exit_code("failed to create thread in finite_difference_multi_threaded.c",THREAD_CREATION_FAILED);
 328   }
 329   /* Wait for each thread to join - i.e. once they are all finished. */
 330   for(thread_number=0;thread_number<number_of_workers;thread_number++)
 331   {
 332     ret=pthread_join(thread_id[thread_number], NULL);
 333     if(ret != 0)
 334       exit_with_msg_and_exit_code("failed to join thread in finite_difference_multi_threaded.c",THREAD_FAILED_TO_JOIN);
 335       }
 336   energy_per_metre=0.0;
 337   /*
 338   The following code is wrong, but worked on all systems apart from AIX !!
 339
 340   for(i=0;i<width;++i)
 341     for(j=0;j<height;++j)
 342         energy_per_metre+=find_energy_per_metre(i,j);
 343         */
 344
 345   for(i=1;i<width-1;++i)
 346     for(j=1;j<height-1;++j)
 347         energy_per_metre+=find_energy_per_metre(i,j);
 348
 349   if(coupler==FALSE)
 350     capacitance_per_metre=2*energy_per_metre;
 351   else
 352     capacitance_per_metre=energy_per_metre;
 353   return(capacitance_per_metre);
 354 }
 355 #endif