1 /* atlc - arbitrary transmission line calculator, for the analysis of
2 transmission lines are directional couplers.
4 Copyright (C) 2002. Dr. David Kirkby, PhD (G8WRB).
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either package_version 2
9 of the License, or (at your option) any later package_version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
21 Dr. David Kirkby, e-mail drkirkby at ntlworld.com
29 #ifndef ENABLE_POSIX_THREADS
31 /* If we are not comiling for multi-threaded use, a function
32 is defined, but it is not used. This keeps the linker happy */
34 double finite_difference_multi_threaded()
41 #ifdef ENABLE_POSIX_THREADS
43 #ifdef HAVE_SYS_TYPES_H
44 #include <sys/types.h>
61 #include "definitions.h"
62 #include "exit_codes.h"
65 extern int width, height;
66 extern double **Vij, **Er;
67 extern signed char **oddity;
68 extern int number_of_workers;
71 /* The algorithm for this is based on one in the book 'Foundations of Multithraded,
72 Parallel and Distributed Programming' by G. A. Andrews, (2000). Especially
73 chapter 11, Fig 11.6, "Red/Black Gauss-Seidel using shared vairables'.
75 Basically the array is a considered a chess board. Since we only use in the computation the
76 values above, below, to the left and the right of the current pixel, these will all be the
77 same colour on the chess board (black or white). So we compute all the white squares, which
78 can all be done in parallel, since they don't depend on each other. Once the white squares
79 are done, the black ones can be done. Again, these don't depend on the white squares, but
80 only the black ones. So the algorithm does
82 1) Covers the array with a large chess board with black and white squares.
85 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
86 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
87 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
88 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
89 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
90 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
91 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
92 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
93 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
94 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
95 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
96 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
99 2) Split the array into a number of columns, one for each CPU.
101 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
102 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
103 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
104 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
105 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
106 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
107 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
108 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
109 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
110 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
111 B W B W B W B W B W B W B W B W B W B W B W B W B W B W
112 W B W B W B W B W B W B W B W B W B W B W B W B W B W B
114 3) Do all columns in parallel on the black squares.
116 B B B B B B B B B B B B B B
117 B B B B B B B B B B B B B B
118 B B B B B B B B B B B B B B
119 B B B B B B B B B B B B B B
120 B B B B B B B B B B B B B B
121 B B B B B B B B B B B B B B
122 B B B B B B B B B B B B B B
123 B B B B B B B B B B B B B B
124 B B B B B B B B B B B B B B
125 B B B B B B B B B B B B B B
126 B B B B B B B B B B B B B B
127 B B B B B B B B B B B B B B
129 Thread 0 Thread 1 Thread 2 Thread 3
131 4) Wait until all the black squares are done.
133 5) Compute all the white squares in parallel.
135 W W W W W W W W W W W W W W
136 W W W W W W W W W W W W W W
137 W W W W W W W W W W W W W W
138 W W W W W W W W W W W W W W
139 W W W W W W W W W W W W W W
140 W W W W W W W W W W W W W W
141 W W W W W W W W W W W W W W
142 W W W W W W W W W W W W W W
143 W W W W W W W W W W W W W W
144 W W W W W W W W W W W W W W
145 W W W W W W W W W W W W W W
146 W W W W W W W W W W W W W W
147 Thread 0 Thread 1 Thread 2 Thread 3
150 6) Repeat a large number (typically 100) times.
152 7) Check for convergence.
154 8) Stop if the solution has converged.
159 pthread_mutex_t barrier; /* mutex semaphore for the barrier */
160 pthread_cond_t go; /* condition variable for leaving */
161 int numArrived = 0; /* count of the number who have arrived */
168 pthread_mutex_lock(&barrier);
170 if( pthread_mutex_lock(&barrier) != 0 )
171 exit_with_msg_and_exit_code("pthread_mutex_lock failed",PTHREAD_MUTEX_LOCK_FAILED);
175 if (numArrived == number_of_workers) {
177 pthread_cond_broadcast(&go);
179 pthread_cond_wait(&go, &barrier);
181 pthread_mutex_unlock(&barrier);
183 if( pthread_mutex_unlock(&barrier) != 0 )
184 exit_with_msg_and_exit_code("pthread_mutex_unlock failed",PTHREAD_MUTEX_UNLOCK_FAILED);
190 int numArrived2[2] = {0, 0};
196 pthread_mutex_lock(&barrier);
198 if( pthread_mutex_lock(&barrier) != 0 )
199 exit_with_msg_and_exit_code("pthread_mutex_lock failed in finite_difference_multi_threaded.c",PTHREAD_MUTEX_LOCK_FAILED);
202 numArrived2[barrierNdx]++;
203 if (numArrived2[barrierNdx] == number_of_workers) {
204 barrierNdx = (barrierNdx + 1)%2; /* toggle */
205 numArrived2[barrierNdx] = 0; /* reset other count */
208 pthread_cond_broadcast(&go);
210 if( pthread_cond_broadcast(&go) != 0)
211 exit_with_msg_and_exit_code("pthread_cond_broadcast failed in finite_difference_multi_threaded.c",PTHREAD_COND_BROADCAST_FAILED);
216 localNdx = barrierNdx; /* wait on "current" numArrived. */
217 while (numArrived2[localNdx] != number_of_workers)
219 pthread_cond_wait(&go, &barrier);
221 if( pthread_cond_wait(&go, &barrier) != 0)
222 exit_with_msg_and_exit_code("pthread_cond_wait failed finite_difference_multi_threaded.c",PTHREAD_COND_WAIT_FAILED);
226 pthread_mutex_unlock(&barrier);
228 if( pthread_mutex_unlock(&barrier) != 0 )
229 exit_with_msg_and_exit_code("pthread_mutex_unlock failed finite_difference_multi_threaded.c",PTHREAD_MUTEX_UNLOCK_FAILED);
236 /* Each Worker computes values in one strip of the grids.
237 The main worker loop does two computations to avoid copying from
238 one grid to the other. */
240 void *worker(void *arg) {
241 int myid = (int) arg;
242 double r_over_4,a,b,c,d,e,f,g,h;
244 int i, j, iters, jstart;
245 int firstcol, lastcol;
246 int strip_width=width/number_of_workers;
248 firstcol = myid*strip_width+1;
249 lastcol = firstcol + strip_width - 1;
252 if(myid == number_of_workers -1)
255 for (iters = 1; iters <= ITERATIONS; iters++)
257 for(i= firstcol ; i <= lastcol; ++i){
262 for(j=jstart ; j < height-1 ; j+=2){
263 if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */
264 Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j];
265 else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */
267 a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]);
268 b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]);
269 c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]);
270 d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]);
272 e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]);
273 f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]);
274 g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]);
275 h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]);
277 Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j];
282 for(i= firstcol ; i <= lastcol; ++i){
287 for(j=jstart ; j < height -1; j+=2){
288 if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */
289 Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j];
290 else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */
292 a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]);
293 b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]);
294 c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]);
295 d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]);
297 e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]);
298 f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]);
299 g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]);
300 h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]);
302 Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j];
312 double finite_difference_multi_threaded()
314 int i, j, ret, thread_number;
315 pthread_t thread_id[MAXIMUM_PROCESSING_DEVICES];
318 double capacitance_per_metre, energy_per_metre;
321 /* initialize mutex and condition variable */
322 pthread_mutex_init(&barrier, NULL);
323 pthread_cond_init(&go, NULL);
324 for(thread_number=0;thread_number<number_of_workers;thread_number++){
325 ret=pthread_create(&thread_id[thread_number],NULL, worker,(void *)thread_number);
327 exit_with_msg_and_exit_code("failed to create thread in finite_difference_multi_threaded.c",THREAD_CREATION_FAILED);
329 /* Wait for each thread to join - i.e. once they are all finished. */
330 for(thread_number=0;thread_number<number_of_workers;thread_number++)
332 ret=pthread_join(thread_id[thread_number], NULL);
334 exit_with_msg_and_exit_code("failed to join thread in finite_difference_multi_threaded.c",THREAD_FAILED_TO_JOIN);
336 energy_per_metre=0.0;
338 The following code is wrong, but worked on all systems apart from AIX !!
341 for(j=0;j<height;++j)
342 energy_per_metre+=find_energy_per_metre(i,j);
345 for(i=1;i<width-1;++i)
346 for(j=1;j<height-1;++j)
347 energy_per_metre+=find_energy_per_metre(i,j);
350 capacitance_per_metre=2*energy_per_metre;
352 capacitance_per_metre=energy_per_metre;
353 return(capacitance_per_metre);