X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=src%2Ffinite_difference_multi_threaded.c;fp=src%2Ffinite_difference_multi_threaded.c;h=37f507b4a24c83aeb9b71d55e09fcc51c3ee3b73;hb=ec2d2ab8e90a0c4b86ffc2385655c2fb27fcacd9;hp=0000000000000000000000000000000000000000;hpb=795fb988211df268a8f7724bd2f4493c594abee9;p=debian%2Fatlc diff --git a/src/finite_difference_multi_threaded.c b/src/finite_difference_multi_threaded.c new file mode 100644 index 0000000..37f507b --- /dev/null +++ b/src/finite_difference_multi_threaded.c @@ -0,0 +1,355 @@ +/* atlc - arbitrary transmission line calculator, for the analysis of +transmission lines are directional couplers. + +Copyright (C) 2002. Dr. David Kirkby, PhD (G8WRB). + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either package_version 2 +of the License, or (at your option) any later package_version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +USA. + +Dr. David Kirkby, e-mail drkirkby at gmail.com + +*/ + +#include "config.h" + +#define SLOW + +#ifndef ENABLE_POSIX_THREADS + +/* If we are not comiling for multi-threaded use, a function +is defined, but it is not used. This keeps the linker happy */ + +double finite_difference_multi_threaded() +{ + return(0.0); +} +#endif + + +#ifdef ENABLE_POSIX_THREADS + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#ifdef HAVE_STDLIB_H +#include +#endif + +#include + +#ifdef HAVE_SCHED_H +#include +#endif + +#ifdef HAVE_STRING_H +#include +#endif + +#include "definitions.h" +#include "exit_codes.h" + +extern int coupler; +extern int width, height; +extern double **Vij, **Er; +extern signed char **oddity; +extern int number_of_workers; + + +/* The algorithm for this is based on one in the book 'Foundations of Multithraded, +Parallel and Distributed Programming' by G. A. Andrews, (2000). Especially +chapter 11, Fig 11.6, "Red/Black Gauss-Seidel using shared vairables'. + +Basically the array is a considered a chess board. Since we only use in the computation the +values above, below, to the left and the right of the current pixel, these will all be the +same colour on the chess board (black or white). So we compute all the white squares, which +can all be done in parallel, since they don't depend on each other. Once the white squares +are done, the black ones can be done. Again, these don't depend on the white squares, but +only the black ones. So the algorithm does + +1) Covers the array with a large chess board with black and white squares. + + +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B + + +2) Split the array into a number of columns, one for each CPU. + +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B +B W B W B W B W B W B W B W B W B W B W B W B W B W B W +W B W B W B W B W B W B W B W B W B W B W B W B W B W B + +3) Do all columns in parallel on the black squares. + +B B B B B B B B B B B B B B + B B B B B B B B B B B B B B +B B B B B B B B B B B B B B + B B B B B B B B B B B B B B +B B B B B B B B B B B B B B + B B B B B B B B B B B B B B +B B B B B B B B B B B B B B + B B B B B B B B B B B B B B +B B B B B B B B B B B B B B + B B B B B B B B B B B B B B +B B B B B B B B B B B B B B + B B B B B B B B B B B B B B + +Thread 0 Thread 1 Thread 2 Thread 3 + +4) Wait until all the black squares are done. + +5) Compute all the white squares in parallel. + + W W W W W W W W W W W W W W +W W W W W W W W W W W W W W + W W W W W W W W W W W W W W +W W W W W W W W W W W W W W + W W W W W W W W W W W W W W +W W W W W W W W W W W W W W + W W W W W W W W W W W W W W +W W W W W W W W W W W W W W + W W W W W W W W W W W W W W +W W W W W W W W W W W W W W + W W W W W W W W W W W W W W +W W W W W W W W W W W W W W +Thread 0 Thread 1 Thread 2 Thread 3 + + +6) Repeat a large number (typically 100) times. + +7) Check for convergence. + +8) Stop if the solution has converged. +*/ + +extern double r; + +pthread_mutex_t barrier; /* mutex semaphore for the barrier */ +pthread_cond_t go; /* condition variable for leaving */ +int numArrived = 0; /* count of the number who have arrived */ + +#define BARRIER2 + +#ifdef BARRIER1 +void Barrier() { +#ifndef DEBUG + pthread_mutex_lock(&barrier); +#else + if( pthread_mutex_lock(&barrier) != 0 ) + exit_with_msg_and_exit_code("pthread_mutex_lock failed",PTHREAD_MUTEX_LOCK_FAILED); +#endif + + numArrived++; + if (numArrived == number_of_workers) { + numArrived = 0; + pthread_cond_broadcast(&go); + } else + pthread_cond_wait(&go, &barrier); +#ifndef DEBUG + pthread_mutex_unlock(&barrier); +#else + if( pthread_mutex_unlock(&barrier) != 0 ) + exit_with_msg_and_exit_code("pthread_mutex_unlock failed",PTHREAD_MUTEX_UNLOCK_FAILED); +#endif +} +#endif + +#ifdef BARRIER2 +int numArrived2[2] = {0, 0}; +int barrierNdx = 0; + +void Barrier() { + int localNdx; +#ifndef DEBUG + pthread_mutex_lock(&barrier); +#else + if( pthread_mutex_lock(&barrier) != 0 ) + exit_with_msg_and_exit_code("pthread_mutex_lock failed in finite_difference_multi_threaded.c",PTHREAD_MUTEX_LOCK_FAILED); +#endif + + numArrived2[barrierNdx]++; + if (numArrived2[barrierNdx] == number_of_workers) { + barrierNdx = (barrierNdx + 1)%2; /* toggle */ + numArrived2[barrierNdx] = 0; /* reset other count */ + +#ifndef DEBUG + pthread_cond_broadcast(&go); +#else + if( pthread_cond_broadcast(&go) != 0) + exit_with_msg_and_exit_code("pthread_cond_broadcast failed in finite_difference_multi_threaded.c",PTHREAD_COND_BROADCAST_FAILED); +#endif + } + else + { + localNdx = barrierNdx; /* wait on "current" numArrived. */ + while (numArrived2[localNdx] != number_of_workers) +#ifndef DEBUG + pthread_cond_wait(&go, &barrier); +#else + if( pthread_cond_wait(&go, &barrier) != 0) + exit_with_msg_and_exit_code("pthread_cond_wait failed finite_difference_multi_threaded.c",PTHREAD_COND_WAIT_FAILED); +#endif + } +#ifndef DEBUG + pthread_mutex_unlock(&barrier); +#else + if( pthread_mutex_unlock(&barrier) != 0 ) + exit_with_msg_and_exit_code("pthread_mutex_unlock failed finite_difference_multi_threaded.c",PTHREAD_MUTEX_UNLOCK_FAILED); +#endif +} +#endif + + + +/* Each Worker computes values in one strip of the grids. + The main worker loop does two computations to avoid copying from + one grid to the other. */ + +void *worker(void *arg) { + int myid = (int) arg; + double r_over_4,a,b,c,d,e,f,g,h; + double one_minus_r; + int i, j, iters, jstart; + int firstcol, lastcol; + int strip_width=width/number_of_workers; + + firstcol = myid*strip_width+1; + lastcol = firstcol + strip_width - 1; + r_over_4=r/4.0; + one_minus_r=1-r; + if(myid == number_of_workers -1) + lastcol=width-2; + Barrier(); + for (iters = 1; iters <= ITERATIONS; iters++) + { + for(i= firstcol ; i <= lastcol; ++i){ + if(i%2 ==1 ) + jstart=1; + else + jstart=2; + for(j=jstart ; j < height-1 ; j+=2){ + if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */ + Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j]; + else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */ + { + a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]); + b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]); + c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]); + d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]); + + e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]); + f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]); + g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]); + h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]); + + Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j]; + } + } + } + Barrier(); + for(i= firstcol ; i <= lastcol; ++i){ + if(i%2 ==1 ) + jstart=2; + else + jstart=1; + for(j=jstart ; j < height -1; j+=2){ + if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */ + Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j]; + else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */ + { + a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]); + b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]); + c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]); + d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]); + + e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]); + f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]); + g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]); + h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]); + + Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j]; + } + } + } + Barrier(); + } + Barrier(); + return(0); +} + +double finite_difference_multi_threaded() +{ + int i, j, ret, thread_number; + pthread_t thread_id[MAXIMUM_PROCESSING_DEVICES]; + + + double capacitance_per_metre, energy_per_metre; + + + /* initialize mutex and condition variable */ + pthread_mutex_init(&barrier, NULL); + pthread_cond_init(&go, NULL); + for(thread_number=0;thread_number