+++ /dev/null
-/* atlc - arbitrary transmission line calculator, for the analysis of
-transmission lines are directional couplers.
-
-Copyright (C) 2002. Dr. David Kirkby, PhD (G8WRB).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either package_version 2
-of the License, or (at your option) any later package_version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-Dr. David Kirkby, e-mail drkirkby at ntlworld.com
-
-*/
-
-#include "config.h"
-
-#define SLOW
-
-#ifndef ENABLE_POSIX_THREADS
-
-/* If we are not comiling for multi-threaded use, a function
-is defined, but it is not used. This keeps the linker happy */
-
-double finite_difference_multi_threaded()
-{
- return(0.0);
-}
-#endif
-
-
-#ifdef ENABLE_POSIX_THREADS
-
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-
-#include <pthread.h>
-
-#ifdef HAVE_SCHED_H
-#include <sched.h>
-#endif
-
-#ifdef HAVE_STRING_H
-#include <string.h>
-#endif
-
-#include "definitions.h"
-#include "exit_codes.h"
-
-extern int coupler;
-extern int width, height;
-extern double **Vij, **Er;
-extern signed char **oddity;
-extern int number_of_workers;
-
-
-/* The algorithm for this is based on one in the book 'Foundations of Multithraded,
-Parallel and Distributed Programming' by G. A. Andrews, (2000). Especially
-chapter 11, Fig 11.6, "Red/Black Gauss-Seidel using shared vairables'.
-
-Basically the array is a considered a chess board. Since we only use in the computation the
-values above, below, to the left and the right of the current pixel, these will all be the
-same colour on the chess board (black or white). So we compute all the white squares, which
-can all be done in parallel, since they don't depend on each other. Once the white squares
-are done, the black ones can be done. Again, these don't depend on the white squares, but
-only the black ones. So the algorithm does
-
-1) Covers the array with a large chess board with black and white squares.
-
-
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-
-
-2) Split the array into a number of columns, one for each CPU.
-
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-B W B W B W B W B W B W B W B W B W B W B W B W B W B W
-W B W B W B W B W B W B W B W B W B W B W B W B W B W B
-
-3) Do all columns in parallel on the black squares.
-
-B B B B B B B B B B B B B B
- B B B B B B B B B B B B B B
-B B B B B B B B B B B B B B
- B B B B B B B B B B B B B B
-B B B B B B B B B B B B B B
- B B B B B B B B B B B B B B
-B B B B B B B B B B B B B B
- B B B B B B B B B B B B B B
-B B B B B B B B B B B B B B
- B B B B B B B B B B B B B B
-B B B B B B B B B B B B B B
- B B B B B B B B B B B B B B
-
-Thread 0 Thread 1 Thread 2 Thread 3
-
-4) Wait until all the black squares are done.
-
-5) Compute all the white squares in parallel.
-
- W W W W W W W W W W W W W W
-W W W W W W W W W W W W W W
- W W W W W W W W W W W W W W
-W W W W W W W W W W W W W W
- W W W W W W W W W W W W W W
-W W W W W W W W W W W W W W
- W W W W W W W W W W W W W W
-W W W W W W W W W W W W W W
- W W W W W W W W W W W W W W
-W W W W W W W W W W W W W W
- W W W W W W W W W W W W W W
-W W W W W W W W W W W W W W
-Thread 0 Thread 1 Thread 2 Thread 3
-
-
-6) Repeat a large number (typically 100) times.
-
-7) Check for convergence.
-
-8) Stop if the solution has converged.
-*/
-
-extern double r;
-
-pthread_mutex_t barrier; /* mutex semaphore for the barrier */
-pthread_cond_t go; /* condition variable for leaving */
-int numArrived = 0; /* count of the number who have arrived */
-
-#define BARRIER2
-
-#ifdef BARRIER1
-void Barrier() {
-#ifndef DEBUG
- pthread_mutex_lock(&barrier);
-#else
- if( pthread_mutex_lock(&barrier) != 0 )
- exit_with_msg_and_exit_code("pthread_mutex_lock failed",PTHREAD_MUTEX_LOCK_FAILED);
-#endif
-
- numArrived++;
- if (numArrived == number_of_workers) {
- numArrived = 0;
- pthread_cond_broadcast(&go);
- } else
- pthread_cond_wait(&go, &barrier);
-#ifndef DEBUG
- pthread_mutex_unlock(&barrier);
-#else
- if( pthread_mutex_unlock(&barrier) != 0 )
- exit_with_msg_and_exit_code("pthread_mutex_unlock failed",PTHREAD_MUTEX_UNLOCK_FAILED);
-#endif
-}
-#endif
-
-#ifdef BARRIER2
-int numArrived2[2] = {0, 0};
-int barrierNdx = 0;
-
-void Barrier() {
- int localNdx;
-#ifndef DEBUG
- pthread_mutex_lock(&barrier);
-#else
- if( pthread_mutex_lock(&barrier) != 0 )
- exit_with_msg_and_exit_code("pthread_mutex_lock failed in finite_difference_multi_threaded.c",PTHREAD_MUTEX_LOCK_FAILED);
-#endif
-
- numArrived2[barrierNdx]++;
- if (numArrived2[barrierNdx] == number_of_workers) {
- barrierNdx = (barrierNdx + 1)%2; /* toggle */
- numArrived2[barrierNdx] = 0; /* reset other count */
-
-#ifndef DEBUG
- pthread_cond_broadcast(&go);
-#else
- if( pthread_cond_broadcast(&go) != 0)
- exit_with_msg_and_exit_code("pthread_cond_broadcast failed in finite_difference_multi_threaded.c",PTHREAD_COND_BROADCAST_FAILED);
-#endif
- }
- else
- {
- localNdx = barrierNdx; /* wait on "current" numArrived. */
- while (numArrived2[localNdx] != number_of_workers)
-#ifndef DEBUG
- pthread_cond_wait(&go, &barrier);
-#else
- if( pthread_cond_wait(&go, &barrier) != 0)
- exit_with_msg_and_exit_code("pthread_cond_wait failed finite_difference_multi_threaded.c",PTHREAD_COND_WAIT_FAILED);
-#endif
- }
-#ifndef DEBUG
- pthread_mutex_unlock(&barrier);
-#else
- if( pthread_mutex_unlock(&barrier) != 0 )
- exit_with_msg_and_exit_code("pthread_mutex_unlock failed finite_difference_multi_threaded.c",PTHREAD_MUTEX_UNLOCK_FAILED);
-#endif
-}
-#endif
-
-
-
-/* Each Worker computes values in one strip of the grids.
- The main worker loop does two computations to avoid copying from
- one grid to the other. */
-
-void *worker(void *arg) {
- int myid = (int) arg;
- double r_over_4,a,b,c,d,e,f,g,h;
- double one_minus_r;
- int i, j, iters, jstart;
- int firstcol, lastcol;
- int strip_width=width/number_of_workers;
-
- firstcol = myid*strip_width+1;
- lastcol = firstcol + strip_width - 1;
- r_over_4=r/4.0;
- one_minus_r=1-r;
- if(myid == number_of_workers -1)
- lastcol=width-2;
- Barrier();
- for (iters = 1; iters <= ITERATIONS; iters++)
- {
- for(i= firstcol ; i <= lastcol; ++i){
- if(i%2 ==1 )
- jstart=1;
- else
- jstart=2;
- for(j=jstart ; j < height-1 ; j+=2){
- if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */
- Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j];
- else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */
- {
- a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]);
- b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]);
- c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]);
- d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]);
-
- e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]);
- f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]);
- g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]);
- h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]);
-
- Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j];
- }
- }
- }
- Barrier();
- for(i= firstcol ; i <= lastcol; ++i){
- if(i%2 ==1 )
- jstart=2;
- else
- jstart=1;
- for(j=jstart ; j < height -1; j+=2){
- if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */
- Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j];
- else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */
- {
- a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]);
- b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]);
- c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]);
- d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]);
-
- e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]);
- f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]);
- g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]);
- h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]);
-
- Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j];
- }
- }
- }
- Barrier();
- }
- Barrier();
- return(0);
-}
-
-double finite_difference_multi_threaded()
-{
- int i, j, ret, thread_number;
- pthread_t thread_id[MAXIMUM_PROCESSING_DEVICES];
-
-
- double capacitance_per_metre, energy_per_metre;
-
-
- /* initialize mutex and condition variable */
- pthread_mutex_init(&barrier, NULL);
- pthread_cond_init(&go, NULL);
- for(thread_number=0;thread_number<number_of_workers;thread_number++){
- ret=pthread_create(&thread_id[thread_number],NULL, worker,(void *)thread_number);
- if(ret != 0)
- exit_with_msg_and_exit_code("failed to create thread in finite_difference_multi_threaded.c",THREAD_CREATION_FAILED);
- }
- /* Wait for each thread to join - i.e. once they are all finished. */
- for(thread_number=0;thread_number<number_of_workers;thread_number++)
- {
- ret=pthread_join(thread_id[thread_number], NULL);
- if(ret != 0)
- exit_with_msg_and_exit_code("failed to join thread in finite_difference_multi_threaded.c",THREAD_FAILED_TO_JOIN);
- }
- energy_per_metre=0.0;
- /*
- The following code is wrong, but worked on all systems apart from AIX !!
-
- for(i=0;i<width;++i)
- for(j=0;j<height;++j)
- energy_per_metre+=find_energy_per_metre(i,j);
- */
-
- for(i=1;i<width-1;++i)
- for(j=1;j<height-1;++j)
- energy_per_metre+=find_energy_per_metre(i,j);
-
- if(coupler==FALSE)
- capacitance_per_metre=2*energy_per_metre;
- else
- capacitance_per_metre=energy_per_metre;
- return(capacitance_per_metre);
-}
-#endif