/*--------------------------------------------------------------------
NAS Parallel Benchmarks 2.3 OpenMP C versions - BT
This benchmark is an OpenMP C version of the NPB BT code.
The OpenMP C versions are developed by RWCP and derived from the serial
Fortran versions in "NPB 2.3-serial" developed by NAS.
Permission to use, copy, distribute and modify this software for any
purpose with or without fee is hereby granted.
This software is provided "as is" without express or implied warranty.
Send comments on the OpenMP C versions to pdp-openmp@rwcp.or.jp
Information on OpenMP activities at RWCP is available at:
http://pdplab.trc.rwcp.or.jp/pdperf/Omni/
Information on NAS Parallel Benchmarks 2.3 is available at:
http://www.nas.nasa.gov/NAS/NPB/
--------------------------------------------------------------------*/
/*--------------------------------------------------------------------
Authors: R. Van der Wijngaart
T. Harris
M. Yarrow
OpenMP C version: S. Satoh
--------------------------------------------------------------------*/
#include "npb-C.h"
/* global variables */
#include "header.h"
/* function declarations */
static void add(void);
static void adi(void);
static void error_norm(double rms[5]);
static void rhs_norm(double rms[5]);
static void exact_rhs(void);
static void exact_solution(double xi, double eta, double zeta,
double dtemp[5]);
static void initialize(void);
static void lhsinit(void);
static void lhsx(void);
static void lhsy(void);
static void lhsz(void);
static void compute_rhs(void);
static void set_constants(void);
static void verify(int no_time_steps, char *class, boolean *verified);
static void x_solve(void);
static void x_backsubstitute(void);
static void x_solve_cell(void);
static void matvec_sub(double ablock[5][5], double avec[5], double bvec[5]);
static void matmul_sub(double ablock[5][5], double bblock[5][5],
double cblock[5][5]);
static void binvcrhs(double lhs[5][5], double c[5][5], double r[5]);
static void binvrhs(double lhs[5][5], double r[5]);
static void y_solve(void);
static void y_backsubstitute(void);
static void y_solve_cell(void);
static void z_solve(void);
static void z_backsubstitute(void);
static void z_solve_cell(void);
/*--------------------------------------------------------------------
program BT
c-------------------------------------------------------------------*/
int main(int argc, char **argv) {
int niter, step, n3;
int nthreads = 1;
double navg, mflops;
double tmax;
boolean verified;
char class;
FILE *fp;
/*--------------------------------------------------------------------
c Root node reads input file (if it exists) else takes
c defaults from parameters
c-------------------------------------------------------------------*/
printf("\n\n NAS Parallel Benchmarks 2.3 OpenMP C version"
" - BT Benchmark\n\n");
fp = fopen("inputbt.data", "r");
if (fp != NULL) {
printf(" Reading from input file inputbt.data");
fscanf(fp, "%d", &niter);
while (fgetc(fp) != '\n');
fscanf(fp, "%lg", &dt);
while (fgetc(fp) != '\n');
fscanf(fp, "%d%d%d",
&grid_points[0], &grid_points[1], &grid_points[2]);
fclose(fp);
} else {
printf(" No input file inputbt.data. Using compiled defaults\n");
niter = NITER_DEFAULT;
dt = DT_DEFAULT;
grid_points[0] = PROBLEM_SIZE;
grid_points[1] = PROBLEM_SIZE;
grid_points[2] = PROBLEM_SIZE;
}
printf(" Size: %3dx%3dx%3d\n",
grid_points[0], grid_points[1], grid_points[2]);
printf(" Iterations: %3d dt: %10.6f\n", niter, dt);
if (grid_points[0] > IMAX ||
grid_points[1] > JMAX ||
grid_points[2] > KMAX) {
printf(" %dx%dx%d\n", grid_points[0], grid_points[1], grid_points[2]);
printf(" Problem size too big for compiled array sizes\n");
exit(1);
}
set_constants();
#pragma omp parallel
{
initialize();
lhsinit();
exact_rhs();
/*--------------------------------------------------------------------
c do one time step to touch all code, and reinitialize
c-------------------------------------------------------------------*/
adi();
initialize();
} /* end parallel */
timer_clear(1);
timer_start(1);
#pragma omp parallel firstprivate(niter) private(step)
{
for (step = 1; step <= niter; step++) {
if (step%20 == 0 || step == 1) {
#pragma omp master
printf(" Time step %4d\n", step);
}
adi();
}
#if defined(_OPENMP)
#pragma omp master
nthreads = omp_get_num_threads();
#endif /* _OPENMP */
} /* end parallel */
timer_stop(1);
tmax = timer_read(1);
verify(niter, &class, &verified);
n3 = grid_points[0]*grid_points[1]*grid_points[2];
navg = (grid_points[0]+grid_points[1]+grid_points[2])/3.0;
if ( tmax != 0.0 ) {
mflops = 1.0e-6*(double)niter*
(3478.8*(double)n3-17655.7*pow2(navg)+28023.7*navg) / tmax;
} else {
mflops = 0.0;
}
c_print_results("BT", class, grid_points[0],
grid_points[1], grid_points[2], niter, nthreads,
tmax, mflops, " floating point",
verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5,
CS6, "(none)");
}
/*--------------------------------------------------------------------
c-------------------------------------------------------------------*/
static void add(void) {
/*--------------------------------------------------------------------
c addition of update to the vector u
c-------------------------------------------------------------------*/
int i, j, k, m;
#pragma omp for
for (i = 1; i < grid_points[0]-1; i++) {
for (j = 1; j < grid_points[1]-1; j++) {
for (k = 1; k < grid_points[2]-1; k++) {
for (m = 0; m < 5; m++) {
u[i][j][k][m] = u[i][j][k][m] + rhs[i][j][k][m];
}
}
}
}
}
/*--------------------------------------------------------------------
--------------------------------------------------------------------*/
static void adi(void) {
compute_rhs();
x_solve();
y_solve();
z_solve();
add();
}
/*--------------------------------------------------------------------
--------------------------------------------------------------------*/
static void error_norm(double rms[5]) {
/*--------------------------------------------------------------------
c this function computes the norm of the difference between the
c computed solution and the exact solution
c-------------------------------------------------------------------*/
int i, j, k, m, d;
double xi, eta, zeta, u_exact[5], add;
for (m = 0; m < 5; m++) {
rms[m] = 0.0;
}
for (i = 0; i < grid_points[0]; i++) {
xi = (double)i * dnxm1;
for (j = 0; j < grid_points[1]; j++) {
eta = (double)j * dnym1;
for (k = 0; k < grid_points[2]; k++) {
zeta = (double)k * dnzm1;
exact_solution(xi, eta, zeta, u_exact);
for (m = 0; m < 5; m++) {
add = u[i][j][k][m] - u_exact[m];
rms[m] = rms[m] + add*add;
}
}
}
}
for (m = 0; m < 5; m++) {
for (d = 0; d <= 2; d++) {
rms[m] = rms[m] / (double)(grid_points[d]-2);
}
rms[m] = sqrt(rms[m]);
}
}
/*--------------------------------------------------------------------
--------------------------------------------------------------------*/
static void rhs_norm(double rms[5]) {
/*--------------------------------------------------------------------
--------------------------------------------------------------------*/
int i, j, k, d, m;
double add;
for (m = 0; m < 5; m++) {
rms[m] = 0.0;
}
for (i = 1; i < grid_points[0]-1; i++) {
for (j = 1; j < grid_points[1]-1; j++) {
for (k = 1; k < grid_points[2]-1; k++) {
for (m = 0; m < 5; m++) {
add = rhs[i][j][k][m];
rms[m] = rms[m] + add*add;
}
}
}
}
for (m = 0; m < 5; m++) {
for (d = 0; d <= 2; d++) {
rms[m] = rms[m] / (double)(grid_points[d]-2);
}
rms[m] = sqrt(rms[m]);
}
}
/*--------------