openmp的使用例子_openmp安装linux资源-CSDN文库

共65个文件

c：14个

makefile：10个

h：7个

5星 · 超过95%的资源需积分: 34 3 浏览量 2013-09-26 12:20:33 上传评论 1 收藏 106KB RAR 举报

《OpenMP在气象计算中的应用实例》 OpenMP（Open Multi-Processing）是一种并行编程模型，主要用于共享内存多核处理器系统。它提供了一种简单、高效的方式来编写并行程序，通过在源代码中添加特定的编译器指令，使得程序员能够轻松地将串行代码转换为并行代码。在这个例子中，我们将探讨OpenMP如何应用于气象计算领域，这有助于理解其在科学计算中的强大功能。我们需要理解气象计算的特点。气象模型通常涉及大量的数值模拟，如大气动力学、辐射平衡和水循环等。这些模拟计算密集型且高度并行，非常适合利用多核处理器进行加速。OpenMP通过任务并行和数据并行机制，可以有效地利用多核资源，提升气象模型的运行效率。在压缩包中的文件“NPB2.3-omp-C”是一个基于OpenMP的NAS Parallel Benchmarks（NPB）的C语言实现。NPB是一组基准测试程序，用于评估并行计算机的性能。这些测试涵盖了各种并行算法，包括气象模型中常见的流体动力学问题。其中的“omp”后缀表明这些程序已经集成了OpenMP的支持。在这些程序中，我们可以看到OpenMP的关键指令，如`#pragma omp parallel for`，用于指示编译器将for循环并行化。这种并行化方式允许不同的线程独立处理循环的不同迭代，大大减少了计算时间。此外，`#pragma omp sections`和`#pragma omp single`指令则可以用于并行化更复杂的控制流结构，使得任务能够在多个线程间并行执行。 OpenMP还提供了同步原语，如`#pragma omp barrier`用于确保所有线程在继续执行之前都到达某个点，`#pragma omp critical`用于保护共享数据的安全访问。在气象计算中，由于数据的依赖性和一致性要求，这些同步机制至关重要，防止了数据竞争和错误。为了充分利用OpenMP，我们需要考虑并行度的选择，这取决于可用的核心数和具体任务的性质。通过设置`omp_set_num_threads()`函数，我们可以动态调整并行线程的数量，以达到最佳性能。 OpenMP在气象计算中的应用展示了并行计算在解决复杂科学问题上的优势。通过理解和掌握OpenMP，气象科学家和工程师能够优化他们的模型，提高预测速度和准确性，从而更好地服务于天气预报、气候研究等领域。通过对“NPB2.3-omp-C”中的代码进行分析和实践，我们可以深入理解OpenMP的工作原理，并将其应用于实际的气象计算项目中。

资源推荐

资源详情

资源评论

收起资源包目录

345351767NPB2.3-omp-C.rar （65个子文件）

NPB2.3-omp-C

bin

config

suite.def.template 530B

make.def.template 6KB

NAS.samples

make.def.dec_alpha 308B

suite.def.small 40B

suite.def.bt 25B

suite.def.is 25B

suite.def.ep 25B

make.def.sun_ultra_sparc 850B

suite.def.mg 25B

suite.def.cg 25B

make.def.sp2_babbage 320B

suite.def.ft 25B

make.def.irix6.2 179B

README 394B

suite.def.lu 25B

suite.def.sp 25B

README.carefully 2KB

Makefile 412B

is.c 21KB

sp.c 90KB

inputsp.data.sample 116B

Makefile 877B

header.h 3KB

inputbt.data.sample 114B

bt.c 114KB

Makefile 879B

header.h 3KB

inputlu.data.sample 738B

applu.h 4KB

lu.c 107KB

Makefile 866B

LOG.omc 1KB

cg.c 28KB

README.carefully 834B

Makefile 392B

README.omc 4KB

README 256B

ep.c 7KB

Makefile 391B

common

npb-C.h 1KB

c_randdp.c 5KB

wtime_sgi64.c 2KB

c_print_results.c 3KB

wtime.c 228B

wtime.h 208B

c_timers.c 1KB

ft.c 34KB

README 276B

global.h 2KB

Makefile 433B

README.org 4KB

mg.input.sample 78B

README 6KB

mg.c 37KB

Makefile 391B

globals.h 2KB

Makefile 1KB

sys

make.common 2KB

print_instructions 790B

print_header 224B

setparams.c 25KB

README 1KB

Makefile 533B

Doc

README.install 5KB

README.report 124B

/*-------------------------------------------------------------------- NAS Parallel Benchmarks 2.3 OpenMP C versions - BT This benchmark is an OpenMP C version of the NPB BT code. The OpenMP C versions are developed by RWCP and derived from the serial Fortran versions in "NPB 2.3-serial" developed by NAS. Permission to use, copy, distribute and modify this software for any purpose with or without fee is hereby granted. This software is provided "as is" without express or implied warranty. Send comments on the OpenMP C versions to pdp-openmp@rwcp.or.jp Information on OpenMP activities at RWCP is available at: http://pdplab.trc.rwcp.or.jp/pdperf/Omni/ Information on NAS Parallel Benchmarks 2.3 is available at: http://www.nas.nasa.gov/NAS/NPB/ --------------------------------------------------------------------*/ /*-------------------------------------------------------------------- Authors: R. Van der Wijngaart T. Harris M. Yarrow OpenMP C version: S. Satoh --------------------------------------------------------------------*/ #include "npb-C.h" /* global variables */ #include "header.h" /* function declarations */ static void add(void); static void adi(void); static void error_norm(double rms[5]); static void rhs_norm(double rms[5]); static void exact_rhs(void); static void exact_solution(double xi, double eta, double zeta, double dtemp[5]); static void initialize(void); static void lhsinit(void); static void lhsx(void); static void lhsy(void); static void lhsz(void); static void compute_rhs(void); static void set_constants(void); static void verify(int no_time_steps, char *class, boolean *verified); static void x_solve(void); static void x_backsubstitute(void); static void x_solve_cell(void); static void matvec_sub(double ablock[5][5], double avec[5], double bvec[5]); static void matmul_sub(double ablock[5][5], double bblock[5][5], double cblock[5][5]); static void binvcrhs(double lhs[5][5], double c[5][5], double r[5]); static void binvrhs(double lhs[5][5], double r[5]); static void y_solve(void); static void y_backsubstitute(void); static void y_solve_cell(void); static void z_solve(void); static void z_backsubstitute(void); static void z_solve_cell(void); /*-------------------------------------------------------------------- program BT c-------------------------------------------------------------------*/ int main(int argc, char **argv) { int niter, step, n3; int nthreads = 1; double navg, mflops; double tmax; boolean verified; char class; FILE *fp; /*-------------------------------------------------------------------- c Root node reads input file (if it exists) else takes c defaults from parameters c-------------------------------------------------------------------*/ printf("\n\n NAS Parallel Benchmarks 2.3 OpenMP C version" " - BT Benchmark\n\n"); fp = fopen("inputbt.data", "r"); if (fp != NULL) { printf(" Reading from input file inputbt.data"); fscanf(fp, "%d", &niter); while (fgetc(fp) != '\n'); fscanf(fp, "%lg", &dt); while (fgetc(fp) != '\n'); fscanf(fp, "%d%d%d", &grid_points[0], &grid_points[1], &grid_points[2]); fclose(fp); } else { printf(" No input file inputbt.data. Using compiled defaults\n"); niter = NITER_DEFAULT; dt = DT_DEFAULT; grid_points[0] = PROBLEM_SIZE; grid_points[1] = PROBLEM_SIZE; grid_points[2] = PROBLEM_SIZE; } printf(" Size: %3dx%3dx%3d\n", grid_points[0], grid_points[1], grid_points[2]); printf(" Iterations: %3d dt: %10.6f\n", niter, dt); if (grid_points[0] > IMAX || grid_points[1] > JMAX || grid_points[2] > KMAX) { printf(" %dx%dx%d\n", grid_points[0], grid_points[1], grid_points[2]); printf(" Problem size too big for compiled array sizes\n"); exit(1); } set_constants(); #pragma omp parallel { initialize(); lhsinit(); exact_rhs(); /*-------------------------------------------------------------------- c do one time step to touch all code, and reinitialize c-------------------------------------------------------------------*/ adi(); initialize(); } /* end parallel */ timer_clear(1); timer_start(1); #pragma omp parallel firstprivate(niter) private(step) { for (step = 1; step <= niter; step++) { if (step%20 == 0 || step == 1) { #pragma omp master printf(" Time step %4d\n", step); } adi(); } #if defined(_OPENMP) #pragma omp master nthreads = omp_get_num_threads(); #endif /* _OPENMP */ } /* end parallel */ timer_stop(1); tmax = timer_read(1); verify(niter, &class, &verified); n3 = grid_points[0]*grid_points[1]*grid_points[2]; navg = (grid_points[0]+grid_points[1]+grid_points[2])/3.0; if ( tmax != 0.0 ) { mflops = 1.0e-6*(double)niter* (3478.8*(double)n3-17655.7*pow2(navg)+28023.7*navg) / tmax; } else { mflops = 0.0; } c_print_results("BT", class, grid_points[0], grid_points[1], grid_points[2], niter, nthreads, tmax, mflops, " floating point", verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, "(none)"); } /*-------------------------------------------------------------------- c-------------------------------------------------------------------*/ static void add(void) { /*-------------------------------------------------------------------- c addition of update to the vector u c-------------------------------------------------------------------*/ int i, j, k, m; #pragma omp for for (i = 1; i < grid_points[0]-1; i++) { for (j = 1; j < grid_points[1]-1; j++) { for (k = 1; k < grid_points[2]-1; k++) { for (m = 0; m < 5; m++) { u[i][j][k][m] = u[i][j][k][m] + rhs[i][j][k][m]; } } } } } /*-------------------------------------------------------------------- --------------------------------------------------------------------*/ static void adi(void) { compute_rhs(); x_solve(); y_solve(); z_solve(); add(); } /*-------------------------------------------------------------------- --------------------------------------------------------------------*/ static void error_norm(double rms[5]) { /*-------------------------------------------------------------------- c this function computes the norm of the difference between the c computed solution and the exact solution c-------------------------------------------------------------------*/ int i, j, k, m, d; double xi, eta, zeta, u_exact[5], add; for (m = 0; m < 5; m++) { rms[m] = 0.0; } for (i = 0; i < grid_points[0]; i++) { xi = (double)i * dnxm1; for (j = 0; j < grid_points[1]; j++) { eta = (double)j * dnym1; for (k = 0; k < grid_points[2]; k++) { zeta = (double)k * dnzm1; exact_solution(xi, eta, zeta, u_exact); for (m = 0; m < 5; m++) { add = u[i][j][k][m] - u_exact[m]; rms[m] = rms[m] + add*add; } } } } for (m = 0; m < 5; m++) { for (d = 0; d <= 2; d++) { rms[m] = rms[m] / (double)(grid_points[d]-2); } rms[m] = sqrt(rms[m]); } } /*-------------------------------------------------------------------- --------------------------------------------------------------------*/ static void rhs_norm(double rms[5]) { /*-------------------------------------------------------------------- --------------------------------------------------------------------*/ int i, j, k, d, m; double add; for (m = 0; m < 5; m++) { rms[m] = 0.0; } for (i = 1; i < grid_points[0]-1; i++) { for (j = 1; j < grid_points[1]-1; j++) { for (k = 1; k < grid_points[2]-1; k++) { for (m = 0; m < 5; m++) { add = rhs[i][j][k][m]; rms[m] = rms[m] + add*add; } } } } for (m = 0; m < 5; m++) { for (d = 0; d <= 2; d++) { rms[m] = rms[m] / (double)(grid_points[d]-2); } rms[m] = sqrt(rms[m]); } } /*--------------

评论收藏

内容反馈