OpenACC 云水参数化方案

▶ 书上第十三章,用一系列步骤优化一个云水参数化方案。用于熟悉 Fortran 以及 OpenACC 在旗下的表现

● 代码,文件较多,放在一起了

  1 ! main.f90
  2 PROGRAM main
  3     USE m_config,  ONLY: nstop
  4     USE m_physics, ONLY: physics
  5     USE m_io,      ONLY: write_output
  6     USE m_setup,   ONLY: initialize, cleanup
  7     USE m_timing,  ONLY: start_timer, end_timer, print_timers
  8 
  9     IMPLICIT NONE
 10 
 11     INTEGER :: ntstep
 12     INTEGER, parameter :: itimloop = 5
 13 
 14     CALL initialize()       ! 初始化计时器和设备
 15     
 16     WRITE(*,"(A)") "Start of time loop"
 17     CALL start_timer(itimloop, "Time loop")
 18 
 19     DO ntstep = 1, nstop    ! 计算
 20         CALL physics()
 21         CALL write_output( ntstep )
 22     END DO
 23 
 24     CALL end_timer( itimloop )
 25     WRITE(*,"(A)") "End of time loop"
 26 
 27     CALL print_timers()
 28     CALL cleanup()
 29 
 30 END PROGRAM main
 31 
 32 ! m_config.f90,运行参数
 33 MODULE m_config
 34     INTEGER, parameter :: nx    = 128  ! 经度网格数 
 35     INTEGER, parameter :: ny    = 128  ! 纬度网格数
 36     INTEGER, parameter :: nz    = 60   ! 海拔网格数
 37     INTEGER, parameter :: nstop = 100  ! 时间步数
 38     INTEGER, parameter :: nout  = 20   ! 输出间隔
 39     
 40 END MODULE m_config
 41 
 42 ! m_fields.f90,场参数
 43 MODULE m_fields
 44     REAL*8, ALLOCATABLE :: qv(:,:,:)  ! 水蒸汽含量
 45     REAL*8, ALLOCATABLE :: t(:,:,:)   ! 温度
 46   
 47 END MODULE m_fields
 48 
 49 ! m_io.f90,输入输出函数
 50 MODULE m_io
 51     USE m_config,  ONLY: nout, nx, ny, nz
 52     USE m_fields,  ONLY: qv
 53 
 54     IMPLICIT NONE
 55 
 56 CONTAINS
 57     SUBROUTINE write_output(ntstep)
 58         IMPLICIT NONE
 59 
 60         INTEGER, INTENT(IN) :: ntstep       ! 当前时间片
 61         INTEGER :: i, j, k
 62         REAL*8  :: qv_mean                  ! 水蒸汽含量平均值(标量)
 63         
 64         IF (MOD(ntstep, nout) /= 0) RETURN  ! 当前时间片不作输出            
 65 
 66         qv_mean = 0.0D0                     ! 计算均值并输出
 67         DO k = 1, nz
 68             DO j = 1, ny
 69                 DO i = 1, nx
 70                     qv_mean = qv_mean + qv(i,j,k)
 71                 END DO
 72             END DO
 73         END DO
 74         qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
 75 
 76         WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
 77     END SUBROUTINE write_output
 78 
 79 END MODULE m_io
 80 
 81 ! m_parametrizations.f90,参数化方案
 82 MODULE m_parametrizations
 83     IMPLICIT NONE
 84 
 85     REAL*8, parameter ::  cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0
 86     REAL*8, parameter ::  cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0
 87 
 88 CONTAINS
 89     SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) ! 参数化方案一
 90         IMPLICIT NONE
 91     
 92         INTEGER, INTENT(IN)    :: npx, npy, nlev  ! 输入维度
 93         REAL*8,  INTENT(IN)    :: t(:,:,:)        ! 温度
 94         REAL*8,  INTENT(OUT)   :: qc(:,:,:)       ! 云水含量
 95         REAL*8,  INTENT(INOUT) :: qv(:,:,:)       ! 水蒸汽含量
 96         INTEGER :: i, j, k
 97 
 98         DO k = 1, nlev
 99             DO j = 1, npy
100                 DO i = 1, npx
101                     qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
102                     qc(i,j,k) = cs4 * qv(i,j,k)
103                 END DO
104             END DO
105         END DO
106     END SUBROUTINE saturation_adjustment
107 
108     SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)  ! 参数化方案二
109         IMPLICIT NONE
110 
111         INTEGER, INTENT(IN)   :: npx, npy, nlev
112         REAL*8, INTENT(INOUT) :: t(:,:,:)      
113         REAL*8, INTENT(IN)    :: qc(:,:,:)     
114         REAL*8, INTENT(INOUT) :: qv(:,:,:)     
115         INTEGER :: i, j, k
116 
117         DO k = 2, nlev
118             DO j = 1, npy
119                 DO i = 1, npx
120                     qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3
121                     t(i, j, k)  = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
122                 END DO
123             END DO
124         END DO
125     END SUBROUTINE microphysics
126 
127 END MODULE m_parametrizations
128 
129 ! m_physics.f90,参数化方案的执行
130 MODULE m_physics
131     USE m_config,           ONLY: nx, ny, nz
132     USE m_fields,           ONLY: qv, t
133     USE m_parametrizations, ONLY: saturation_adjustment, microphysics
134 
135     IMPLICIT NONE
136 
137 CONTAINS
138     SUBROUTINE physics()
139         IMPLICIT NONE
140         REAL*8 :: qc(nx,ny,nz)                              ! 云水含量临时变量
141         CALL saturation_adjustment(nx, ny, nz, t, qc, qv)   ! 第一物理参数化  
142         CALL microphysics(nx, ny, nz, t, qc, qv)            ! 第二物理参数化
143     END SUBROUTINE physics
144     
145 END MODULE m_physics
146 
147 ! m_timming.f90,计时器
148 MODULE m_timing
149     IMPLICIT NONE
150 
151     INTEGER, PARAMETER :: ntimer=10             ! 计时器数量
152     REAL*8             :: rtimer(ntimer)        ! 计时器
153     CHARACTER(32)      :: timertag(ntimer)      ! 计时器标签
154     INTEGER            :: icountold(ntimer), &  ! tick (start of timer section)
155                         icountrate,          &  ! countrate of SYSTEM_CLOCK()
156                         icountmax               ! maximum counter value of SYSTEM_CLOCK()
157 
158 CONTAINS
159     SUBROUTINE init_timers()        ! 初始化计时器
160         IMPLICIT NONE
161 
162         rtimer(:)   = 0.0D0
163         timertag(:) = ""
164         icountold(:) = 0
165 
166         CALL SYSTEM_CLOCK( COUNT_RATE=icountrate, COUNT_MAX=icountmax )
167     END SUBROUTINE init_timers
168 
169     SUBROUTINE start_timer(id, tag) ! 开始计时
170         IMPLICIT NONE
171 
172         INTEGER, INTENT(IN)       :: id
173         CHARACTER(*), INTENT(IN) :: tag
174 
175         IF (id < 1 .OR. id > ntimer) THEN           ! 检查计时器编号范围
176           WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceeds maximum timer number", ntimer
177           STOP
178         END IF
179 
180 
181         IF (LEN_TRIM(timertag(id)) /= 0) THEN       ! 检查计时器是否已经开始运行
182           WRITE(*,"(A,I4)") "Error: timer already started previously, id:", id
183           STOP
184         END IF
185 
186         IF (LEN_TRIM(tag) == 0) THEN                ! 检查计时器标签是否非空
187           WRITE(*,"(A,I4)") "Error: empty tag provided, id:", id
188           STOP
189         END IF
190 
191         timertag(id) = TRIM(tag)                    ! 保存标签
192         !$acc wait
193         
194         CALL SYSTEM_CLOCK( COUNT=icountold(id) )    ! 开始计时
195     END SUBROUTINE start_timer
196 
197     SUBROUTINE end_timer(id) ! 结束计时
198         IMPLICIT NONE
199 
200         INTEGER, INTENT(IN) :: id
201         INTEGER             :: icountnew
202 
203         IF (id < 1 .OR. id > ntimer) THEN       ! 检查计时器编号范围
204           WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceed max timer number", ntimer
205           STOP
206         END IF
207 
208         IF (LEN_TRIM(timertag(id)) == 0) THEN   ! 检查计时器是否已经开始运行
209           WRITE(*,"(A,I4)") "Error: Need to call start_timer before end_timing, id:", id
210           STOP
211         END IF
212         !$acc wait
213         
214         CALL SYSTEM_CLOCK( COUNT=icountnew )    ! 获取当前时间,计算耗时
215         rtimer(id) = ( REAL(icountnew - icountold(id), KIND(rtimer(id))) ) / REAL(icountrate, KIND(rtimer(id)))
216     END SUBROUTINE end_timer
217 
218     SUBROUTINE print_timers()   ! 打印计时
219         IMPLICIT NONE
220 
221         INTEGER :: id
222 
223         WRITE(*,"(A)") "----------------------------"
224         WRITE(*,"(A)") "Timers:"
225         WRITE(*,"(A)") "----------------------------"
226         DO id = 1, ntimer
227             IF ( rtimer(id) > 0.0D0 ) THEN
228                 WRITE(*,"(A15,A2,F8.2,A)") timertag(id), ": ", rtimer(id)*1.0D3, " ms"
229             END IF
230         END DO    
231         WRITE(*,"(A)") "----------------------------"
232     END SUBROUTINE print_timers
233   
234 END MODULE m_timing
235 
236 ! m_setup.f90,初始化和清理
237 MODULE m_setup
238     USE m_config,  ONLY: nstop, nout, nx, ny, nz
239     USE m_fields,  ONLY: t,qv
240     USE m_timing,  ONLY: init_timers, start_timer, end_timer
241 
242     IMPLICIT NONE
243 
244 CONTAINS
245     SUBROUTINE initialize() ! 初始化计时器和设备
246         IMPLICIT NONE
247         
248         INTEGER, PARAMETER :: itiminit = 1  ! 计时器编号
249         INTEGER :: i, j, k                  
250 
251 #ifdef _OPENACC
252         WRITE(*,"(A)") "Running with OpenACC"       
253 #else
254         WRITE(*,"(A)") "Running without OpenACC"   
255 #endif
256 
257         WRITE(*,"(A)") "Initialize"
258 
259         CALL init_timers()
260         CALL start_timer( itiminit, "Initialization" )
261         ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
262 
263         DO k =1, nz
264             DO j = 1, ny
265                 DO i = 1, nx
266                     t(i,j,k)  = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
267                     qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
268                 END DO
269             END DO
270         END DO
271 
272 #ifdef _OPENACC
273         CALL initialize_gpu()
274 #endif
275 
276         CALL end_timer( itiminit )
277     END SUBROUTINE initialize
278 
279     SUBROUTINE initialize_gpu()! 让 GPU 跑一个小内核来初始化
280         IMPLICIT NONE
281 
282         INTEGER :: temp(16)
283         INTEGER :: i
284 
285         !$acc parallel loop
286         DO i = 1, 16
287             temp(i) = 1
288         END DO
289 
290         IF (SUM(temp) == 16) THEN
291             WRITE(*,"(A)") "GPU initialized"
292         ELSE
293             WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
294             STOP
295         END IF
296     END SUBROUTINE initialize_gpu
297 
298     SUBROUTINE cleanup()! 清扫 t 和 qv 的内存
299         IMPLICIT NONE
300 
301         DEALLOCATE( t, qv )
302     END SUBROUTINE cleanup
303 
304 END MODULE m_setup

● OpenMP 优化,改了 m_io.f90,m_parametrizations.f90,m_setup.f90

  1 ! m_io.f90
  2 MODULE m_io
  3     USE m_config,  ONLY: nout, nx, ny, nz
  4     USE m_fields,  ONLY: qv
  5 
  6     IMPLICIT NONE
  7 
  8 CONTAINS
  9     SUBROUTINE write_output(ntstep)
 10         IMPLICIT NONE
 11 
 12         INTEGER, INTENT(IN) :: ntstep
 13         INTEGER :: i, j, k
 14         REAL*8  :: qv_mean           
 15         
 16         IF (MOD(ntstep, nout) /= 0) RETURN
 17 
 18         qv_mean = 0.0D0                   
 19         DO k = 1, nz
 20             !$OMP PARALLEL DO PRIVATE(i,j) SHARED(k,qv) REDUCTION(+:qv_mean) 
 21             DO j = 1, ny
 22                 DO i = 1, nx
 23                     qv_mean = qv_mean + qv(i,j,k)
 24                 END DO
 25             END DO
 26         END DO
 27         qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
 28 
 29         WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
 30     END SUBROUTINE write_output
 31 
 32 END MODULE m_io
 33 
 34 ! m_parametrizations.f90
 35 MODULE m_parametrizations
 36     IMPLICIT NONE
 37 
 38     REAL*8, parameter ::  cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0
 39     REAL*8, parameter ::  cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0
 40 
 41 CONTAINS
 42     SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
 43         IMPLICIT NONE
 44     
 45         INTEGER, INTENT(IN)    :: npx, npy, nlev
 46         REAL*8,  INTENT(IN)    :: t(:,:,:)      
 47         REAL*8,  INTENT(OUT)   :: qc(:,:,:)     
 48         REAL*8,  INTENT(INOUT) :: qv(:,:,:)     
 49         INTEGER :: i, j, k
 50 
 51         !$OMP PARALLEL
 52         DO k = 1, nlev
 53             !$OMP DO PRIVATE(i,j)
 54             DO j = 1, npy
 55                 DO i = 1, npx
 56                     qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
 57                     qc(i,j,k) = cs4 * qv(i,j,k)
 58                 END DO
 59             END DO
 60         END DO
 61         !$OMP END PARALLEL
 62     END SUBROUTINE saturation_adjustment
 63 
 64     SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
 65         IMPLICIT NONE
 66 
 67         INTEGER, INTENT(IN)   :: npx, npy, nlev
 68         REAL*8, INTENT(INOUT) :: t(:,:,:)      
 69         REAL*8, INTENT(IN)    :: qc(:,:,:)     
 70         REAL*8, INTENT(INOUT) :: qv(:,:,:)     
 71         INTEGER :: i, j, k
 72 
 73         !$OMP PARALLEL
 74         DO k = 2, nlev
 75             !$OMP DO PRIVATE(i,j)
 76             DO j = 1, npy
 77                 DO i = 1, npx
 78                     qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3
 79                     t(i, j, k)  = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
 80                 END DO
 81             END DO
 82         END DO
 83         !$OMP END PARALLEL
 84     END SUBROUTINE microphysics
 85 
 86 END MODULE m_parametrizations
 87 
 88 ! m_setup.f90
 89 MODULE m_setup
 90     USE m_config,  ONLY: nstop, nout, nx, ny, nz
 91     USE m_fields,  ONLY: t,qv
 92     USE m_timing,  ONLY: init_timers, start_timer, end_timer
 93 
 94     IMPLICIT NONE
 95 
 96 CONTAINS
 97     SUBROUTINE initialize() ! 初始化计时器和设备
 98         IMPLICIT NONE
 99 
100         INTEGER, PARAMETER :: itiminit = 1  ! timer ID
101         INTEGER :: i, j, k                  ! loop indices
102         INTEGER :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM
103         
104 #ifdef _OPENACC
105         WRITE(*,"(A)") "Running with OpenACC"       
106 #else
107         WRITE(*,"(A)") "Running without OpenACC"
108 #ifdef _OPENMP
109         !$OMP PARALLEL 
110         IF (OMP_GET_THREAD_NUM()==0) THEN
111             WRITE(*,"(A,I4,A)") "Running with OpenMP with ", OMP_GET_NUM_THREADS(), " threads"
112         END IF
113         !$OMP END PARALLEL
114 #endif
115 #endif
116         WRITE(*,"(A)") "Initialize"
117 
118         CALL init_timers()
119         CALL start_timer( itiminit, "Initialization" )
120         ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
121 
122         DO k =1, nz
123             DO j = 1, ny
124                 DO i = 1, nx
125                     t(i,j,k)  = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
126                     qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
127                 END DO
128             END DO
129         END DO
130 
131 #ifdef _OPENACC
132         CALL initialize_gpu()
133 #endif
134 
135         CALL end_timer( itiminit )
136     END SUBROUTINE initialize
137 
138     SUBROUTINE initialize_gpu()
139         IMPLICIT NONE
140 
141         INTEGER :: temp(16)
142         INTEGER :: i
143 
144         !$acc parallel loop
145         DO i = 1, 16
146             temp(i) = 1
147         END DO
148 
149         IF (SUM(temp) == 16) THEN
150             WRITE(*,"(A)") "GPU initialized"
151         ELSE
152             WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
153             STOP
154         END IF
155     END SUBROUTINE initialize_gpu
156 
157     SUBROUTINE cleanup()
158         IMPLICIT NONE
159 
160         DEALLOCATE( t, qv )
161     END SUBROUTINE cleanup
162 
163 END MODULE m_setup

● OpenACC 优化,改了 m_io.f90,m_parametrizations.f90,m_physics.f90,m_setup.f90。树上的优化 04 设计算法改动,没有参与比较

  1 ! m_io.f90
  2 MODULE m_io
  3     USE m_config,  ONLY: nout, nx, ny, nz
  4     USE m_fields,  ONLY: qv
  5 
  6     IMPLICIT NONE
  7 
  8 CONTAINS
  9     SUBROUTINE write_output(ntstep)
 10         IMPLICIT NONE
 11 
 12         INTEGER, INTENT(IN) :: ntstep    
 13         INTEGER :: i, j, k
 14         REAL*8  :: qv_mean               
 15         
 16         IF (MOD(ntstep, nout) /= 0) RETURN
 17     
 18         !$acc data present(qv)
 19         qv_mean = 0.0D0
 20         !$acc parallel 
 21         !$acc loop gang vector collapse(3) reduction(+:qv_mean)
 22         DO k = 1, nz
 23             DO j = 1, ny
 24                 DO i = 1, nx
 25                     qv_mean = qv_mean + qv(i,j,k)
 26                 END DO
 27             END DO
 28         END DO
 29         !$acc end parallel
 30         !$acc end data
 31         qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
 32 
 33         WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
 34     END SUBROUTINE write_output
 35 
 36 END MODULE m_io
 37 
 38 ! m_parametrizations.f90
 39 MODULE m_parametrizations
 40     IMPLICIT NONE
 41 
 42     REAL*8, parameter ::  cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0
 43     REAL*8, parameter ::  cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0
 44 
 45 CONTAINS
 46     SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
 47         IMPLICIT NONE
 48     
 49         INTEGER, INTENT(IN)    :: npx, npy, nlev
 50         REAL*8,  INTENT(IN)    :: t(:,:,:)      
 51         REAL*8,  INTENT(OUT)   :: qc(:,:,:)     
 52         REAL*8,  INTENT(INOUT) :: qv(:,:,:)     
 53         INTEGER :: i, j, k
 54 
 55         !$acc data present(t,qv,qc)
 56         !$acc parallel
 57         !$acc loop gang vector collapse(3) 
 58         DO k = 1, nlev
 59             DO j = 1, npy
 60                 DO i = 1, npx
 61                     qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
 62                     qc(i,j,k) = cs4 * qv(i,j,k)
 63                 END DO
 64             END DO
 65         END DO
 66         !$acc end parallel
 67         !$acc end data
 68     END SUBROUTINE saturation_adjustment
 69 
 70     SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
 71         IMPLICIT NONE
 72 
 73         INTEGER, INTENT(IN)   :: npx, npy, nlev
 74         REAL*8, INTENT(INOUT) :: t(:,:,:)      
 75         REAL*8, INTENT(IN)    :: qc(:,:,:)     
 76         REAL*8, INTENT(INOUT) :: qv(:,:,:)     
 77         INTEGER :: i, j, k
 78         !$acc data present(t,qv,qc)    
 79         !$acc parallel
 80         !$acc loop seq
 81         DO k = 2, nlev
 82             !$acc loop gang 
 83             DO j = 1, npy
 84                 !$acc loop vector
 85                 DO i = 1, npx
 86                     qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3
 87                     t(i, j, k)  = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
 88                 END DO
 89             END DO
 90         END DO
 91         !$acc end parallel
 92         !$acc end data
 93     END SUBROUTINE microphysics
 94 
 95 END MODULE m_parametrizations
 96 
 97 ! m_physics.f90
 98 MODULE m_physics
 99     USE m_config,           ONLY: nx, ny, nz
100     USE m_fields,           ONLY: qv, t
101     USE m_parametrizations, ONLY: saturation_adjustment, microphysics
102 
103     IMPLICIT NONE
104 
105     REAL*8, ALLOCATABLE :: qc(:,:,:)    ! 提前声明,由 init_physics 和 finalize_physics 来申请和释放
106 
107 CONTAINS
108     SUBROUTINE physics()
109         IMPLICIT NONE        
110         
111         CALL saturation_adjustment(nx, ny, nz, t, qc, qv)
112         CALL microphysics(nx, ny, nz, t, qc, qv)
113     END SUBROUTINE physics
114 
115     SUBROUTINE init_physics()
116         IMPLICIT NONE        
117         
118         ALLOCATE( qc(nx,ny,nz) )
119         !$acc enter data create(qc)
120     END SUBROUTINE init_physics
121 
122     SUBROUTINE finalize_physics()
123         IMPLICIT NONE
124         1
125         !$acc exit data delete(qc)
126         DEALLOCATE(qc)
127     END SUBROUTINE finalize_physics
128 
129 END MODULE m_physics
130 
131 ! m_setup.f90
132 MODULE m_setup
133     USE m_config,  ONLY: nstop, nout, nx, ny, nz
134     USE m_fields,  ONLY: t,qv
135     USE m_timing,  ONLY: init_timers, start_timer, end_timer
136     USE m_physics, ONLY: init_physics, finalize_physics
137     IMPLICIT NONE
138 
139 CONTAINS
140     SUBROUTINE initialize() ! 初始化计时器和设备
141         IMPLICIT NONE
142         
143         INTEGER, PARAMETER :: itiminit = 1
144         INTEGER :: i, j, k                  
145 
146 #ifdef _OPENACC
147         WRITE(*,"(A)") "Running with OpenACC"       
148 #else
149         WRITE(*,"(A)") "Running without OpenACC"   
150 #endif
151 
152         WRITE(*,"(A)") "Initialize"
153 
154         CALL init_timers()
155         CALL start_timer( itiminit, "Initialization" )
156         ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
157 
158         !$acc enter data create(t,qv)
159         DO k =1, nz
160             DO j = 1, ny
161                 DO i = 1, nx
162                     t(i,j,k)  = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
163                     qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
164                 END DO
165             END DO
166         END DO
167         !$acc update device(t,qv)
168 
169 #ifdef _OPENACC
170         CALL initialize_gpu()
171 #endif
172         CALL init_physics()
173         CALL end_timer( itiminit )
174     END SUBROUTINE initialize
175 
176     SUBROUTINE initialize_gpu()
177         IMPLICIT NONE
178 
179         INTEGER :: temp(16)
180         INTEGER :: i
181 
182         !$acc parallel loop
183         DO i = 1, 16
184             temp(i) = 1
185         END DO
186 
187         IF (SUM(temp) == 16) THEN
188             WRITE(*,"(A)") "GPU initialized"
189         ELSE
190             WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
191             STOP
192         END IF
193     END SUBROUTINE initialize_gpu
194 
195     SUBROUTINE cleanup()
196         IMPLICIT NONE
197     
198         !$acc exit data delete(t,qv)
199         DEALLOCATE( t, qv )
200         CALL finalize_physics()
201   END SUBROUTINE cleanup
202 
203 END MODULE m_setup

● 所有的输出结果。单独编译一个模式(而不使用默认的 makefile)时,在命令 pgf90 中要使用参数 -Mpreprocess,意思是将预编译器作用到 fortran 文件中,否则 m_setup.f90 中的 # 预编译命令会被当成错误

cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ make example_serial example_openmp example_openacc1 example_openacc2 example_openacc3 example_openacc4
make[1]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
compiling m_config.f90
compiling m_fields.f90
compiling m_io.f90
compiling m_parametrizations.f90
compiling m_physics.f90
compiling m_timing.f90
compiling m_setup.f90
compiling main.f90
make[1]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
make[1]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openmp'

... ! 类似上面的过程

make[1]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openacc4'
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_serial/example_serial
Running without OpenACC
Initialize
Start of time loop
Step:     20, mean(qv) =    1.14302104E-04
Step:     40, mean(qv) =    1.34041461E-04
Step:     60, mean(qv) =    1.53710207E-04
Step:     80, mean(qv) =    1.73309068E-04
Step:    100, mean(qv) =    1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization :    17.28 ms
Time loop      :   978.08 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openmp/example_openmp
Running without OpenACC
Running with OpenMP with    1 threads
Initialize
Start of time loop
Step:     20, mean(qv) =    1.14302104E-04
Step:     40, mean(qv) =    1.34041461E-04
Step:     60, mean(qv) =    1.53710207E-04
Step:     80, mean(qv) =    1.73309068E-04
Step:    100, mean(qv) =    1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization :    17.96 ms
Time loop      :   898.92 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc1/example_openacc1
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step:     20, mean(qv) =    1.14302104E-04
Step:     40, mean(qv) =    1.34041461E-04
Step:     60, mean(qv) =    1.53710207E-04
Step:     80, mean(qv) =    1.73309068E-04
Step:    100, mean(qv) =    1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization :   191.11 ms
Time loop      :  1044.35 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc2/example_openacc2
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step:     20, mean(qv) =    1.14302104E-04
Step:     40, mean(qv) =    1.34041461E-04
Step:     60, mean(qv) =    1.53710207E-04
Step:     80, mean(qv) =    1.73309068E-04
Step:    100, mean(qv) =    1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization :   176.72 ms
Time loop      :   142.11 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc3/example_openacc3
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step:     20, mean(qv) =    1.14302104E-04
Step:     40, mean(qv) =    1.34041461E-04
Step:     60, mean(qv) =    1.53710207E-04
Step:     80, mean(qv) =    1.73309068E-04
Step:    100, mean(qv) =    1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization :   162.15 ms
Time loop      :   121.77 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc4/example_openacc4
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step:     20, mean(qv) =    1.14302104E-04
Step:     40, mean(qv) =    1.34041461E-04
Step:     60, mean(qv) =    1.53710207E-04
Step:     80, mean(qv) =    1.73309068E-04
Step:    100, mean(qv) =    1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization :   152.47 ms
Time loop      :   166.53 ms
----------------------------

● 所有的结果在 nvprof 中的图形。三张图分别为 “仅计算优化无数据优化”,“计算优化与数据优化”,“手工优化变量”

原文地址:https://www.cnblogs.com/cuancuancuanhao/p/9494656.html