ROMS
Loading...
Searching...
No Matches
mod_parallel.F
Go to the documentation of this file.
1#include "cppdefs.h"
3!
4!git $Id$
5!================================================== Hernan G. Arango ===
6! Copyright (c) 2002-2025 The ROMS Group !
7! Licensed under a MIT/X style license !
8! See License_ROMS.md !
9!=======================================================================
10! !
11! This module contains all variables used for parallelization !
12! !
13!=======================================================================
14!
15 USE mod_kinds
16!
17 implicit none
18
19#ifdef MPI
20!
21 include 'mpif.h'
22#endif
23!
24 PUBLIC :: allocate_parallel
25 PUBLIC :: deallocate_parallel
26 PUBLIC :: initialize_parallel
27
28#if defined DISTRIBUTE && defined DISJOINTED
29 PUBLIC :: split_communicator
30 PUBLIC :: assign_communicator
31#endif
32!
33!-----------------------------------------------------------------------
34! Define variables in module.
35!-----------------------------------------------------------------------
36!
37! Switch to identify master processor. In serial and shared-memory
38! applications it is always true.
39!
40 logical :: master
41!
42! Switch to identify which thread is processing input/output files.
43! In distributed-memory applications, this thread can be the master
44! thread or all threads in case of parallel output. In serial and
45! shared-memory applications it is always true.
46!
47 logical :: inpthread
48 logical :: outthread
49
50!$OMP THREADPRIVATE (InpThread, OutThread)
51!
52! Number of shared-memory parallel threads or distributed-memory
53! parallel nodes.
54!
55 integer :: numthreads
56#ifndef DISJOINTED
57 integer :: forksize
58#endif
59!
60! First and last tile to process in a tiled application.
61!
62 integer, allocatable :: first_tile(:)
63 integer, allocatable :: last_tile(:)
64
65!$OMP THREADPRIVATE (first_tile, last_tile)
66
67#if defined ATM_COUPLING && defined MCT_LIB
68!
69! Parallel nodes assined to the atmosphere model.
70!
71 integer :: peatm_frst ! first atmosphere parallel node
72 integer :: peatm_last ! last atmosphere parallel node
73#endif
74#if defined WAV_COUPLING && defined MCT_LIB
75!
76! Parallel nodes assined to the wave model.
77!
78 integer :: pewav_frst ! first atmosphere parallel node
79 integer :: pewav_last ! last atmosphere parallel node
80#endif
81!
82! Parallel nodes assined to the ocean model.
83!
84 integer :: peocn_frst ! first ocean parallel node
85 integer :: peocn_last ! last ocean parallel node
86!
87! Parallel threads/nodes counters used in critical parallel regions.
88!
89 integer :: tile_count = 0
90 integer :: block_count = 0
91 integer :: thread_count = 0
92!
93! Profiling variables as function of parallel thread:
94!
95! proc Parallel process ID.
96! Cstr Starting time for program region.
97! Cend Ending time for program region.
98! Csum Accumulated time for progam region.
99! Ctotal Total time profiled
100! total_cpu Total elapsed CPU
101!
102 integer, allocatable :: proc(:,:,:)
103
105
106 real(r8), allocatable :: cstr(:,:,:)
107 real(r8), allocatable :: cend(:,:,:)
108 real(r8), allocatable :: csum(:,:,:)
109
110!$OMP THREADPRIVATE (proc)
111!$OMP THREADPRIVATE (Cstr, Cend)
112
113#if defined DISTRIBUTE && defined PROFILE
114!
115! Switch manage time clock in "mp_bcasts". During initialization is
116! set to .FALSE. because the profiling variables cannot be allocated
117! and initialized before the "Ngrids" parameter is known.
118!
119 logical :: lwclock = .false.
120#endif
121!
122! Distributed-memory master process.
123!
124 integer :: mymaster = 0
125
126#ifdef DISTRIBUTE
127# if defined DISJOINTED || \
128 (defined pio_lib && \
129 (defined asynchronous_pio || defined asynchronous_scorpio))
130!
131! Split communicator parameters.
132!
133# if defined DISJOINTED
134 logical :: fullmaster ! full communicator master process
135
136 integer :: forkcolor = -1 ! Fork split criteria: FullRank/ForkSize
137 integer :: forkkey ! Fork split rank ordering: FullRank
138 integer :: forksize ! number of processes in FORK_COMM_WORLD
139 integer :: fullsize ! number of precesses in FULL_COMM_WORLD
140 integer :: nsubgroups ! number of disjointed subgroups
141 integer :: taskcolor = -1 ! Task split criteria: FullRank/TaskSize
142 integer :: taskkey ! Task split rank ordering: FullRank
143 integer :: tasksize ! number of processes in TASK_COMM_WORLD
144# endif
145# if defined PIO_LIB && \
146 (defined asynchronous_pio || defined asynchronous_scorpio)
147 integer :: peerrank ! process rank in PEER_COMM_WORLD
148 integer :: peersize ! number of processes in PEER_COMM_WORLD
149# endif
150# endif
151#endif
152!
153! Rank of the parallel local process.
154!
155 integer :: fullrank = 0
156 integer :: taskrank = 0
157 integer :: myrank = 0
158 integer :: mythread = 0
159
160!$OMP THREADPRIVATE (MyThread)
161
162#ifdef DISTRIBUTE
163# ifdef MPI
164!
165! Distributed-memory group communicator handles.
166!
167# if defined PIO_LIB && defined DISTRIBUTE && \
168 (defined asynchronous_pio || defined asynchronous_scorpio)
169 integer :: peer_comm_world ! PIO full communicator
170 integer :: inter_comm_world ! PIO inter-communicator
171 integer :: io_comm_world ! I/O dedicated communicator
172# endif
173# ifdef DISJOINTED
174 integer :: full_comm_world ! full communicator
175 integer :: fork_comm_world ! fork communicator
176 integer :: task_comm_world ! task communicator
177# endif
178 integer :: ocn_comm_world ! internal ROMS communicator
179!
180! Set mpi_info opaque object handle.
181!
182 integer :: mp_info = mpi_info_null
183# endif
184!
185! Type of message-passage floating point bindings.
186!
187# ifdef DOUBLE_PRECISION
188# ifdef MPI
189 integer, parameter :: mp_float = mpi_double_precision
190!! integer, parameter :: MP_FLOAT = MPI_REAL8
191# endif
192# else
193# ifdef MPI
194 integer, parameter :: mp_float = mpi_real
195!! integer, parameter :: MP_FLOAT = MPI_REAL4
196# endif
197# endif
198# ifdef MPI
199 integer, parameter :: mp_double = mpi_double_precision
200# endif
201#endif
202!
203 CONTAINS
204!
205 SUBROUTINE allocate_parallel (Ngrids)
206!
207!=======================================================================
208! !
209! This routine allocates several variables in the module that depend !
210! on the number of nested grids. !
211! !
212!=======================================================================
213!
214 USE mod_strings, ONLY: nregion
215!
216! Imported variable declarations.
217!
218 integer, intent(in) :: ngrids
219!
220!-----------------------------------------------------------------------
221! Allocate and initialize module variables.
222!-----------------------------------------------------------------------
223!
224!$OMP PARALLEL
225! First and last tile to process in a tiled application.
226!
227 IF (.not.allocated(first_tile)) THEN
228 allocate ( first_tile(ngrids) )
229 END IF
230 IF (.not.allocated(last_tile)) THEN
231 allocate ( last_tile(ngrids) )
232 END IF
233!
234! Time profiling variables.
235!
236 IF (.not.allocated(proc)) THEN
237 allocate ( proc(0:1,4,ngrids) )
238 proc(0:1,1:4,1:ngrids)=0
239 END IF
240
241 IF (.not.allocated(cstr)) THEN
242 allocate ( cstr(0:nregion,4,ngrids) )
243 cstr(0:nregion,1:4,1:ngrids)=0.0_r8
244 END IF
245
246 IF (.not.allocated(cend)) THEN
247 allocate ( cend(0:nregion,4,ngrids) )
248 cend(0:nregion,1:4,1:ngrids)=0.0_r8
249 END IF
250!$OMP END PARALLEL
251
252 IF (.not.allocated(csum)) THEN
253 allocate ( csum(0:nregion,4,ngrids) )
254 csum(0:nregion,1:4,1:ngrids)=0.0_r8
255 END IF
256!
257! Initialize other profiling variables.
258!
259 ctotal=0.0_r8
260 total_cpu=0.0_r8
261 total_model=0.0_r8
262
263#if defined DISTRIBUTE && defined PROFILE
264!
265! Activate wall clock switch used only in "mp_bcasts". This switch
266! is set to .FALSE. during initialization before calling "inp_par.F"
267! because the above profiling variables are allocated and initialized
268! after the value of "Ngrids" is known.
269!
270 lwclock=.true.
271#endif
272
273 RETURN
274 END SUBROUTINE allocate_parallel
275!
277!
278!=======================================================================
279! !
280! This routine deallocates variables in the module. !
281! !
282!=======================================================================
283!
284!-----------------------------------------------------------------------
285! Deallocate variables in module.
286!-----------------------------------------------------------------------
287!
288!$OMP PARALLEL
289 IF (allocated(first_tile)) deallocate ( first_tile )
290 IF (allocated(last_tile)) deallocate ( last_tile )
291!
292 IF (allocated(proc)) deallocate ( proc )
293 IF (allocated(cstr)) deallocate ( cstr )
294 IF (allocated(cend)) deallocate ( cend )
295!$OMP END PARALLEL
296
297 IF (allocated(csum)) deallocate ( csum )
298!
299 RETURN
300 END SUBROUTINE deallocate_parallel
301!
303!
304!=======================================================================
305! !
306! This routine initializes and spawn distribute-memory nodes. !
307! !
308!=======================================================================
309!
310 USE mod_iounits
311 USE mod_scalars
312!
313! Local variable declarations.
314!
315 integer :: i
316#ifdef DISTRIBUTE
317 integer :: myerror
318#else
319 integer :: my_numthreads, my_threadnum
320#endif
321
322#if defined _OPENMP
323!
324!-----------------------------------------------------------------------
325! Initialize shared-memory (OpenMP) configuration.
326!-----------------------------------------------------------------------
327!
328! Disable dynamic adjustment, by the run-time environment, of the
329! number of threads available to execute parallel regions.
330!
331 CALL omp_set_dynamic (.false.)
332!
333! Inquire number of threads in parallel region. Set master and I/O
334! switches.
335!
337 IF (my_threadnum().eq.0) THEN
338 inpthread=.true.
339 outthread=.true.
340 ELSE
341 inpthread=.false.
342 outthread=.false.
343 END IF
344 master=.true.
345
346#elif defined DISTRIBUTE
347# ifdef MPI
348!
349!-----------------------------------------------------------------------
350! Initialize distributed-memory (MPI) configuration.
351!-----------------------------------------------------------------------
352!
353! Get the number of processes in the group associated with the world
354! communicator. Here FullRank and MyRank are the same. It is computed
355! for consistency wity concurrent applications.
356!
357 CALL mpi_comm_size (ocn_comm_world, numthreads, myerror)
358 IF (myerror.ne.0) THEN
359 WRITE (stdout,10)
360 10 FORMAT (/,' ROMS - Unable to inquire number of', &
361 & ' processors in the group.')
362 exit_flag=6
363 RETURN
364 END IF
365 CALL mpi_comm_rank (ocn_comm_world, fullrank, myerror)
367!
368! Identify master, input and output threads.
369!
370# ifdef PARALLEL_IO
371 master=.false.
372 inpthread=.true.
373 outthread=.true.
374 IF (myrank.eq.mymaster) THEN
375 master=.true.
376 END IF
377# else
378 master=.false.
379 inpthread=.false.
380 outthread=.false.
381 IF (myrank.eq.mymaster) THEN
382 master=.true.
383 inpthread=.true.
384 outthread=.true.
385 END IF
386# endif
387# endif
388#else
389!
390!-----------------------------------------------------------------------
391! Initialize serial configuration.
392!-----------------------------------------------------------------------
393!
395 master=.true.
396 inpthread=.true.
397 outthread=.true.
398#endif
399!
400 RETURN
401 END SUBROUTINE initialize_parallel
402
403#if defined DISTRIBUTE && defined DISJOINTED
404!
405 SUBROUTINE split_communicator (Nsplit, Ntasks)
406!
407!=======================================================================
408! !
409! This routine splits the main distributed-memory communicator object !
410! (OCN_COMM_WORLD saved as FULL_COMM_WORLD) into several disjointed !
411! subgroups processes with new communicator handle (FORK_COMM_WORLD). !
412! Notice that at entry, the OCN_COMM_WORLD handle is the same object !
413! as MPI_COMM_WORLD, if no multi-model coupling. !
414! !
415! If Ntask=2, a second split is done. Both tasks (1 and 2) have equal !
416! number processors. Task 1 uses the first half, whereas Task 2 uses !
417! the second half of all the processes assigned to ROMS. The extra !
418! communicator handle (TASK_COMM_WORLD) can be used for collective !
419! operations within each task section. !
420! !
421! On Input: !
422! !
423! Nsplit Number of disjointed by related subgroups (integer) !
424! Ntasks Number of different code sections tasks (integer) !
425! Usually, Ntasks=1 or Ntasks=2 !
426! !
427! Therefore, !
428! !
429! ForkSize Number of processes assigned for each subgroup: !
430! ForkSize = FullSize/(Nplit*Ntasks) !
431! !
432! FullSize Total number of processes assigned to ROMS !
433! !
434! TaskSize Number of processes assigned for each task !
435! TaskSize=FullSize/Ntasks !
436! !
437! NsubGroups Total number of disjointed subgroups !
438! NsubGroups=Nsplit*Ntasks !
439! !
440!=======================================================================
441!
442 USE mod_iounits
443 USE mod_scalars
444!
445! Imported variable declarations
446!
447 integer, intent(in) :: nsplit, ntasks
448!
449! Local variable declarations.
450!
451 integer :: myerror, mysize
452 integer :: i, lstr, serror
453!
454 integer, allocatable :: groupranks(:)
455!
456 character (len=MPI_MAX_ERROR_STRING) :: string
457!
458!-----------------------------------------------------------------------
459! Split ROMS communicator into disjointed subgroups, FORK_COMM_WORLD.
460! It is used, for example, for the concurrent time-stepping of the
461! ROMS kernel.
462!-----------------------------------------------------------------------
463!
464! Save full communicator (starting handle) and inquire about its size
465! and rank.
466!
467 full_comm_world=ocn_comm_world
468 CALL mpi_comm_rank (full_comm_world, fullrank, myerror)
469 CALL mpi_comm_size (full_comm_world, fullsize, myerror)
470 fullmaster=fullrank.eq.mymaster
471!
472! Split the full communicator into sub-communicators based on color and
473! key. It is a collective operation.
474!
475! For example, if FullSize=8 and Nsplit=2, we get ForkSize=4 and:
476!
477! FullRank: 0 1 2 3 4 5 6 7 FULL_COMM_WORLD
478! ForkColor: 0 0 0 0 1 1 1 1 split criteria
479! ForkKey: 0 1 2 3 4 5 6 7 same as FullRank
480! ForkRank: 0 1 2 3 0 1 2 3 FORK_COMM_WORLD
481!
482!
483! The color determines in which of the sub-communicators the current
484! process (spawn by FULL_COMM_WORLD) will fall.
485!
486! The key argument determines the rank ordering within the split
487! communicator. The process that passes in the smallest value for
488! key will be rank 0, the next smallest will be rank 1, and so on.
489! Here, the key is set to the rank of the full communicator since we
490! want all of the processes in the split communicator to be in the
491! same order that they were in the full communicator.
492!
493! The resulting communicator, FORK_COMM_WORLD, has the same handle
494! value (context ID) in all sub-communicators. However, each process
495! only have reference to the sub-communicator that it belongs. It is
496! an opaque object.
497!
498 nsubgroups=nsplit*ntasks
499 IF (mod(fullsize, nsubgroups).ne.0) THEN
500 IF (fullmaster) THEN
501 WRITE (stdout,10) nsubgroups, fullsize
502 END IF
503 exit_flag=2
504 RETURN
505 END IF
506 forksize=fullsize/nsubgroups ! integer operation
507 forkcolor=fullrank/forksize ! integer operation
508 forkkey=fullrank
509 CALL mpi_comm_split (full_comm_world, forkcolor, forkkey, &
510 & fork_comm_world, myerror)
511 IF (myerror.ne.mpi_success) THEN
512 CALL mpi_error_string (myerror, string, lstr, serror)
513 WRITE (stdout,20) 'MPI_COMM_SPLIT', 'TASK_COMM_WORLD', &
514 & fullrank, myerror, trim(string)
515 exit_flag=2
516 RETURN
517 END IF
518!
519!-----------------------------------------------------------------------
520! If Ntasks=2, split the FULL_COMM_WORLD into disjointed tasks
521! communicators.
522!-----------------------------------------------------------------------
523!
524 IF (ntasks.gt.1) THEN
525 tasksize=fullsize/ntasks
526 taskcolor=fullrank/tasksize
527 taskkey=fullrank
528 CALL mpi_comm_split (full_comm_world, taskcolor, taskkey, &
529 & task_comm_world, myerror)
530 IF (myerror.ne.mpi_success) THEN
531 CALL mpi_error_string (myerror, string, lstr, serror)
532 WRITE (stdout,20) 'MPI_COMM_SPLIT', 'TASK_COMM_WORLD', &
533 & fullrank, myerror, trim(string)
534 exit_flag=2
535 RETURN
536 END IF
537 CALL mpi_comm_rank (task_comm_world, taskrank, myerror)
538 ELSE
539 task_comm_world=full_comm_world
540 tasksize=fullsize
542 taskkey=0
543 taskcolor=0
544 END IF
545!
546 10 FORMAT (/,' SPLIT_COMMUNICATOR - illegal configuration, ', &
547 & 'Nsplit * Ntasks = ',i0,/,22x, &
548 & 'needs to be less, equal, or a multiple of the ', &
549 & 'communicator size, FullSize = ',i0,/,22x, &
550 & 'That is, MOD(FullSise, Nsplit*Ntask)=0')
551 20 FORMAT (/,' SPLIT_COMMUNICATOR - error during ',a, &
552 & ' call, Comm = ',a,', Rank = ',i0,' Error = ',i0,/,22x,a)
553!
554 RETURN
555 END SUBROUTINE split_communicator
556!
557 SUBROUTINE assign_communicator (choice)
558!
559!=======================================================================
560! !
561! This routine selects request distributed memory communicator and !
562! it associated parameters and control switches. It assigns the !
563! requested communicator to the OCN_COMM_WORLD handle. !
564! !
565! On Input: !
566! !
567! choice Requested communicator keyword (string) !
568! !
569!=======================================================================
570!
571! Imported variable declarations.
572!
573 character (len=*), intent(in) :: choice
574!
575! Local variable declarations.
576!
577 integer :: myerror
578!
579!-----------------------------------------------------------------------
580! Assing internal ROMS communicator, OCN_COMM_WORLD.
581!-----------------------------------------------------------------------
582!
583! Set communicator handle and get its size and rank.
584!
585 SELECT CASE (trim(choice))
586 CASE ('FORK', 'SPLIT', 'CONCURRENT')
587 ocn_comm_world=fork_comm_world
588 CALL mpi_comm_size (ocn_comm_world, numthreads, myerror)
589 CALL mpi_comm_rank (ocn_comm_world, myrank, myerror)
591 CASE ('FULL')
592 ocn_comm_world=full_comm_world
593 CALL mpi_comm_size (ocn_comm_world, numthreads, myerror)
594 CALL mpi_comm_rank (ocn_comm_world, myrank, myerror)
596 END SELECT
597 CALL mpi_barrier (ocn_comm_world, myerror)
598!
599! Initialize parallel control switches.
600!
601# ifdef PARALLEL_IO
602 inpthread=.true.
603 outthread=.true.
604# else
605 IF (myrank.eq.0) THEN
606 inpthread=.true.
607 outthread=.true.
608 ELSE
609 inpthread=.false.
610 outthread=.false.
611 END IF
612# endif
613!
614 RETURN
615 END SUBROUTINE assign_communicator
616#endif
617
618 END MODULE mod_parallel
integer function my_threadnum()
integer function my_numthreads()
Definition mp_routines.F:89
integer stdout
integer, parameter r8
Definition mod_kinds.F:28
integer, parameter mp_double
integer forksize
integer, parameter mp_float
subroutine, public initialize_parallel
integer block_count
integer taskrank
integer numthreads
integer, dimension(:), allocatable first_tile
integer peocn_last
integer fullrank
integer mythread
integer mp_info
logical inpthread
logical master
real(r8), dimension(:,:,:), allocatable cend
real(r8), dimension(:,:,:), allocatable cstr
real(r8), dimension(4) total_model
integer peocn_frst
integer mymaster
real(r8) total_cpu
logical outthread
integer thread_count
integer, dimension(:), allocatable last_tile
subroutine, public deallocate_parallel
real(r8) ctotal
integer tile_count
integer, dimension(:,:,:), allocatable proc
subroutine, public allocate_parallel(ngrids)
integer ocn_comm_world
real(r8), dimension(:,:,:), allocatable csum
logical lwclock
integer exit_flag
integer, parameter nregion