ROMS
Loading...
Searching...
No Matches
timers.F File Reference
#include "cppdefs.h"
Include dependency graph for timers.F:

Go to the source code of this file.

Functions/Subroutines

recursive subroutine wclock_on (ng, model, region, line, routine)
 
recursive subroutine wclock_off (ng, model, region, line, routine)
 

Function/Subroutine Documentation

◆ wclock_off()

recursive subroutine wclock_off ( integer, intent(in) ng,
integer, intent(in) model,
integer, intent(in) region,
integer, intent(in) line,
character (len=*), intent(in) routine )

Definition at line 147 of file timers.F.

148!
149!=======================================================================
150! !
151! This routine turns off wall clock to meassure the elapsed time in !
152! seconds spend by each parallel thread in requested model region. !
153! !
154! On Input: !
155! !
156! ng Nested grid number (integer) !
157! model Calling model identifier (integer) !
158! region Profiling region number (integer) !
159! line Calling model routine line (integer) !
160! routine Calling model routine (string) !
161! !
162!=======================================================================
163!
164 USE mod_param
165 USE mod_parallel
166 USE mod_iounits
167 USE mod_strings
168
169#ifdef DISTRIBUTE
170!
172 USE distribute_mod, ONLY : mp_collect
173 USE strings_mod, ONLY : uppercase
174#endif
175!
176 implicit none
177!
178! Imported variable declarations.
179!
180 integer, intent(in) :: ng, model, region, line
181
182 character (len=*), intent(in) :: routine
183!
184! Local variable declarations.
185!
186 integer :: ig, imodel, iregion, MyModel, NSUB
187#ifdef DISTRIBUTE
188 integer :: MyCOMM, nPETs, PETrank
189# ifdef ROMS_STDOUT
190 integer :: node
191# endif
192#endif
193 integer :: my_threadnum
194
195 real(r8) :: percent, sumcpu, sumper, sumsum, total
196
197 real(r8), dimension(2) :: wtime
198
199 real(r8) :: my_wtime
200
201#ifdef DISTRIBUTE
202 real(r8) :: TendMin, TendMax
203
204 real(r8), parameter :: Tspv = 0.0_r8
205 real(r8), allocatable :: Tend(:)
206 real(r8), dimension(0:Nregion) :: rbuffer
207
208 character (len= 3), dimension(0:Nregion) :: op_handle
209#endif
210 character (len=14), dimension(4) :: label
211!
212!-----------------------------------------------------------------------
213! Compute elapsed wall time for all threads.
214!-----------------------------------------------------------------------
215#ifdef TRACING
216!
217 IF (master) THEN
218 WRITE (stdout,'(a,a)') '<== Exiting ', trim(routine)
219 FLUSH (stdout)
220 END IF
221#endif
222!
223! Set number of subdivisions, same as for global reductions.
224!
225#ifdef DISTRIBUTE
226# ifdef DISJOINTED
227 mycomm=full_comm_world
228 npets=fullsize
229 petrank=fullrank
230# else
231 mycomm=ocn_comm_world
232 npets=numthreads
233 petrank=myrank
234# endif
235 nsub=1
236#else
237 nsub=numthreads
238#endif
239!
240! Insure that MyModel is not zero.
241!
242 mymodel=max(1,model)
243!
244! Compute elapsed CPU time (seconds) for each profile region, except
245! for region zero which is called by the main driver before the
246! simulatiom is stopped.
247!
248 IF (region.ne.0) THEN
249 cend(region,mymodel,ng)=cend(region,mymodel,ng)+ &
250 & (my_wtime(wtime)- &
251 & cstr(region,mymodel,ng))
252 END IF
253!
254!-----------------------------------------------------------------------
255! If simulation is compleated, compute and report elapsed CPU time for
256! all regions.
257!-----------------------------------------------------------------------
258!
259 IF ((region.eq.0).and.(proc(1,mymodel,ng).eq.1)) THEN
260!
261! Computed elapsed wall time for the driver, region=0. Since it is
262! called only once, "MyModel" will have a value and the other models
263! will be zero.
264!
265 cend(region,mymodel,ng)=cend(region,mymodel,ng)+ &
266 & (my_wtime(wtime)- &
267 & cstr(region,mymodel,ng))
268 DO imodel=1,4
269 proc(1,imodel,ng)=0
270 END DO
271
272!$OMP CRITICAL (FINALIZE_WCLOCK)
273!
274! Report elapsed time (seconds) for each CPU. We get the same time
275! time for all nested grids.
276!
277 IF (ng.eq.1) THEN
278#ifdef DISTRIBUTE
279 CALL mp_barrier (ng, model, mycomm)
280 IF (.not.allocated(tend)) THEN
281 allocate ( tend(npets) )
282 tend=0.0_r8
283 END IF
284 tend(petrank+1)=cend(region,mymodel,ng)
285 CALL mp_collect (ng, model, npets, tspv, tend, mycomm)
286 tendmin=minval(tend)
287 tendmax=maxval(tend)
288# ifdef ROMS_STDOUT
289 IF (master) THEN
290 DO node=1,npets
291 WRITE (stdout,10) ' Node #', node-1, &
292 & ' CPU:', tend(node)
293 END DO
294 END IF
295# else
296 WRITE (stdout,10) ' Node #', petrank, &
297 & ' CPU:', tend(petrank+1)
298# endif
299 FLUSH (stdout)
300#else
301 WRITE (stdout,10) ' Thread #', mythread, ' CPU:', &
302 & cend(region,mymodel,ng)
303#endif
304 10 FORMAT (a,i5,a,f12.3)
305 END IF
306!
307! Sum the elapsed time for each profile region by model.
308!
310 DO imodel=1,4
311 DO iregion=0,nregion
312 csum(iregion,imodel,ng)=csum(iregion,imodel,ng)+ &
313 & cend(iregion,imodel,ng)
314 END DO
315 END DO
316!
317! Compute total elapsed CPU wall time between all parallel processes.
318!
319 IF (thread_count.eq.nsub) THEN
321#ifdef DISTRIBUTE
322 op_handle(0:nregion)='SUM' ! Gather all values using a
323 DO imodel=1,4 ! reduced sum between nodes
324 DO iregion=0,nregion
325 rbuffer(iregion)=csum(iregion,imodel,ng)
326 END DO
327 CALL mp_reduce (ng, mymodel, nregion+1, rbuffer(0:), &
328 & op_handle(0:), mycomm)
329 DO iregion=0,nregion
330 csum(iregion,imodel,ng)=rbuffer(iregion)
331 END DO
332 END DO
333#endif
334 IF (master) THEN
335 IF (ng.eq.1) THEN ! Same for all nested grids
336 total_cpu=total_cpu+csum(region,model,ng)
337 END IF
338 DO imodel=1,4
339 total_model(imodel)=0.0_r8
340 DO iregion=1,nregion
341 total_model(imodel)=total_model(imodel)+ &
342 & csum(iregion,imodel,ng)
343 END DO
344 END DO
345 IF (ng.eq.1) THEN
346 WRITE (stdout,20) ' Total:', total_cpu
347 20 FORMAT (a,t18,f14.3)
348 IF (numthreads.gt.1) THEN
349 WRITE (stdout,20) ' Average:', total_cpu/numthreads
350#ifdef DISTRIBUTE
351 WRITE (stdout,20) ' Minimum:', tendmin
352 WRITE (stdout,20) ' Maximum:', tendmax
353#endif
354 END IF
355 END IF
356 END IF
357
358#ifdef DISTRIBUTE
359 IF (allocated(tend)) deallocate (tend)
360#endif
361
362#ifdef PROFILE
363!
364! Report profiling times.
365!
366 label(inlm)='Nonlinear '
367 label(itlm)='Tangent linear'
368 label(irpm)='Representer '
369 label(iadm)='Adjoint '
370 DO imodel=1,4
371 IF (master.and.(total_model(imodel).gt.0.0_r8)) THEN
372 WRITE (stdout,30) trim(label(imodel)), &
373 & 'model elapsed CPU time profile, Grid:',&
374 & ng
375 30 FORMAT (/,1x,a,1x,a,1x,i2.2/)
376 END IF
377 sumcpu=0.0_r8
378 sumper=0.0_r8
379 DO iregion=1,mregion-1
380 IF (master.and.(csum(iregion,imodel,ng).gt.0.0_r8)) THEN
381 percent=100.0_r8*csum(iregion, imodel,ng)/total_cpu
382 WRITE (stdout,40) pregion(iregion), &
383 & csum(iregion,imodel,ng), percent
384 sumcpu=sumcpu+csum(iregion,imodel,ng)
385 sumper=sumper+percent
386 END IF
387 END DO
388 ctotal=ctotal+sumcpu
389 40 FORMAT (2x,a,t53,f14.3,2x,'(',f7.4,' %)')
390 IF (master.and.(total_model(imodel).gt.0.0_r8)) THEN
391 WRITE (stdout,50) sumcpu, sumper
392 50 FORMAT (t47,'Total:',f14.3,2x,f8.4,' %')
393 END IF
394 END DO
395!
396! Sometimes the profiling does not fully accounts for all the CPU
397! spend outside of the ROMS kernels. In data assimilation algorithms,
398! there is a lot of CPU expend outside the kernels. A separated
399! profiling is reported bellow.
400!
401 IF (master.and.(ng.eq.ngrids)) THEN
402 percent=100.0_r8*ctotal/total_cpu
403 WRITE (stdout,60) ctotal, percent, &
404 & total_cpu-ctotal, 100.0_r8-percent
405 60 FORMAT (/,2x, &
406 & 'Unique kernel(s) regions profiled ................',&
407 & f14.3,2x,f8.4,' %'/,2x, &
408 & 'Residual, non-profiled code ......................',&
409 & f14.3,2x,f8.4,' %'/)
410 WRITE (stdout,70) total_cpu
411 70 FORMAT (/,' All percentages are with respect to', &
412 & ' total time =',5x,f12.3,/)
413 END IF
414
415# ifdef FOUR_DVAR
416!
417! Report elapsed time for 4D-Var algorithms.
418!
419 total=0.0_r8
420 DO iregion=fregion,nregion-3
421 DO imodel=1,4
422 total=total+csum(iregion,imodel,ng)
423 END DO
424 END DO
425 IF (master.and.(total.gt.0.0_r8)) THEN
426 WRITE (stdout,30) 'Variational', &
427 & 'data assimilation profile, Grid:', ng
428 END IF
429 IF (total.gt.0.0_r8) THEN
430 sumper=0.0_r8
431 sumsum=0.0_r8
432 DO iregion=fregion,nregion-3
433 sumcpu=0.0_r8
434 DO imodel=1,4
435 sumcpu=sumcpu+csum(iregion,imodel,ng)
436 END DO
437 IF (master.and.(sumcpu.gt.0.0_r8)) THEN
438 percent=100.0_r8*sumcpu/total_cpu
439 WRITE (stdout,40) pregion(iregion), sumcpu, percent
440 sumsum=sumsum+sumcpu
441 sumper=sumper+percent
442 END IF
443 END DO
444 IF (master.and.(total.gt.0.0_r8)) THEN
445 WRITE (stdout,80) sumsum
446 80 FORMAT (t47,'Total:',f14.3)
447 END IF
448 END IF
449!
450! The background, increment, and analysis should be close to 100
451! percent. The are listed last in Pregion.
452!
453 total=0.0_r8
454 DO iregion=nregion-2,nregion
455 DO imodel=1,4
456 total=total+csum(iregion,imodel,ng)
457 END DO
458 END DO
459 IF (master.and.(total.gt.0.0_r8)) THEN
460 WRITE (stdout,'(1x)')
461 END IF
462 IF (total.gt.0.0_r8) THEN
463 sumper=0.0_r8
464 sumsum=0.0_r8
465 DO iregion=nregion-2,nregion
466 sumcpu=0.0_r8
467 DO imodel=1,4
468 sumcpu=sumcpu+csum(iregion,imodel,ng)
469 END DO
470 IF (master.and.(sumcpu.gt.0.0_r8)) THEN
471 percent=100.0_r8*sumcpu/total_cpu
472 WRITE (stdout,40) pregion(iregion), sumcpu, percent
473 sumsum=sumsum+sumcpu
474 sumper=sumper+percent
475 END IF
476 END DO
477 IF (master.and.(total.gt.0.0_r8)) THEN
478 WRITE (stdout,50) sumsum, sumper
479 END IF
480 END IF
481# endif
482# ifdef DISTRIBUTE
483!
484! Report elapsed time for message passage communications.
485!
486 total=0.0_r8
487 DO iregion=mregion,fregion-1
488 DO imodel=1,4
489 total=total+csum(iregion,imodel,ng)
490 END DO
491 END DO
492 IF (master.and.(total.gt.0.0_r8)) THEN
493 WRITE (stdout,30) uppercase('mpi'), &
494 & 'communications profile, Grid:', ng
495 END IF
496 IF (total.gt.0.0_r8) THEN
497 sumper=0.0_r8
498 sumsum=0.0_r8
499 DO iregion=mregion,fregion-1
500 sumcpu=0.0_r8
501 DO imodel=1,4
502 sumcpu=sumcpu+csum(iregion,imodel,ng)
503 END DO
504 IF (master.and.(sumcpu.gt.0.0_r8)) THEN
505 percent=100.0_r8*sumcpu/total_cpu
506 WRITE (stdout,40) pregion(iregion), sumcpu, percent
507 sumsum=sumsum+sumcpu
508 sumper=sumper+percent
509 END IF
510 END DO
511 IF (master.and.(total.gt.0.0_r8)) THEN
512 WRITE (stdout,50) sumsum, sumper
513 END IF
514 END IF
515
516# ifdef NESTING
517!
518! Report total elapsed time for message passage communications for
519! all nested grids.
520!
521 IF (ng.eq.ngrids) THEN
522 total=0.0_r8
523 DO ig=1,ngrids
524 DO iregion=mregion,fregion-1
525 DO imodel=1,4
526 total=total+csum(iregion,imodel,ig)
527 END DO
528 END DO
529 END DO
530 IF (master.and.(total.gt.0.0_r8)) THEN
531 WRITE (stdout,90) uppercase('mpi'), &
532 & 'communications profile over all nested grids:'
533 90 FORMAT (/,1x,a,1x,a,/)
534 END IF
535 IF (total.gt.0.0_r8) THEN
536 sumper=0.0_r8
537 sumsum=0.0_r8
538 DO iregion=mregion,fregion-1
539 sumcpu=0.0_r8
540 DO ig=1,ngrids
541 DO imodel=1,4
542 sumcpu=sumcpu+csum(iregion,imodel,ig)
543 END DO
544 END DO
545 IF (master.and.(sumcpu.gt.0.0_r8)) THEN
546 percent=100.0_r8*sumcpu/total_cpu
547 WRITE (stdout,40) pregion(iregion), sumcpu, percent
548 sumsum=sumsum+sumcpu
549 sumper=sumper+percent
550 END IF
551 END DO
552 IF (master.and.(total.gt.0.0_r8)) THEN
553 WRITE (stdout,50) sumsum, sumper
554 END IF
555 END IF
556 END IF
557# endif
558# endif
559#endif
560 END IF
561!$OMP END CRITICAL (FINALIZE_WCLOCK)
562 END IF
563 RETURN
real(r8) function my_wtime(wtime)
subroutine mp_barrier(ng, model, inpcomm)
Definition distribute.F:126
integer stdout
integer numthreads
integer fullrank
integer mythread
logical master
real(r8), dimension(:,:,:), allocatable cend
real(r8), dimension(:,:,:), allocatable cstr
real(r8), dimension(4) total_model
real(r8) total_cpu
integer thread_count
real(r8) ctotal
integer, dimension(:,:,:), allocatable proc
integer ocn_comm_world
real(r8), dimension(:,:,:), allocatable csum
integer, parameter inlm
Definition mod_param.F:662
integer, parameter irpm
Definition mod_param.F:664
integer, parameter iadm
Definition mod_param.F:665
integer ngrids
Definition mod_param.F:113
integer, parameter itlm
Definition mod_param.F:663
integer, parameter mregion
character(len=50), dimension(nregion) pregion
integer, parameter fregion
integer, parameter nregion
character(len(sinp)) function, public uppercase(sinp)
Definition strings.F:582

References mod_parallel::cend, mod_parallel::cstr, mod_parallel::csum, mod_parallel::ctotal, mod_strings::fregion, mod_parallel::fullrank, mod_param::iadm, mod_param::inlm, mod_param::irpm, mod_param::itlm, mod_parallel::master, distribute_mod::mp_barrier(), mod_strings::mregion, my_wtime(), mod_parallel::myrank, mod_parallel::mythread, mod_param::ngrids, mod_strings::nregion, mod_parallel::numthreads, mod_parallel::ocn_comm_world, mod_strings::pregion, mod_parallel::proc, mod_iounits::stdout, mod_parallel::thread_count, mod_parallel::total_cpu, mod_parallel::total_model, and strings_mod::uppercase().

Referenced by ad_biology_mod::ad_biology(), ad_bulk_flux_mod::ad_bulk_flux(), ad_diag_mod::ad_diag(), dotproduct_mod::ad_dotproduct(), ad_frc_adjust_mod::ad_frc_adjust(), ad_get_data(), ad_get_idata(), ad_ini_fields_mod::ad_ini_fields(), ini_adjust_mod::ad_ini_perturb(), ad_ini_fields_mod::ad_ini_zeta(), ad_initial(), inner2state_mod::ad_inner2state(), mp_exchange_mod::ad_mp_exchange2d(), mp_exchange_mod::ad_mp_exchange2d_bry(), mp_exchange_mod::ad_mp_exchange3d(), mp_exchange_mod::ad_mp_exchange3d_bry(), mp_exchange_mod::ad_mp_exchange4d(), ad_obc_adjust_mod::ad_obc2d_adjust(), ad_obc_adjust_mod::ad_obc_adjust(), ad_omega_mod::ad_omega(), ad_ini_fields_mod::ad_out_fields(), ad_ini_fields_mod::ad_out_zeta(), ad_output(), ad_pack(), ad_pack_tile(), ad_pre_step3d_mod::ad_pre_step3d(), ad_prsgrd_mod::ad_prsgrd(), ad_rho_eos_mod::ad_rho_eos(), ad_rhs3d_mod::ad_rhs3d(), ad_set_avg_mod::ad_set_avg(), ad_set_data(), ad_set_depth_mod::ad_set_depth(), ad_set_depth_mod::ad_set_depth_bry(), ad_set_massflux_mod::ad_set_massflux(), ad_set_vbc_mod::ad_set_vbc(), ad_set_vbc_mod::ad_set_vbc_tile(), ad_set_zeta_mod::ad_set_zeta(), ad_ini_fields_mod::ad_set_zeta_timeavg(), dotproduct_mod::ad_statenorm(), ad_step2d_mod::ad_step2d(), ad_step3d_t_mod::ad_step3d_t(), ad_step3d_uv_mod::ad_step3d_uv(), ad_t3dmix2_mod::ad_t3dmix2(), ad_t3dmix4_mod::ad_t3dmix4(), ad_t3drelax_mod::ad_t3drelax(), ad_unpack(), ad_unpack_tile(), ad_uv3dmix2_mod::ad_uv3dmix2(), ad_uv3dmix4_mod::ad_uv3dmix4(), ad_uv3drelax_mod::ad_uv3drelax(), uv_var_change_mod::ad_uv_a2c_grid(), uv_var_change_mod::ad_uv_c2a_grid(), roms_kernel_mod::adm_initial(), i4dvar_mod::analysis(), r4dvar_mod::analysis(), rbl4dvar_mod::analysis(), i4dvar_mod::background(), r4dvar_mod::background(), rbl4dvar_mod::background(), rbl4dvar_mod::background_initialize(), bbl_mod::bblm(), biology_mod::biology(), biology_floats_mod::biology_floats(), bulk_flux_mod::bulk_flux(), cgradient_mod::cgradient(), congrad_mod::congrad(), convolve_mod::convolve(), diag_mod::diag(), convolve_mod::error_covariance(), frc_weak_mod::frc_adgather(), frc_adjust_mod::frc_adjust(), frc_weak_mod::frc_clear(), frc_iau_mod::frc_iau(), frc_iau_mod::frc_iau_ini(), get_data(), get_idata(), get_state_mod::get_state_nf90(), get_state_mod::get_state_pio(), gls_corstep_mod::gls_corstep(), gls_prestep_mod::gls_prestep(), ice_advect_mod::ice_advect(), ice_thermo_mod::ice_thermo(), i4dvar_mod::increment(), r4dvar_mod::increment(), rbl4dvar_mod::increment(), ini_adjust_mod::ini_adjust(), ini_adjust_mod::ini_adjust_tile(), inner2state_mod::ini_c_norm(), ini_fields_mod::ini_fields(), ini_lanczos_mod::ini_lanczos(), ini_adjust_mod::ini_perturb(), ini_fields_mod::ini_zeta(), initial(), lmd_vmix_mod::lmd_vmix(), ini_adjust_mod::load_adtotl(), frc_adjust_mod::load_frc(), obc_adjust_mod::load_obc(), ini_adjust_mod::load_tltoad(), distribute_mod::mp_aggregate2d(), distribute_mod::mp_aggregate3d(), distribute_mod::mp_assemble::mp_assemblef_1d(), distribute_mod::mp_assemble::mp_assemblef_2d(), distribute_mod::mp_assemble::mp_assemblef_3d(), distribute_mod::mp_assemble::mp_assemblei_1d(), distribute_mod::mp_assemble::mp_assemblei_2d(), distribute_mod::mp_barrier(), distribute_mod::mp_bcast_struc(), distribute_mod::mp_bcastf::mp_bcastf_0d(), distribute_mod::mp_bcastf::mp_bcastf_0dp(), distribute_mod::mp_bcastf::mp_bcastf_1d(), distribute_mod::mp_bcastf::mp_bcastf_1dp(), distribute_mod::mp_bcastf::mp_bcastf_2d(), distribute_mod::mp_bcastf::mp_bcastf_2dp(), distribute_mod::mp_bcastf::mp_bcastf_3d(), distribute_mod::mp_bcastf::mp_bcastf_3dp(), distribute_mod::mp_bcastf::mp_bcastf_4d(), distribute_mod::mp_bcasti::mp_bcasti_0d(), distribute_mod::mp_bcasti::mp_bcasti_1d(), distribute_mod::mp_bcasti::mp_bcasti_2d(), distribute_mod::mp_bcastl::mp_bcastl_0d(), distribute_mod::mp_bcastl::mp_bcastl_1d(), distribute_mod::mp_bcastl::mp_bcastl_2d(), distribute_mod::mp_bcasts::mp_bcasts_0d(), distribute_mod::mp_bcasts::mp_bcasts_1d(), distribute_mod::mp_bcasts::mp_bcasts_2d(), distribute_mod::mp_bcasts::mp_bcasts_3d(), distribute_mod::mp_boundary(), distribute_mod::mp_collect::mp_collect_f(), distribute_mod::mp_collect::mp_collect_i(), mp_exchange_mod::mp_exchange2d(), mp_exchange_mod::mp_exchange2d_bry(), mp_exchange_mod::mp_exchange3d(), mp_exchange_mod::mp_exchange3d_bry(), mp_exchange_mod::mp_exchange4d(), distribute_mod::mp_gather2d(), distribute_mod::mp_gather3d(), distribute_mod::mp_gather_state(), distribute_mod::mp_ncread1d(), distribute_mod::mp_ncread2d(), distribute_mod::mp_ncwrite1d(), distribute_mod::mp_ncwrite2d(), distribute_mod::mp_reduce2(), distribute_mod::mp_reduce::mp_reduce_0d(), distribute_mod::mp_reduce::mp_reduce_0dp(), distribute_mod::mp_reduce::mp_reduce_1d(), distribute_mod::mp_reduce::mp_reduce_1dp(), distribute_mod::mp_reduce::mp_reduce_i8(), distribute_mod::mp_scatter2d(), distribute_mod::mp_scatter3d(), distribute_mod::mp_scatter_state(), my25_corstep_mod::my25_corstep(), my25_prestep_mod::my25_prestep(), nesting_mod::nesting(), dotproduct_mod::nl_dotproduct(), roms_kernel_mod::nlm_initial(), obc_adjust_mod::obc_adjust(), omega_mod::omega(), output(), posterior_mod::posterior(), i4dvar_mod::posterior_analysis(), i4dvar_mod::posterior_analysis_initialize(), posterior_var_mod::posterior_var(), pre_step3d_mod::pre_step3d(), prsgrd_mod::prsgrd(), random_ic_mod::random_ic(), set_massflux_mod::reset_massflux(), rho_eos_mod::rho_eos(), rhs3d_mod::rhs3d(), mod_arrays::roms_allocate_arrays(), roms_kernel_mod::roms_finalize(), mod_arrays::roms_initialize_arrays(), roms_kernel_mod::roms_run(), roms_kernel_mod::roms_run(), rp_set_depth_mod::rp_bath(), rp_biology_mod::rp_biology(), rp_bulk_flux_mod::rp_bulk_flux(), rp_diag_mod::rp_diag(), rp_frc_adjust_mod::rp_frc_adjust(), rp_get_data(), rp_get_idata(), ini_adjust_mod::rp_ini_adjust(), rp_ini_fields_mod::rp_ini_fields(), rp_ini_fields_mod::rp_ini_zeta(), rp_initial(), rp_obc_adjust_mod::rp_obc2d_adjust(), rp_obc_adjust_mod::rp_obc_adjust(), rp_omega_mod::rp_omega(), rp_output(), rp_pre_step3d_mod::rp_pre_step3d(), rp_prsgrd_mod::rp_prsgrd(), rp_rho_eos_mod::rp_rho_eos(), rp_rhs3d_mod::rp_rhs3d(), rp_set_data(), rp_set_depth_mod::rp_set_depth(), rp_set_depth_mod::rp_set_depth_bry(), rp_set_massflux_mod::rp_set_massflux(), rp_set_vbc_mod::rp_set_vbc(), rp_set_vbc_mod::rp_set_vbc_tile(), rp_set_zeta_mod::rp_set_zeta(), rp_ini_fields_mod::rp_set_zeta_timeavg(), rp_step2d_mod::rp_step2d(), rp_step3d_t_mod::rp_step3d_t(), rp_step3d_uv_mod::rp_step3d_uv(), rp_t3dmix2_mod::rp_t3dmix2(), rp_t3dmix4_mod::rp_t3dmix4(), rp_t3drelax_mod::rp_t3drelax(), rp_uv3dmix2_mod::rp_uv3dmix2(), rp_uv3dmix4_mod::rp_uv3dmix4(), rp_uv3drelax_mod::rp_uv3drelax(), rpcg_lanczos_mod::rpcg_lanczos(), convolve_mod::saddlec(), sed_bed_mod::sed_bed(), sed_bedload(), sed_fluxes_mod::sed_fluxes(), sed_settling_mod::sed_settling(), sed_surface_mod::sed_surface(), set_avg_mod::set_avg(), set_data(), set_depth_mod::set_depth(), set_depth_mod::set_depth0(), set_depth_mod::set_depth_bry(), set_diags(), set_masks_mod::set_masks(), set_massflux_mod::set_massflux(), set_tides_mod::set_tides(), set_vbc_mod::set_vbc(), set_vbc_mod::set_vbc_tile(), set_zeta_mod::set_zeta(), ini_fields_mod::set_zeta_timeavg(), state_read_mod::state_read_nf90(), state_read_mod::state_read_pio(), step2d_mod::step2d(), step3d_t_mod::step3d_t(), step3d_uv_mod::step3d_uv(), step_floats_mod::step_floats(), t3dmix2_mod::t3dmix2(), t3dmix4_mod::t3dmix4(), tl_set_depth_mod::tl_bath(), tl_biology_mod::tl_biology(), tl_bulk_flux_mod::tl_bulk_flux(), tl_diag_mod::tl_diag(), dotproduct_mod::tl_dotproduct(), tl_frc_adjust_mod::tl_frc_adjust(), tl_get_data(), tl_get_idata(), tl_ini_fields_mod::tl_ini_fields(), ini_adjust_mod::tl_ini_perturb(), tl_ini_fields_mod::tl_ini_zeta(), tl_initial(), inner2state_mod::tl_inner2state(), tl_nesting_mod::tl_nesting(), tl_obc_adjust_mod::tl_obc2d_adjust(), tl_obc_adjust_mod::tl_obc_adjust(), tl_omega_mod::tl_omega(), tl_output(), tl_pack(), tl_pre_step3d_mod::tl_pre_step3d(), tl_prsgrd_mod::tl_prsgrd(), tl_rho_eos_mod::tl_rho_eos(), tl_rhs3d_mod::tl_rhs3d(), tl_set_avg_mod::tl_set_avg(), tl_set_data(), tl_set_depth_mod::tl_set_depth(), tl_set_depth_mod::tl_set_depth_bry(), tl_set_massflux_mod::tl_set_massflux(), tl_set_vbc_mod::tl_set_vbc(), tl_set_vbc_mod::tl_set_vbc_tile(), tl_set_zeta_mod::tl_set_zeta(), tl_ini_fields_mod::tl_set_zeta_timeavg(), dotproduct_mod::tl_statenorm(), tl_step2d_mod::tl_step2d(), tl_step3d_t_mod::tl_step3d_t(), tl_step3d_uv_mod::tl_step3d_uv(), tl_t3dmix2_mod::tl_t3dmix2(), tl_t3dmix4_mod::tl_t3dmix4(), tl_t3drelax_mod::tl_t3drelax(), tl_unpack(), tl_unpack_tile(), tl_uv3dmix2_mod::tl_uv3dmix2(), tl_uv3dmix4_mod::tl_uv3dmix4(), tl_uv3drelax_mod::tl_uv3drelax(), uv_var_change_mod::tl_uv_a2c_grid(), uv_var_change_mod::tl_uv_c2a_grid(), roms_kernel_mod::tlm_initial(), uv3dmix2_mod::uv3dmix2(), uv3dmix4_mod::uv3dmix4(), uv_var_change_mod::uv_a2c_grid(), uv_var_change_mod::uv_c2a_grid(), vorticity_mod::vorticity(), vwalk_floats_mod::vwalk_floats(), wrt_state_mod::wrt_state_nf90(), and wrt_state_mod::wrt_state_pio().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ wclock_on()

recursive subroutine wclock_on ( integer, intent(in) ng,
integer, intent(in) model,
integer, intent(in) region,
integer, intent(in) line,
character (len=*), intent(in) routine )

Definition at line 2 of file timers.F.

3!
4
5!git $Id$
6!================================================== Hernan G. Arango ===
7! Copyright (c) 2002-2025 The ROMS Group !
8! Licensed under a MIT/X style license !
9! See License_ROMS.md !
10!=======================================================================
11! !
12! This routine turns on wall clock to meassure the elapsed time in !
13! seconds spend by each parallel thread in requested model region. !
14! !
15! On Input: !
16! !
17! ng Nested grid number (integer) !
18! model Calling model identifier (integer) !
19! region Profiling reagion number (integer) !
20! line Calling model routine line (integer) !
21! routine Calling model routine (string) !
22! !
23!=======================================================================
24!
25 USE mod_param
26 USE mod_parallel
27 USE mod_iounits
28 USE mod_strings
29
30#ifdef DISTRIBUTE
31!
32 USE distribute_mod, ONLY : mp_barrier
33# ifdef ROMS_STDOUT
34 USE distribute_mod, ONLY : mp_collect
35# endif
36#endif
37!
38 implicit none
39!
40! Imported variable declarations.
41!
42 integer, intent(in) :: ng, model, region, line
43
44 character (len=*), intent(in) :: routine
45!
46! Local variable declarations.
47!
48 integer :: iregion, MyModel, NSUB
49
50 integer :: my_getpid
51#ifndef DISTRIBUTE
52 integer :: my_threadnum
53#else
54 integer :: MyCOMM, nPETs, PETrank
55# ifdef ROMS_STDOUT
56 integer :: node
57 integer, parameter :: Pspv = 0
58 integer, allocatable :: Pids(:)
59# endif
60#endif
61 real(r8), dimension(2) :: wtime
62 real(r8) :: my_wtime
63!
64!-----------------------------------------------------------------------
65! Initialize timing for all threads.
66!-----------------------------------------------------------------------
67#ifdef TRACING
68!
69 IF (master) THEN
70 WRITE (stdout,'(a,a)') '==> Entering ', trim(routine)
71 FLUSH (stdout)
72 END IF
73#endif
74!
75! Set number of subdivisions, same as for global reductions.
76!
77#ifdef DISTRIBUTE
78# ifdef DISJOINTED
79 mycomm=full_comm_world
80 npets=fullsize
81 petrank=fullrank
82# else
83 mycomm=ocn_comm_world
84 npets=numthreads
85 petrank=myrank
86# endif
87 nsub=1
88#else
89 nsub=numthreads
90#endif
91!
92! Insure that MyModel is not zero.
93!
94 mymodel=max(1,model)
95!
96! Start the wall CPU clock for specified region, model, and grid.
97!
98 cstr(region,mymodel,ng)=my_wtime(wtime)
99!
100! If region zero, indicating first call from main driver, initialize
101! time profiling arrays and set process ID.
102!
103 IF ((region.eq.0).and.(proc(1,mymodel,ng).eq.0)) THEN
104 DO iregion=1,nregion
105 cend(iregion,mymodel,ng)=0.0_r8
106 csum(iregion,mymodel,ng)=0.0_r8
107 END DO
108 proc(1,mymodel,ng)=1
109 proc(0,mymodel,ng)=my_getpid()
110
111!$OMP CRITICAL (START_WCLOCK)
112 IF (ng.eq.1) THEN
113#ifdef DISTRIBUTE
114 CALL mp_barrier (ng, model, mycomm)
115# ifdef ROMS_STDOUT
116 IF (.not.allocated(pids)) THEN
117 allocate ( pids(npets) )
118 pids=0
119 END IF
120 pids(petrank+1)=proc(0,mymodel,ng)
121 CALL mp_collect (ng, model, npets, pspv, pids, mycomm)
122 IF (master) THEN
123 DO node=1,npets
124 WRITE (stdout,10) ' Node #', node-1, &
125 & ' (pid=',pids(node),') is active.'
126 END DO
127 END IF
128 IF (allocated(pids)) deallocate (pids)
129# else
130 WRITE (stdout,10) ' Node #', petrank, &
131 & ' (pid=',proc(0,mymodel,ng),') is active.'
132# endif
133 FLUSH (stdout)
134#else
135 WRITE (stdout,10) ' Thread #', mythread, &
136 & ' (pid=',proc(0,mymodel,ng),') is active.'
137#endif
138 END IF
139 10 FORMAT (a,i5,a,i8,a)
141 IF (thread_count.eq.nsub) thread_count=0
142!$OMP END CRITICAL (START_WCLOCK)
143 END IF
144 RETURN
integer function my_getpid()
Definition mp_routines.F:60

References mod_parallel::cend, mod_parallel::cstr, mod_parallel::csum, mod_parallel::fullrank, mod_parallel::master, distribute_mod::mp_barrier(), my_getpid(), my_wtime(), mod_parallel::myrank, mod_parallel::mythread, mod_strings::nregion, mod_parallel::numthreads, mod_parallel::ocn_comm_world, mod_parallel::proc, mod_iounits::stdout, and mod_parallel::thread_count.

Referenced by ad_biology_mod::ad_biology(), ad_bulk_flux_mod::ad_bulk_flux(), ad_diag_mod::ad_diag(), dotproduct_mod::ad_dotproduct(), ad_frc_adjust_mod::ad_frc_adjust(), ad_get_data(), ad_get_idata(), ad_ini_fields_mod::ad_ini_fields(), ini_adjust_mod::ad_ini_perturb(), ad_ini_fields_mod::ad_ini_zeta(), ad_initial(), inner2state_mod::ad_inner2state(), mp_exchange_mod::ad_mp_exchange2d(), mp_exchange_mod::ad_mp_exchange2d_bry(), mp_exchange_mod::ad_mp_exchange3d(), mp_exchange_mod::ad_mp_exchange3d_bry(), mp_exchange_mod::ad_mp_exchange4d(), ad_obc_adjust_mod::ad_obc2d_adjust(), ad_obc_adjust_mod::ad_obc_adjust(), ad_omega_mod::ad_omega(), ad_ini_fields_mod::ad_out_fields(), ad_ini_fields_mod::ad_out_zeta(), ad_output(), ad_pack(), ad_pack_tile(), ad_pre_step3d_mod::ad_pre_step3d(), ad_prsgrd_mod::ad_prsgrd(), ad_rho_eos_mod::ad_rho_eos(), ad_rhs3d_mod::ad_rhs3d(), ad_set_avg_mod::ad_set_avg(), ad_set_data(), ad_set_depth_mod::ad_set_depth(), ad_set_depth_mod::ad_set_depth_bry(), ad_set_massflux_mod::ad_set_massflux(), ad_set_vbc_mod::ad_set_vbc(), ad_set_vbc_mod::ad_set_vbc_tile(), ad_set_zeta_mod::ad_set_zeta(), ad_ini_fields_mod::ad_set_zeta_timeavg(), dotproduct_mod::ad_statenorm(), ad_step2d_mod::ad_step2d(), ad_step3d_t_mod::ad_step3d_t(), ad_step3d_uv_mod::ad_step3d_uv(), ad_t3dmix2_mod::ad_t3dmix2(), ad_t3dmix4_mod::ad_t3dmix4(), ad_t3drelax_mod::ad_t3drelax(), ad_unpack(), ad_unpack_tile(), ad_uv3dmix2_mod::ad_uv3dmix2(), ad_uv3dmix4_mod::ad_uv3dmix4(), ad_uv3drelax_mod::ad_uv3drelax(), uv_var_change_mod::ad_uv_a2c_grid(), uv_var_change_mod::ad_uv_c2a_grid(), roms_kernel_mod::adm_initial(), i4dvar_mod::analysis(), r4dvar_mod::analysis(), rbl4dvar_mod::analysis(), rbl4dvar_mod::analysis_initialize(), i4dvar_mod::background(), r4dvar_mod::background(), rbl4dvar_mod::background(), i4dvar_mod::background_initialize(), rbl4dvar_mod::background_initialize(), bbl_mod::bblm(), biology_mod::biology(), biology_floats_mod::biology_floats(), bulk_flux_mod::bulk_flux(), cgradient_mod::cgradient(), congrad_mod::congrad(), convolve_mod::convolve(), diag_mod::diag(), convolve_mod::error_covariance(), frc_weak_mod::frc_adgather(), frc_adjust_mod::frc_adjust(), frc_weak_mod::frc_clear(), frc_iau_mod::frc_iau(), frc_iau_mod::frc_iau_ini(), get_data(), get_idata(), get_state_mod::get_state_nf90(), get_state_mod::get_state_pio(), gls_corstep_mod::gls_corstep(), gls_prestep_mod::gls_prestep(), ice_advect_mod::ice_advect(), ice_thermo_mod::ice_thermo(), i4dvar_mod::increment(), r4dvar_mod::increment(), rbl4dvar_mod::increment(), ini_adjust_mod::ini_adjust(), ini_adjust_mod::ini_adjust_tile(), inner2state_mod::ini_c_norm(), ini_fields_mod::ini_fields(), ini_lanczos_mod::ini_lanczos(), ini_adjust_mod::ini_perturb(), ini_fields_mod::ini_zeta(), initial(), lmd_vmix_mod::lmd_vmix(), ini_adjust_mod::load_adtotl(), frc_adjust_mod::load_frc(), obc_adjust_mod::load_obc(), ini_adjust_mod::load_tltoad(), distribute_mod::mp_aggregate2d(), distribute_mod::mp_aggregate3d(), distribute_mod::mp_assemble::mp_assemblef_1d(), distribute_mod::mp_assemble::mp_assemblef_2d(), distribute_mod::mp_assemble::mp_assemblef_3d(), distribute_mod::mp_assemble::mp_assemblei_1d(), distribute_mod::mp_assemble::mp_assemblei_2d(), distribute_mod::mp_barrier(), distribute_mod::mp_bcast_struc(), distribute_mod::mp_bcastf::mp_bcastf_0d(), distribute_mod::mp_bcastf::mp_bcastf_0dp(), distribute_mod::mp_bcastf::mp_bcastf_1d(), distribute_mod::mp_bcastf::mp_bcastf_1dp(), distribute_mod::mp_bcastf::mp_bcastf_2d(), distribute_mod::mp_bcastf::mp_bcastf_2dp(), distribute_mod::mp_bcastf::mp_bcastf_3d(), distribute_mod::mp_bcastf::mp_bcastf_3dp(), distribute_mod::mp_bcastf::mp_bcastf_4d(), distribute_mod::mp_bcasti::mp_bcasti_0d(), distribute_mod::mp_bcasti::mp_bcasti_1d(), distribute_mod::mp_bcasti::mp_bcasti_2d(), distribute_mod::mp_bcastl::mp_bcastl_0d(), distribute_mod::mp_bcastl::mp_bcastl_1d(), distribute_mod::mp_bcastl::mp_bcastl_2d(), distribute_mod::mp_bcasts::mp_bcasts_0d(), distribute_mod::mp_bcasts::mp_bcasts_1d(), distribute_mod::mp_bcasts::mp_bcasts_2d(), distribute_mod::mp_bcasts::mp_bcasts_3d(), distribute_mod::mp_boundary(), distribute_mod::mp_collect::mp_collect_f(), distribute_mod::mp_collect::mp_collect_i(), mp_exchange_mod::mp_exchange2d(), mp_exchange_mod::mp_exchange2d_bry(), mp_exchange_mod::mp_exchange3d(), mp_exchange_mod::mp_exchange3d_bry(), mp_exchange_mod::mp_exchange4d(), distribute_mod::mp_gather2d(), distribute_mod::mp_gather3d(), distribute_mod::mp_gather_state(), distribute_mod::mp_ncread1d(), distribute_mod::mp_ncread2d(), distribute_mod::mp_ncwrite1d(), distribute_mod::mp_ncwrite2d(), distribute_mod::mp_reduce2(), distribute_mod::mp_reduce::mp_reduce_0d(), distribute_mod::mp_reduce::mp_reduce_0dp(), distribute_mod::mp_reduce::mp_reduce_1d(), distribute_mod::mp_reduce::mp_reduce_1dp(), distribute_mod::mp_reduce::mp_reduce_i8(), distribute_mod::mp_scatter2d(), distribute_mod::mp_scatter3d(), distribute_mod::mp_scatter_state(), my25_corstep_mod::my25_corstep(), my25_prestep_mod::my25_prestep(), nesting_mod::nesting(), dotproduct_mod::nl_dotproduct(), roms_kernel_mod::nlm_initial(), obc_adjust_mod::obc_adjust(), omega_mod::omega(), output(), posterior_mod::posterior(), i4dvar_mod::posterior_analysis(), i4dvar_mod::posterior_analysis_initialize(), posterior_var_mod::posterior_var(), pre_step3d_mod::pre_step3d(), prsgrd_mod::prsgrd(), random_ic_mod::random_ic(), set_massflux_mod::reset_massflux(), rho_eos_mod::rho_eos(), rhs3d_mod::rhs3d(), mod_arrays::roms_allocate_arrays(), roms_kernel_mod::roms_initialize(), mod_arrays::roms_initialize_arrays(), roms_kernel_mod::roms_initializep1(), rp_set_depth_mod::rp_bath(), rp_biology_mod::rp_biology(), rp_bulk_flux_mod::rp_bulk_flux(), rp_diag_mod::rp_diag(), rp_frc_adjust_mod::rp_frc_adjust(), rp_get_data(), rp_get_idata(), ini_adjust_mod::rp_ini_adjust(), rp_ini_fields_mod::rp_ini_fields(), rp_ini_fields_mod::rp_ini_zeta(), rp_initial(), rp_obc_adjust_mod::rp_obc2d_adjust(), rp_obc_adjust_mod::rp_obc_adjust(), rp_omega_mod::rp_omega(), rp_output(), rp_pre_step3d_mod::rp_pre_step3d(), rp_prsgrd_mod::rp_prsgrd(), rp_rho_eos_mod::rp_rho_eos(), rp_rhs3d_mod::rp_rhs3d(), rp_set_data(), rp_set_depth_mod::rp_set_depth(), rp_set_depth_mod::rp_set_depth_bry(), rp_set_massflux_mod::rp_set_massflux(), rp_set_vbc_mod::rp_set_vbc(), rp_set_vbc_mod::rp_set_vbc_tile(), rp_set_zeta_mod::rp_set_zeta(), rp_ini_fields_mod::rp_set_zeta_timeavg(), rp_step2d_mod::rp_step2d(), rp_step3d_t_mod::rp_step3d_t(), rp_step3d_uv_mod::rp_step3d_uv(), rp_t3dmix2_mod::rp_t3dmix2(), rp_t3dmix4_mod::rp_t3dmix4(), rp_t3drelax_mod::rp_t3drelax(), rp_uv3dmix2_mod::rp_uv3dmix2(), rp_uv3dmix4_mod::rp_uv3dmix4(), rp_uv3drelax_mod::rp_uv3drelax(), rpcg_lanczos_mod::rpcg_lanczos(), convolve_mod::saddlec(), sed_bed_mod::sed_bed(), sed_bedload(), sed_fluxes_mod::sed_fluxes(), sed_settling_mod::sed_settling(), sed_surface_mod::sed_surface(), set_avg_mod::set_avg(), set_data(), set_depth_mod::set_depth(), set_depth_mod::set_depth0(), set_depth_mod::set_depth_bry(), set_diags(), set_masks_mod::set_masks(), set_massflux_mod::set_massflux(), set_tides_mod::set_tides(), set_vbc_mod::set_vbc(), set_vbc_mod::set_vbc_tile(), set_zeta_mod::set_zeta(), ini_fields_mod::set_zeta_timeavg(), state_read_mod::state_read_nf90(), state_read_mod::state_read_pio(), step2d_mod::step2d(), step3d_t_mod::step3d_t(), step3d_uv_mod::step3d_uv(), step_floats_mod::step_floats(), t3dmix2_mod::t3dmix2(), t3dmix4_mod::t3dmix4(), tl_set_depth_mod::tl_bath(), tl_biology_mod::tl_biology(), tl_bulk_flux_mod::tl_bulk_flux(), tl_diag_mod::tl_diag(), dotproduct_mod::tl_dotproduct(), tl_frc_adjust_mod::tl_frc_adjust(), tl_get_data(), tl_get_idata(), tl_ini_fields_mod::tl_ini_fields(), ini_adjust_mod::tl_ini_perturb(), tl_ini_fields_mod::tl_ini_zeta(), tl_initial(), inner2state_mod::tl_inner2state(), tl_nesting_mod::tl_nesting(), tl_obc_adjust_mod::tl_obc2d_adjust(), tl_obc_adjust_mod::tl_obc_adjust(), tl_omega_mod::tl_omega(), tl_output(), tl_pack(), tl_pre_step3d_mod::tl_pre_step3d(), tl_prsgrd_mod::tl_prsgrd(), tl_rho_eos_mod::tl_rho_eos(), tl_rhs3d_mod::tl_rhs3d(), tl_set_avg_mod::tl_set_avg(), tl_set_data(), tl_set_depth_mod::tl_set_depth(), tl_set_depth_mod::tl_set_depth_bry(), tl_set_massflux_mod::tl_set_massflux(), tl_set_vbc_mod::tl_set_vbc(), tl_set_vbc_mod::tl_set_vbc_tile(), tl_set_zeta_mod::tl_set_zeta(), tl_ini_fields_mod::tl_set_zeta_timeavg(), dotproduct_mod::tl_statenorm(), tl_step2d_mod::tl_step2d(), tl_step3d_t_mod::tl_step3d_t(), tl_step3d_uv_mod::tl_step3d_uv(), tl_t3dmix2_mod::tl_t3dmix2(), tl_t3dmix4_mod::tl_t3dmix4(), tl_t3drelax_mod::tl_t3drelax(), tl_unpack(), tl_unpack_tile(), tl_uv3dmix2_mod::tl_uv3dmix2(), tl_uv3dmix4_mod::tl_uv3dmix4(), tl_uv3drelax_mod::tl_uv3drelax(), uv_var_change_mod::tl_uv_a2c_grid(), uv_var_change_mod::tl_uv_c2a_grid(), roms_kernel_mod::tlm_initial(), uv3dmix2_mod::uv3dmix2(), uv3dmix4_mod::uv3dmix4(), uv_var_change_mod::uv_a2c_grid(), uv_var_change_mod::uv_c2a_grid(), vorticity_mod::vorticity(), vwalk_floats_mod::vwalk_floats(), wrt_state_mod::wrt_state_nf90(), and wrt_state_mod::wrt_state_pio().

Here is the call graph for this function:
Here is the caller graph for this function: