Kpp-oasis-mct

Revision as of 01:55, 18 March 2019 by S.wales (talk | contribs)
Template:Stub This is a stub page and needs expansion

Couple KPP to UM using OASIS3-MCT

Changes to KPP

+++ Makefile    (working copy)
-FFLAGS=-fpp -xHost -O3 -r8 -I. -traceback -fp-model precise
+FFLAGS=-fpp -xHost -O3 -r8 -I. -traceback -fp-model precise -diag-disable 10010

-OASIS3_LIB=-lpsmile.MPI1 -lmpp_io
+OASIS3_LIB=-lpsmile.MPI1 -lmct -lmpeu -lscrip

+++ steves_3D_ocn.f     (working copy)
-      use mod_prism_proto
+      use mod_oasis

-      call prism_get_localcomm_proto(kpp_mpi_comm,mpierr)
+      call oasis_get_localcomm(kpp_mpi_comm,mpierr)

+++ init_oasis3.f       (working copy)
-      USE mod_kinds_model
-      USE mod_prism_proto
-      USE mod_prism_def_partition_proto
-      USE mod_prism_put_proto
-      USE mod_prism_get_proto
-      USE mod_prism_grids_writing
+      USE mod_oasis_kinds
+      USE mod_oasis

-      CALL prism_init_comp_proto(il_comp_id, cp_modnam, ierror)
-      IF (ierror .NE. PRISM_Ok) THEN
+      CALL oasis_init_comp(il_comp_id, cp_modnam, ierror)
+      IF (ierror .NE. OASIS_Ok) THEN
          WRITE(nuout,*) 'KPP: Received error from ',
-     +        'PRISM_Init_Comp_Proto = ',ierror
-         CALL prism_abort_proto(il_comp_id,'KPP init_oasis3.f','abort1')
+     +        'OASIS_Init_Comp_Proto = ',ierror
+         CALL oasis_abort(il_comp_id,'KPP init_oasis3.f','abort1')
          ! Can/should we call MIXED_ABORT here as well?
       ELSE
-         WRITE(nuout,*) 'KPP: Successful call to PRISM_Init_Comp_Proto'
+         WRITE(nuout,*) 'KPP: Successful call to OASIS_Init_Comp_Proto'
       ENDIF

       ! Get local communicator
-      CALL prism_get_localcomm_proto(il_commlocal,ierror)
-      IF (ierror .NE. PRISM_Ok) THEN
+      CALL oasis_get_localcomm(il_commlocal,ierror)
+      IF (ierror .NE. OASIS_Ok) THEN
          WRITE(nuout,*) 'KPP: Received error from ',
-     +        'PRISM_Get_LocalComm_Proto = ',ierror
+     +        'OASIS_Get_LocalComm_Proto = ',ierror
       ELSE
          WRITE(nuout,*) 'KPP: Successfully received local communicator'
       ENDIF
@@ -81,15 +81,15 @@

       ! Define the grids used by KPP (for master processor only)
       IF (il_rank .EQ. 0) THEN
-         CALL prism_start_grids_writing(il_flag)
+         CALL oasis_start_grids_writing(il_flag)
          IF (il_flag .EQ. 1) THEN
       ! Will we ever need to do this?  Do we need to support it?
             WRITE(nuout,*) 'KPP: il_flag=1, so we will write ',
-     +           'grids for PRISM'
-            CALL prism_terminate_grids_writing()
+     +           'grids for OASIS'
+            CALL oasis_terminate_grids_writing()
          ELSE
             WRITE(nuout,*) 'KPP: il_flag/=1, so we will not write ',
-     +           'grids for PRISM'
+     +           'grids for OASIS'
          ENDIF
       ENDIF

@@ -103,12 +103,12 @@
       il_paral ( clim_offset   ) = 0
       il_paral ( clim_length   ) = NX_GLOBE*NY_GLOBE

-      CALL prism_def_partition_proto(il_part_id,il_paral,ierror)
-      IF (ierror.NE.PRISM_Ok) THEN
+      CALL oasis_def_partition(il_part_id,il_paral,ierror)
+      IF (ierror.NE.OASIS_Ok) THEN
          WRITE(nuout,*) 'KPP: Received error from ',
-     +        'PRISM_Def_Partition_Proto = ',ierror
+     +        'OASIS_Def_Partition_Proto = ',ierror
       ELSE
-         WRITE(nuout,*) 'KPP: Called PRISM_Def_Partition_Proto'
+         WRITE(nuout,*) 'KPP: Called OASIS_Def_Partition_Proto'
       ENDIF

 #ifdef TOYCLIM /* For the OASIS3 toy model - Exchange 1D fields */
@@ -136,15 +136,15 @@
       cl_writ(6)='SVNOCEAN'

       DO i=1,jpfldout
-         CALL prism_def_var_proto(il_var_id_out(i),cl_writ(i),
-     +        il_part_id,il_var_nodims,PRISM_Out,il_var_shape,
-     +        PRISM_Real,ierror)
-         IF (ierror.NE.PRISM_Ok) THEN
+         CALL oasis_def_var(il_var_id_out(i),cl_writ(i),
+     +        il_part_id,il_var_nodims,OASIS_Out,il_var_shape,
+     +        OASIS_Real,ierror)
+         IF (ierror.NE.OASIS_Ok) THEN
             WRITE(nuout,*) 'KPP: Received error from ',
-     +           'PRISM_Def_Var_Proto = ',ierror,'for variable ',
+     +           'OASIS_Def_Var_Proto = ',ierror,'for variable ',
      +           cl_writ(i),' (output field)'
          ELSE
-            WRITE(nuout,*) 'KPP: Called PRISM_Def_Var_Proto for ',
+            WRITE(nuout,*) 'KPP: Called OASIS_Def_Var_Proto for ',
      +           'variable ',cl_writ(i),' (output field)'
          ENDIF
       ENDDO
@@ -163,25 +163,25 @@
       cl_read(11)='TAUY'

       DO i=1,jpfldin
-         CALL prism_def_var_proto(il_var_id_in(i),cl_read(i),
-     +        il_part_id,il_var_nodims,PRISM_In,il_var_shape,
-     +        PRISM_Real,ierror)
-         IF (ierror.NE.PRISM_Ok) THEN
+         CALL oasis_def_var(il_var_id_in(i),cl_read(i),
+     +        il_part_id,il_var_nodims,OASIS_In,il_var_shape,
+     +        OASIS_Real,ierror)
+         IF (ierror.NE.OASIS_Ok) THEN
             WRITE(nuout,*) 'KPP: Received error from ',
-     +           'PRISM_Def_Var_Proto = ',ierror,'for variable',
+     +           'OASIS_Def_Var_Proto = ',ierror,'for variable',
      +           cl_read(i),' (input field)'
          ELSE
-            WRITE(nuout,*) 'KPP: Called PRISM_Def_Var_Proto for ',
+            WRITE(nuout,*) 'KPP: Called OASIS_Def_Var_Proto for ',
      +           'variable ',cl_read(i),' (input field)'
          ENDIF
       ENDDO

-      CALL prism_enddef_proto(ierror)
-      IF (ierror.NE.PRISM_Ok) THEN
+      CALL oasis_enddef(ierror)
+      IF (ierror.NE.OASIS_Ok) THEN
          WRITE(nuout,*) 'KPP: Received error from ',
-     +        'PRISM_enddef_proto = ',ierror
+     +        'OASIS_enddef = ',ierror
       ELSE
-         WRITE(nuout,*) 'KPP: Called PRISM_Enddef_Proto'
+         WRITE(nuout,*) 'KPP: Called OASIS_Enddef_Proto'
       ENDIF

       RETURN

+++ couple_io_oasis3.f  (working copy)
-      USE mod_kinds_model
-      USE mod_prism_proto
-      USE mod_prism_def_partition_proto
-      USE mod_prism_put_proto
-      USE mod_prism_get_proto
-      USE mod_prism_grids_writing
+      USE mod_oasis_kinds
+      USE mod_oasis

@@ -28,6 +28,7 @@
 #include <times.com>
 #include <constants.com>
 #include <initialcon.com>
+      include "mpif.h"
 c
 c     Output variables on the KPP regional grid - returned to
 c     the calling routine (usually <fluxes>).
@@ -64,15 +65,15 @@
-      call prism_get_localcomm_proto(kpp_mpi_comm, ierror)
+      call oasis_get_localcomm(kpp_mpi_comm, ierror)
       if (ierror .ne. 0) then
-        call prism_abort_proto(il_comp_id,
+        call oasis_abort(il_comp_id,
      +      'couple_io_oasis3.f',
      +      'getcomm')
       end if
       call MPI_Comm_rank(kpp_mpi_comm, kpp_mpi_rank, ierror)
       if (ierror .ne. 0) then
-        call prism_abort_proto(il_comp_id,'couple_io_oasis3.f','rank')
+        call oasis_abort(il_comp_id,'couple_io_oasis3.f','rank')
       end if
-            CALL prism_get_proto(il_var_id_in(i),
+            CALL oasis_get(il_var_id_in(i),

-            IF (ierror.NE.PRISM_Ok .and. ierror .LT. PRISM_Recvd) THEN
+            IF (ierror.NE.OASIS_Ok .and. ierror .LT. OASIS_Recvd) THEN

-     +              'PRISM_Get_Proto =',ierror,' receiving variable ',
+     +              'OASIS_Get_Proto =',ierror,' receiving variable ',

-               CALL prism_abort_proto(il_comp_id,'couple_io_oasis3.f',
+               CALL oasis_abort(il_comp_id,'couple_io_oasis3.f',

@@ -191,15 +192,15 @@

-      USE mod_kinds_model
-      USE mod_prism_proto
-      USE mod_prism_def_partition_proto
-      USE mod_prism_put_proto
-      USE mod_prism_get_proto
-      USE mod_prism_grids_writing
+      USE mod_oasis_kinds
+      USE mod_oasis

@@ -418,22 +419,22 @@

-     +     'KPP: Calling PRISM_Put_Proto for variable ' // cl_writ(i) )
-         CALL prism_put_proto(il_var_id_out(i),
+     +     'KPP: Calling OASIS_Put_Proto for variable ' // cl_writ(i) )
+         CALL oasis_put(il_var_id_out(i),

-         IF (ierror.NE.PRISM_Ok.and.ierror.LT.PRISM_Sent) THEN
+         IF (ierror.NE.OASIS_Ok.and.ierror.LT.OASIS_Sent) THEN

-     +        'KPP: Received error from PRISM_Put_Proto =',ierror )
+     +        'KPP: Received error from OASIS_Put_Proto =',ierror )

-            CALL prism_abort_proto(il_comp_id,'couple_io_oasis3.f',
+            CALL oasis_abort(il_comp_id,'couple_io_oasis3.f',

-     +       'KPP: Successfully called PRISM_Put_Proto for variable ' //
+     +       'KPP: Successfully called OASIS_Put_Proto for variable ' //

-      USE mod_kinds_model
-      USE mod_prism_proto
+      USE mod_oasis_kinds
+      USE mod_oasis

-     +     'Calling prism_terminate_proto(ierror)' )
-      CALL prism_terminate_proto(ierror)
+     +     'Calling oasis_terminate(ierror)' )
+      CALL oasis_terminate(ierror)

-     +     'Called prism_terminate_proto(ierror)' )
-      IF (ierror .NE. PRISM_Ok) THEN
+     +     'Called oasis_terminate(ierror)' )
+      IF (ierror .NE. OASIS_Ok) THEN

-     +        'PRISM_Terminate_Proto =',ierror,
+     +        'OASIS_Terminate_Proto =',ierror,

Changes to UM

Mainly the configuration build:

    • ummodel/cfg/bld.cfg

Add:

excl_dep                                               USE::mod_oasis_kinds

and to tool::ldflags}} remove Template:-lmpp io and add {{-lmct -lscrip -lmpeu

    • umrecon/cfg/bld.cfg

to tool::ldflags}} remove Template:-lmpp io and add {{-lmct -lscrip -lmpeu

Current Crash

Runtime Crash:

NEMO_NPROC  CICE_NPROC
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
     Version 7.3 template, Unified Model ,  Non-Operational
     Created by UMUI version 7.3
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
PATH used = /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin:
                    /short/w35/hxw599/UM_ROUTDIR//vaapb/bin:/projects/access/umdir/vn7.3/normal/scripts:
                    /projects/access/umdir/vn7.3/normal/exec:/projects/access/umdir/fcm1.4/bin:
                    /projects/access/umdir/vn7.3/normal/utils:/projects/access/umdir/bin:
                    /projects/access/umdir/vn7.3/bin:/projects/access/umdir/umui2.0/bin:
                    /projects/access/bin:/projects/access/umdir/vn7.3/normal/runscripts:
                    /apps/openmpi/wrapper:/apps/openmpi/1.8.2/bin:/apps/x11vnc/0.9.13/bin:
                    /opt/bin:/bin:/usr/bin:/opt/pbs/default/bin:/projects/access/bin/:/home/599/hxw599/bin:.
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
     Job started at : Mon Nov  9 15:57:08 AEDT 2015
     Run started from UMUI
     Running from control files in /home/599/hxw599/umui_runs/vaapb-313155649
uamul (collab) - N48 KPP - sea ice
This job is running on machine r76,
using UM directory /projects/access/umdir,
*****'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
   Starting script :   qsexecute
   Starting time   :   Mon Nov  9 15:57:08 AEDT 2015
*****'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

KPP using 15 processors

/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute: Executing setup

/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qssetup: Job terminated normally

/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute: Executing dump reconfiguration program

*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
RCF Executable : /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qxreconf
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute: Executing model run

*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
UM Executable : /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/vaapb.exe
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

No OASIS3 angles file will be used.
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''*
No existing rmp_* file directory specified
Any existing rmp_* files will be removed from
for safety
Generating rmp_* files at run time
NOTE: This will vastly increase your required run time
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''*
cp: cannot stat `/short/w35/hxw599/vaapb/kpp-scripts//namcouple': No such file or directory
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute[888]: /short/w35/hxw599/vaapb/kpp-scripts//kpp_run_pre.ksh: not found [No such file or directory]
readline() on closed filehandle F0 at /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/OASIS3_kpp line 60.
readline() on closed filehandle F1 at /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/OASIS3_kpp line 120.
seconds total = 0
oasis_init_comp: Calling MPI_Init
... line repeated to a total of 48 times == number of UM cores

oasis_init_comp: Not Calling MPI_Init
... line repeated to a total of 15 times == number of KPP cores

forrtl: error (78): process killed (SIGTERM)
... (snip 47 um sigterms and 15 kpp sigterms)

/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute[1165]: /short/w35/hxw599/vaapb/kpp-scripts//kpp_run_post.ksh: not found [No such file or directo
ry]
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
   Ending script   :   qsexecute
   Completion code :   1
   Completion time :   Mon Nov  9 15:57:16 AEDT 2015
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
\n\n\n

/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsmaster: Failed in qsexecute in model vaapb
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
   Starting script :   qsfinal
   Starting time   :   Mon Nov  9 15:57:16 AEDT 2015
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsfinal: Model vaapb - Error: No history files
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
   Ending script   :   qsfinal
   Completion code :   135
   Completion time :   Mon Nov  9 15:57:16 AEDT 2015
*****''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
\n\n\n

/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsmaster: failed in final in model vaapb
 <<<< Information about How Many Lines of Output follow >>>>
 73  lines in main OUTPUT file.
 0 lines of O/P from pe0.
 <<<<         Lines of Output Information ends          >>>>

 '''*   *   *  ''''''*  ''''''   *   *  ''''''*
*   *  *   *    *    *   *  *   *    *
*   *  *   *    *    *   *  *   *    *
*   *  *   *    *    *   *  *   *    *
*   *  *   *    *    ''''''   *   *    *
*   *  *   *    *    *      *   *    *
*   *  *   *    *    *      *   *    *
*   *  *   *    *    *      *   *    *
 '''*    '''*     *    *       '''*     *

****    '''*   ''''''*         '''*   *   *  ''''''*  ''''''   *   *  ''''''*
*   *  *   *  *            *   *  *   *    *    *   *  *   *    *
*   *  *      *            *   *  *   *    *    *   *  *   *    *
*   *  *      *            *   *  *   *    *    *   *  *   *    *
****   *      ''''''         *   *  *   *    *    ''''''   *   *    *
**     *      *            *   *  *   *    *    *      *   *    *
* *    *      *            *   *  *   *    *    *      *   *    *
*  *   *   *  *            *   *  *   *    *    *      *   *    *
*   *   '''*   *             '''*    '''*     *    *       '''*     *

qsexecute:  %RECONA% Atmosphere reconfiguration step

 =====================================================
 GCOM Version 3.3
 openmpi/1.6.5,intel-fc/12.1.9.293
 Using precision : 64bit INTEGERs and 64bit REALs
 Built at Thu Aug 29 20:23:42 EST 2013
 =====================================================

 Parallel Reconfiguration using                      1  processor(s)
 divided into a LPG with nproc_x=                     1 and nproc_y=
                     1

OPEN:  Claimed 32000512 Bytes (4000064 Words) for Buffering
OPEN:  Buffer Address is                   F6A41040
CLOSE: File /short/w48/bxp565/ancils/lsm_claudia Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/qrparm.soil Closed on Unit 12
CLOSE: File /short/w48/bxp565/ancils/lsm_claudia Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/qrparm.orog Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/cable_vegfrac_N48.anc Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/cable_vegfunc_N48.anc Closed on Unit 12
CLOSE: File /short/w48/bxp565/ancils/lfrac_claudia Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/qrparm.soil.dust Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/TRIP_riv_store_ancil2 Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/riverrouting_access_v2 Closed on Unit 12
CLOSE: File /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/vaapb.astart Closed on Unit 11
CLOSE: File /short/w48/dxd565/UM_ROUTDIR/dxd565/ualdd//um-dump.restart Closed on Unit 10

*   *  *     *         '''*   *   *  ''''''*  ''''''   *   *  ''''''*
*   *  '''   '''        *   *  *   *    *    *   *  *   *    *
*   *  * * * *        *   *  *   *    *    *   *  *   *    *
*   *  *  *  *        *   *  *   *    *    *   *  *   *    *
*   *  *     *        *   *  *   *    *    ''''''   *   *    *
*   *  *     *        *   *  *   *    *    *      *   *    *
*   *  *     *        *   *  *   *    *    *      *   *    *
*   *  *     *        *   *  *   *    *    *      *   *    *
 '''*   *     *         '''*    '''*     *    *       '''*     *

USING KPP_PRERUN

qsexecute: %MODEL% output follows:-

UMMACHINE =  ALTIX
false
USING LINUXMPP
ACCESSRUNCMD  -n 48 ./um7.3x : -n 15 ./toyoce
--------------------------------------------------------------------------
MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD
with errorcode 0.

NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes.
You may or may not see output from other processes, depending on
exactly when Open MPI kills them.
--------------------------------------------------------------------------
USING KPP_POSTRUN
/short/w35/hxw599/vaapb/kpp-scripts//kpp_run_post.ksh exited with error code 127
0+1 records in
0+1 records out
3787 bytes (3.8 kB) copied, 0.000257916 s, 14.7 MB/s