Source code for CIME.case.case_submit

#!/usr/bin/env python

"""
case.submit - Submit a cesm workflow to the queueing system or run it
if there is no queueing system.  A cesm workflow may include multiple
jobs.
submit, check_case and check_da_settings are members of class Case in file case.py
"""
from six.moves                      import configparser
from CIME.XML.standard_module_setup import *
from CIME.utils                     import expect, run_and_log_case_status, verbatim_success_msg, CIMEError
from CIME.locked_files              import unlock_file, lock_file
from CIME.test_status               import *

import socket

logger = logging.getLogger(__name__)

def _build_prereq_str(case, prev_job_ids):
    delimiter = case.get_value("depend_separator")
    prereq_str = ""
    for job_id in prev_job_ids.values():
        prereq_str += str(job_id) + delimiter
    return prereq_str[:-1]

def _submit(case, job=None, no_batch=False, prereq=None, allow_fail=False, resubmit=False,
            resubmit_immediate=False, skip_pnl=False, mail_user=None, mail_type=None,
            batch_args=None, workflow=True):
    if job is None:
        job = case.get_first_job()

    # Check mediator
    hasMediator = True
    comp_classes = case.get_values("COMP_CLASSES")
    if 'CPL' not in comp_classes:
        hasMediator = False

    # Check if CONTINUE_RUN value makes sense
    if job != "case.test" and case.get_value("CONTINUE_RUN") and hasMediator:
        rundir = case.get_value("RUNDIR")
        expect(os.path.isdir(rundir),
               "CONTINUE_RUN is true but RUNDIR {} does not exist".format(rundir))
        # only checks for the first instance in a multidriver case
        if case.get_value("COMP_INTERFACE") == "nuopc":
            rpointer = "rpointer.cpl"
        elif case.get_value("MULTI_DRIVER"):
            rpointer = "rpointer.drv_0001"
        else:
            rpointer = "rpointer.drv"
        expect(os.path.exists(os.path.join(rundir,rpointer)),
               "CONTINUE_RUN is true but this case does not appear to have restart files staged in {} {}".format(rundir,rpointer))
        # Finally we open the rpointer file and check that it's correct
        casename = case.get_value("CASE")
        with open(os.path.join(rundir,rpointer), "r") as fd:
            ncfile = fd.readline().strip()
            expect(ncfile.startswith(casename) and
                   os.path.exists(os.path.join(rundir,ncfile)),
                   "File {ncfile} not present or does not match case {casename}".
                   format(ncfile=os.path.join(rundir,ncfile),casename=casename))

    # if case.submit is called with the no_batch flag then we assume that this
    # flag will stay in effect for the duration of the RESUBMITs
    env_batch = case.get_env("batch")
    external_workflow = case.get_value("EXTERNAL_WORKFLOW")
    if env_batch.get_batch_system_type() == "none" or resubmit and external_workflow:
        no_batch = True

    if no_batch:
        batch_system = "none"
    else:
        batch_system = env_batch.get_batch_system_type()
    unlock_file(os.path.basename(env_batch.filename))
    case.set_value("BATCH_SYSTEM", batch_system)

    env_batch_has_changed = False
    if not external_workflow:
        try:
            case.check_lockedfile(os.path.basename(env_batch.filename))
        except:
            env_batch_has_changed = True

    if batch_system != "none" and env_batch_has_changed and not external_workflow:
        # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc)
        logger.warning(\
"""
env_batch.xml appears to have changed, regenerating batch scripts
manual edits to these file will be lost!
""")
        env_batch.make_all_batch_files(case)
    case.flush()
    lock_file(os.path.basename(env_batch.filename))

    if resubmit:
        # This is a resubmission, do not reinitialize test values
        if job == "case.test":
            case.set_value("IS_FIRST_RUN", False)

        resub = case.get_value("RESUBMIT")
        logger.info("Submitting job '{}', resubmit={:d}".format(job, resub))
        case.set_value("RESUBMIT", resub-1)
        if case.get_value("RESUBMIT_SETS_CONTINUE_RUN"):
            case.set_value("CONTINUE_RUN", True)

    else:
        if job == "case.test":
            case.set_value("IS_FIRST_RUN", True)

        if no_batch:
            batch_system = "none"
        else:
            batch_system = env_batch.get_batch_system_type()

        case.set_value("BATCH_SYSTEM", batch_system)

        env_batch_has_changed = False
        try:
            case.check_lockedfile(os.path.basename(env_batch.filename))
        except CIMEError:
            env_batch_has_changed = True

        if env_batch.get_batch_system_type() != "none" and env_batch_has_changed:
            # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc)
            logger.warning(\
"""
env_batch.xml appears to have changed, regenerating batch scripts
manual edits to these file will be lost!
""")
            env_batch.make_all_batch_files(case)

        unlock_file(os.path.basename(env_batch.filename))
        lock_file(os.path.basename(env_batch.filename))

        case.check_case()
        if job == case.get_primary_job():
            case.check_DA_settings()
            if case.get_value("MACH") == "mira":
                with open(".original_host", "w") as fd:
                    fd.write( socket.gethostname())

    #Load Modules
    case.load_env()

    case.flush()

    logger.warning("submit_jobs {}".format(job))
    job_ids = case.submit_jobs(no_batch=no_batch, job=job, prereq=prereq,
                               skip_pnl=skip_pnl, resubmit_immediate=resubmit_immediate,
                               allow_fail=allow_fail, mail_user=mail_user,
                               mail_type=mail_type, batch_args=batch_args, workflow=workflow)

    xml_jobids = []
    for jobname, jobid in job_ids.items():
        logger.info("Submitted job {} with id {}".format(jobname, jobid))
        if jobid:
            xml_jobids.append("{}:{}".format(jobname, jobid))

    xml_jobid_text = ", ".join(xml_jobids)
    if xml_jobid_text:
        case.set_value("JOB_IDS", xml_jobid_text)

    return xml_jobid_text

[docs]def submit(self, job=None, no_batch=False, prereq=None, allow_fail=False, resubmit=False, resubmit_immediate=False, skip_pnl=False, mail_user=None, mail_type=None, batch_args=None, workflow=True): if resubmit_immediate and self.get_value("MACH") in ['mira', 'cetus']: logger.warning("resubmit_immediate does not work on Mira/Cetus, submitting normally") resubmit_immediate = False caseroot = self.get_value("CASEROOT") if self.get_value("TEST"): casebaseid = self.get_value("CASEBASEID") # This should take care of the race condition where the submitted job # begins immediately and tries to set RUN phase. We proactively assume # a passed SUBMIT phase. If this state is already PASS, don't set it again # because then we'll lose RUN phase info if it's there. This info is important # for system_tests_common to know if it needs to reinitialize the test or not. with TestStatus(test_dir=caseroot, test_name=casebaseid) as ts: phase_status = ts.get_status(SUBMIT_PHASE) if phase_status != TEST_PASS_STATUS: ts.set_status(SUBMIT_PHASE, TEST_PASS_STATUS) # If this is a resubmit check the hidden file .submit_options for # any submit options used on the original submit and use them again submit_options = os.path.join(caseroot, ".submit_options") if resubmit and os.path.exists(submit_options): config = configparser.RawConfigParser() config.read(submit_options) if not skip_pnl and config.has_option('SubmitOptions','skip_pnl'): skip_pnl = config.getboolean('SubmitOptions', 'skip_pnl') if mail_user is None and config.has_option('SubmitOptions', 'mail_user'): mail_user = config.get('SubmitOptions', 'mail_user') if mail_type is None and config.has_option('SubmitOptions', 'mail_type'): mail_type = str(config.get('SubmitOptions', 'mail_type')).split(',') if batch_args is None and config.has_option('SubmitOptions', 'batch_args'): batch_args = config.get('SubmitOptions', 'batch_args') try: functor = lambda: _submit(self, job=job, no_batch=no_batch, prereq=prereq, allow_fail=allow_fail, resubmit=resubmit, resubmit_immediate=resubmit_immediate, skip_pnl=skip_pnl, mail_user=mail_user, mail_type=mail_type, batch_args=batch_args, workflow=workflow) run_and_log_case_status(functor, "case.submit", caseroot=caseroot, custom_success_msg_functor=verbatim_success_msg) except BaseException: # Want to catch KeyboardInterrupt too # If something failed in the batch system, make sure to mark # the test as failed if we are running a test. if self.get_value("TEST"): with TestStatus(test_dir=caseroot, test_name=casebaseid) as ts: ts.set_status(SUBMIT_PHASE, TEST_FAIL_STATUS) raise
[docs]def check_case(self): self.check_lockedfiles() self.create_namelists() # Must be called before check_all_input_data logger.info("Checking that inputdata is available as part of case submission") self.check_all_input_data() if self.get_value('COMP_WAV') == 'ww': # the ww3 buildnml has dependencies on inputdata so we must run it again self.create_namelists(component='WAV') expect(self.get_value("BUILD_COMPLETE"), "Build complete is " "not True please rebuild the model by calling case.build") logger.info("Check case OK")
[docs]def check_DA_settings(self): script = self.get_value("DATA_ASSIMILATION_SCRIPT") cycles = self.get_value("DATA_ASSIMILATION_CYCLES") if len(script) > 0 and os.path.isfile(script) and cycles > 0: logger.info("Data Assimilation enabled using script {} with {:d} cycles".format(script, cycles))