Source code for CIME.hist_utils

"""
Functions for actions pertaining to history files.
"""

from CIME.XML.standard_module_setup import *
from CIME.test_status import TEST_NO_BASELINES_COMMENT

from CIME.utils import get_current_commit, get_timestamp, get_model, safe_copy

import logging, glob, os, re, stat, filecmp
logger = logging.getLogger(__name__)

BLESS_LOG_NAME = "bless_log"

def _iter_model_file_substrs(case):
    models = case.get_compset_components()
    models.append('cpl')
    for model in models:
        yield model

def _get_all_hist_files(testcase, model, from_dir, suffix=""):
    suffix = (".{}".format(suffix) if suffix else "")

    # Match hist files produced by run
    test_hists = glob.glob("{}/{}.{}*.h?.nc{}".format(from_dir, testcase, model, suffix))
    test_hists.extend(glob.glob("{}/{}.{}*.h.nc{}".format(from_dir, testcase, model, suffix)))

    # Match multi-instance files produced by run
    test_hists.extend(glob.glob("{}/{}.{}*.h?.*.nc{}".format(from_dir, testcase, model, suffix)))
    test_hists.extend(glob.glob("{}/{}.{}*.h.*.nc{}".format(from_dir, testcase, model, suffix)))

    # suffix == "" implies baseline comparison, baseline hist files have simpler names

    if suffix == "":
        test_hists.extend(glob.glob("{}/{}*.h.nc".format(from_dir, model)))
        test_hists.extend(glob.glob("{}/{}*.h?.nc".format(from_dir, model)))
        test_hists.extend(glob.glob("{}/{}*.h.*.nc".format(from_dir, model)))
        test_hists.extend(glob.glob("{}/{}*.h?.*.nc".format(from_dir, model)))

    test_hists.sort()
    return test_hists

def _get_latest_hist_files(testcase, model, from_dir, suffix=""):
    test_hists = _get_all_hist_files(testcase, model, from_dir, suffix)
    latest_files = {}
    histlist = []
    for hist in test_hists:
        ext = get_extension(model, hist)
        latest_files[ext] = hist

    for key in latest_files.keys():
        histlist.append(latest_files[key])
    return histlist

[docs]def copy(case, suffix): """Copy the most recent batch of hist files in a case, adding the given suffix. This can allow you to temporarily "save" these files so they won't be blown away if you re-run the case. case - The case containing the files you want to save suffix - The string suffix you want to add to saved files, this can be used to find them later. """ rundir = case.get_value("RUNDIR") testcase = case.get_value("CASE") # Loop over models comments = "Copying hist files to suffix '{}'\n".format(suffix) num_copied = 0 for model in _iter_model_file_substrs(case): comments += " Copying hist files for model '{}'\n".format(model) test_hists = _get_latest_hist_files(testcase, model, rundir) num_copied += len(test_hists) for test_hist in test_hists: new_file = "{}.{}".format(test_hist, suffix) if os.path.exists(new_file): os.remove(new_file) comments += " Copying '{}' to '{}'\n".format(test_hist, new_file) # Need to copy rather than move in case there are some history files # that will need to continue to be filled on the next phase; this # can be the case for a restart run. # # (If it weren't for that possibility, a move/rename would be more # robust here: The problem with a copy is that there can be # confusion after the second run as to which files were created by # the first run and which by the second. For example, if the second # run fails to output any history files, the test will still pass, # because the test system will think that run1's files were output # by run2. But we live with that downside for the sake of the reason # noted above.) safe_copy(test_hist, new_file) expect(num_copied > 0, "copy failed: no hist files found in rundir '{}'".format(rundir)) return comments
def _hists_match(model, hists1, hists2, suffix1="", suffix2=""): """ return (num in set 1 but not 2 , num in set 2 but not 1, matchups) >>> hists1 = ['FOO.G.cpl.h1.nc', 'FOO.G.cpl.h2.nc', 'FOO.G.cpl.h3.nc'] >>> hists2 = ['cpl.h2.nc', 'cpl.h3.nc', 'cpl.h4.nc'] >>> _hists_match('cpl', hists1, hists2) (['FOO.G.cpl.h1.nc'], ['cpl.h4.nc'], [('FOO.G.cpl.h2.nc', 'cpl.h2.nc'), ('FOO.G.cpl.h3.nc', 'cpl.h3.nc')]) >>> hists1 = ['FOO.G.cpl.h1.nc.SUF1', 'FOO.G.cpl.h2.nc.SUF1', 'FOO.G.cpl.h3.nc.SUF1'] >>> hists2 = ['cpl.h2.nc.SUF2', 'cpl.h3.nc.SUF2', 'cpl.h4.nc.SUF2'] >>> _hists_match('cpl', hists1, hists2, 'SUF1', 'SUF2') (['FOO.G.cpl.h1.nc.SUF1'], ['cpl.h4.nc.SUF2'], [('FOO.G.cpl.h2.nc.SUF1', 'cpl.h2.nc.SUF2'), ('FOO.G.cpl.h3.nc.SUF1', 'cpl.h3.nc.SUF2')]) >>> hists1 = ['cam.h0.1850-01-08-00000.nc'] >>> hists2 = ['cam_0001.h0.1850-01-08-00000.nc','cam_0002.h0.1850-01-08-00000.nc'] >>> _hists_match('cam', hists1, hists2, '', '') ([], [], [('cam.h0.1850-01-08-00000.nc', 'cam_0001.h0.1850-01-08-00000.nc'), ('cam.h0.1850-01-08-00000.nc', 'cam_0002.h0.1850-01-08-00000.nc')]) >>> hists1 = ['cam_0001.h0.1850-01-08-00000.nc.base','cam_0002.h0.1850-01-08-00000.nc.base'] >>> hists2 = ['cam_0001.h0.1850-01-08-00000.nc.rest','cam_0002.h0.1850-01-08-00000.nc.rest'] >>> _hists_match('cam', hists1, hists2, 'base', 'rest') ([], [], [('cam_0001.h0.1850-01-08-00000.nc.base', 'cam_0001.h0.1850-01-08-00000.nc.rest'), ('cam_0002.h0.1850-01-08-00000.nc.base', 'cam_0002.h0.1850-01-08-00000.nc.rest')]) """ normalized1, normalized2 = [], [] multi_normalized1, multi_normalized2 = [], [] multiinst = False for hists, suffix, normalized, multi_normalized in [(hists1, suffix1, normalized1, multi_normalized1), (hists2, suffix2, normalized2, multi_normalized2)]: for hist in hists: normalized_name = hist[hist.rfind(model):] if suffix != "": expect(normalized_name.endswith(suffix), "How did '{}' not have suffix '{}'".format(hist, suffix)) normalized_name = normalized_name[:len(normalized_name) - len(suffix) - 1] m = re.search("(.+)_[0-9]{4}(.+.nc)",normalized_name) if m is not None: multiinst = True multi_normalized.append(m.group(1)+m.group(2)) normalized.append(normalized_name) set_of_1_not_2 = set(normalized1) - set(normalized2) set_of_2_not_1 = set(normalized2) - set(normalized1) one_not_two = sorted([hists1[normalized1.index(item)] for item in set_of_1_not_2]) two_not_one = sorted([hists2[normalized2.index(item)] for item in set_of_2_not_1]) both = set(normalized1) & set(normalized2) match_ups = sorted([ (hists1[normalized1.index(item)], hists2[normalized2.index(item)]) for item in both]) # Special case - comparing multiinstance to single instance files if multi_normalized1 != multi_normalized2: # in this case hists1 contains multiinstance hists2 does not if set(multi_normalized1) == set(normalized2): for idx, norm_hist1 in enumerate(multi_normalized1): for idx1, hist2 in enumerate(hists2): norm_hist2 = normalized2[idx1] if norm_hist1 == norm_hist2: match_ups.append((hists1[idx], hist2)) if hist2 in two_not_one: two_not_one.remove(hist2) if hists1[idx] in one_not_two: one_not_two.remove(hists1[idx]) # in this case hists2 contains multiinstance hists1 does not if set(multi_normalized2) == set(normalized1): for idx, norm_hist2 in enumerate(multi_normalized2): for idx1, hist1 in enumerate(hists1): norm_hist1 = normalized1[idx1] if norm_hist2 == norm_hist1: match_ups.append((hist1, hists2[idx])) if hist1 in one_not_two: one_not_two.remove(hist1) if hists2[idx] in two_not_one: two_not_one.remove(hists2[idx]) if not multiinst: expect(len(match_ups) + len(set_of_1_not_2) == len(hists1), "Programming error1") expect(len(match_ups) + len(set_of_2_not_1) == len(hists2), "Programming error2") return one_not_two, two_not_one, match_ups def _compare_hists(case, from_dir1, from_dir2, suffix1="", suffix2="", outfile_suffix=""): if from_dir1 == from_dir2: expect(suffix1 != suffix2, "Comparing files to themselves?") testcase = case.get_value("CASE") casedir = case.get_value("CASEROOT") all_success = True num_compared = 0 comments = "Comparing hists for case '{}' dir1='{}', suffix1='{}', dir2='{}' suffix2='{}'\n".format(testcase, from_dir1, suffix1, from_dir2, suffix2) multiinst_driver_compare = False for model in _iter_model_file_substrs(case): if model == 'cpl' and suffix2 == 'multiinst': multiinst_driver_compare = True comments += " comparing model '{}'\n".format(model) hists1 = _get_latest_hist_files(testcase, model, from_dir1, suffix1) hists2 = _get_latest_hist_files(testcase, model, from_dir2, suffix2) if len(hists1) == 0 and len(hists2) == 0: comments += " no hist files found for model {}\n".format(model) continue one_not_two, two_not_one, match_ups = _hists_match(model, hists1, hists2, suffix1, suffix2) for item in one_not_two: comments += " File '{}' had no counterpart in '{}' with suffix '{}'\n".format(item, from_dir2, suffix2) all_success = False for item in two_not_one: comments += " File '{}' had no counterpart in '{}' with suffix '{}'\n".format(item, from_dir1, suffix1) all_success = False num_compared += len(match_ups) for hist1, hist2 in match_ups: success, cprnc_log_file = cprnc(model, hist1, hist2, case, from_dir1, multiinst_driver_compare=multiinst_driver_compare, outfile_suffix=outfile_suffix) if success: comments += " {} matched {}\n".format(hist1, hist2) else: comments += " {} did NOT match {}\n".format(hist1, hist2) comments += " cat " + cprnc_log_file + "\n" expected_log_file = os.path.join(casedir, os.path.basename(cprnc_log_file)) if not (os.path.exists(expected_log_file) and filecmp.cmp(cprnc_log_file, expected_log_file)): try: safe_copy(cprnc_log_file, casedir) except (OSError, IOError) as _: logger.warning("Could not copy {} to {}".format(cprnc_log_file, casedir)) all_success = False if num_compared == 0: all_success = False comments += "Did not compare any hist files! Missing baselines?\n" comments += "PASS" if all_success else "FAIL" return all_success, comments
[docs]def compare_test(case, suffix1, suffix2): """ Compares two sets of component history files in the testcase directory case - The case containing the hist files to compare suffix1 - The suffix that identifies the first batch of hist files suffix1 - The suffix that identifies the second batch of hist files returns (SUCCESS, comments) """ rundir = case.get_value("RUNDIR") return _compare_hists(case, rundir, rundir, suffix1, suffix2)
[docs]def cprnc(model, file1, file2, case, rundir, multiinst_driver_compare=False, outfile_suffix=""): """ Run cprnc to compare two individual nc files file1 - the full or relative path of the first file file2 - the full or relative path of the second file case - the case containing the files rundir - the rundir for the case outfile_suffix - if non-blank, then the output file name ends with this suffix (with a '.' added before the given suffix). Use None to avoid permissions issues in the case dir. returns (True if the files matched, log_name) """ cprnc_exe = case.get_value("CCSM_CPRNC") basename = os.path.basename(file1) multiinst_regex = re.compile(r'.*%s[^_]*(_[0-9]{4})[.]h.?[.][^.]+?[.]nc' % model) mstr = '' mstr1 = '' mstr2 = '' # If one is a multiinstance file but the other is not add an instance string m1 = multiinst_regex.match(file1) m2 = multiinst_regex.match(file2) if m1 is not None: mstr1 = m1.group(1) if m2 is not None: mstr2 = m2.group(1) if mstr1 != mstr2: mstr = mstr1+mstr2 output_filename = os.path.join(rundir, "{}{}.cprnc.out".format(basename, mstr)) if outfile_suffix: output_filename += ".{}".format(outfile_suffix) if outfile_suffix is None: cpr_stat, out, _ = run_cmd("{} -m {} {}".format(cprnc_exe, file1, file2), combine_output=True) else: cpr_stat = run_cmd("{} -m {} {}".format(cprnc_exe, file1, file2), combine_output=True, arg_stdout=output_filename)[0] with open(output_filename, "r") as fd: out = fd.read() if multiinst_driver_compare: # In a multiinstance test the cpl hist file will have a different number of # dimensions and so cprnc will indicate that the files seem to be DIFFERENT # in this case we only want to check that the fields we are able to compare # have no differences. return (cpr_stat == 0 and " 0 had non-zero differences" in out, output_filename) else: return (cpr_stat == 0 and "files seem to be IDENTICAL" in out, output_filename)
[docs]def compare_baseline(case, baseline_dir=None, outfile_suffix=""): """ compare the current test output to a baseline result case - The case containing the hist files to be compared against baselines baseline_dir - Optionally, specify a specific baseline dir, otherwise it will be computed from case config outfile_suffix - if non-blank, then the cprnc output file name ends with this suffix (with a '.' added before the given suffix). if None, no output file saved. returns (SUCCESS, comments) SUCCESS means all hist files matched their corresponding baseline """ rundir = case.get_value("RUNDIR") if baseline_dir is None: baselineroot = case.get_value("BASELINE_ROOT") basecmp_dir = os.path.join(baselineroot, case.get_value("BASECMP_CASE")) dirs_to_check = (baselineroot, basecmp_dir) else: basecmp_dir = baseline_dir dirs_to_check = (basecmp_dir,) for bdir in dirs_to_check: if not os.path.isdir(bdir): return False, "ERROR {} baseline directory '{}' does not exist".format(TEST_NO_BASELINES_COMMENT,bdir) success, comments = _compare_hists(case, rundir, basecmp_dir, outfile_suffix=outfile_suffix) if get_model() == "e3sm": bless_log = os.path.join(basecmp_dir, BLESS_LOG_NAME) if os.path.exists(bless_log): last_line = open(bless_log, "r").readlines()[-1] comments += "\n Most recent bless: {}".format(last_line) return success, comments
[docs]def get_extension(model, filepath): """ For a hist file for the given model, return what we call the "extension" model - The component model filepath - The path of the hist file >>> get_extension("cpl", "cpl.hi.nc") 'hi' >>> get_extension("cpl", "cpl.h.nc") 'h' >>> get_extension("cpl", "cpl.h1.nc") 'h1' >>> get_extension("cpl", "TESTRUNDIFF_Mmpi-serial.f19_g16_rx1.A.melvin_gnu.C.fake_testing_only_20160816_164150-20160816_164240.cpl.hi.0.nc.base") 'hi' >>> get_extension("cpl", "TESTRUNDIFF_Mmpi-serial.f19_g16_rx1.A.melvin_gnu.C.fake_testing_only_20160816_164150-20160816_164240.cpl.h.nc") 'h' >>> get_extension("clm","clm2_0002.h0.1850-01-06-00000.nc") '0002.h0' >>> get_extension("pop","PFS.f09_g16.B1850.cheyenne_intel.allactive-default.GC.c2_0_b1f2_int.pop.h.ecosys.nday1.0001-01-02.nc") 'h' """ basename = os.path.basename(filepath) ext_regex = re.compile(r'.*%s[^_]*_?([0-9]{4})?[.](h.?)([.].*[^.])?[.]nc' % model) m = ext_regex.match(basename) expect(m is not None, "Failed to get extension for file '{}'".format(filepath)) if m.group(1) is not None: result = m.group(1)+'.'+m.group(2) else: result = m.group(2) return result
[docs]def generate_baseline(case, baseline_dir=None, allow_baseline_overwrite=False): """ copy the current test output to baseline result case - The case containing the hist files to be copied into baselines baseline_dir - Optionally, specify a specific baseline dir, otherwise it will be computed from case config allow_baseline_overwrite must be true to generate baselines to an existing directory. returns (SUCCESS, comments) """ rundir = case.get_value("RUNDIR") if baseline_dir is None: baselineroot = case.get_value("BASELINE_ROOT") basegen_dir = os.path.join(baselineroot, case.get_value("BASEGEN_CASE")) else: basegen_dir = baseline_dir testcase = case.get_value("CASE") if not os.path.isdir(basegen_dir): os.makedirs(basegen_dir) if (os.path.isdir(os.path.join(basegen_dir,testcase)) and not allow_baseline_overwrite): expect(False, " Cowardly refusing to overwrite existing baseline directory") comments = "Generating baselines into '{}'\n".format(basegen_dir) num_gen = 0 for model in _iter_model_file_substrs(case): comments += " generating for model '{}'\n".format(model) hists = _get_latest_hist_files(testcase, model, rundir) logger.debug("latest_files: {}".format(hists)) num_gen += len(hists) for hist in hists: basename = hist[hist.rfind(model):] baseline = os.path.join(basegen_dir, basename) if os.path.exists(baseline): os.remove(baseline) safe_copy(hist, baseline) comments += " generating baseline '{}' from file {}\n".format(baseline, hist) # copy latest cpl log to baseline # drop the date so that the name is generic newestcpllogfile = case.get_latest_cpl_log(coupler_log_path=case.get_value("RUNDIR")) if newestcpllogfile is None: logger.warning("No cpl.log file found in directory {}".format(case.get_value("RUNDIR"))) else: safe_copy(newestcpllogfile, os.path.join(basegen_dir, "cpl.log.gz")) expect(num_gen > 0, "Could not generate any hist files for case '{}', something is seriously wrong".format(os.path.join(rundir, testcase))) #make sure permissions are open in baseline directory for root, _, files in os.walk(basegen_dir): for name in files: try: os.chmod(os.path.join(root,name), stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH) except OSError: # We tried. Not worth hard failure here. pass if get_model() == "e3sm": bless_log = os.path.join(basegen_dir, BLESS_LOG_NAME) with open(bless_log, "a") as fd: fd.write("sha:{} date:{}\n".format(get_current_commit(repo=case.get_value("CIMEROOT")), get_timestamp(timestamp_format="%Y-%m-%d_%H:%M:%S"))) return True, comments