Source code for CIME.bless_test_results

import CIME.compare_namelists, CIME.simple_compare
from CIME.test_scheduler import NAMELIST_PHASE
from CIME.utils import (
    run_cmd,
    get_scripts_root,
    EnvironmentContext,
    parse_test_name,
    match_any,
)
from CIME.config import Config
from CIME.test_status import *
from CIME.hist_utils import generate_baseline, compare_baseline
from CIME.case import Case
from CIME.test_utils import get_test_status_files
from CIME.baselines.performance import (
    perf_compare_throughput_baseline,
    perf_compare_memory_baseline,
    perf_write_baseline,
)
import os, time

logger = logging.getLogger(__name__)


def _bless_throughput(
    case,
    test_name,
    baseline_root,
    baseline_name,
    report_only,
    force,
):
    success = True
    reason = None
    below_threshold = False

    baseline_dir = os.path.join(
        baseline_root, baseline_name, case.get_value("CASEBASEID")
    )

    try:
        below_threshold, comment = perf_compare_throughput_baseline(
            case, baseline_dir=baseline_dir
        )
    except FileNotFoundError as e:
        comment = f"Could not read throughput file: {e!s}"
    except Exception as e:
        comment = f"Error comparing throughput baseline: {e!s}"

    if below_threshold:
        logger.info("Throughput diff appears to have been already resolved.")
    else:
        logger.info(comment)

        if not report_only and (
            force or input("Update this diff (y/n)? ").upper() in ["Y", "YES"]
        ):
            try:
                perf_write_baseline(case, baseline_dir, memory=False)
            except Exception as e:
                success = False

                reason = f"Failed to write baseline throughput for {test_name!r}: {e!s}"

    return success, reason


def _bless_memory(
    case,
    test_name,
    baseline_root,
    baseline_name,
    report_only,
    force,
):
    success = True
    reason = None
    below_threshold = False

    baseline_dir = os.path.join(
        baseline_root, baseline_name, case.get_value("CASEBASEID")
    )

    try:
        below_threshold, comment = perf_compare_memory_baseline(
            case, baseline_dir=baseline_dir
        )
    except FileNotFoundError as e:
        comment = f"Could not read memory usage file: {e!s}"
    except Exception as e:
        comment = f"Error comparing memory baseline: {e!s}"

    if below_threshold:
        logger.info("Memory usage diff appears to have been already resolved.")
    else:
        logger.info(comment)

        if not report_only and (
            force or input("Update this diff (y/n)? ").upper() in ["Y", "YES"]
        ):
            try:
                perf_write_baseline(case, baseline_dir, throughput=False)
            except Exception as e:
                success = False

                reason = f"Failed to write baseline memory usage for test {test_name!r}: {e!s}"

    return success, reason


###############################################################################

[docs]
def bless_namelists(
    test_name,
    report_only,
    force,
    pes_file,
    baseline_name,
    baseline_root,
    new_test_root=None,
    new_test_id=None,
):
    ###############################################################################
    # Be aware that restart test will overwrite the original namelist files
    # with versions of the files that should not be blessed. This forces us to
    # re-run create_test.

    # Update namelist files
    logger.info("Test '{}' had namelist diff".format(test_name))
    if not report_only and (
        force or input("Update namelists (y/n)? ").upper() in ["Y", "YES"]
    ):
        config = Config.instance()

        create_test_gen_args = (
            " -g {} ".format(baseline_name)
            if config.create_test_flag_mode == "cesm"
            else " -g -b {} ".format(baseline_name)
        )

        if new_test_root is not None:
            create_test_gen_args += " --test-root={0} --output-root={0} ".format(
                new_test_root
            )
        if new_test_id is not None:
            create_test_gen_args += " -t {}".format(new_test_id)

        if pes_file is not None:
            create_test_gen_args += " --pesfile {}".format(pes_file)

        stat, out, _ = run_cmd(
            "{}/create_test {} --namelists-only {} --baseline-root {} -o".format(
                get_scripts_root(), test_name, create_test_gen_args, baseline_root
            ),
            combine_output=True,
        )
        if stat != 0:
            return False, "Namelist regen failed: '{}'".format(out)
        else:
            return True, None
    else:
        return True, None




[docs]
def bless_history(test_name, case, baseline_name, baseline_root, report_only, force):
    real_user = case.get_value("REALUSER")
    with EnvironmentContext(USER=real_user):

        baseline_full_dir = os.path.join(
            baseline_root, baseline_name, case.get_value("CASEBASEID")
        )

        cmp_result, cmp_comments = compare_baseline(
            case, baseline_dir=baseline_full_dir, outfile_suffix=None
        )
        if cmp_result:
            logger.info("Diff appears to have been already resolved.")
            return True, None
        else:
            logger.info(cmp_comments)
            if not report_only and (
                force or input("Update this diff (y/n)? ").upper() in ["Y", "YES"]
            ):
                gen_result, gen_comments = generate_baseline(
                    case, baseline_dir=baseline_full_dir
                )
                if not gen_result:
                    logger.warning(
                        "Hist file bless FAILED for test {}".format(test_name)
                    )
                    return False, "Generate baseline failed: {}".format(gen_comments)
                else:
                    logger.info(gen_comments)
                    return True, None
            else:
                return True, None




[docs]
def bless_test_results(
    baseline_name,
    baseline_root,
    test_root,
    compiler,
    test_id=None,
    namelists_only=False,
    hist_only=False,
    report_only=False,
    force=False,
    pes_file=None,
    bless_tests=None,
    no_skip_pass=False,
    new_test_root=None,
    new_test_id=None,
    exclude=None,
    bless_tput=False,
    bless_mem=False,
    bless_perf=False,
    **_,  # Capture all for extra
):
    if bless_perf:
        bless_mem = True
        bless_tput = True

    bless_all_non_perf = not (namelists_only | hist_only | bless_tput | bless_mem)
    is_perf_bless = bless_mem or bless_tput

    expect(
        not (is_perf_bless and hist_only) and not (is_perf_bless and namelists_only),
        "Do not mix performance and non-performance blesses",
    )

    test_status_files = get_test_status_files(test_root, compiler, test_id=test_id)

    # auto-adjust test-id if multiple rounds of tests were matched
    timestamps = set()
    for test_status_file in test_status_files:
        timestamp = os.path.basename(os.path.dirname(test_status_file)).split(".")[-1]
        timestamps.add(timestamp)

    if len(timestamps) > 1:
        logger.warning(
            "Multiple sets of tests were matched! Selected only most recent tests."
        )

    most_recent = sorted(timestamps)[-1]
    logger.info("Matched test batch is {}".format(most_recent))

    bless_tests_counts = []
    if bless_tests:
        bless_tests_counts = dict([(bless_test, 0) for bless_test in bless_tests])

    # compile excludes into single regex
    if exclude is not None:
        exclude = re.compile("|".join([f"({x})" for x in exclude]))

    broken_blesses = []
    for test_status_file in test_status_files:
        if not most_recent in test_status_file:
            logger.info("Skipping {}".format(test_status_file))
            continue

        test_dir = os.path.dirname(test_status_file)
        ts = TestStatus(test_dir=test_dir)
        test_name = ts.get_name()
        testopts = parse_test_name(test_name)[1]
        testopts = [] if testopts is None else testopts
        build_only = "B" in testopts
        # TODO test_name will never be None otherwise `parse_test_name` would raise an error
        if test_name is None:
            case_dir = os.path.basename(test_dir)
            test_name = CIME.utils.normalize_case_id(case_dir)
            if not bless_tests or match_any(test_name, bless_tests_counts):
                broken_blesses.append(
                    (
                        "unknown",
                        "test had invalid TestStatus file: '{}'".format(
                            test_status_file
                        ),
                    )
                )
                continue
            else:
                continue

        # Must pass tests to continue
        has_no_tests = bless_tests in [[], None]
        match_test_name = match_any(test_name, bless_tests_counts)
        excluded = exclude.match(test_name) if exclude else False

        if (not has_no_tests and not match_test_name) or excluded:
            logger.debug("Skipping {!r}".format(test_name))

            continue

        overall_result, phase = ts.get_overall_test_status(
            ignore_namelists=True,
            ignore_memleak=True,
            ignore_diffs=is_perf_bless,
            check_throughput=bless_tput,
            check_memory=bless_mem,
        )

        # See if we need to bless namelist
        if namelists_only or bless_all_non_perf:
            if no_skip_pass:
                nl_bless = True
            else:
                nl_bless = ts.get_status(NAMELIST_PHASE) != TEST_PASS_STATUS
        else:
            nl_bless = False

        hist_bless, tput_bless, mem_bless = [False] * 3

        # Skip if test is build only i.e. testopts contains "B"
        if not build_only:
            hist_bless = is_hist_bless_needed(
                test_name, ts, broken_blesses, overall_result, no_skip_pass, phase
            ) and (hist_only or bless_all_non_perf)
            tput_bless = (
                bless_tput and ts.get_status(THROUGHPUT_PHASE) != TEST_PASS_STATUS
            )
            mem_bless = bless_mem and ts.get_status(MEMCOMP_PHASE) != TEST_PASS_STATUS

        expect(
            not ((nl_bless or hist_bless) and (tput_bless or mem_bless)),
            "Do not mix performance and non-performance blessing",
        )

        # Now, do the bless
        if not nl_bless and not hist_bless and not tput_bless and not mem_bless:
            logger.info(
                "Nothing to bless for test: {}, overall status: {}".format(
                    test_name, overall_result
                )
            )
        else:
            logger.debug("Determined blesses for {!r}".format(test_name))
            logger.debug("nl_bless     = {}".format(nl_bless))
            logger.debug("hist_bless   = {}".format(hist_bless))
            logger.debug("tput_bless   = {}".format(tput_bless))
            logger.debug("mem_bless    = {}".format(mem_bless))

            logger.info(
                "###############################################################################"
            )
            logger.info(
                "Blessing results for test: {}, most recent result: {}".format(
                    test_name, overall_result
                )
            )
            logger.info("Case dir: {}".format(test_dir))
            logger.info(
                "###############################################################################"
            )
            if not force:
                time.sleep(2)

            with Case(test_dir) as case:
                # Resolve baseline_name and baseline_root
                if baseline_name is None:
                    baseline_name_resolved = case.get_value("BASELINE_NAME_CMP")
                    if not baseline_name_resolved:
                        cime_root = CIME.utils.get_cime_root()
                        baseline_name_resolved = CIME.utils.get_current_branch(
                            repo=cime_root
                        )
                else:
                    baseline_name_resolved = baseline_name

                if baseline_root is None:
                    baseline_root_resolved = case.get_value("BASELINE_ROOT")
                else:
                    baseline_root_resolved = baseline_root

                if baseline_name_resolved is None:
                    broken_blesses.append(
                        (test_name, "Could not determine baseline name")
                    )
                    continue

                if baseline_root_resolved is None:
                    broken_blesses.append(
                        (test_name, "Could not determine baseline root")
                    )
                    continue

                # Bless namelists
                if nl_bless:
                    success, reason = bless_namelists(
                        test_name,
                        report_only,
                        force,
                        pes_file,
                        baseline_name_resolved,
                        baseline_root_resolved,
                        new_test_root=new_test_root,
                        new_test_id=new_test_id,
                    )
                    if not success:
                        broken_blesses.append((test_name, reason))

                # Bless hist files
                if hist_bless:
                    if "HOMME" in test_name:
                        success = False
                        reason = "HOMME tests cannot be blessed with bless_for_tests"
                    else:
                        success, reason = bless_history(
                            test_name,
                            case,
                            baseline_name_resolved,
                            baseline_root_resolved,
                            report_only,
                            force,
                        )

                    if not success:
                        broken_blesses.append((test_name, reason))

                if tput_bless:
                    success, reason = _bless_throughput(
                        case,
                        test_name,
                        baseline_root_resolved,
                        baseline_name_resolved,
                        report_only,
                        force,
                    )

                    if not success:
                        broken_blesses.append((test_name, reason))

                if mem_bless:
                    success, reason = _bless_memory(
                        case,
                        test_name,
                        baseline_root_resolved,
                        baseline_name_resolved,
                        report_only,
                        force,
                    )

                    if not success:
                        broken_blesses.append((test_name, reason))

    # Emit a warning if items in bless_tests did not match anything
    if bless_tests:
        for bless_test, bless_count in bless_tests_counts.items():
            if bless_count == 0:
                logger.warning(
                    """
bless test arg '{}' did not match any tests in test_root {} with
compiler {} and test_id {}. It's possible that one of these arguments
had a mistake (likely compiler or testid).""".format(
                        bless_test, test_root, compiler, test_id
                    )
                )

    # Make sure user knows that some tests were not blessed
    success = True
    for broken_bless, reason in broken_blesses:
        logger.warning(
            "FAILED TO BLESS TEST: {}, reason {}".format(broken_bless, reason)
        )
        success = False

    return success




[docs]
def is_hist_bless_needed(
    test_name, ts, broken_blesses, overall_result, no_skip_pass, phase
):
    needed = False

    run_result = ts.get_status(RUN_PHASE)

    if run_result is None:
        broken_blesses.append((test_name, "no run phase"))
        logger.warning("Test '{}' did not make it to run phase".format(test_name))
        needed = False
    elif run_result != TEST_PASS_STATUS:
        broken_blesses.append((test_name, "run phase did not pass"))
        logger.warning(
            "Test '{}' run phase did not pass, not safe to bless, test status = {}".format(
                test_name, ts.phase_statuses_dump()
            )
        )
        needed = False
    elif overall_result == TEST_FAIL_STATUS:
        broken_blesses.append((test_name, "test did not pass"))
        logger.warning(
            "Test '{}' did not pass due to phase {}, not safe to bless, test status = {}".format(
                test_name, phase, ts.phase_statuses_dump()
            )
        )
        needed = False

    elif no_skip_pass:
        needed = True
    else:
        needed = ts.get_status(BASELINE_PHASE) != TEST_PASS_STATUS

    return needed