Source code for CIME.wait_for_tests

#pylint: disable=import-error
from six.moves import queue
import os, time, threading, socket, signal, shutil, glob
#pylint: disable=import-error
from distutils.spawn import find_executable
import logging
import xml.etree.ElementTree as xmlet

import CIME.utils
from CIME.utils import expect, Timeout, run_cmd_no_fail, safe_copy, CIMEError
from CIME.XML.machines import Machines
from CIME.test_status import *
from CIME.provenance import save_test_success
from CIME.case.case import Case

SIGNAL_RECEIVED           = False
E3SM_MAIN_CDASH           = "ACME_Climate"
CDASH_DEFAULT_BUILD_GROUP = "ACME_Latest"
SLEEP_INTERVAL_SEC        = .1

###############################################################################
[docs]def signal_handler(*_):
###############################################################################
    global SIGNAL_RECEIVED
    SIGNAL_RECEIVED = True

###############################################################################
[docs]def set_up_signal_handlers():
###############################################################################
    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)

###############################################################################
[docs]def get_test_time(test_path):
###############################################################################
    ts = TestStatus(test_dir=test_path)
    comment = ts.get_comment(RUN_PHASE)
    if comment is None or "time=" not in comment:
        logging.warning("No run-phase time data found in {}".format(test_path))
        return 0
    else:
        time_data = [token for token in comment.split() if token.startswith("time=")][0]
        return int(time_data.split("=")[1])

###############################################################################
[docs]def get_test_output(test_path):
###############################################################################
    output_file = os.path.join(test_path, "TestStatus.log")
    if (os.path.exists(output_file)):
        return open(output_file, 'r').read()
    else:
        logging.warning("File '{}' not found".format(output_file))
        return ""

###############################################################################
[docs]def create_cdash_xml_boiler(phase, cdash_build_name, cdash_build_group, utc_time, current_time, hostname, git_commit):
###############################################################################
    site_elem = xmlet.Element("Site")

    if ("JENKINS_START_TIME" in os.environ):
        time_info_str = "Total testing time: {:d} seconds".format(int(current_time) - int(os.environ["JENKINS_START_TIME"]))
    else:
        time_info_str = ""

    site_elem.attrib["BuildName"] = cdash_build_name
    site_elem.attrib["BuildStamp"] = "{}-{}".format(utc_time, cdash_build_group)
    site_elem.attrib["Name"] = hostname
    site_elem.attrib["OSName"] = "Linux"
    site_elem.attrib["Hostname"] = hostname
    site_elem.attrib["OSVersion"] = "Commit: {}{}".format(git_commit, time_info_str)

    phase_elem = xmlet.SubElement(site_elem, phase)

    xmlet.SubElement(phase_elem, "StartDateTime").text = time.ctime(current_time)
    xmlet.SubElement(phase_elem, "Start{}Time".format("Test" if phase == "Testing" else phase)).text = str(int(current_time))

    return site_elem, phase_elem

###############################################################################
[docs]def create_cdash_config_xml(results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname, data_rel_path, git_commit):
###############################################################################
    site_elem, config_elem = create_cdash_xml_boiler("Configure", cdash_build_name, cdash_build_group, utc_time, current_time, hostname, git_commit)

    xmlet.SubElement(config_elem, "ConfigureCommand").text = "namelists"

    config_results = []
    for test_name in sorted(results):
        test_status = results[test_name][1]
        config_results.append("{} {} Config {}".format("" if test_status != NAMELIST_FAIL_STATUS else "CMake Warning:\n", test_name, "PASS" if test_status != NAMELIST_FAIL_STATUS else "NML DIFF"))

    xmlet.SubElement(config_elem, "Log").text = "\n".join(config_results)

    xmlet.SubElement(config_elem, "ConfigureStatus").text = "0"
    xmlet.SubElement(config_elem, "ElapsedMinutes").text = "0" # Skip for now

    etree = xmlet.ElementTree(site_elem)
    etree.write(os.path.join(data_rel_path, "Configure.xml"))

###############################################################################
[docs]def create_cdash_build_xml(results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname, data_rel_path, git_commit):
###############################################################################
    site_elem, build_elem = create_cdash_xml_boiler("Build", cdash_build_name, cdash_build_group, utc_time, current_time, hostname, git_commit)

    xmlet.SubElement(build_elem, "ConfigureCommand").text = "case.build"

    build_results = []
    for test_name in sorted(results):
        build_results.append(test_name)

    xmlet.SubElement(build_elem, "Log").text = "\n".join(build_results)

    for idx, test_name in enumerate(sorted(results)):
        test_path = results[test_name][0]
        test_norm_path = test_path if os.path.isdir(test_path) else os.path.dirname(test_path)
        if get_test_time(test_norm_path) == 0:
            error_elem = xmlet.SubElement(build_elem, "Error")
            xmlet.SubElement(error_elem, "Text").text = test_name
            xmlet.SubElement(error_elem, "BuildLogLine").text = str(idx)
            xmlet.SubElement(error_elem, "PreContext").text = test_name
            xmlet.SubElement(error_elem, "PostContext").text = ""
            xmlet.SubElement(error_elem, "RepeatCount").text = "0"

    xmlet.SubElement(build_elem, "ElapsedMinutes").text = "0" # Skip for now

    etree = xmlet.ElementTree(site_elem)
    etree.write(os.path.join(data_rel_path, "Build.xml"))

###############################################################################
[docs]def create_cdash_test_xml(results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname, data_rel_path, git_commit):
###############################################################################
    site_elem, testing_elem = create_cdash_xml_boiler("Testing", cdash_build_name, cdash_build_group, utc_time, current_time, hostname, git_commit)

    test_list_elem = xmlet.SubElement(testing_elem, "TestList")
    for test_name in sorted(results):
        xmlet.SubElement(test_list_elem, "Test").text = test_name

    for test_name in sorted(results):
        test_path, test_status = results[test_name]
        test_passed = test_status in [TEST_PASS_STATUS, NAMELIST_FAIL_STATUS]
        test_norm_path = test_path if os.path.isdir(test_path) else os.path.dirname(test_path)

        full_test_elem = xmlet.SubElement(testing_elem, "Test")
        if test_passed:
            full_test_elem.attrib["Status"] = "passed"
        elif (test_status == TEST_PEND_STATUS):
            full_test_elem.attrib["Status"] = "notrun"
        else:
            full_test_elem.attrib["Status"] = "failed"

        xmlet.SubElement(full_test_elem, "Name").text = test_name

        xmlet.SubElement(full_test_elem, "Path").text = test_norm_path

        xmlet.SubElement(full_test_elem, "FullName").text = test_name

        xmlet.SubElement(full_test_elem, "FullCommandLine")
        # text ?

        results_elem = xmlet.SubElement(full_test_elem, "Results")

        named_measurements = (
            ("text/string",    "Exit Code",         test_status),
            ("text/string",    "Exit Value",        "0" if test_passed else "1"),
            ("numeric_double", "Execution Time",    str(get_test_time(test_norm_path))),
            ("text/string",    "Completion Status", "Not Completed" if test_status == TEST_PEND_STATUS else "Completed"),
            ("text/string",    "Command line",      "create_test")
        )

        for type_attr, name_attr, value in named_measurements:
            named_measurement_elem = xmlet.SubElement(results_elem, "NamedMeasurement")
            named_measurement_elem.attrib["type"] = type_attr
            named_measurement_elem.attrib["name"] = name_attr

            xmlet.SubElement(named_measurement_elem, "Value").text = value

        measurement_elem = xmlet.SubElement(results_elem, "Measurement")

        value_elem = xmlet.SubElement(measurement_elem, "Value")
        value_elem.text = ''.join([item for item in get_test_output(test_norm_path) if ord(item) < 128])

    xmlet.SubElement(testing_elem, "ElapsedMinutes").text = "0" # Skip for now

    etree = xmlet.ElementTree(site_elem)

    etree.write(os.path.join(data_rel_path, "Test.xml"))

###############################################################################
[docs]def create_cdash_xml_fakes(results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname):
###############################################################################
    # We assume all cases were created from the same code repo
    first_result_case = os.path.dirname(list(results.items())[0][1][0])
    try:
        srcroot = run_cmd_no_fail("./xmlquery --value CIMEROOT", from_dir=first_result_case)
    except CIMEError:
        # Use repo containing this script as last resort
        srcroot = CIME.utils.get_cime_root()

    git_commit = CIME.utils.get_current_commit(repo=srcroot)

    data_rel_path = os.path.join("Testing", utc_time)

    create_cdash_config_xml(results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname, data_rel_path, git_commit)

    create_cdash_build_xml(results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname, data_rel_path, git_commit)

    create_cdash_test_xml(results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname, data_rel_path, git_commit)

###############################################################################
[docs]def create_cdash_upload_xml(results, cdash_build_name, cdash_build_group, utc_time, hostname, force_log_upload):
###############################################################################

    data_rel_path = os.path.join("Testing", utc_time)

    try:
        log_dir = "{}_logs".format(cdash_build_name)

        need_to_upload = False

        for test_name, test_data in results.items():
            test_path, test_status = test_data

            if test_status not in [TEST_PASS_STATUS, NAMELIST_FAIL_STATUS] or force_log_upload:
                test_case_dir = os.path.dirname(test_path)
                ts = TestStatus(test_case_dir)

                build_status    = ts.get_status(SHAREDLIB_BUILD_PHASE)
                build_status    = TEST_FAIL_STATUS if build_status == TEST_FAIL_STATUS else ts.get_status(MODEL_BUILD_PHASE)
                run_status      = ts.get_status(RUN_PHASE)
                baseline_status = ts.get_status(BASELINE_PHASE)

                if build_status == TEST_FAIL_STATUS or run_status == TEST_FAIL_STATUS or baseline_status == TEST_FAIL_STATUS or force_log_upload:
                    case_dirs = [test_case_dir]
                    case_base = os.path.basename(test_case_dir)
                    test_case2_dir = os.path.join(test_case_dir, "case2", case_base)
                    if os.path.exists(test_case2_dir):
                        case_dirs.append(test_case2_dir)

                    for case_dir in case_dirs:
                        param = "EXEROOT" if build_status == TEST_FAIL_STATUS else "RUNDIR"
                        log_src_dir = run_cmd_no_fail("./xmlquery {} --value".format(param), from_dir=case_dir)

                        log_dst_dir = os.path.join(log_dir, "{}{}_{}_logs".format(test_name, "" if case_dir == test_case_dir else ".case2", param))
                        os.makedirs(log_dst_dir)
                        for log_file in glob.glob(os.path.join(log_src_dir, "*log*")):
                            safe_copy(log_file, log_dst_dir)
                        for log_file in glob.glob(os.path.join(log_src_dir, "*.cprnc.out*")):
                            safe_copy(log_file, log_dst_dir)

                        need_to_upload = True

        if (need_to_upload):

            tarball = "{}.tar.gz".format(log_dir)
            if (os.path.exists(tarball)):
                os.remove(tarball)

            run_cmd_no_fail("tar -cf - {} | gzip -c".format(log_dir), arg_stdout=tarball)
            base64 = run_cmd_no_fail("base64 {}".format(tarball))

            xml_text = \
r"""<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="Dart/Source/Server/XSL/Build.xsl <file:///Dart/Source/Server/XSL/Build.xsl> "?>
<Site BuildName="{}" BuildStamp="{}-{}" Name="{}" Generator="ctest3.0.0">
<Upload>
<File filename="{}">
<Content encoding="base64">
{}
</Content>
</File>
</Upload>
</Site>
""".format(cdash_build_name, utc_time, cdash_build_group, hostname, os.path.abspath(tarball), base64)

            with open(os.path.join(data_rel_path, "Upload.xml"), "w") as fd:
                fd.write(xml_text)

    finally:
        if (os.path.isdir(log_dir)):
            shutil.rmtree(log_dir)

###############################################################################
[docs]def create_cdash_xml(results, cdash_build_name, cdash_project, cdash_build_group, force_log_upload=False):
###############################################################################

    #
    # Create dart config file
    #

    current_time = time.time()

    utc_time_tuple = time.gmtime(current_time)
    cdash_timestamp = time.strftime("%H:%M:%S", utc_time_tuple)

    hostname = Machines().get_machine_name()
    if (hostname is None):
        hostname = socket.gethostname().split(".")[0]
        logging.warning("Could not convert hostname '{}' into an E3SM machine name".format(hostname))

    dart_config = \
"""
SourceDirectory: {0}
BuildDirectory: {0}

# Site is something like machine.domain, i.e. pragmatic.crd
Site: {1}

# Build name is osname-revision-compiler, i.e. Linux-2.4.2-2smp-c++
BuildName: {2}

# Submission information
IsCDash: TRUE
CDashVersion:
QueryCDashVersion:
DropSite: my.cdash.org
DropLocation: /submit.php?project={3}
DropSiteUser:
DropSitePassword:
DropSiteMode:
DropMethod: http
TriggerSite:
ScpCommand: {4}

# Dashboard start time
NightlyStartTime: {5} UTC
""".format(os.getcwd(), hostname, cdash_build_name, cdash_project,
           find_executable("scp"), cdash_timestamp)

    with open("DartConfiguration.tcl", "w") as dart_fd:
        dart_fd.write(dart_config)

    utc_time = time.strftime('%Y%m%d-%H%M', utc_time_tuple)
    os.makedirs(os.path.join("Testing", utc_time))

    # Make tag file
    with open("Testing/TAG", "w") as tag_fd:
        tag_fd.write("{}\n{}\n".format(utc_time, cdash_build_group))

    create_cdash_xml_fakes(results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname)

    create_cdash_upload_xml(results, cdash_build_name, cdash_build_group, utc_time, hostname, force_log_upload)

    run_cmd_no_fail("ctest -VV -D NightlySubmit", verbose=True)

###############################################################################
[docs]def wait_for_test(test_path, results, wait, check_throughput, check_memory, ignore_namelists, ignore_memleak, no_run):
###############################################################################
    if (os.path.isdir(test_path)):
        test_status_filepath = os.path.join(test_path, TEST_STATUS_FILENAME)
    else:
        test_status_filepath = test_path

    logging.debug("Watching file: '{}'".format(test_status_filepath))
    test_log_path = os.path.join(os.path.dirname(test_status_filepath), ".internal_test_status.log")

    # We don't want to make it a requirement that wait_for_tests has write access
    # to all case directories
    try:
        fd = open(test_log_path, "w")
        fd.close()
    except (IOError, OSError):
        test_log_path = "/dev/null"

    prior_ts = None
    with open(test_log_path, "w") as log_fd:
        while (True):
            if (os.path.exists(test_status_filepath)):
                ts = TestStatus(test_dir=os.path.dirname(test_status_filepath))
                test_name = ts.get_name()
                test_status = ts.get_overall_test_status(wait_for_run=not no_run, # Important
                                                         no_run=no_run,
                                                         check_throughput=check_throughput,
                                                         check_memory=check_memory, ignore_namelists=ignore_namelists,
                                                         ignore_memleak=ignore_memleak)

                if prior_ts is not None and prior_ts != ts:
                    log_fd.write(ts.phase_statuses_dump())
                    log_fd.write("OVERALL: {}\n\n".format(test_status))

                prior_ts = ts

                if (test_status == TEST_PEND_STATUS and (wait and not SIGNAL_RECEIVED)):
                    time.sleep(SLEEP_INTERVAL_SEC)
                    logging.debug("Waiting for test to finish")
                else:
                    results.put( (test_name, test_path, test_status) )
                    break

            else:
                if (wait and not SIGNAL_RECEIVED):
                    logging.debug("File '{}' does not yet exist".format(test_status_filepath))
                    time.sleep(SLEEP_INTERVAL_SEC)
                else:
                    test_name = os.path.abspath(test_status_filepath).split("/")[-2]
                    results.put( (test_name, test_path, "File '{}' doesn't exist".format(test_status_filepath)) )
                    break

###############################################################################
[docs]def wait_for_tests_impl(test_paths, no_wait=False, check_throughput=False, check_memory=False, ignore_namelists=False, ignore_memleak=False, no_run=False):
###############################################################################
    results = queue.Queue()

    for test_path in test_paths:
        t = threading.Thread(target=wait_for_test, args=(test_path, results, not no_wait, check_throughput, check_memory, ignore_namelists, ignore_memleak, no_run))
        t.daemon = True
        t.start()

    while threading.active_count() > 1:
        time.sleep(1)

    test_results = {}
    completed_test_paths = []
    while (not results.empty()):
        test_name, test_path, test_status = results.get()
        if (test_name in test_results):
            prior_path, prior_status = test_results[test_name]
            if (test_status == prior_status):
                logging.warning("Test name '{}' was found in both '{}' and '{}'".format(test_name, test_path, prior_path))
            else:
                raise CIMEError("Test name '{}' was found in both '{}' and '{}' with different results".format(test_name, test_path, prior_path))

        test_results[test_name] = (test_path, test_status)
        completed_test_paths.append(test_path)

    expect(set(test_paths) == set(completed_test_paths),
           "Missing results for test paths: {}".format(set(test_paths) - set(completed_test_paths)))

    return test_results

###############################################################################
[docs]def wait_for_tests(test_paths,
                   no_wait=False,
                   check_throughput=False,
                   check_memory=False,
                   ignore_namelists=False,
                   ignore_memleak=False,
                   cdash_build_name=None,
                   cdash_project=E3SM_MAIN_CDASH,
                   cdash_build_group=CDASH_DEFAULT_BUILD_GROUP,
                   timeout=None,
                   force_log_upload=False,
                   no_run=False,
                   update_success=False):
###############################################################################
    # Set up signal handling, we want to print results before the program
    # is terminated
    set_up_signal_handlers()

    with Timeout(timeout, action=signal_handler):
        test_results = wait_for_tests_impl(test_paths, no_wait, check_throughput, check_memory, ignore_namelists, ignore_memleak, no_run)

    all_pass = True
    for test_name, test_data in sorted(test_results.items()):
        test_path, test_status = test_data
        logging.info("Test '{}' finished with status '{}'".format(test_name, test_status))
        logging.info("    Path: {}".format(test_path))
        all_pass &= test_status == TEST_PASS_STATUS

        if update_success:
            caseroot = os.path.dirname(test_data[0])
            with Case(caseroot, read_only=True) as case:
                srcroot = case.get_value("CIMEROOT")
                baseline_root = case.get_value("BASELINE_ROOT")
                save_test_success(baseline_root, srcroot, test_name, test_status in [TEST_PASS_STATUS, NAMELIST_FAIL_STATUS])

    if cdash_build_name:
        create_cdash_xml(test_results, cdash_build_name, cdash_project, cdash_build_group, force_log_upload)

    return all_pass