Source code for CIME.simple_compare

import os, re

from CIME.utils import expect

###############################################################################
def _normalize_string_value(value, case):
    ###############################################################################
    """
    Some of the strings are inherently prone to diffs, like file
    paths, etc. This function attempts to normalize that data so that
    it will not cause diffs.
    """
    # Any occurance of case must be normalized because test-ids might not match
    if case is not None:
        case_re = re.compile(r"{}[.]([GC])[.]([^./\s]+)".format(case))
        value = case_re.sub("{}.ACTION.TESTID".format(case), value)

    if "/" in value:
        # File path, just return the basename
        return os.path.basename(value)
    elif "username" in value:
        return ""
    elif ".log." in value:
        # Remove the part that's prone to diff
        components = value.split(".")
        return os.path.basename(".".join(components[0:-1]))
    else:
        return value


###############################################################################
def _skip_comments_and_whitespace(lines, idx):
    ###############################################################################
    """
    Starting at idx, return next valid idx of lines that contains real data
    """
    if idx == len(lines):
        return idx

    comment_re = re.compile(r"^[#!]")

    lines_slice = lines[idx:]
    for line in lines_slice:
        line = line.strip()
        if comment_re.match(line) is not None or line == "":
            idx += 1
        else:
            return idx

    return idx


###############################################################################
def _compare_data(gold_lines, comp_lines, case, offset_method=False):
    ###############################################################################
    """
    >>> teststr = '''
    ... data1
    ... data2 data3
    ... data4 data5 data6
    ...
    ... # Comment
    ... data7 data8 data9 data10
    ... '''
    >>> _compare_data(teststr.splitlines(), teststr.splitlines(), None)
    ('', 0)

    >>> teststr2 = '''
    ... data1
    ... data2 data30
    ... data4 data5 data6
    ... data7 data8 data9 data10
    ... data00
    ... '''
    >>> results,_ = _compare_data(teststr.splitlines(), teststr2.splitlines(), None)
    >>> print(results)
    Inequivalent lines data2 data3 != data2 data30
      NORMALIZED: data2 data3 != data2 data30
    Found extra lines
    data00
    <BLANKLINE>
    >>> teststr3 = '''
    ... data1
    ... data4 data5 data6
    ... data7 data8 data9 data10
    ... data00
    ... '''
    >>> results,_ = _compare_data(teststr3.splitlines(), teststr2.splitlines(), None, offset_method=True)
    >>> print(results)
    Inequivalent lines data4 data5 data6 != data2 data30
      NORMALIZED: data4 data5 data6 != data2 data30
    <BLANKLINE>
    """
    comments = ""
    cnt = 0
    gidx, cidx = 0, 0
    gnum, cnum = len(gold_lines), len(comp_lines)
    while gidx < gnum or cidx < cnum:
        gidx = _skip_comments_and_whitespace(gold_lines, gidx)
        cidx = _skip_comments_and_whitespace(comp_lines, cidx)

        if gidx == gnum:
            if cidx == cnum:
                return comments, cnt
            else:
                comments += "Found extra lines\n"
                comments += "\n".join(comp_lines[cidx:]) + "\n"
                return comments, cnt
        elif cidx == cnum:
            comments += "Missing lines\n"
            comments += "\n".join(gold_lines[gidx:1]) + "\n"
            return comments, cnt

        gold_value = gold_lines[gidx].strip()
        gold_value = gold_value.replace('"', "'")
        comp_value = comp_lines[cidx].strip()
        comp_value = comp_value.replace('"', "'")

        norm_gold_value = _normalize_string_value(gold_value, case)
        norm_comp_value = _normalize_string_value(comp_value, case)

        if norm_gold_value != norm_comp_value:
            comments += "Inequivalent lines {} != {}\n".format(gold_value, comp_value)
            comments += "  NORMALIZED: {} != {}\n".format(
                norm_gold_value, norm_comp_value
            )
            cnt += 1
        if offset_method and (norm_gold_value != norm_comp_value):
            if gnum > cnum:
                gidx += 1
            else:
                cidx += 1
        else:
            gidx += 1
            cidx += 1

    return comments, cnt


###############################################################################

[docs]
def compare_files(gold_file, compare_file, case=None):
    ###############################################################################
    """
    Returns true if files are the same, comments are returned too:
    (success, comments)
    """
    expect(os.path.exists(gold_file), "File not found: {}".format(gold_file))
    expect(os.path.exists(compare_file), "File not found: {}".format(compare_file))

    comments, cnt = _compare_data(
        open(gold_file, "r").readlines(), open(compare_file, "r").readlines(), case
    )

    if cnt > 0:
        comments2, cnt2 = _compare_data(
            open(gold_file, "r").readlines(),
            open(compare_file, "r").readlines(),
            case,
            offset_method=True,
        )
        if cnt2 < cnt:
            comments = comments2

    return comments == "", comments



###############################################################################

[docs]
def compare_runconfigfiles(gold_file, compare_file, case=None):
    ###############################################################################
    """
    Returns true if files are the same, comments are returned too:
    (success, comments)
    """
    expect(os.path.exists(gold_file), "File not found: {}".format(gold_file))
    expect(os.path.exists(compare_file), "File not found: {}".format(compare_file))

    # create dictionary's of the runconfig files and compare them
    gold_dict = _parse_runconfig(gold_file)
    compare_dict = _parse_runconfig(compare_file)

    comments = findDiff(gold_dict, compare_dict, case=case)
    comments = comments.replace(" d1", " " + gold_file)
    comments = comments.replace(" d2", " " + compare_file)
    # this picks up the case that an entry in compare is not in gold
    if comments == "":
        comments = findDiff(compare_dict, gold_dict, case=case)
        comments = comments.replace(" d2", " " + gold_file)
        comments = comments.replace(" d1", " " + compare_file)

    return comments == "", comments



def _parse_runconfig(filename):
    runconfig = {}
    inrunseq = False
    insubsection = None
    subsection_re = re.compile(r"\s*(\S+)::")
    group_re = re.compile(r"\s*(\S+)\s*:\s*(\S+)")
    var_re = re.compile(r"\s*(\S+)\s*=\s*(\S+)")
    with open(filename, "r") as fd:
        for line in fd:
            # remove comments
            line = line.split("#")[0]
            subsection_match = subsection_re.match(line)
            group_match = group_re.match(line)
            var_match = var_re.match(line)
            if re.match(r"\s*runSeq\s*::", line):
                runconfig["runSeq"] = []
                inrunseq = True
            elif re.match(r"\s*::\s*", line):
                inrunseq = False
            elif inrunseq:
                runconfig["runSeq"].append(line)
            elif subsection_match:
                insubsection = subsection_match.group(1)
                runconfig[insubsection] = {}
            elif group_match:
                runconfig[group_match.group(1)] = group_match.group(2)
            elif insubsection and var_match:
                runconfig[insubsection][var_match.group(1)] = var_match.group(2)
    return runconfig



[docs]
def findDiff(d1, d2, path="", case=None):
    comment = ""
    for k in d1.keys():
        if not k in d2:
            comment += path + ":\n"
            comment += k + " as key not in d2\n"
        else:
            if type(d1[k]) is dict:
                if path == "":
                    path = k
                else:
                    path = path + "->" + k
                comment += findDiff(d1[k], d2[k], path=path, case=case)
            else:
                if case in d1[k]:
                    pass
                elif "username" in k:
                    pass
                elif "logfile" in k:
                    pass
                elif d1[k] != d2[k]:
                    comment += path + ":\n"
                    comment += " - {} : {}\n".format(k, d1[k])
                    comment += " + {} : {}\n".format(k, d2[k])
    return comment