Source code for CIME.simple_compare

import os, re

from CIME.utils import expect

###############################################################################
def _normalize_string_value(value, case):
    ###############################################################################
    """
    Some of the strings are inherently prone to diffs, like file
    paths, etc. This function attempts to normalize that data so that
    it will not cause diffs.
    """
    # Any occurance of case must be normalized because test-ids might not match
    if case is not None:
        case_re = re.compile(r"{}[.]([GC])[.]([^./\s]+)".format(case))
        value = case_re.sub("{}.ACTION.TESTID".format(case), value)

    if "/" in value:
        # File path, just return the basename
        return os.path.basename(value)
    elif "username" in value:
        return ""
    elif ".log." in value:
        # Remove the part that's prone to diff
        components = value.split(".")
        return os.path.basename(".".join(components[0:-1]))
    else:
        return value


###############################################################################
def _skip_comments_and_whitespace(lines, idx):
    ###############################################################################
    """
    Starting at idx, return next valid idx of lines that contains real data
    """
    if idx == len(lines):
        return idx

    comment_re = re.compile(r"^[#!]")

    lines_slice = lines[idx:]
    for line in lines_slice:
        line = line.strip()
        if comment_re.match(line) is not None or line == "":
            idx += 1
        else:
            return idx

    return idx


###############################################################################
def _compare_data(gold_lines, comp_lines, case, offset_method=False):
    ###############################################################################
    """
    >>> teststr = '''
    ... data1
    ... data2 data3
    ... data4 data5 data6
    ...
    ... # Comment
    ... data7 data8 data9 data10
    ... '''
    >>> _compare_data(teststr.splitlines(), teststr.splitlines(), None)
    ('', 0)

    >>> teststr2 = '''
    ... data1
    ... data2 data30
    ... data4 data5 data6
    ... data7 data8 data9 data10
    ... data00
    ... '''
    >>> results,_ = _compare_data(teststr.splitlines(), teststr2.splitlines(), None)
    >>> print(results)
    Inequivalent lines data2 data3 != data2 data30
      NORMALIZED: data2 data3 != data2 data30
    Found extra lines
    data00
    <BLANKLINE>
    >>> teststr3 = '''
    ... data1
    ... data4 data5 data6
    ... data7 data8 data9 data10
    ... data00
    ... '''
    >>> results,_ = _compare_data(teststr3.splitlines(), teststr2.splitlines(), None, offset_method=True)
    >>> print(results)
    Inequivalent lines data4 data5 data6 != data2 data30
      NORMALIZED: data4 data5 data6 != data2 data30
    <BLANKLINE>
    """
    comments = ""
    cnt = 0
    gidx, cidx = 0, 0
    gnum, cnum = len(gold_lines), len(comp_lines)
    while gidx < gnum or cidx < cnum:
        gidx = _skip_comments_and_whitespace(gold_lines, gidx)
        cidx = _skip_comments_and_whitespace(comp_lines, cidx)

        if gidx == gnum:
            if cidx == cnum:
                return comments, cnt
            else:
                comments += "Found extra lines\n"
                comments += "\n".join(comp_lines[cidx:]) + "\n"
                return comments, cnt
        elif cidx == cnum:
            comments += "Missing lines\n"
            comments += "\n".join(gold_lines[gidx:1]) + "\n"
            return comments, cnt

        gold_value = gold_lines[gidx].strip()
        gold_value = gold_value.replace('"', "'")
        comp_value = comp_lines[cidx].strip()
        comp_value = comp_value.replace('"', "'")

        norm_gold_value = _normalize_string_value(gold_value, case)
        norm_comp_value = _normalize_string_value(comp_value, case)

        if norm_gold_value != norm_comp_value:
            comments += "Inequivalent lines {} != {}\n".format(gold_value, comp_value)
            comments += "  NORMALIZED: {} != {}\n".format(
                norm_gold_value, norm_comp_value
            )
            cnt += 1
        if offset_method and (norm_gold_value != norm_comp_value):
            if gnum > cnum:
                gidx += 1
            else:
                cidx += 1
        else:
            gidx += 1
            cidx += 1

    return comments, cnt


###############################################################################
[docs] def compare_files(gold_file, compare_file, case=None): ############################################################################### """ Returns true if files are the same, comments are returned too: (success, comments) """ expect(os.path.exists(gold_file), "File not found: {}".format(gold_file)) expect(os.path.exists(compare_file), "File not found: {}".format(compare_file)) comments, cnt = _compare_data( open(gold_file, "r").readlines(), open(compare_file, "r").readlines(), case ) if cnt > 0: comments2, cnt2 = _compare_data( open(gold_file, "r").readlines(), open(compare_file, "r").readlines(), case, offset_method=True, ) if cnt2 < cnt: comments = comments2 return comments == "", comments
###############################################################################
[docs] def compare_runconfigfiles(gold_file, compare_file, case=None): ############################################################################### """ Returns true if files are the same, comments are returned too: (success, comments) """ expect(os.path.exists(gold_file), "File not found: {}".format(gold_file)) expect(os.path.exists(compare_file), "File not found: {}".format(compare_file)) # create dictionary's of the runconfig files and compare them gold_dict = _parse_runconfig(gold_file) compare_dict = _parse_runconfig(compare_file) comments = findDiff(gold_dict, compare_dict, case=case) comments = comments.replace(" d1", " " + gold_file) comments = comments.replace(" d2", " " + compare_file) # this picks up the case that an entry in compare is not in gold if comments == "": comments = findDiff(compare_dict, gold_dict, case=case) comments = comments.replace(" d2", " " + gold_file) comments = comments.replace(" d1", " " + compare_file) return comments == "", comments
def _parse_runconfig(filename): runconfig = {} inrunseq = False insubsection = None subsection_re = re.compile(r"\s*(\S+)::") group_re = re.compile(r"\s*(\S+)\s*:\s*(\S+)") var_re = re.compile(r"\s*(\S+)\s*=\s*(\S+)") with open(filename, "r") as fd: for line in fd: # remove comments line = line.split("#")[0] subsection_match = subsection_re.match(line) group_match = group_re.match(line) var_match = var_re.match(line) if re.match(r"\s*runSeq\s*::", line): runconfig["runSeq"] = [] inrunseq = True elif re.match(r"\s*::\s*", line): inrunseq = False elif inrunseq: runconfig["runSeq"].append(line) elif subsection_match: insubsection = subsection_match.group(1) runconfig[insubsection] = {} elif group_match: runconfig[group_match.group(1)] = group_match.group(2) elif insubsection and var_match: runconfig[insubsection][var_match.group(1)] = var_match.group(2) return runconfig
[docs] def findDiff(d1, d2, path="", case=None): comment = "" for k in d1.keys(): if not k in d2: comment += path + ":\n" comment += k + " as key not in d2\n" else: if type(d1[k]) is dict: if path == "": path = k else: path = path + "->" + k comment += findDiff(d1[k], d2[k], path=path, case=case) else: if case in d1[k]: pass elif "username" in k: pass elif "logfile" in k: pass elif d1[k] != d2[k]: comment += path + ":\n" comment += " - {} : {}\n".format(k, d1[k]) comment += " + {} : {}\n".format(k, d2[k]) return comment