import os, re
from CIME.utils import expect
###############################################################################
def _normalize_string_value(value, case):
###############################################################################
"""
Some of the strings are inherently prone to diffs, like file
paths, etc. This function attempts to normalize that data so that
it will not cause diffs.
"""
# Any occurance of case must be normalized because test-ids might not match
if (case is not None):
case_re = re.compile(r'{}[.]([GC])[.]([^./\s]+)'.format(case))
value = case_re.sub("{}.ACTION.TESTID".format(case), value)
if ("/" in value):
# File path, just return the basename
return os.path.basename(value)
else:
return value
###############################################################################
def _skip_comments_and_whitespace(lines, idx):
###############################################################################
"""
Starting at idx, return next valid idx of lines that contains real data
"""
if (idx == len(lines)):
return idx
comment_re = re.compile(r'^[#!]')
lines_slice = lines[idx:]
for line in lines_slice:
line = line.strip()
if (comment_re.match(line) is not None or line == ""):
idx += 1
else:
return idx
return idx
###############################################################################
def _compare_data(gold_lines, comp_lines, case, offset_method=False):
###############################################################################
"""
>>> teststr = '''
... data1
... data2 data3
... data4 data5 data6
...
... # Comment
... data7 data8 data9 data10
... '''
>>> _compare_data(teststr.splitlines(), teststr.splitlines(), None)
('', 0)
>>> teststr2 = '''
... data1
... data2 data30
... data4 data5 data6
... data7 data8 data9 data10
... data00
... '''
>>> results,_ = _compare_data(teststr.splitlines(), teststr2.splitlines(), None)
>>> print(results)
Inequivalent lines data2 data3 != data2 data30
NORMALIZED: data2 data3 != data2 data30
Found extra lines
data00
<BLANKLINE>
>>> teststr3 = '''
... data1
... data4 data5 data6
... data7 data8 data9 data10
... data00
... '''
>>> results,_ = _compare_data(teststr3.splitlines(), teststr2.splitlines(), None, offset_method=True)
>>> print(results)
Inequivalent lines data4 data5 data6 != data2 data30
NORMALIZED: data4 data5 data6 != data2 data30
<BLANKLINE>
"""
comments = ""
cnt = 0
gidx, cidx = 0, 0
gnum, cnum = len(gold_lines), len(comp_lines)
while (gidx < gnum or cidx < cnum):
gidx = _skip_comments_and_whitespace(gold_lines, gidx)
cidx = _skip_comments_and_whitespace(comp_lines, cidx)
if (gidx == gnum):
if (cidx == cnum):
return comments, cnt
else:
comments += "Found extra lines\n"
comments += "\n".join(comp_lines[cidx:]) + "\n"
return comments, cnt
elif (cidx == cnum):
comments += "Missing lines\n"
comments += "\n".join(gold_lines[gidx:1]) + "\n"
return comments, cnt
gold_value = gold_lines[gidx].strip()
gold_value = gold_value.replace('"',"'")
comp_value = comp_lines[cidx].strip()
comp_value = comp_value.replace('"',"'")
norm_gold_value = _normalize_string_value(gold_value, case)
norm_comp_value = _normalize_string_value(comp_value, case)
if (norm_gold_value != norm_comp_value):
comments += "Inequivalent lines {} != {}\n".format(gold_value, comp_value)
comments += " NORMALIZED: {} != {}\n".format(norm_gold_value, norm_comp_value)
cnt += 1
if offset_method and (norm_gold_value != norm_comp_value):
if gnum > cnum:
gidx += 1
else:
cidx += 1
else:
gidx += 1
cidx += 1
return comments, cnt
###############################################################################
[docs]def compare_files(gold_file, compare_file, case=None):
###############################################################################
"""
Returns true if files are the same, comments are returned too:
(success, comments)
"""
expect(os.path.exists(gold_file), "File not found: {}".format(gold_file))
expect(os.path.exists(compare_file), "File not found: {}".format(compare_file))
comments, cnt = _compare_data(open(gold_file, "r").readlines(),
open(compare_file, "r").readlines(), case)
if cnt > 0:
comments2, cnt2 = _compare_data(open(gold_file, "r").readlines(),
open(compare_file, "r").readlines(),
case, offset_method=True)
if cnt2 < cnt:
comments = comments2
return comments == "", comments