Note: This site is currently "Under construction". I'm migrating to a new version of my site building software. Lots of things are in a state of disrepair as a result (for example, footnote links aren't working). It's all part of the process of building in public. Most things should still be readable though.

Get The Diff Between Two Files In Python

This is what I ended up with for real

Code

#!/usr/bin/env python3

import difflib
import normalize_dates
import unittest
import filecmp
import sys
import io


class TestNormalizeDates(unittest.TestCase):

    def test_first_file(self):

        input_dir = "data_for_tests/input"
        output_dir = "data_for_tests/output"
        target_dir = "data_for_tests/targets"

        normalize_dates.make_files(
            input_dir=input_dir,
            output_dir=output_dir
        )

        file_list = ['1', '2']

        for file in file_list:
            a_file = f"{output_dir}/{file}.txt"
            b_file = f"{target_dir}/{file}.txt"
            with open(a_file) as _a:
                a_list = _a.read().splitlines()
            with open(b_file) as _b:
                b_list = _b.read().splitlines()
            self.assertListEqual(
                a_list, b_list
            )


if __name__ == "__main__":
    unittest.main()

The below are for notes to put together later

NOTE: the filecmp.cmp has problems with newlines in some cases that I can't figure out (like it looks like there shouldn't be a difference but there is)

I think this is the way I'm going to do it for testing (time passes... nope, gonna use difflib below since it's easier to see what's up). TODO: Figure out if there's a way to use the diff itself for the compare instead of filecmp.

Code

#!/usr/bin/env python3 is here

import difflib
import normalize_dates
import unittest
import filecmp
import sys
import io


class TestNormalizeDates(unittest.TestCase):

    def test_first_file(self):

        input_dir = "data_for_tests/input",
        output_dir = "data_for_tests/output"

        normalize_dates.make_files(
            input_dir=input_dir,
            output_dir=output_dir
        )

        input_file = "data_for_tests/input/1.txt"
        output_file = "data_for_tests/output/1.txt"

        with open(input_file) as _in_file:
            input_lines = _in_file.readlines()
        with open(output_file) as _out_file:
            output_lines = _out_file.readlines()

        self.assertListEqual(
            input_lines,
            output_lines
        )

if __name__ == "__main__":
    unittest.main()

This is the way that I like to do it that's very compact.

Code

import difflib
import sys

input_file = "/Users/alans/workshop/site_ksuid_migration/02_add_and_normalize_dates_in_frontmatter/data_for_tests/input/1.txt"
output_file = "/Users/alans/workshop/site_ksuid_migration/02_add_and_normalize_dates_in_frontmatter/data_for_tests/output/1.txt"

with open(input_file) as _in:
    input_data = _in.readlines()
    
with open(output_file) as _out:
    output_data = _out.readlines()

sys.stdout.writelines(
    difflib.unified_diff(
        input_data,
        output_data, 
        n=0
    )
)

Here's the other examples:

Code

sys.stdout.writelines(
    difflib.unified_diff(
        input_data,
        output_data, 
        'input_file.txt',
        'output_file.txt'
    )
)



-- hr

category: Miscellaneous
-date: '2007-08-29'
+date: '2007-08-29T00:00:00'
slug: /a-few-more-dave-ramsey-thoughts
title: A few more Dave Ramsey thoughts
template: post


print("\n")
print("\n")


# compact output
sys.stdout.writelines(
    difflib.unified_diff(
        input_data,
        output_data, 
        'input_file.txt',
        'output_file.txt',
        n=0
    )
)

Output

-date: '2007-08-29'
+date: '2007-08-29T00:00:00'


print("\n")
print("\n")



# compact output with no file names which looks a little cleaner to me
# there's no need to pass the file names and by reducing the context
# it's very clean
sys.stdout.writelines(
    difflib.unified_diff(
        input_data,
        output_data, 
        n=0
    )
)

TODO: Go thru here are write up more examples:

https://docs.python.org/3/library/difflib.html#module-difflib

See also this which looks interesting, but not that the `io.open` isn't closed (see comment on that)

Code

41          self.assertListEqual(
  1             list(io.open(input_file)),
  2             list(io.open(output_file))
  3         )