Personal tools
You are here: Home technobabble projects python recursive directory and file comparison diffTextDirectories.py

diffTextDirectories.py

by Craig McChesney last modified Nov 26, 2008 11:22 AM

This Python program recursively compares two directory structures, reporting the differences between them. It uses python difflib to compare text files. Also demonstrates os.path usage for file and directory operations like exists, islink, and isdir. Uses os.walk to iterate over the directory structure - a pretty cool mechanism. Also shows how to make a main function that is callable from other contexts like the interactive python prompt.

#!/usr/bin/env python2.5

# diffs the files in two directories
# for each file in the source directory
# complain if it doesn't exist in the target directory
# or if there is a diff between source and target

import sys
import os
import difflib

def usage():
    print "usage: diffTextDirectories.py sourceDir targetDir"



def main(args):

    # extract command line args
    sourceDir = args[0]
    targetDir = args[1]

    # verify existence of source directory
    if os.path.exists(sourceDir) == False:
        print "sourceDir doesn't exist: %s" % sourceDir
        sys.exit(1)

    if os.path.isdir(sourceDir) == False:
        print "sourceDir not a directory: %s" % sourceDir
        sys.exit(1)

    # verify existence of target directory
    if os.path.exists(targetDir) == False:
        print "targetDir doesn't exist: %s" % targetDir
        sys.exit(1)

    if os.path.isdir(targetDir) == False:
        print "targetDir not a directory: %s" % targetDir
        sys.exit(1)

    # walk the sourceDirectory...
    for root, dirs, files in os.walk(sourceDir):

        subDir = root.replace(sourceDir,'')
        targetSubDir = targetDir + subDir

        # check to see if targetSubDir exists
        if os.path.exists(targetSubDir) == False or os.path.isdir(targetSubDir) == False:
            print
            print "sourceDir %s not found at %s" % (root, targetSubDir)
            continue
       
        # verify that each file in root exists in targetSubDir
        count = 0
        for sourceFile in files:

            # skip symbolic links
            if os.path.islink(root + '/' + sourceFile):
                continue
           
            targetFile = targetSubDir + '/' + sourceFile
            if os.path.exists(targetFile) == False:

                # print header if this is the first missing file
                count = count + 1
                if count == 1:
                    print
                    print "Files in %s missing from %s" % (root, targetSubDir)

                print "\t%s" % (sourceFile)

        # verify that each file in root is indeed a file in targetSubDir
        count = 0
        for sourceFile in files:

            # skip symbolic links
            if os.path.islink(root + '/' + sourceFile):
                continue
           
            targetFile = targetSubDir + '/' + sourceFile
            if os.path.exists(targetFile) == True and os.path.isfile(targetFile) == False:

                # print header if this is the first missing file
                count = count + 1
                if count == 1:
                    print
                    print "Files in %s not valid files in %s" % (root, targetSubDir)

                print "\t%s" % (sourceFile)

        # now diff the source and target files
        count = 0
        for sourceFile in files:

            # skip symbolic links
            if os.path.islink(root + '/' + sourceFile):
                continue
           
            targetFile = targetSubDir + '/' + sourceFile
            if os.path.exists(targetFile) == True and os.path.isfile(targetFile) == True:
                sourceText = open(root + '/' + sourceFile, "r").readlines()
                targetText = open(targetFile, "r").readlines()
                result = difflib.unified_diff(sourceText, targetText)
                resultStr = '\n'.join(list(result))
                if len(resultStr) > 0:
                    count = count + 1
                    if count == 1:
                        print
                        print "Files that differ between %s and %s" % (root, targetSubDir)

                    print
                    print "********************************"
                    print "\t%s" % (sourceFile)
                    print "********************************"
                    print resultStr
                    print "********************************"
                    print "********************************"


   
if __name__=='__main__':

    if len(sys.argv) != 3:
        usage()
        sys.exit(2)

    main(sys.argv[1:])

Document Actions