#!/bin/sh
#
# Tue Apr 28 08:30:40 MDT 2009 - 0.9
# cleaned up code, do checks if output files already exist
# no more need to manually disalbe parts of this script
# output files are left intact for manual removal/reuse
# added in 'maxdepth' to $findcmd to prevent directory
# traversal if not required - by default goes 999 dirs deep
# Thu Feb 21 08:20:55 MST 2008 - 0.5
# basics, more "options" coming soon
#
# script to recursively check for duplicate files in current directory
#
# [email]scripts@pknet.net[/email]
# [url]http://peterk.org/scripts/[/url]
#
PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/games:/usr/local/sbin:/usr/local/bin
export PATH
# user configurable variables
# files
cksumraw=~/.tmp.cksumraw
cksumsorted=~/.tmp.cksumsorted
cksumresults=~/.tmp.cksumresults
findcmd="/usr/bin/find ./ -type f -maxdepth 999 -print0"
datecmd='/bin/date +"%Y.%m.%d.%H%M.%S"'
# end user configurable variables
# check if output file exists, if it does, go on to next step
# using the existing files
echo
# 'cksum' each file in directory
if [ ! -f $cksumraw ]
then
echo "populating $cksumraw - `$datecmd`"
$findcmd | /usr/bin/xargs -0 cksum >> $cksumraw
else
echo "$cksumraw ALREADY exists, NOT repopulating"
echo " continuing with sorting it"
echo
fi
if [ ! -f $cksumsorted ]
then
echo "populating $cksumsorted - `$datecmd`"
sort $cksumraw > $cksumsorted
else
echo "$cksumsorted ALREADY exists, NOT repopulating"
echo " continuing with analyzing it"
echo
fi
# now go through sorted list to check for dups
# cksum write: checksum CRC, total number of octets, the filename
if [ ! -f $cksumresults ]
then
echo "populating $cksumresults - `$datecmd`"
#init start of list
startlist=0
echo > $cksumresults
cat $cksumsorted | \
while read crc octets filename
do
# if first in list, make it same as previous
if [ $startlist -eq 0 ]
then
startlist=1
prevchksum=$crc
prevfile=$filename
continue
fi
# if current checksum equals saved checksum,
# the file is a duplicate
if [ $prevchksum -eq $crc ]
then
printf "file $prevfile \n has duplicate $filename \n" >> $cksumresults
printf "file $prevfile \n has duplicate $filename \n"
else
prevchksum=$crc
prevfile=$filename
fi
done
echo "done populating $cksumresults - `$datecmd`"
else
echo "$cksumresults ALREADY exists, NOT repopulating"
echo
fi
echo 'temp files NOT deleted:'
echo " $cksumraw"
echo " $cksumsorted"
echo Output list of duplicates saved in $cksumresults