Monday, 30 March 2020

Bash script to remove duplicate files

Identify the duplicate files and remove.

vi remove_duplicate.sh
#!/bin/bash
ls -ls --time-style=long-iso | awk 'BEGIN {
 getline; getline;
 name1=$9; size=$5
}
{
 name2=$9;
 if (size==$5)
 {
  "md5sum "name1 | getline; csum1=$1;
  "md5sum "name2 | getline; csum2=$1;
  if ( csum1==csum2 )
        {
         print name1; print name2;
        }
 };

 size=$5; name1=name2;
}' | sort -u > duplicate_files

cat duplicate_files | xargs -I {} md5sum {} | sort | uniq -w 32 | awk '{ print "^"$2"$" }' | sort -u > duplicate_sample

if [ -s duplicate_sample ]
 then
  echo Removing...
  comm duplicate_files duplicate_sample -2 -3 | tee /dev/stderr | xargs rm -rf
  echo Removed duplicate files successfully.
 else
 echo No duplicate files found.
fi

No comments:

Post a comment