#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <fcntl.h>
#include <stdlib.h>
#include <ndbm.h>
#include "md5.h"
#define DBNAME "/var/db/filedupe.db"
int lookup(void *md5sum, int size);
int addfile(void *md5sum, int size);
/* available options */
char *opts = "r";
/* flags for them */
int recurse = 0;
void usage(char *progname)
{
fprintf(stderr, "%s: [-r] <directory> [directory] ...\n", progname);
fprintf(stderr, "\t-r: scan directories recursively\n");
exit(1);
}
int parse_args(int ac, char *av[])
{
int opt;
int count = 0;
while((opt = getopt(ac, av, opts)) != EOF) {
switch(opt) {
case 'r':
recurse = 1;
break;
case '?':
usage(av[0]);
break;
}
count++;
}
return count;
}
/* scan through a single directory */
int dupe_dir(char *dirname)
{
struct dirent **files;
int total;
int x = 0;
total = scandir(dirname, &files, NULL, NULL);
while(x < total) {
void *md5sum;
FILE *check;
struct stat statbuf;
char filename[1024];
sprintf(filename, "%s/%s", dirname, files[x]->d_name);
/* skip ., .. */
if(!strcmp(".", files[x]->d_name) || !strcmp("..", files[x]->d_name))
{
x++;
continue;
}
if(stat(filename, &statbuf) == -1) {
fprintf(stderr, "Couldn't stat %s\n", filename);
x++;
continue;
}
if(recurse) {
if(S_ISDIR(statbuf.st_mode)) {
dupe_dir(filename);
x++;
continue;
}
}
if(!S_ISREG(statbuf.st_mode)) {
fprintf(stderr, "Skipping %s\n", filename);
x++;
continue;
}
if((check = fopen(filename, "r")) == NULL) {
fprintf(stderr, "Couldn't open %s\n", filename);
x++;
continue;
}
md5sum = malloc(16);
md5_stream(check, md5sum);
fclose(check);
if(lookup(md5sum, statbuf.st_size)) {
free(md5sum);
x++;
printf("%s\n", filename);
continue;
}
if(addfile(md5sum, statbuf.st_size)) {
fprintf(stderr, "Unable to add %s\n", filename);
free(md5sum);
x++;
continue;
}
x++;
} /* end of per-file loop */
return 0;
}
int main(int ac, char *av[]) {
int i = 1;
if(ac < 2) {
usage(av[0]);
}
i += parse_args(ac, av);
while(i < ac) {
dupe_dir(av[i]);
i++;
}
return 0;
}
/* check and see if the database has anything for this md5sum. */
int lookup(void *md5sum, int size) {
DBM *db;
datum key;
datum data;
int *sizes;
int count;
int x;
key.dptr = md5sum;
key.dsize = 16;
/* open with O_RDWR and O_CREAT so that the dbm will be created
if it doesn't exist. */
if((db = dbm_open(DBNAME, O_RDWR | O_CREAT, S_IREAD | S_IWRITE))
== NULL)
return -1;
data = dbm_fetch(db, key);
if(data.dptr == NULL) {
dbm_close(db);
return 0;
}
dbm_close(db);
sizes = (int *)malloc(data.dsize);
memcpy(sizes, data.dptr, data.dsize);
count = data.dsize/sizeof(int);
x = 0;
while(x < count) {
if(sizes[x] == size) {
free(sizes);
return 1;
}
x++;
}
free(sizes);
return 0;
}
int addfile(void *md5sum, int size) {
DBM *db;
datum key;
datum data;
datum newdata;
int wcount;
int *wsizes;
if((db = dbm_open(DBNAME, O_RDWR | O_CREAT, S_IREAD | S_IWRITE))
== NULL)
return -1;
key.dptr = md5sum;
key.dsize = 16;
data = dbm_fetch(db, key);
if(data.dptr == NULL) {
wsizes = (int *)malloc(sizeof(int));
wcount = 1;
wsizes[0] = size;
} else {
wsizes = (int *)malloc(data.dsize+sizeof(int));
wcount = data.dsize/sizeof(int);
memcpy(wsizes, data.dptr, data.dsize);
wsizes[wcount] = size;
wcount++;
}
newdata.dptr = (void *)wsizes;
newdata.dsize = wcount*sizeof(int);
dbm_store(db, key, newdata, DBM_REPLACE);
dbm_close(db);
free(wsizes);
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1