#include #include #include #include #include #include #include #include "md5.h" #define DBNAME "your_database" int lookup(void *md5sum, int size); int addfile(void *md5sum, int size); /* available options */ char *opts = "r"; /* flags for them */ int recurse = 0; void usage(char *progname) { fprintf(stderr, "%s: [-r] [directory] ...\n", progname); fprintf(stderr, "\t-r: scan directories recursively\n"); exit(1); } int parse_args(int ac, char *av[]) { int opt; int count = 0; while((opt = getopt(ac, av, opts)) != EOF) { switch(opt) { case 'r': recurse = 1; break; case '?': usage(av[0]); break; } count++; } return count; } /* scan through a single directory */ int dupe_dir(char *dirname) { struct dirent **files; int total; int x = 0; total = scandir(dirname, &files, NULL, NULL); while(x < total) { void *md5sum; FILE *check; struct stat statbuf; char filename[80]; sprintf(filename, "%s/%s", dirname, files[x]->d_name); /* skip ., .. */ if(!strcmp(".", files[x]->d_name) || !strcmp("..", files[x]->d_name)) { x++; continue; } if(stat(filename, &statbuf) == -1) { fprintf(stderr, "Couldn't stat %s\n", filename); x++; continue; } if(recurse) { if(S_ISDIR(statbuf.st_mode)) { dupe_dir(filename); x++; continue; } } if(!S_ISREG(statbuf.st_mode)) { fprintf(stderr, "Skipping %s\n", filename); x++; continue; } if((check = fopen(filename, "r")) == NULL) { fprintf(stderr, "Couldn't open %s\n", filename); x++; continue; } md5sum = malloc(16); md5_stream(check, md5sum); fclose(check); if(lookup(md5sum, statbuf.st_size)) { free(md5sum); x++; printf("%s\n", filename); continue; } if(addfile(md5sum, statbuf.st_size)) { fprintf(stderr, "Unable to add %s\n", filename); free(md5sum); x++; continue; } x++; } /* end of per-file loop */ return 0; } int main(int ac, char *av[]) { int i = 1; if(ac < 2) { usage(av[0]); } i += parse_args(ac, av); while(i < ac) { dupe_dir(av[i]); i++; } return 0; } /* check and see if the database has anything for this md5sum. */ int lookup(void *md5sum, int size) { DBM *db; datum key; datum data; int *sizes; int count; int x; key.dptr = md5sum; key.dsize = 16; /* open with O_RDWR and O_CREAT so that the dbm will be created if it doesn't exist. */ if((db = dbm_open(DBNAME, O_RDWR | O_CREAT, S_IREAD | S_IWRITE)) == NULL) return -1; data = dbm_fetch(db, key); if(data.dptr == NULL) { dbm_close(db); return 0; } dbm_close(db); sizes = (int *)malloc(data.dsize); memcpy(sizes, data.dptr, data.dsize); count = data.dsize/sizeof(int); x = 0; while(x < count) { if(sizes[x] == size) { free(sizes); return 1; } x++; } free(sizes); return 0; } int addfile(void *md5sum, int size) { DBM *db; datum key; datum data; datum newdata; int wcount; int *wsizes; if((db = dbm_open(DBNAME, O_RDWR | O_CREAT, S_IREAD | S_IWRITE)) == NULL) return -1; key.dptr = md5sum; key.dsize = 16; data = dbm_fetch(db, key); if(data.dptr == NULL) { wsizes = (int *)malloc(sizeof(int)); wcount = 1; wsizes[0] = size; } else { wsizes = (int *)malloc(data.dsize+sizeof(int)); wcount = data.dsize/sizeof(int); memcpy(wsizes, data.dptr, data.dsize); wsizes[wcount] = size; wcount++; } newdata.dptr = (void *)wsizes; newdata.dsize = wcount*sizeof(int); dbm_store(db, key, newdata, DBM_REPLACE); dbm_close(db); free(wsizes); return 0; }