#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <fcntl.h>
#include <stdlib.h>
#include <ndbm.h>

#include "md5.h"

#define DBNAME "/var/db/filedupe.db"

int lookup(void *md5sum, int size);
int addfile(void *md5sum, int size);

/* available options */
char *opts = "r";

/* flags for them */
int recurse = 0;

void usage(char *progname)
{
	fprintf(stderr, "%s: [-r] <directory> [directory] ...\n", progname);
	fprintf(stderr, "\t-r: scan directories recursively\n");
	exit(1);
}

int parse_args(int ac, char *av[])
{
	int opt;
	int count = 0;
	
	while((opt = getopt(ac, av, opts)) != EOF) {

		switch(opt) {
		case 'r':
			recurse = 1;
			break;

		case '?':
			usage(av[0]);
			break;
		}

		count++;
	}
	
	return count;

}
	

	
/* scan through a single directory */	

int dupe_dir(char *dirname)
{
	struct dirent **files;
	int total;
	int x = 0;
	
	total = scandir(dirname, &files, NULL, NULL);
	while(x < total) {
		void *md5sum;
		FILE *check;
		struct stat statbuf;
		char filename[1024];
		
		sprintf(filename, "%s/%s", dirname, files[x]->d_name);
		/* skip ., .. */
		if(!strcmp(".", files[x]->d_name) || !strcmp("..", files[x]->d_name)) 
		{
			x++;
			continue;
		}
		
		if(stat(filename, &statbuf) == -1) {
			fprintf(stderr, "Couldn't stat %s\n", filename);
			x++;
			continue;
		}
		
		if(recurse) {
			if(S_ISDIR(statbuf.st_mode)) {
				dupe_dir(filename);
				x++;
				continue;
			}
		}
		
		if(!S_ISREG(statbuf.st_mode)) {
			fprintf(stderr, "Skipping %s\n", filename);
			x++;
			continue;
		}
		
		if((check = fopen(filename, "r")) == NULL) {
			fprintf(stderr, "Couldn't open %s\n", filename);
			x++;
			continue;
		}

		md5sum = malloc(16);
		md5_stream(check, md5sum);
		fclose(check);
	       
		if(lookup(md5sum, statbuf.st_size)) {
			free(md5sum);
			x++;
			printf("%s\n", filename);
			continue;
		}
     
		if(addfile(md5sum, statbuf.st_size)) {
			fprintf(stderr, "Unable to add %s\n", filename);
			free(md5sum);
			x++;
			continue;
		}

		x++;
	} /* end of per-file loop */
	
	return 0;
	
}	

int main(int ac, char *av[]) {

	int i = 1;

	if(ac < 2) {
		usage(av[0]);
	}

	i += parse_args(ac, av);

	while(i < ac) {
		dupe_dir(av[i]);
		i++;
	} 

	return 0;
}

/* check and see if the database has anything for this md5sum. */

int lookup(void *md5sum, int size) {
	
	DBM *db;
	datum key;
	datum data;

	int *sizes;
	int count;
	int x;

	key.dptr = md5sum;
	key.dsize = 16;

/* open with O_RDWR and O_CREAT so that the dbm will be created
   if it doesn't exist. */

	if((db = dbm_open(DBNAME, O_RDWR | O_CREAT, S_IREAD | S_IWRITE))
	   == NULL) 
		return -1;

	data = dbm_fetch(db, key);
	if(data.dptr == NULL) {
		dbm_close(db);
		return 0;
	}
	dbm_close(db);
     
	sizes = (int *)malloc(data.dsize);

	memcpy(sizes, data.dptr, data.dsize);
	count = data.dsize/sizeof(int);
     
	x = 0;

	while(x < count) {
		if(sizes[x] == size) {
			free(sizes);
			return 1;
		}
		x++;
	}
     
	free(sizes);

	return 0;

}

int addfile(void *md5sum, int size) {

	DBM *db;
	datum key;
	datum data;
	datum newdata;
     
	int wcount;
	int *wsizes;

	if((db = dbm_open(DBNAME, O_RDWR | O_CREAT, S_IREAD | S_IWRITE))
	   == NULL) 
		return -1;

	key.dptr = md5sum;
	key.dsize = 16;

	data = dbm_fetch(db, key);

	if(data.dptr == NULL) {
		wsizes = (int *)malloc(sizeof(int));
		wcount = 1;

		wsizes[0] = size;
	} else {
		wsizes = (int *)malloc(data.dsize+sizeof(int));
		wcount = data.dsize/sizeof(int);
		memcpy(wsizes, data.dptr, data.dsize);
		wsizes[wcount] = size;
		wcount++;
	}

	newdata.dptr = (void *)wsizes;
	newdata.dsize = wcount*sizeof(int);
	dbm_store(db, key, newdata, DBM_REPLACE);
	dbm_close(db);
	free(wsizes);
     
	return 0;

	  
}


syntax highlighted by Code2HTML, v. 0.9.1