/*
 * Copyright (C) 2002
 * 	Hidetoshi Shimokawa. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *
 *	This product includes software developed by Hidetoshi Shimokawa.
 *
 * 4. Neither the name of the author nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * 
 * $Id: ufs_copy.c,v 1.36 2006/09/21 06:28:09 simokawa Exp $
 */

#undef USEMMAP

#include <sys/param.h>
#include <sys/time.h>
#include <sys/disklabel.h>
#include <sys/stat.h>
#ifdef USEMMAP
#include <sys/mman.h>
#endif
#include <sys/types.h>
#include <sys/resource.h>

#include <ufs/ufs/dinode.h>
#include <ufs/ffs/fs.h>

#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <fstab.h>
#include <libufs.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>

#define DEFAULT_BUFSIZE	(8*1024*1024)
#define DEFAULT_ASYNCQ	16
#define	afs	src.d_fs

union {
	struct cg cg;
	char pad[MAXBSIZE];
} cgun;
#define	acg	cgun.cg

int verbose, asyncq=0, bufsize, bfactor=0, skipbb=1, noerror=0, comp=0;
#ifdef USEMMAP
int usemmap=0;
#endif
struct uufsd src, dst;
char *buf;
intmax_t total;
struct timeval st;

int	copy_ufs(const char *, const char *);
int	copy_cg(int);
void	copy_used_blocks(int, void *, int);
void	copy_blocks(int32_t, int32_t);
void	usage(void);
void	async_init(void);
void	async_copy_blocks(int32_t, int32_t);
void	async_wait(void);
int	retry_read(struct uufsd *, ufs2_daddr_t, void *, size_t);

struct cloop_sc *cloop;

struct cloop_sc *cloop_create(int fd, off_t size, uint block_size);
void cloop_write(struct cloop_sc *sc, off_t off, char *buf, size_t size);
void cloop_flush(struct cloop_sc *sc);


double
getsec() {
	struct timeval ct;
        gettimeofday(&ct, (struct timezone *)NULL);
	ct.tv_sec -= st.tv_sec;
	ct.tv_usec -= st.tv_usec;
	if (ct.tv_usec < 0)
		ct.tv_sec--, ct.tv_usec += 1000000;
	return (ct.tv_sec + (double)ct.tv_usec/1000000);
}

int
main(int argc, char *argv[])
{
	struct fstab *fs;
	int ch, eval=0;
	char *snapshot = NULL;
	char *src, *dst;

#ifdef USEMMAP
	while ((ch = getopt(argc, argv, "aBb:ceMms:v")) != -1)
#else
	while ((ch = getopt(argc, argv, "aBb:ces:v")) != -1)
#endif
		switch(ch) {
		case 'a':
			asyncq = DEFAULT_ASYNCQ;
			break;
		case 'B':
			skipbb = 0;
			break;
		case 'b':
			bfactor = atoi(optarg);
			break;
		case 'c':
			comp = 1;
			break;
		case 'e':
			noerror = 1;
			break;
#ifdef USEMMAP
		case 'M':
			usemmap = 2;
			comp = 0;
			break;
		case 'm':
			usemmap = 1;
			comp = 0;
			break;
#endif
		case 's':
			snapshot = optarg;
			break;
		case 'v':
			verbose = 1;
			break;
		case '?':
		default:
			usage();
		}
	argc -= optind;
	argv += optind;

	if (argc < 1)
		usage();

	src = argv[0];
	dst = argv[1];

	if (snapshot) {
		char buf[1024];

		snprintf(buf, sizeof(buf),
			"/sbin/mksnap_ffs %s %s", snapshot, src);
		fprintf(stderr, "%s\n", buf);
		eval = system(buf);
		if (eval)
			errx(eval, "mksnap_ffs failed");
	}

	if ((fs = getfsfile(src)) == NULL) {
		endfsent();
		eval = copy_ufs(src, dst);
	} else {
		char buf[256];

		strncpy(buf, fs->fs_spec, sizeof(buf));
		endfsent();
		eval = copy_ufs(buf, dst);
	}

	if (snapshot) {
		fprintf(stderr, "removing snapshot file %s ... ", src);
		fflush(stderr);
		if (unlink(src) == 0)
			fprintf(stderr, "done\n");
		else
			fprintf(stderr, "failed\n");
	}
	exit(eval);
}

static intmax_t
fs_size(struct uufsd *disk)
{
	switch (disk->d_ufs) {
	case 1:
		return (disk->d_fs.fs_old_size);
	case 2:
		return (disk->d_fs.fs_size);
	}
	errx(1, "invalid UFS version %d", disk->d_ufs);
}

static intmax_t
sblock(struct uufsd *disk)
{
	switch (disk->d_ufs) {
	case 1:
		return (SBLOCK_UFS1/disk->d_fs.fs_fsize);
	case 2:
		return (SBLOCK_UFS2/disk->d_fs.fs_fsize);
	}
	errx(1, "invalid UFS version %d", disk->d_ufs);
}

int
copy_ufs(const char *src_path, const char *dst_path)
{
	int i;
	struct rusage rusage;
	double sec;

	fprintf(stderr, "copying %s to %s\n", src_path, dst_path);
        gettimeofday(&st, (struct timezone *)NULL);

	if (ufs_disk_fillout(&src, src_path) == -1) {
		warnx("%s: %s", src_path, src.d_error);
		goto err;
	}

#if __FreeBSD_version >= 500110
	fcntl(src.d_fd, F_SETFL, O_DIRECT | fcntl(src.d_fd, F_GETFL));
#endif

	if (bfactor > 0)
		bufsize = bfactor * afs.fs_fsize;
	else if (asyncq > 0)
		bufsize = MAXPHYS;
	else
		bufsize = DEFAULT_BUFSIZE;

	if (asyncq > 0)
		async_init();
#ifdef USEMMAP
	else if (!usemmap) {
#else
	else {
#endif
		buf = malloc(bufsize);
		if (buf == NULL) {
			warnx("malloc failed");
			goto err;
		}
	}
	bcopy(&src, &dst, sizeof(dst));
	dst.d_name = dst_path;
	dst.d_fd = open(dst_path, O_CREAT | O_WRONLY,  S_IRUSR |  S_IWUSR );
	if (dst.d_fd < 0) {
		warn("%s", dst_path);
		goto err;
	}
	if (comp)
		cloop = cloop_create(dst.d_fd,
			(off_t)fs_size(&src) * afs.fs_fsize,
#if 0
			afs.fs_bsize);
#else
			64*1024);
#endif
	else
		ftruncate(dst.d_fd, (off_t)fs_size(&src) * afs.fs_fsize);

	total = 0;
	fprintf(stderr,
		"ufs_version: %d, cg_size: %.3lf MB, fs_size: %.3lf GB\n",
		src.d_ufs,
		(double)src.d_fs.fs_fpg * afs.fs_fsize / 1024 / 1024,
		(double)fs_size(&src) * afs.fs_fsize/1024/1024/1024);
#if 1
	for (i = 0; i < afs.fs_ncg; i++)
		if (copy_cg(i))
			goto err2;
#else
	/* debug */
	if (copy_cg(19))
		goto err2;
#endif
	fprintf(stderr, 
		"%jd/%jd blocks (%.3lf/%.3lf Mbytes) (%jd%%) written\n",
		total, fs_size(&src),
		(double)total * afs.fs_fsize / 1024.0 / 1024.0,
		(double)fs_size(&src) * afs.fs_fsize / 1024.0 / 1024.0,
		total * 100 / fs_size(&src)
	);

	fprintf(stderr, "Syncing...\n");
	if (asyncq > 0)
		async_wait();
	if (comp)
		cloop_flush(cloop);
	fsync(dst.d_fd);
	ufs_disk_close(&src);
	ufs_disk_close(&dst);

	getrusage(RUSAGE_SELF, &rusage);
	sec = getsec();
	fprintf(stderr,
		"sys %lds, user %lds, total %.3lfs, %.3lf MB/s\n",
		rusage.ru_stime.tv_sec, rusage.ru_utime.tv_sec,
		sec, (double)total * afs.fs_fsize / 1024.0 / 1024.0 / sec);
	return (0);

/* XXX error handling */
err2:
	ufs_disk_close(&dst);
err:
	ufs_disk_close(&src);
	return (1);
}

int
copy_cg(int c)
{
	int ss;

	fprintf(stderr, "cg %4d/%-4d:", c, afs.fs_ncg);
	fflush(stderr);
	ss = bread(&src, fsbtodb(&afs, cgtod(&afs, c)), &acg, afs.fs_bsize);
	if (ss != afs.fs_bsize)
		err(1, "read cg failed blkno=%ju size=%u",
		    cgtod(&afs, c), afs.fs_bsize);
	copy_used_blocks(c, cg_blksfree(&acg), acg.cg_ndblk);
#if 1
	fsync(dst.d_fd);
#endif
	return (0);
}

void
copy_used_blocks(int c, void *vp, int max)
{
	int col, i, j, size;
	int32_t sum=0;
	char *p;
	double sec;

	sec = getsec();
	for (col = i = 0, p = vp; i < max; i++)
		if (isclr(p, i)) { /* used */
			j = i;
			while ((i+1)<max && isclr(p, i+1))
				i++;
			size = i - j + 1;
#if 0
			fprintf(stderr, "%d-%d (%d)\n", j, i, size);
#endif
			if (verbose && col ++ % 20 == 0)
				fprintf(stderr, "\n");
			copy_blocks(cgbase(&afs, c) + j, size);

			sum += size;

		}
	total += sum;
	if (verbose)
		fprintf(stderr, "\n");
	sec = getsec() - sec;
	fprintf(stderr,
		"%6d/%6d blocks (%3d%%) %.3lf sec  %.3lf MB/s\n",
		sum, max, sum*100/max, sec, sum/sec * afs.fs_fsize/1024/1024);
}

void copy_blocks(int32_t blkno, int32_t count)
{
	int c, skip;
	ssize_t ss;

	/* protect boot blocks in front of the first superblock */
	if (skipbb && blkno < sblock(&src)) {
		skip = MIN(count, sblock(&src) - blkno);
		if (verbose)
			fprintf(stderr, " skiped(%d)", skip);
		blkno += skip;
		count -= skip;
		if (count <= 0)
			return;
	}

	if (verbose)
#if 1
		fprintf(stderr, " %d", count);
#else
		fprintf(stderr, " %d(%d)", blkno, count);
#endif
	fflush(stderr);
	while (count > 0) {
		off_t offset;
		size_t size;
#if 0
		char *p;
		size_t wcount;
#endif

		c = MIN(count, bufsize/afs.fs_fsize);
		size = c * afs.fs_fsize;
		offset = (off_t)blkno * afs.fs_fsize;
#ifdef USEMMAP
		switch(usemmap) {
		case 1:
			buf = mmap(NULL, size, PROT_WRITE, MAP_SHARED | MAP_NOSYNC,
				dst.d_fd, offset);
			if (buf == MAP_FAILED)
				err(1, "mmap failed");
			madvise(buf, size, MADV_SEQUENTIAL);
			ss = bread(&src, fsbtodb(&afs, blkno), buf, size);
			if (ss != size)
				err(1, "read block failed blkno=%zu size=%zu",
				    blkno, size);
			msync(buf, size, MS_ASYNC);
			munmap(buf, size);
			break;
		case 2:
			buf = mmap(NULL, size, PROT_READ, MAP_SHARED | MAP_NOSYNC,
				src.d_fd, offset);
			if (buf == MAP_FAILED)
				err(1, "mmap failed");
			madvise(buf, size, MADV_SEQUENTIAL);
#if 1
			ss = bwrite(&dst, fsbtodb(&afs, blkno), buf, size);
			if (ss != size)
				err(1, "write failed");
#else
			lseek(dst.d_fd, offset, SEEK_SET);
			p = buf;
			while (size > 0) {
				wcount = write(dst.d_fd, p, size);
				if (wcount <= 0)
					break;
				size -= wcount;
				p += wcount;
			}
			if (size > 0)
				err(1, "write failed");
#endif
			msync(buf, size, MS_ASYNC);
			munmap(buf, size);
			break;
		default:
#endif
#if 0
			fprintf(stderr, "blk: %d count: %d\n", blkno, size);
#endif
			if (asyncq > 0)
				async_copy_blocks(blkno, size);
			else {
				ufs2_daddr_t db;
				db = fsbtodb(&afs, blkno);
				ss = bread(&src, db, buf, size);
				if (ss != size) {
					warn("read block failed "
					    "blkno=%u size=%zu",
					    blkno, size);
					fprintf(stderr, "retrying...");
					if (retry_read(&src, db, buf, size) > 0
					    && noerror == 0)
						exit(1);
				}
				if (comp) {
					cloop_write(cloop, offset, buf, size);
				} else {
					ss = bwrite(&dst, db, buf, size);
					if (ss != size)
						err(1, "write failed");
				}
			}
#ifdef USEMMAP
		}
#endif
		blkno += c;
		count -= c;
	}
}

void
usage(void)
{
	(void)fprintf(stderr, "usage: ufs_copy"
		" [-a]"
		" [-B]"
		" [-c]"
		" [-e]"
		" [-b block_factor]"
#ifdef USEMMAP
		" [-M]"
		" [-m]"
#endif
		" [-s mount_point]"
		" [-v]"
		" src dst\n"
	);
	exit(1);
}

/* async IO */

#include <sys/event.h>
#include <aio.h>
int kq;
int last_free, outstanding;

struct async_control {
	struct aiocb cb;
	int status;
#define AC_FREE		0
#define AC_READ		1
#define AC_WRITE	2
};

struct async_control *ac_list;
struct kevent *kev_list;
struct timespec zero_timeout;

void async_event(struct timespec *timeout);

void
async_init()
{
	int i;
	struct async_control *ac;
	struct aiocb *cb;

	ac_list = (struct async_control *)
			malloc(sizeof(struct async_control) * asyncq);
	kev_list = (struct kevent *)
			malloc(sizeof(struct kevent) * asyncq);
	if ((kq = kqueue()) == -1)
		err(1, "kqueue failed");
	for (i = 0; i < asyncq; i++) {
		ac = &ac_list[i];
		cb = &ac->cb;
		if ((cb->aio_buf = malloc(bufsize)) == NULL)
			err(1, "malloc failed");
		cb->aio_sigevent.sigev_notify = SIGEV_KEVENT;
		cb->aio_sigevent.sigev_notify_kqueue = kq;
#if __FreeBSD_version >= 700005
		cb->aio_sigevent.sigev_value.sival_ptr = ac;
#else
		cb->aio_sigevent.sigev_value.sigval_ptr = ac;
#endif
		ac->status = AC_FREE;
	}
	last_free = 0;
	outstanding = 0;
	zero_timeout.tv_sec = 0;
	zero_timeout.tv_nsec = 0;
}

void
async_copy_blocks(int32_t blkno, int32_t count)
{
	off_t offset;
	struct async_control *ac;
	struct aiocb *cb;

	while(ac_list[last_free].status != AC_FREE)
		async_event(NULL);

	ac = &ac_list[last_free];
	last_free = (last_free + 1) % asyncq;

	offset = (off_t)blkno * afs.fs_fsize;
	cb = &ac->cb;
	cb->aio_fildes = src.d_fd;
	cb->aio_offset = offset;
	cb->aio_nbytes = count;
	cb->aio_lio_opcode = LIO_READ;
	ac->status = AC_READ;

#if 0
	fprintf(stderr, "start (%d)\n", outstanding);
#endif
	if (aio_read(cb) == -1)
		err(1, "aio_read failed blkno=%u size=%u", blkno, count);
		
	outstanding ++;
#if 0
	fprintf(stderr, "outstanding: %d\n", outstanding);
#endif

	async_event(&zero_timeout);
}

void
async_event(struct timespec *timeout)
{
	int n, i;
	struct async_control *ac;
	struct aiocb *cb;
	ssize_t count;

	n = kevent(kq, NULL, 0, kev_list, asyncq, timeout);
	if (n == -1)
		err(1, "kevent failed");
	for (i = 0; i < n; i++) {
		ac = (struct async_control *)kev_list[i].udata;
		cb = (struct aiocb *) kev_list[i].ident;
#if 0
		fprintf(stderr, "0x%x 0x%x\n",
			kev_list[i].flags, kev_list[i].data);
#endif
		if (ac->status == AC_READ) {
#if 0
			fprintf(stderr, "%d/%d read done(%d) 0x%zx\n",
					i, n, outstanding, cb->aio_offset);
#endif
			count = aio_return(cb);
			if (count == -1 || count != cb->aio_nbytes)
				err(1, "aio_return failed");
			cb->aio_fildes = dst.d_fd;
			cb->aio_lio_opcode = LIO_WRITE;
#if 1
			ac->status = AC_WRITE;
			if (aio_write(cb) == -1)
				err(1, "aio_write failed");
#else
			ac->status = AC_FREE;
			outstanding --;
#endif
		} else {
			count = aio_return(cb);
			if (count == -1 || count != cb->aio_nbytes)
				err(1, "aio_return failed");
#if 0
			fprintf(stderr, "%d/%d write done(%d) 0x%zx\n",
					i, n, outstanding, cb->aio_offset);
#endif
			ac->status = AC_FREE;
			outstanding --;
		}
	}
}

void
async_wait()
{
	while(outstanding > 0)
		async_event(NULL);
}


#define RETRY_BLOCKSIZE 512
#define RETRY	10
int
retry_read_block(int fd, char *buf, size_t size, off_t offset, u_int retry)
{
	int i, count;

	for (i = 0; i < RETRY; i++) {
		fprintf(stderr, " %d ", i + 1);
		count = pread(fd, (void *)buf, size, offset);
		if (count == size) {
			fprintf(stderr, "ok;");
			return (0);
		}
		fprintf(stderr, "ng,");
	}
	fprintf(stderr, "\nretry count %d exhausted for offset=%ju size=%zu\n",
	    retry, offset, size);
	bzero((void *)buf, size);
	return (1);
}

int
retry_read(struct uufsd *src, ufs2_daddr_t db, void *buf, size_t size)
{
	int i, errors = 0;
	u_int blocks, offset, retry = RETRY;
	off_t start;

	blocks = size / RETRY_BLOCKSIZE;
	start = (off_t)db * src->d_bsize;
	for (i = 0; i < blocks; i ++) {
		offset = RETRY_BLOCKSIZE * i;
		errors += retry_read_block(src->d_fd, (char *)buf + offset,
		    RETRY_BLOCKSIZE, start + offset, retry);
	}
	fprintf(stderr, "\n");
	return (errors);
}


syntax highlighted by Code2HTML, v. 0.9.1