
/* This code is public domain. It'd be stupid to claim copyright on it. */

/* Version 1.1 - Changed duplicate bank listing (aka bank_list) to start
                 at one.  This avoids the "little" issue of there being
		 duplicates of bank 0 causing said duplicates to attempt
		 to be rematched again.  This only showed up when I was
		 getting over 100% compression on a 1MB file full of \0s.
   Version 1.0 - Initial release
   */

#define _GNU_SOURCE
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>

int main(int argc, char *argv[])
{
    int file, bank_size, file_size, bank_count;
    int i, location;
    char *hay, *search, *result;
    int *bank_list;
    struct stat buf1;

    if (argc != 3) {
	return printf("Usage: %s <file> <bank size>\n", argv[0]);
    } else {
	file = open(argv[1], O_RDONLY);
	bank_size = atoi(argv[2]);
	if (bank_size) {
	    /* Search for bank_size duplicate chunks at bank_size
	       intevals. */
	    fstat(file, &buf1);
	    file_size = buf1.st_size;
	    hay = mmap(0, file_size, PROT_READ, MAP_PRIVATE, file, 0);
	    bank_count =
		file_size / bank_size + (file_size % bank_size ? 1 : 0);
	    /* List of bank mappings. */
	    bank_list = (int *) calloc(sizeof(int), bank_count);

	    for (i = 0; i < bank_count - 1; i++) {
		/* If bank_list[i] is zero, then it's not a duplicate
		   of a previous bank */
		if (bank_list[i] == 0) {
		    search = (char *) ((int) hay + bank_size * (i + 1));
		    while ((result =
			    memmem(search,
				   file_size - ((int) search - (int) hay),
				   (char *) ((int) hay + bank_size * i),
				   bank_size)) != NULL) {
			/* Match loop */
			/* relative location to file start */
			location = (int) result - (int) hay;
			/* Verify that it's alligned to a bank start */
			if (location % bank_size) {
			    /* printf("Unalligned match for bank %d found at %x.\n", i, location); */
			} else {
			    /* Update bank_list */
			    bank_list[location / bank_size] = i + 1;
			    /* And print out info */
			    printf("Duplicate bank %d (%x) of %d (%x).\n",
				   location / bank_size, location, i,
				   i * bank_size);
			}
			/* Start next search one step forward */
			search = (char *) ((int) result + 1);
		    }
		}
	    }
	    /* Now that we have a bank_list, remapping should be a pretty
	       trivial secondary map - small note that any bank_list value
	       with a zero matches itself (ex. bank_list[0] == 0's match is
	       itself) */
	    return 0;
	} else {
	    return printf("You need to supply a valid bank size.\n");
	}
    }
}

