// Use complex type?

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>


// Set default parameters.
const int	nStartDefault	= 16,
			nEndDefault		= 65536,
			dStartDefault	= 1,
			dEndDefault		= 1;

// See CompareVectors for description of these constants.
static const float	RelativeThreshold	= 1e-4f,
					RelativeError		= 1e-2f,
					AbsoluteError		= 1e-6f;
					
// Longest vector for which the slow definition-based routine will be used.
static const int DefinitionThreshold = 128;

static const double TwoPi = 2 * 3.1415926535897932384626433;


static void usage(void) {
	fprintf(stderr, "\n\
Parameters are:\n\
\n\
	seed=<initial seed for pseudo-random number generation>\n\
		Default is current time.\n\
\n\
	n=<first vector length to test>[-<last vector length to test>]\n\
		Default is n=%d-%d.\n\
\n\
	d=<first direction value to test>[-<last direction value to test>\n\
		Default is d=%d-%d.\n",
		nStartDefault, nEndDefault, dStartDefault, dEndDefault);
}


static void parameter(const char *argument) {
	fprintf(stderr,
"Error, parameter \"%s\" is not in the form \
<key>=<value> or <key>=<start>-<end>.\n", argument);
	usage();
}


static void ParseArguments(int argc, const char *argv[],
		unsigned long *seed, int *nStart, int *nEnd, int *dStart, int *dEnd) {
	// Set default parameters.
	*seed = time(NULL);
	*dStart = dStartDefault;
	*dEnd = dEndDefault;
	*nStart = nStartDefault;
	*nEnd = nEndDefault;
	
	// Process each argument after program name.
	while (1 < argc) {
		char *p, *q;
		int l, secondPresent = 0;
		long first, second = 0;
		
		if (strcmp(argv[1], "-h") == 0) {
			usage();
			exit(0);
		}
		
		// Scan for '='.
		p = strchr(argv[1], '=');
		
		// If there is no '=' or nothing follows it, it is an error.
		if (p == NULL || p[1] == '\0') {
			parameter(argv[1]);
			exit(1);
		}
		
		// Note the length of the keyword before the '='.
		l = p - argv[1];
		
		// Get the number after the '='.
		first = strtol(++p, &q, 0);
		
		// If q equals p, strtol did not find a number.
		if (q == p) {
			parameter(argv[1]);
			exit(1);
		}
		
		/*	The character where parsing of the number finished should be
			'\0' (end of string) or '-' (to indicate a second number follows).
		*/
		if (*q == '\0')
			second = first;	// Second number defaults to same as first.
		else if (*q == '-') {
			// There is a second number.  Get it.
			
			// Flag presence of second number.
			secondPresent = 1;
			
			// Start the parse after the '-'.
			p = q + 1;
			
			// Parse the second number.
			second = strtol(p, &q, 0);
			
			/*	Report an error if no number was found or there are
				extraneous characters beyond it.
			*/
			if (q == p || *q != '\0') {
				parameter(argv[1]);
				exit(1);
			}
		}
		else {
			parameter(argv[1]);
			exit(1);
		}
		
		/*	Next, we examine the keyword.  Each keyword is processed by its
			own clause.
		*/
		
		// Process "seed".
		if (strncmp(argv[1], "seed", l) == 0) {
			*seed = first;
			
			if (secondPresent)
				fprintf(stderr, "Warning, an end value was given for seed, \
but only the start value is used.\n");
		}
		
		// Process "n".
		else if (strncmp(argv[1], "n", l) == 0) {
			*nStart = first;
			*nEnd = second;
		}
		
		// Process "d".
		else if (strncmp(argv[1], "d", l) == 0) {
			*dStart = first;
			*dEnd = second;
		}
		
		// If we reach here, the keyword is unknown.
		else {
			fprintf(stderr, "Error, parameter \"%s\" is not recognized.\n",
				argv[1]);
			usage();
			exit(1);
		}
		
		--argc;
		++argv;
	}
}


static float frand(void) {
	return rand() * (1. / (RAND_MAX + 1.));
}


static void GenerateData(float *re, float *im, int n) {
	int i;
	
	for (i = 0; i < n; ++i) {
		re[i] = frand();
		im[i] = frand();
	}
}


static void CopyData(float *destination, float *source, int n) {
	memcpy(destination, source, n * sizeof *destination);
}


static int DFT(float *re, float *im, int n, int d) {
	double scale, sign;			// Scale and sign to choose DFT variant.
	double *reInput, *imInput;	// Space to copy input.
	
	int j, k;
	
	/*	Set the scale and sign to perform a forward or reverse DFT with or
		without scaling.
	*/
	switch (d) {
		case -1:
			sign = -1;		// Reverse transform.
			scale = 1./n;	// Scaling by 1/n.
			break;
		case +1:
			sign = +1;		// Forward transform.
			scale = 1.;		// No scaling.
			break;
		default:
			fprintf(stderr, "Error, direction %d not supported.\n", d);
			return 1;
	}
	
	/*	Get memory to hold the input while we calculate output.  Also, we
		will use double-precision arithmetic to provide a little better
		check on the data.
	*/
	reInput = (double *) malloc(n * sizeof *reInput);
	imInput = (double *) malloc(n * sizeof *imInput);
	
	if (reInput == NULL || imInput == NULL) {
		fprintf(stderr,
			"Error, failed to allocate memory for %d-element FFT.\n", n);
		free(imInput);
		free(reInput);
		return 1;
	}
	
	// Copy the data.
	for (k = 0; k < n; ++k) {
		reInput[k] = re[k];
		imInput[k] = im[k];
	}
	
	// Do the DFT.
	for (k = 0; k < n; ++k) {
		double sumR = 0., sumI = 0.;
		
		for (j = 0; j < n; ++j) {
			// Prepare components of e^(sign * 2*pi*i * j * k / n).
			double eRe = cos(sign * TwoPi * j * k / n);
			double eIm = sin(sign * TwoPi * j * k / n);
			
			// Add e^(sign*2*pi*i*j*k*n) * h[j] to sum for H[k].
			sumR += eRe * reInput[j] - eIm * imInput[j];
			sumI += eRe * imInput[j] + eIm * reInput[j];
		}
		
		// Scale output as required.
		re[k] = scale * sumR;
		im[k] = scale * sumI;
	}
	
	free(reInput);
	free(imInput);
	
	return 0;
}


static unsigned rw(unsigned int k)
{
	// This is a table of bit-reversals of bytes.
	static const unsigned char b[256] = {
  0, 128,  64, 192,  32, 160,  96, 224,  16, 144,  80, 208,  48, 176, 112, 240, 
  8, 136,  72, 200,  40, 168, 104, 232,  24, 152,  88, 216,  56, 184, 120, 248, 
  4, 132,  68, 196,  36, 164, 100, 228,  20, 148,  84, 212,  52, 180, 116, 244, 
 12, 140,  76, 204,  44, 172, 108, 236,  28, 156,  92, 220,  60, 188, 124, 252, 
  2, 130,  66, 194,  34, 162,  98, 226,  18, 146,  82, 210,  50, 178, 114, 242, 
 10, 138,  74, 202,  42, 170, 106, 234,  26, 154,  90, 218,  58, 186, 122, 250, 
  6, 134,  70, 198,  38, 166, 102, 230,  22, 150,  86, 214,  54, 182, 118, 246, 
 14, 142,  78, 206,  46, 174, 110, 238,  30, 158,  94, 222,  62, 190, 126, 254, 
  1, 129,  65, 193,  33, 161,  97, 225,  17, 145,  81, 209,  49, 177, 113, 241, 
  9, 137,  73, 201,  41, 169, 105, 233,  25, 153,  89, 217,  57, 185, 121, 249, 
  5, 133,  69, 197,  37, 165, 101, 229,  21, 149,  85, 213,  53, 181, 117, 245, 
 13, 141,  77, 205,  45, 173, 109, 237,  29, 157,  93, 221,  61, 189, 125, 253, 
  3, 131,  67, 195,  35, 163,  99, 227,  19, 147,  83, 211,  51, 179, 115, 243, 
 11, 139,  75, 203,  43, 171, 107, 235,  27, 155,  91, 219,  59, 187, 123, 251, 
  7, 135,  71, 199,  39, 167, 103, 231,  23, 151,  87, 215,  55, 183, 119, 247, 
 15, 143,  79, 207,  47, 175, 111, 239,  31, 159,  95, 223,  63, 191, 127, 255
	};
	unsigned char
		b0 = b[k >> 0*8 & 0xff], 
		b1 = b[k >> 1*8 & 0xff], 
		b2 = b[k >> 2*8 & 0xff], 
		b3 = b[k >> 3*8 & 0xff];
	return b0 << 3*8 | b1 << 2*8 | b2 << 1*8 | b3 << 0*8;
}


static float r(unsigned int k)
{
 	return 1./4294967296. * rw(k);
}


static int ilog2(unsigned int n)
{
 	int c;
 	for (c = 0; n >>= 1; ++c)
 		; 
   return c;
}


void BitReversalPermute(float *re, float *im, int n) {
	float tmp;
	unsigned int i, j, shift;
	
	// Figure out how much to shift to get high log2(n) bits to the right.
	shift = 32 - ilog2(n);
	
	for (i = 0; (int) i < n; ++i) {
		// Take i, reverse the whole word, and get just the high bits.
		j = rw(i) >> shift;
		
		// We don't want to swap each pair twice, so swap only when i < j.
		if (i < j) {
			// Swap elements i and j.
			tmp		= re[i];
			re[i]	= re[j];
			re[j]	= tmp;
			
			tmp		= im[i];
			im[i]	= im[j];
			im[j]	= tmp;
		}
	}
}


static int FFT_reference(float *re, float *im, int n, int d) {
	float *t;
	int k0, k0End, k2, k2End;
	
	// Check that n is a power of two.
	if ((n & n-1) != 0)
		return 1;
	
	/*	Set the pointers to perform a forward or reverse DFT, and perform
		scaling if needed.
	*/
	switch (d) {
		case -1: {	// Reverse transform, swap pointers and scale.
			int i;
			float scale = 1./n;
			
			// Swap pointers to perform reverse transform.
			t = im;
			im = re;
			re = t;
			
			// Scale the vector.
			for (i = 0; i < n; ++i) {
				re[i] *= scale;
				im[i] *= scale;
			}
			break;
		 }
		case +1:	// Forward transform, no changes needed.
			break;
		default:	// Unknown direction value.
			return 1;
	}
	
	for (k0End = 1, k2End = n/2; 1 <= k2End; k0End <<= 1, k2End >>= 1)
		for (k0 = 0; k0 < k0End; ++k0) {
			const float x = TwoPi * r(2*k0);
			const float t1r = cos(x);
			const float t1i = sin(x);
			
			for (k2 = 0; k2 < k2End; ++k2) {

				// Get input elements.
				float a0r = re[(k0*2 + 0) * k2End + k2];
				float a0i = im[(k0*2 + 0) * k2End + k2];
				float a1r = re[(k0*2 + 1) * k2End + k2];
				float a1i = im[(k0*2 + 1) * k2End + k2];
				
				// Scale by twiddle factor.
				float b0r = a0r;
				float b0i = a0i;
				float b1r = t1r * a1r - t1i * a1i;
				float b1i = t1r * a1i + t1i * a1r;
				
				// Do butterfly.
				float c0r = b0r + b1r;
				float c0i = b0i + b1i;
				float c1r = b0r - b1r;
				float c1i = b0i - b1i;
				
				// Store output.
				re[(k0*2 + 0) * k2End + k2] = c0r;
				im[(k0*2 + 0) * k2End + k2] = c0i;
				re[(k0*2 + 1) * k2End + k2] = c1r;
				im[(k0*2 + 1) * k2End + k2] = c1i;
			}

		}
	
	BitReversalPermute(re, im, n);
	
	return 0;
}


static int CompareFloats(float expected, float observed,
		float RelativeThreshold, float RelativeError, float AbsoluteError,
		const char *description) {
	
	// Which test should we use, relative or absolute?
	if (RelativeThreshold < fabs(expected)) {
		// Check the relative error.
		const float error = fabs(1 - observed/expected);
		if (RelativeError < error) {
			if (description != NULL)
				printf("\t\tRelative error in %s is %g.\n", description, error);
			return 1;
		}
	}
	else {
		// Check the absolute error.
		const float error = fabs(observed - expected);
		if (AbsoluteError < error) {
			if (description != NULL)
				printf("\t\tAbsolute error in %s is %g.\n", description, error);
			return 1;
		}
	}
	
	// Return 0 when no error is found.
	return 0;
}


static int CompareVectors(	float *reExpected, float *imExpected,
							float *reObserved, float *imObserved, int n) {
	int i, status = 0;	// Status so far.
	
	for (i = 0; i < n; ++i) {
		if (	CompareFloats(reExpected[i], reObserved[i], RelativeThreshold,
					RelativeError, AbsoluteError, NULL)
			||
				CompareFloats(imExpected[i], imObserved[i], RelativeThreshold,
					RelativeError, AbsoluteError, NULL)) {
			status = 1;
			printf("\
	Error found in element output[%d]:\n\
		Expected value is %g + %g i.\n\
		Observed value is %g + %g i.\n",
				i, reExpected[i], imExpected[i], reObserved[i], imObserved[i]);
			CompareFloats(reExpected[i], reObserved[i], RelativeThreshold,
				RelativeError, AbsoluteError, "real part");
			CompareFloats(imExpected[i], imObserved[i], RelativeThreshold,
				RelativeError, AbsoluteError, "imaginary part");
		}
	}
	
	return status;
}


int main(int argc, const char *argv[]) {
	
	extern int FFT(float *re, float *im, int N, int d);

	int result, status = 0;	// Status so far.

	unsigned long seed;		// Pseudo-random number seed.
	int	d, dStart, dEnd,	// Directions to test.
		n, nStart, nEnd;	// Vector lengths to test.
		
	// Places to hold input, expected results, and observed results;
	float *reInput, *imInput;
	float *reExpected, *imExpected;
	float *reObserved, *imObserved;
	
	ParseArguments(argc, argv, &seed, &nStart, &nEnd, &dStart, &dEnd);
	
	printf("Using %lu as seed for pseudo-random number generator.\n", seed);
	srand(seed);
	
	// Get space to hold input, expected results, and observed results.
	reInput = (float *) malloc(nEnd * sizeof *reInput);
	imInput = (float *) malloc(nEnd * sizeof *imInput);
	reExpected = (float *) malloc(nEnd * sizeof *reExpected);
	imExpected = (float *) malloc(nEnd * sizeof *imExpected);
	reObserved = (float *) malloc(nEnd * sizeof *reObserved);
	imObserved = (float *) malloc(nEnd * sizeof *imObserved);
	if (	reInput == NULL || imInput == NULL ||
			reExpected == NULL || imExpected == NULL ||
			reObserved == NULL || imObserved == NULL) {
		free(imObserved);
		free(reObserved);
		free(imExpected);
		free(reExpected);
		free(imInput);
		free(reInput);
		
		fprintf(stderr,
			"Error, failed to allocated memory for %d-element FFT.\n", nEnd);
	}
	
	// Test each requested direction.
	for (d = dStart; d <= dEnd; d += 2)
	// Test each requested length.
	for (n = nStart; n <= nEnd; n += n) {
		GenerateData(reInput, imInput, n);
		CopyData(reExpected, reInput, n);
		CopyData(imExpected, imInput, n);
		CopyData(reObserved, reInput, n);
		CopyData(imObserved, imInput, n);
		
		// Perform transform by subject routine.
		result = FFT(reObserved, imObserved, ilog2(n), d);
		if (result != 0) {
			fprintf(stderr,
"Error, FFT(0x%x, 0x%x, %d, %d) unexpectedly returned error.\n",
				(unsigned int) reObserved, (unsigned int) imObserved, n, d);
			exit(1);
		}
		
		// Perform transform by reference routine.
		result = FFT_reference(reExpected, imExpected, n, d);
		if (result != 0) {
			fprintf(stderr,
"Error, FFT_reference(0x%x, 0x%x, %d, %d) unexpectedly returned error.\n",
				(unsigned int) reExpected, (unsigned int) imExpected, n, d);
			exit(1);
		}
		
		// Compare the expected results to the observed results.
		printf("Comparing FFT(n=%d, d=%d) to reference implementation.\n",
			n, d);
		if (0 != CompareVectors(reExpected, imExpected,
					reObserved, imObserved, n))
			status = 1;
		
		/*	If vector is short enough, compare using the slow definition-based
			routine.
		*/
		if (n <= DefinitionThreshold) {

			CopyData(reExpected, reInput, n);
			CopyData(imExpected, imInput, n);

			// Perform transform by reference routine.
			result = DFT(reExpected, imExpected, n, d);
			if (result != 0) {
				fprintf(stderr,
"Error, DFT(0x%x, 0x%x, %d, %d) unexpectedly returned error.\n",
					(unsigned int) reExpected, (unsigned int) imExpected, n, d);
				exit(1);
			}

			// Compare the expected results to the observed results.
			printf("Comparing FFT(n=%d, d=%d) to definition implementation.\n",
				n, d);
			if (0 != CompareVectors(reExpected, imExpected,
						reObserved, imObserved, n))
				status = 1;
		}
	}

	free(imObserved);
	free(reObserved);
	free(imExpected);
	free(reExpected);
	free(imInput);
	free(reInput);
	
	printf("Finished with %serrors.\n", status ? "" : "no ");
	
	return status;
}
