[go back] -------------- #include "dft.h" #include <stdlib.h> #include <stdio.h> #include <math.h> /* X = real input, Y = imag. input A = real output, B = imag. output */ static double **GlobalA, **GlobalB, *X, *Y; static double Q; static int Size = 512; /* default */ /* performance counters */ unsigned int stat_add, stat_mult, stat_trig; int main(int argc, char **argv) { int c, i; extern char *optarg; extern int optind; int seed; char *names[] = {"DFTSimple", "DFTFaster", "DFTTable", "DFTTukey2", "DFTTukey2MultTable", "DFTTukey4", "DFTTukey4Mult"}; int sizes[] = {256, 1024, 4096, 16384}; int num_sizes = 4; while ((c = getopt(argc, argv, "r:s:")) != EOF) { switch (c) { case 's': sscanf(optarg, "%d", &Size); printf("Using size %d\n", Size); Q = M_PI * 2.0 /(double)Size; break; case 'r': sscanf(optarg, "%d", &seed); printf("Using seed %d\n", seed); srandom(seed); default: fprintf(stderr, "Invalid arguments.\n"); exit(1); } } for (i=0; i<num_sizes; i++) { Size = sizes[i]; Q = M_PI * 2.0 / (double)Size; InitDFT(7); DFTSimple(GlobalA[0], GlobalB[0]); DFTFaster(GlobalA[1], GlobalB[1]); DFTTable(GlobalA[2], GlobalB[2]); DFTTukey2(GlobalA[3], GlobalB[3]); DFTTukey2MultTable(GlobalA[4], GlobalB[4]); DFTTukey4(GlobalA[5], GlobalB[5]); DFTTukey4Mult(GlobalA[6], GlobalB[6]); FreeDFT(7); } //CheckDFT(6, names); exit(0); } void FreeDFT(int num) { int i; free(X); free(Y); for (i=0; i<num; i++) { free(GlobalA[i]); free(GlobalB[i]); } free(GlobalA); free(GlobalB); } void ClearCounters(void) { stat_add = stat_mult = stat_trig = 0; } void PrintCounters(char *type) { printf("%s, size %d\n", type, Size); printf("\tAdditions: %u\n", stat_add); printf("\tMultiplications: %u\n", stat_mult); printf("\tTrig. Functions: %u\n\n", stat_trig); } void InitDFT(int num) { int i,j; printf("Initializing %d output arrays of size %d\n", num, Size); X = (double *)calloc(Size, sizeof(double)); Y = (double *)calloc(Size, sizeof(double)); GlobalA = (double **)calloc(num, sizeof(double*)); GlobalB = (double **)calloc(num, sizeof(double*)); for (i=0; i<num; i++) { GlobalA[i] = (double*)calloc(Size, sizeof(double)); GlobalB[i] = (double*)calloc(Size, sizeof(double)); } for (i=0; i<Size; i++) { X[i] = random() & 0xfff; Y[i] = random() & 0xfff; for (j=0; j<num; j++) { GlobalA[j][i] = X[i]; GlobalB[j][i] = Y[i]; } } } void CheckDFT(int num, char **names) { int i,j; printf("%d\n", Size); for (i=1; i<num; i++) { for (j=100; j<Size; j++) { if (floor(GlobalA[i][j]) != floor(GlobalA[i-1][j])) { fprintf(stderr, "Inconsistency in %s's A[%d]: %f != %f\n", names[i], j, GlobalA[i][j], GlobalA[i-1][j]); exit(1); } printf("%s, A[%d]: %f == %f\n", names[i], j, GlobalA[i][j], GlobalA[i-1][j]); if (floor(GlobalB[i][j]) != floor(GlobalB[i-1][j])) { fprintf(stderr, "Inconsistency in %s's B[%d]: %f != %f\n", names[i], GlobalB[i][j], GlobalB[i-1][j]); exit(1); } } } printf("Everything checks out.\nExiting cleanly...\n"); } /* Simplistic N**2 DFT */ void DFTSimple(double *A, double *B) { int i,j; ClearCounters(); for (i=0; i<Size; i++) { A[i] = 0; B[i] = 0; for (j=0; j<Size; j++) { stat_add += 4; stat_mult += 4; stat_trig += 4; A[i] = A[i] + X[j]*cos((double)Q*i*j) + Y[j]*sin((double)Q*i*j); B[i] = B[i] + Y[j]*cos((double)Q*i*j) - X[j]*sin((double)Q*i*j); } } PrintCounters("DFTSimple"); } /* Simplistic DFT with fewer trig/int arith. */ void DFTFaster(double *A, double *B) { int i,j; double W, AT, BT, D, C, S; ClearCounters(); for (i=0; i<Size; i++) { int W = Q * i; AT = X[0]; BT = Y[0]; for (j=1; j<Size; j++) { stat_mult += 5; stat_add += 2; stat_trig += 2; D = W * j; C = cos(D); S = -sin(D); AT += C*X[j] - S*Y[j]; BT += C*Y[j] + S*X[j]; } A[i] = AT; B[i] = BT; } PrintCounters("DFTFaster"); } void DFTTable(double *A, double *B) { int i,j,k; double W, AT, BT; /* initialize lookup table */ double *C = (double*)calloc(Size, sizeof(double)); double *S = (double*)calloc(Size, sizeof(double)); ClearCounters(); for (i=0; i<Size; i++) { C[i] = cos(Q*i); S[i] = -sin(Q*i); stat_trig += 2; } for (i=0; i<Size; i++) { AT = X[0]; BT = Y[0]; k=i; for (j=1; j<Size; j++) { stat_mult += 4; stat_add += 5; AT += C[k]*X[j] - S[k]*Y[j]; BT += C[k]*Y[j] + S[k]*X[j]; k += i; if (k >= Size) { k -= Size; stat_add++; } } A[i] = AT; B[i] = BT; } PrintCounters("DFTTable"); free(C); free(S); } void DFTTukey2(double *A, double *B) { int N2, k, j, i, N1, L, M; double C, S, Z, XT, YT, E; ClearCounters(); M = (int)log2(Size); N2 = Size; for (k=0; k<M; k++) { N1 = N2; N2 = N2/2; // static div by 2! E = M_PI * 2 / (double)N1; Z = 0; stat_mult += 2; stat_add++; for (j=0; j<N2; j++) { C = cos(Z); S = sin(Z); stat_trig += 2; stat_mult += 1; Z = (j+1)*E; // array indices for (i = j; i<Size; i+=N1) { stat_add += 7; stat_mult += 4; L = i + N2; // array indices XT = A[i] - A[L]; A[i] = A[i] + A[L]; YT = B[i] - B[L]; B[i] = B[i] + B[L]; A[L] = C*XT + S*YT; B[L] = C*YT - S*XT; } } } j = 0; for (i=0; i<Size-2; i++) { if (i < j) { XT = A[j]; A[j] = A[i]; A[i] = XT; XT = B[j]; B[j] = B[i]; B[i] = XT; } stat_add += 1; k = Size/2; // static div by 2 while (k < j) { stat_add += 2; j = j-k; k = k/2; // static div by 2 } stat_add += 1; j+=k-1; } PrintCounters("DFTTukey2"); } void DFTTukey2Table(double *A, double *B) { int N2, k, j, i, N1, L, IE, IA, M; double C, S, XT, YT, Z; double *WR, *WI; ClearCounters(); WR = (double*)calloc(Size, sizeof(double)); WI = (double*)calloc(Size, sizeof(double)); for (k=0; k<Size; k++) { stat_mult += 1; stat_trig += 2; Z = k*Q; WR[k] = cos(Z); WI[k] = -sin(Z); } M = log2(Size); N2 = Size; for (k=0; k<M; k++) { N1 = N2; N2 = N2/2; // static div by 2 IE = Size/N1; IA = 0; stat_mult += 1; stat_add++; for (j=0; j<N2; j++) { C = WR[IA]; S = WI[IA]; IA += IE; stat_add += 1; for (i = j; i<Size; i+=N1) { stat_add += 7; stat_mult += 4; L = i + N2; XT = A[i] - A[L]; A[i] = A[i] + A[L]; YT = B[i] - B[L]; B[i] = B[i] + B[L]; A[L] = C*XT + S*YT; B[L] = C*YT - S*XT; } } } j = 1; for (i=0; i<Size-2; i++) { if (i < j) { XT = A[j]; A[j] = A[i]; A[i] = XT; XT = B[j]; B[j] = B[i]; B[i] = XT; } stat_add++; k = Size/2; // static div by 2 while (k < j) { stat_add += 2; j = j-k; k = k/2; // static div by 2 } stat_add += 1; j+=k; } free(WR); free(WI); PrintCounters("DFTTukey2Table"); } void DFTTukey2MultTable(double *A, double *B) { int N2, k, j, i, N1, L, IE, IA, M; double C, S, XT, YT, Z, T, TY; double *WR, *WI; ClearCounters(); WR = (double*)calloc(Size, sizeof(double)); WI = (double*)calloc(Size, sizeof(double)); for (k=0; k<Size; k++) { stat_mult += 1; stat_trig += 2; Z = k*Q; WR[k] = cos(Z); WI[k] = -sin(Z); } N2 = Size; M = log2(Size); for (k=0; k<M; k++) { N1 = N2; N2 /= 2; // static div by 2 stat_add++; for (i=0; i<Size; i+=N1) { stat_add += 5; L = i + N2; T = A[i] - A[L]; A[i] += A[L]; A[L] = T; T = B[i] - B[L]; B[i] += B[L]; B[L] = T; } if (k < M-1) { stat_mult += 1; IE = Size/N1; IA = 0; for (j = 1; j<N2; j++) { stat_add++; IA += IE; C = WR[IA]; S = WI[IA]; for (i=j; i<Size; i+=N1) { stat_add += 7; stat_mult += 4; L = i+N2; T = A[i] - A[L]; A[i] += A[L]; TY = B[i] - B[L]; B[i] += B[L]; A[L] = C*T + S*TY; B[L] = C*TY - S*T; } } } } j = 1; for (i=0; i<Size-2; i++) { if (i < j) { XT = A[j]; A[j] = A[i]; A[i] = XT; XT = B[j]; B[j] = B[i]; B[i] = XT; } stat_add += 1; k = Size/2; // static div by 2 while (k < j) { stat_add += 2; j = j-k; k = k/2; // static div by 2 } stat_add += 1; j+=k; } PrintCounters("DFTTukey2MultTable"); free(WR); free(WI); } void DFTTukey4(double *A, double *B) { int N2, M, k, i, j, N1, i1, i2, i3; double lA, lB, lC, CO1, CO2, CO3, SI1, SI2, SI3, R1, R2, R3, R4, S1, S2, S3, S4, E; ClearCounters(); N2 = Size; M = log2(Size); for (k=0; k<M; k++) { N1 = N2; N2 = N2/4; // static div by 4 E = M_PI * 2/N1; lA = 0; stat_mult += 2; stat_add++; for (j=0; j<N2; j++) { lB = lA + lA; lC = lA + lB; CO1 = cos(lA); CO2 = cos(lB); CO3 = cos(lC); SI1 = sin(lA); SI2 = sin(lB); SI3 = sin(lC); lA = j*E; stat_add += 2; stat_mult++; stat_trig += 6; for (i=j; i<Size; i+=N1) { i1 = i + N2; i2 = i1 + N2; i3 = i2 + N2; R1 = A[i] + A[i2]; R3 = A[i] - A[i2]; S1 = B[i] + B[i2]; S3 = B[i] - B[i2]; R2 = A[i1] + A[i3]; R4 = A[i1] - A[i3]; S2 = B[i1] + B[i3]; S4 = B[i1] - B[i3]; A[i] = R1 + R2; R2 = R1 - R2; R1 = R3 - S4; R3 = R3 + S4; B[i] = S1 + S2; S2 = S1 - S2; S1 = S3 + R4; S3 = S3 - R4; A[i1] = CO1*R3 + SI1*S3; B[i1] = CO1*S3 - SI1*R3; A[i2] = CO2*R2 + SI2*S2; B[i2] = CO2*S2 - SI2*R2; A[i3] = CO3*R1 + SI3*S1; B[i3] = CO3*S1 - SI3*R1; stat_add += 25; stat_mult += 12; } } } PrintCounters("DFTTukey4"); } void DFTTukey4Mult(double *A, double *B) { int N2, N1, JT, IE, IA1, IA2, IA3, k, M, i, i1, i2, i3, j; double R1, R2, R3, R4, S1, S2, S3, SI1, SI2, SI3, CS1, CS2, CS3; double CO1, CO2, CO3, T, Z; double *WR, *WI; WR = (double*)calloc(Size, sizeof(double)); WI = (double*)calloc(Size, sizeof(double)); ClearCounters(); for (k=0; k<Size; k++) { stat_mult += 1; stat_trig += 2; Z = k*Q; WR[k] = cos(Z); WI[k] = -sin(Z); } M = log4(Size); N2 = Size; for (k=0; k<M; k++) { N1 = N2; N2 = N2/4; JT = N2/2; // array indices stat_add += 2; for (i=0; i<Size; i+=N1) { i1 = i + N2; i2 = i1 + N2; i3 = i2 + N2; R1 = A[i] + A[i2]; R2 = A[i] - A[i2]; R3 = A[i1] + A[i3]; A[i] = R1 + R3; A[i2] = R1 - R3; R1 = B[i] + B[i2]; R4 = B[i] - B[i2]; R3 = B[i1] + B[i3]; B[i] = R1 + R3; B[i2] = R1 - R3; R1 = A[i1] - A[i3]; R3 = B[i1] - B[i3]; A[i1] = R2 + R3; A[i3] = R2 - R3; B[i1] = R4 - R1; B[i3] = R4 + R1; stat_add += 19; } if (k == M-1) goto ten; IE = Size/N1; stat_mult++; IA1 = 1; for (j = 1; j<N2; j++) { IA1 = IA1 + IE; stat_add++; if (j == JT) goto fifty; IA2 = IA1 + IA1 - 1; IA3 = IA2 + IA1 - 1; CO1 = WR[IA1]; CO2 = WR[IA2]; CO3 = WR[IA3]; SI1 = -WI[IA1] - CO1; SI2 = -WI[IA2] - CO2; SI3 = -WI[IA3] - CO3; CS1 = WI[IA1] - CO1; CS2 = WI[IA2] - CO2; CS3 = WI[IA3] - CO3; stat_add += 10; for (i=j; i<Size; i+=N1) { i1 = i + N2; i2 = i1 + N2; i3 = i2 + N2; R1 = A[i] + A[i2]; R2 = A[i] - X[i2]; T = A[i1] + A[i3]; A[i] = R1 + T; R1 = R1 - T; S1 = B[i] + B[i2]; S2 = B[i] - B[i2]; T = B[i1] + B[i3]; B[i] = S1 + T; S1 = S1 - T; T = (R1 + S1)*CO2; B[i2] = T + R1*SI2; A[i2] = T + S1*CS2; T = B[i1] - B[i3]; R1 = R2 + T; R2 = R2 - T; T = A[i1] - A[i3]; S1 = S2 - T; S2 = S2 + T; T = (R1 + S1)*CO1; B[i1] = T + R1*SI1; A[i1] = T + S1*CS1; T = (R2 + S2)*CO3; B[i3] = T + R2*SI3; A[i3] = T + S2*CS3; stat_add += 28; stat_mult += 9; } goto twenty; fifty: for (i=j; i<Size; i+=N1) { i1 = i + N2; i2 = i1 + N2; i3 = i2 + N2; R1 = A[i] + A[i2]; R2 = A[i] - A[i2]; S1 = B[i] + B[i2]; S2 = B[i] - B[i2]; T = A[i1] + A[i3]; A[i] = T + R1; B[i2] = T - R1; T = B[i1] + B[i3]; B[i] = S1 + T; A[i2] = S1 - T; R1 = X[i1] - X[i3]; S1 = B[i1] - B[i3]; T = R2 + S1; R2 = R2 - S1; S1 = S2 - R1; S2 = S2 + R1; A[i1] = (T + S1)*CS1; B[i1] = (S1 - T)*CS1; A[i3] = (S2 - R2)*CS1; B[i3] = -(S2 + R2)*CS1; stat_add += 23; stat_mult += 4; } } twenty: } ten: free(WI); free(WR); PrintCounters("DFTTukey4Mult"); } int log2(int n) { int i=0; int p = 1; while (p != n) { i++; p = (int)pow(2,i); } return i; } int log4(int n) { int i=0; int p = 1; while (p != n) { i++; p = (int)pow(4,i); } return i; } -------------- [go back]