[go back]
--------------
#include "dft.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
/* X = real input, Y = imag. input
A = real output, B = imag. output */
static double **GlobalA, **GlobalB, *X, *Y;
static double Q;
static int Size = 512; /* default */
/* performance counters */
unsigned int stat_add, stat_mult, stat_trig;
int main(int argc, char **argv) {
int c, i;
extern char *optarg;
extern int optind;
int seed;
char *names[] = {"DFTSimple", "DFTFaster", "DFTTable",
"DFTTukey2", "DFTTukey2MultTable", "DFTTukey4",
"DFTTukey4Mult"};
int sizes[] = {256, 1024, 4096, 16384};
int num_sizes = 4;
while ((c = getopt(argc, argv, "r:s:")) != EOF) {
switch (c) {
case 's':
sscanf(optarg, "%d", &Size);
printf("Using size %d\n", Size);
Q = M_PI * 2.0 /(double)Size;
break;
case 'r':
sscanf(optarg, "%d", &seed);
printf("Using seed %d\n", seed);
srandom(seed);
default:
fprintf(stderr, "Invalid arguments.\n");
exit(1);
}
}
for (i=0; i<num_sizes; i++) {
Size = sizes[i];
Q = M_PI * 2.0 / (double)Size;
InitDFT(7);
DFTSimple(GlobalA[0], GlobalB[0]);
DFTFaster(GlobalA[1], GlobalB[1]);
DFTTable(GlobalA[2], GlobalB[2]);
DFTTukey2(GlobalA[3], GlobalB[3]);
DFTTukey2MultTable(GlobalA[4], GlobalB[4]);
DFTTukey4(GlobalA[5], GlobalB[5]);
DFTTukey4Mult(GlobalA[6], GlobalB[6]);
FreeDFT(7);
}
//CheckDFT(6, names);
exit(0);
}
void FreeDFT(int num) {
int i;
free(X);
free(Y);
for (i=0; i<num; i++) {
free(GlobalA[i]);
free(GlobalB[i]);
}
free(GlobalA);
free(GlobalB);
}
void ClearCounters(void) {
stat_add = stat_mult = stat_trig = 0;
}
void PrintCounters(char *type) {
printf("%s, size %d\n", type, Size);
printf("\tAdditions: %u\n", stat_add);
printf("\tMultiplications: %u\n", stat_mult);
printf("\tTrig. Functions: %u\n\n", stat_trig);
}
void InitDFT(int num) {
int i,j;
printf("Initializing %d output arrays of size %d\n", num, Size);
X = (double *)calloc(Size, sizeof(double));
Y = (double *)calloc(Size, sizeof(double));
GlobalA = (double **)calloc(num, sizeof(double*));
GlobalB = (double **)calloc(num, sizeof(double*));
for (i=0; i<num; i++) {
GlobalA[i] = (double*)calloc(Size, sizeof(double));
GlobalB[i] = (double*)calloc(Size, sizeof(double));
}
for (i=0; i<Size; i++) {
X[i] = random() & 0xfff;
Y[i] = random() & 0xfff;
for (j=0; j<num; j++) {
GlobalA[j][i] = X[i];
GlobalB[j][i] = Y[i];
}
}
}
void CheckDFT(int num, char **names) {
int i,j;
printf("%d\n", Size);
for (i=1; i<num; i++) {
for (j=100; j<Size; j++) {
if (floor(GlobalA[i][j]) != floor(GlobalA[i-1][j])) {
fprintf(stderr, "Inconsistency in %s's A[%d]: %f != %f\n",
names[i], j, GlobalA[i][j], GlobalA[i-1][j]);
exit(1);
}
printf("%s, A[%d]: %f == %f\n", names[i], j, GlobalA[i][j], GlobalA[i-1][j]);
if (floor(GlobalB[i][j]) != floor(GlobalB[i-1][j])) {
fprintf(stderr, "Inconsistency in %s's B[%d]: %f != %f\n",
names[i], GlobalB[i][j], GlobalB[i-1][j]);
exit(1);
}
}
}
printf("Everything checks out.\nExiting cleanly...\n");
}
/* Simplistic N**2 DFT */
void DFTSimple(double *A, double *B) {
int i,j;
ClearCounters();
for (i=0; i<Size; i++) {
A[i] = 0;
B[i] = 0;
for (j=0; j<Size; j++) {
stat_add += 4;
stat_mult += 4;
stat_trig += 4;
A[i] = A[i] + X[j]*cos((double)Q*i*j) + Y[j]*sin((double)Q*i*j);
B[i] = B[i] + Y[j]*cos((double)Q*i*j) - X[j]*sin((double)Q*i*j);
}
}
PrintCounters("DFTSimple");
}
/* Simplistic DFT with fewer trig/int arith. */
void DFTFaster(double *A, double *B) {
int i,j;
double W, AT, BT, D, C, S;
ClearCounters();
for (i=0; i<Size; i++) {
int W = Q * i;
AT = X[0];
BT = Y[0];
for (j=1; j<Size; j++) {
stat_mult += 5;
stat_add += 2;
stat_trig += 2;
D = W * j;
C = cos(D);
S = -sin(D);
AT += C*X[j] - S*Y[j];
BT += C*Y[j] + S*X[j];
}
A[i] = AT;
B[i] = BT;
}
PrintCounters("DFTFaster");
}
void DFTTable(double *A, double *B) {
int i,j,k;
double W, AT, BT;
/* initialize lookup table */
double *C = (double*)calloc(Size, sizeof(double));
double *S = (double*)calloc(Size, sizeof(double));
ClearCounters();
for (i=0; i<Size; i++) {
C[i] = cos(Q*i);
S[i] = -sin(Q*i);
stat_trig += 2;
}
for (i=0; i<Size; i++) {
AT = X[0];
BT = Y[0];
k=i;
for (j=1; j<Size; j++) {
stat_mult += 4;
stat_add += 5;
AT += C[k]*X[j] - S[k]*Y[j];
BT += C[k]*Y[j] + S[k]*X[j];
k += i;
if (k >= Size) {
k -= Size;
stat_add++;
}
}
A[i] = AT;
B[i] = BT;
}
PrintCounters("DFTTable");
free(C);
free(S);
}
void DFTTukey2(double *A, double *B) {
int N2, k, j, i, N1, L, M;
double C, S, Z, XT, YT, E;
ClearCounters();
M = (int)log2(Size);
N2 = Size;
for (k=0; k<M; k++) {
N1 = N2;
N2 = N2/2; // static div by 2!
E = M_PI * 2 / (double)N1;
Z = 0;
stat_mult += 2;
stat_add++;
for (j=0; j<N2; j++) {
C = cos(Z);
S = sin(Z);
stat_trig += 2;
stat_mult += 1;
Z = (j+1)*E; // array indices
for (i = j; i<Size; i+=N1) {
stat_add += 7;
stat_mult += 4;
L = i + N2; // array indices
XT = A[i] - A[L];
A[i] = A[i] + A[L];
YT = B[i] - B[L];
B[i] = B[i] + B[L];
A[L] = C*XT + S*YT;
B[L] = C*YT - S*XT;
}
}
}
j = 0;
for (i=0; i<Size-2; i++) {
if (i < j) {
XT = A[j];
A[j] = A[i];
A[i] = XT;
XT = B[j];
B[j] = B[i];
B[i] = XT;
}
stat_add += 1;
k = Size/2; // static div by 2
while (k < j) {
stat_add += 2;
j = j-k;
k = k/2; // static div by 2
}
stat_add += 1;
j+=k-1;
}
PrintCounters("DFTTukey2");
}
void DFTTukey2Table(double *A, double *B) {
int N2, k, j, i, N1, L, IE, IA, M;
double C, S, XT, YT, Z;
double *WR, *WI;
ClearCounters();
WR = (double*)calloc(Size, sizeof(double));
WI = (double*)calloc(Size, sizeof(double));
for (k=0; k<Size; k++) {
stat_mult += 1;
stat_trig += 2;
Z = k*Q;
WR[k] = cos(Z);
WI[k] = -sin(Z);
}
M = log2(Size);
N2 = Size;
for (k=0; k<M; k++) {
N1 = N2;
N2 = N2/2; // static div by 2
IE = Size/N1;
IA = 0;
stat_mult += 1;
stat_add++;
for (j=0; j<N2; j++) {
C = WR[IA];
S = WI[IA];
IA += IE;
stat_add += 1;
for (i = j; i<Size; i+=N1) {
stat_add += 7;
stat_mult += 4;
L = i + N2;
XT = A[i] - A[L];
A[i] = A[i] + A[L];
YT = B[i] - B[L];
B[i] = B[i] + B[L];
A[L] = C*XT + S*YT;
B[L] = C*YT - S*XT;
}
}
}
j = 1;
for (i=0; i<Size-2; i++) {
if (i < j) {
XT = A[j];
A[j] = A[i];
A[i] = XT;
XT = B[j];
B[j] = B[i];
B[i] = XT;
}
stat_add++;
k = Size/2; // static div by 2
while (k < j) {
stat_add += 2;
j = j-k;
k = k/2; // static div by 2
}
stat_add += 1;
j+=k;
}
free(WR);
free(WI);
PrintCounters("DFTTukey2Table");
}
void DFTTukey2MultTable(double *A, double *B) {
int N2, k, j, i, N1, L, IE, IA, M;
double C, S, XT, YT, Z, T, TY;
double *WR, *WI;
ClearCounters();
WR = (double*)calloc(Size, sizeof(double));
WI = (double*)calloc(Size, sizeof(double));
for (k=0; k<Size; k++) {
stat_mult += 1;
stat_trig += 2;
Z = k*Q;
WR[k] = cos(Z);
WI[k] = -sin(Z);
}
N2 = Size;
M = log2(Size);
for (k=0; k<M; k++) {
N1 = N2;
N2 /= 2; // static div by 2
stat_add++;
for (i=0; i<Size; i+=N1) {
stat_add += 5;
L = i + N2;
T = A[i] - A[L];
A[i] += A[L];
A[L] = T;
T = B[i] - B[L];
B[i] += B[L];
B[L] = T;
}
if (k < M-1) {
stat_mult += 1;
IE = Size/N1;
IA = 0;
for (j = 1; j<N2; j++) {
stat_add++;
IA += IE;
C = WR[IA];
S = WI[IA];
for (i=j; i<Size; i+=N1) {
stat_add += 7;
stat_mult += 4;
L = i+N2;
T = A[i] - A[L];
A[i] += A[L];
TY = B[i] - B[L];
B[i] += B[L];
A[L] = C*T + S*TY;
B[L] = C*TY - S*T;
}
}
}
}
j = 1;
for (i=0; i<Size-2; i++) {
if (i < j) {
XT = A[j];
A[j] = A[i];
A[i] = XT;
XT = B[j];
B[j] = B[i];
B[i] = XT;
}
stat_add += 1;
k = Size/2; // static div by 2
while (k < j) {
stat_add += 2;
j = j-k;
k = k/2; // static div by 2
}
stat_add += 1;
j+=k;
}
PrintCounters("DFTTukey2MultTable");
free(WR);
free(WI);
}
void DFTTukey4(double *A, double *B) {
int N2, M, k, i, j, N1, i1, i2, i3;
double lA, lB, lC, CO1, CO2, CO3, SI1, SI2, SI3, R1, R2, R3, R4, S1, S2,
S3, S4, E;
ClearCounters();
N2 = Size;
M = log2(Size);
for (k=0; k<M; k++) {
N1 = N2;
N2 = N2/4; // static div by 4
E = M_PI * 2/N1;
lA = 0;
stat_mult += 2;
stat_add++;
for (j=0; j<N2; j++) {
lB = lA + lA;
lC = lA + lB;
CO1 = cos(lA);
CO2 = cos(lB);
CO3 = cos(lC);
SI1 = sin(lA);
SI2 = sin(lB);
SI3 = sin(lC);
lA = j*E;
stat_add += 2;
stat_mult++;
stat_trig += 6;
for (i=j; i<Size; i+=N1) {
i1 = i + N2;
i2 = i1 + N2;
i3 = i2 + N2;
R1 = A[i] + A[i2];
R3 = A[i] - A[i2];
S1 = B[i] + B[i2];
S3 = B[i] - B[i2];
R2 = A[i1] + A[i3];
R4 = A[i1] - A[i3];
S2 = B[i1] + B[i3];
S4 = B[i1] - B[i3];
A[i] = R1 + R2;
R2 = R1 - R2;
R1 = R3 - S4;
R3 = R3 + S4;
B[i] = S1 + S2;
S2 = S1 - S2;
S1 = S3 + R4;
S3 = S3 - R4;
A[i1] = CO1*R3 + SI1*S3;
B[i1] = CO1*S3 - SI1*R3;
A[i2] = CO2*R2 + SI2*S2;
B[i2] = CO2*S2 - SI2*R2;
A[i3] = CO3*R1 + SI3*S1;
B[i3] = CO3*S1 - SI3*R1;
stat_add += 25;
stat_mult += 12;
}
}
}
PrintCounters("DFTTukey4");
}
void DFTTukey4Mult(double *A, double *B) {
int N2, N1, JT, IE, IA1, IA2, IA3, k, M, i, i1, i2, i3, j;
double R1, R2, R3, R4, S1, S2, S3, SI1, SI2, SI3, CS1, CS2, CS3;
double CO1, CO2, CO3, T, Z;
double *WR, *WI;
WR = (double*)calloc(Size, sizeof(double));
WI = (double*)calloc(Size, sizeof(double));
ClearCounters();
for (k=0; k<Size; k++) {
stat_mult += 1;
stat_trig += 2;
Z = k*Q;
WR[k] = cos(Z);
WI[k] = -sin(Z);
}
M = log4(Size);
N2 = Size;
for (k=0; k<M; k++) {
N1 = N2;
N2 = N2/4;
JT = N2/2; // array indices
stat_add += 2;
for (i=0; i<Size; i+=N1) {
i1 = i + N2;
i2 = i1 + N2;
i3 = i2 + N2;
R1 = A[i] + A[i2];
R2 = A[i] - A[i2];
R3 = A[i1] + A[i3];
A[i] = R1 + R3;
A[i2] = R1 - R3;
R1 = B[i] + B[i2];
R4 = B[i] - B[i2];
R3 = B[i1] + B[i3];
B[i] = R1 + R3;
B[i2] = R1 - R3;
R1 = A[i1] - A[i3];
R3 = B[i1] - B[i3];
A[i1] = R2 + R3;
A[i3] = R2 - R3;
B[i1] = R4 - R1;
B[i3] = R4 + R1;
stat_add += 19;
}
if (k == M-1) goto ten;
IE = Size/N1;
stat_mult++;
IA1 = 1;
for (j = 1; j<N2; j++) {
IA1 = IA1 + IE;
stat_add++;
if (j == JT) goto fifty;
IA2 = IA1 + IA1 - 1;
IA3 = IA2 + IA1 - 1;
CO1 = WR[IA1];
CO2 = WR[IA2];
CO3 = WR[IA3];
SI1 = -WI[IA1] - CO1;
SI2 = -WI[IA2] - CO2;
SI3 = -WI[IA3] - CO3;
CS1 = WI[IA1] - CO1;
CS2 = WI[IA2] - CO2;
CS3 = WI[IA3] - CO3;
stat_add += 10;
for (i=j; i<Size; i+=N1) {
i1 = i + N2;
i2 = i1 + N2;
i3 = i2 + N2;
R1 = A[i] + A[i2];
R2 = A[i] - X[i2];
T = A[i1] + A[i3];
A[i] = R1 + T;
R1 = R1 - T;
S1 = B[i] + B[i2];
S2 = B[i] - B[i2];
T = B[i1] + B[i3];
B[i] = S1 + T;
S1 = S1 - T;
T = (R1 + S1)*CO2;
B[i2] = T + R1*SI2;
A[i2] = T + S1*CS2;
T = B[i1] - B[i3];
R1 = R2 + T;
R2 = R2 - T;
T = A[i1] - A[i3];
S1 = S2 - T;
S2 = S2 + T;
T = (R1 + S1)*CO1;
B[i1] = T + R1*SI1;
A[i1] = T + S1*CS1;
T = (R2 + S2)*CO3;
B[i3] = T + R2*SI3;
A[i3] = T + S2*CS3;
stat_add += 28;
stat_mult += 9;
}
goto twenty;
fifty:
for (i=j; i<Size; i+=N1) {
i1 = i + N2;
i2 = i1 + N2;
i3 = i2 + N2;
R1 = A[i] + A[i2];
R2 = A[i] - A[i2];
S1 = B[i] + B[i2];
S2 = B[i] - B[i2];
T = A[i1] + A[i3];
A[i] = T + R1;
B[i2] = T - R1;
T = B[i1] + B[i3];
B[i] = S1 + T;
A[i2] = S1 - T;
R1 = X[i1] - X[i3];
S1 = B[i1] - B[i3];
T = R2 + S1;
R2 = R2 - S1;
S1 = S2 - R1;
S2 = S2 + R1;
A[i1] = (T + S1)*CS1;
B[i1] = (S1 - T)*CS1;
A[i3] = (S2 - R2)*CS1;
B[i3] = -(S2 + R2)*CS1;
stat_add += 23;
stat_mult += 4;
}
}
twenty:
}
ten:
free(WI);
free(WR);
PrintCounters("DFTTukey4Mult");
}
int log2(int n) {
int i=0;
int p = 1;
while (p != n) {
i++;
p = (int)pow(2,i);
}
return i;
}
int log4(int n) {
int i=0;
int p = 1;
while (p != n) {
i++;
p = (int)pow(4,i);
}
return i;
}
--------------
[go back]