[go back]
--------------

#include "dft.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>

/* X = real input, Y = imag. input
   A = real output, B = imag. output */
static double **GlobalA, **GlobalB, *X, *Y;
static double Q;
static int Size = 512; /* default */

/* performance counters */
unsigned int stat_add, stat_mult, stat_trig;

int main(int argc, char **argv) {
  int c, i;
  extern char *optarg;
  extern int optind;
  int seed;
  char *names[] = {"DFTSimple", "DFTFaster", "DFTTable",
           "DFTTukey2", "DFTTukey2MultTable", "DFTTukey4",
           "DFTTukey4Mult"};
  int sizes[] = {256, 1024, 4096, 16384};
  int num_sizes = 4;
  
  while ((c = getopt(argc, argv, "r:s:")) != EOF) {
    switch (c) {
    case 's':
      sscanf(optarg, "%d", &Size);
      printf("Using size %d\n", Size);
      Q = M_PI * 2.0 /(double)Size;
      break;
    case 'r':
      sscanf(optarg, "%d", &seed);
      printf("Using seed %d\n", seed);
      srandom(seed);
    default:
      fprintf(stderr, "Invalid arguments.\n");
      exit(1);
    }
  }

  for (i=0; i<num_sizes; i++) {
    Size = sizes[i];
    Q = M_PI * 2.0 / (double)Size;
    InitDFT(7);
    DFTSimple(GlobalA[0], GlobalB[0]);
    DFTFaster(GlobalA[1], GlobalB[1]);
    DFTTable(GlobalA[2], GlobalB[2]);
    DFTTukey2(GlobalA[3], GlobalB[3]);
    DFTTukey2MultTable(GlobalA[4], GlobalB[4]);
    DFTTukey4(GlobalA[5], GlobalB[5]);
    DFTTukey4Mult(GlobalA[6], GlobalB[6]);
    FreeDFT(7);
  }

  //CheckDFT(6, names);
  exit(0);
}

void FreeDFT(int num) {
  int i;
  free(X);
  free(Y);
  for (i=0; i<num; i++) {
    free(GlobalA[i]);
    free(GlobalB[i]);
  }
  free(GlobalA);
  free(GlobalB);
}

void ClearCounters(void) {
  stat_add = stat_mult = stat_trig = 0;
}

void PrintCounters(char *type) {
  printf("%s, size %d\n", type, Size);
  printf("\tAdditions:       %u\n", stat_add);
  printf("\tMultiplications: %u\n", stat_mult);
  printf("\tTrig. Functions: %u\n\n", stat_trig);
}

void InitDFT(int num) {
  int i,j;
  printf("Initializing %d output arrays of size %d\n", num, Size);
  X = (double *)calloc(Size, sizeof(double));
  Y = (double *)calloc(Size, sizeof(double));
  GlobalA = (double **)calloc(num, sizeof(double*));
  GlobalB = (double **)calloc(num, sizeof(double*));

  for (i=0; i<num; i++) {
    GlobalA[i] = (double*)calloc(Size, sizeof(double)); 
    GlobalB[i] = (double*)calloc(Size, sizeof(double)); 
  }
  for (i=0; i<Size; i++) {
    X[i] = random() & 0xfff; 
    Y[i] = random() & 0xfff;
    for (j=0; j<num; j++) {
      GlobalA[j][i] = X[i];
      GlobalB[j][i] = Y[i];
    }
  }
}

void CheckDFT(int num, char **names) {
  int i,j;
printf("%d\n", Size);
  for (i=1; i<num; i++) {
    for (j=100; j<Size; j++) {
      if (floor(GlobalA[i][j]) != floor(GlobalA[i-1][j])) {
        fprintf(stderr, "Inconsistency in %s's A[%d]: %f != %f\n",
                names[i], j, GlobalA[i][j], GlobalA[i-1][j]);
        exit(1);
      }
      printf("%s, A[%d]: %f == %f\n", names[i], j, GlobalA[i][j], GlobalA[i-1][j]);
      if (floor(GlobalB[i][j]) != floor(GlobalB[i-1][j])) {
        fprintf(stderr, "Inconsistency in %s's B[%d]: %f != %f\n",
                names[i], GlobalB[i][j], GlobalB[i-1][j]);
        exit(1);
      }
    }
  }
  printf("Everything checks out.\nExiting cleanly...\n");
}

/* Simplistic N**2 DFT */
void DFTSimple(double *A, double *B) {
  int i,j;
  ClearCounters();
  for (i=0; i<Size; i++) {
    A[i] = 0;
    B[i] = 0;
    for (j=0; j<Size; j++) {
      stat_add += 4;
      stat_mult += 4;
      stat_trig += 4;
      A[i] = A[i] + X[j]*cos((double)Q*i*j) + Y[j]*sin((double)Q*i*j);
      B[i] = B[i] + Y[j]*cos((double)Q*i*j) - X[j]*sin((double)Q*i*j);
    }
  }
  PrintCounters("DFTSimple");
}

/* Simplistic DFT with fewer trig/int arith. */
void DFTFaster(double *A, double *B) {
  int i,j;
  double W, AT, BT, D, C, S;
  ClearCounters();
  for (i=0; i<Size; i++) {
    int W = Q * i;
    AT = X[0];
    BT = Y[0];
    for (j=1; j<Size; j++) {
      stat_mult += 5;
      stat_add  += 2;
      stat_trig += 2;
      D = W * j;
      C = cos(D);
      S = -sin(D);
      AT += C*X[j] - S*Y[j];
      BT += C*Y[j] + S*X[j];
    }
    A[i] = AT;
    B[i] = BT;
  }
  PrintCounters("DFTFaster");
}

void DFTTable(double *A, double *B) {
  int i,j,k;
  double W, AT, BT;
  /* initialize lookup table */
  double *C = (double*)calloc(Size, sizeof(double));
  double *S = (double*)calloc(Size, sizeof(double));
  ClearCounters();

  for (i=0; i<Size; i++) {
    C[i] = cos(Q*i);
    S[i] = -sin(Q*i);
    stat_trig += 2;
  }

  for (i=0; i<Size; i++) {
    AT = X[0];
    BT = Y[0];
    k=i;
    for (j=1; j<Size; j++) {
      stat_mult += 4;
      stat_add  += 5;
      AT += C[k]*X[j] - S[k]*Y[j];
      BT += C[k]*Y[j] + S[k]*X[j];
      k += i;
      if (k >= Size) {
        k -= Size;
        stat_add++;
      }
    }
    A[i] = AT;
    B[i] = BT;
  }
  PrintCounters("DFTTable");
  free(C);
  free(S);
}

void DFTTukey2(double *A, double *B) {
  int N2, k, j, i, N1, L, M;
  double C, S, Z, XT, YT, E;
  ClearCounters();
  M = (int)log2(Size);
  N2 = Size;
  for (k=0; k<M; k++) {
    N1 = N2;
    N2 = N2/2; // static div by 2!
    E = M_PI * 2 / (double)N1;
    Z = 0;
    stat_mult += 2;
    stat_add++;
    for (j=0; j<N2; j++) {
      C = cos(Z);
      S = sin(Z);
      stat_trig += 2;
      stat_mult += 1;
      Z = (j+1)*E; // array indices
      for (i = j; i<Size; i+=N1) {
        stat_add += 7;
        stat_mult += 4;
        L = i + N2; // array indices
        XT = A[i] - A[L];
        A[i] = A[i] + A[L];
        YT = B[i] - B[L];
        B[i] = B[i] + B[L];
        A[L] = C*XT + S*YT;
        B[L] = C*YT - S*XT;
      }
    }
  }

  j = 0;
  for (i=0; i<Size-2; i++) {
    if (i < j) {
      XT = A[j];
      A[j] = A[i];
      A[i] = XT;
      XT = B[j];
      B[j] = B[i];
      B[i] = XT;
    }
    stat_add += 1;
    k = Size/2; // static div by 2
    while (k < j) {
      stat_add += 2;
      j = j-k;
      k = k/2; // static div by 2
    }
    stat_add += 1;
    j+=k-1;
  }
  PrintCounters("DFTTukey2");
}

void DFTTukey2Table(double *A, double *B) {
  int N2, k, j, i, N1, L, IE, IA, M;
  double C, S, XT, YT, Z;
  double *WR, *WI;
  ClearCounters();
  WR = (double*)calloc(Size, sizeof(double));
  WI = (double*)calloc(Size, sizeof(double));
  for (k=0; k<Size; k++) {
    stat_mult += 1;
    stat_trig += 2;
    Z = k*Q;
    WR[k] = cos(Z);
    WI[k] = -sin(Z);
  }
  M = log2(Size);
  N2 = Size;
  for (k=0; k<M; k++) {
    N1 = N2;
    N2 = N2/2; // static div by 2
    IE = Size/N1;
    IA = 0;
    stat_mult += 1;
    stat_add++;
    for (j=0; j<N2; j++) {
      C = WR[IA];
      S = WI[IA];
      IA += IE;
      stat_add += 1;
      for (i = j; i<Size; i+=N1) {
        stat_add += 7;
        stat_mult += 4;
        L = i + N2;
        XT = A[i] - A[L];
        A[i] = A[i] + A[L];
        YT = B[i] - B[L];
        B[i] = B[i] + B[L];
        A[L] = C*XT + S*YT;
        B[L] = C*YT - S*XT;
      }
    }
  }

  j = 1;
  for (i=0; i<Size-2; i++) {
    if (i < j) {
      XT = A[j];
      A[j] = A[i];
      A[i] = XT;
      XT = B[j];
      B[j] = B[i];
      B[i] = XT;
    }
    stat_add++;
    k = Size/2; // static div by 2
    while (k < j) {
      stat_add += 2;
      j = j-k;
      k = k/2; // static div by 2
    }
    stat_add += 1;
    j+=k;
  }
  free(WR);
  free(WI);
  PrintCounters("DFTTukey2Table");
}

void DFTTukey2MultTable(double *A, double *B) {
  int N2, k, j, i, N1, L, IE, IA, M;
  double C, S, XT, YT, Z, T, TY;
  double *WR, *WI;
  ClearCounters();
  WR = (double*)calloc(Size, sizeof(double));
  WI = (double*)calloc(Size, sizeof(double));
  for (k=0; k<Size; k++) {
    stat_mult += 1;
    stat_trig += 2;
    Z = k*Q;
    WR[k] = cos(Z);
    WI[k] = -sin(Z);
  }
  N2 = Size;
  M = log2(Size);
  for (k=0; k<M; k++) {
    N1 = N2;
    N2 /= 2; // static div by 2
    stat_add++;
    for (i=0; i<Size; i+=N1) {
      stat_add += 5;
      L = i + N2;
      T = A[i] - A[L];
      A[i] += A[L];
      A[L] = T;
      T = B[i] - B[L];
      B[i] += B[L];
      B[L] = T;
    }
    if (k < M-1) {
      stat_mult += 1;
      IE = Size/N1;
      IA = 0;
      for (j = 1; j<N2; j++) {
        stat_add++;
        IA += IE;
        C = WR[IA];
        S = WI[IA];
        for (i=j; i<Size; i+=N1) {
          stat_add += 7;
          stat_mult += 4;
          L = i+N2;
          T = A[i] - A[L];
          A[i] += A[L];
          TY = B[i] - B[L];
          B[i] += B[L];
          A[L] = C*T + S*TY;
          B[L] = C*TY - S*T;
        }
      }
    }
  }
  j = 1;
  for (i=0; i<Size-2; i++) {
    if (i < j) {
      XT = A[j];
      A[j] = A[i];
      A[i] = XT;
      XT = B[j];
      B[j] = B[i];
      B[i] = XT;
    }
    stat_add += 1;
    k = Size/2; // static div by 2
    while (k < j) {
      stat_add += 2;
      j = j-k;
      k = k/2; // static div by 2
    }
    stat_add += 1;
    j+=k;
  }
  PrintCounters("DFTTukey2MultTable");
  free(WR);
  free(WI);
}

void DFTTukey4(double *A, double *B) {
  int N2, M, k, i, j, N1, i1, i2, i3;
  double lA, lB, lC, CO1, CO2, CO3, SI1, SI2, SI3, R1, R2, R3, R4, S1, S2,
    S3, S4, E;
  ClearCounters();
  N2 = Size;
  M = log2(Size);
  for (k=0; k<M; k++) {
    N1 = N2;
    N2 = N2/4; // static div by 4
    E = M_PI * 2/N1;
    lA = 0;
    stat_mult += 2;
    stat_add++;
    for (j=0; j<N2; j++) {
      lB = lA + lA;
      lC = lA + lB;
      CO1 = cos(lA);
      CO2 = cos(lB);
      CO3 = cos(lC);
      SI1 = sin(lA);
      SI2 = sin(lB);
      SI3 = sin(lC);
      lA = j*E;
      stat_add += 2;
      stat_mult++;
      stat_trig += 6;
      for (i=j; i<Size; i+=N1) {
        i1 = i + N2;
        i2 = i1 + N2;
        i3 = i2 + N2;
        R1 = A[i] + A[i2];
        R3 = A[i] - A[i2];
        S1 = B[i] + B[i2];
        S3 = B[i] - B[i2];
        R2 = A[i1] + A[i3];
        R4 = A[i1] - A[i3];
        S2 = B[i1] + B[i3];
        S4 = B[i1] - B[i3];
        A[i] = R1 + R2;
        R2 = R1 - R2;
        R1 = R3 - S4;
        R3 = R3 + S4;
        B[i] = S1 + S2;
        S2 = S1 - S2;
        S1 = S3 + R4;
        S3 = S3 - R4;
        A[i1] = CO1*R3 + SI1*S3;
        B[i1] = CO1*S3 - SI1*R3;
        A[i2] = CO2*R2 + SI2*S2;
        B[i2] = CO2*S2 - SI2*R2;
        A[i3] = CO3*R1 + SI3*S1;
        B[i3] = CO3*S1 - SI3*R1;
        stat_add += 25;
        stat_mult += 12;
      }
    }
  }
  PrintCounters("DFTTukey4");
}

void DFTTukey4Mult(double *A, double *B) {
  int N2, N1, JT, IE, IA1, IA2, IA3, k, M, i, i1, i2, i3, j;
  double R1, R2, R3, R4, S1, S2, S3, SI1, SI2, SI3, CS1, CS2, CS3;
  double CO1, CO2, CO3, T, Z;
  double *WR, *WI;
  WR = (double*)calloc(Size, sizeof(double));
  WI = (double*)calloc(Size, sizeof(double));
  ClearCounters();
  for (k=0; k<Size; k++) {
    stat_mult += 1;
    stat_trig += 2;
    Z = k*Q;
    WR[k] = cos(Z);
    WI[k] = -sin(Z);
  }
  M = log4(Size);
  N2 = Size;
  for (k=0; k<M; k++) {
    N1 = N2;
    N2 = N2/4;
    JT = N2/2; // array indices
    stat_add += 2;
    for (i=0; i<Size; i+=N1) {
      i1 = i + N2;
      i2 = i1 + N2;
      i3 = i2 + N2;
      R1 = A[i] + A[i2];
      R2 = A[i] - A[i2];
      R3 = A[i1] + A[i3];
      A[i] = R1 + R3;
      A[i2] = R1 - R3;
      R1 = B[i] + B[i2];
      R4 = B[i] - B[i2];
      R3 = B[i1] + B[i3];
      B[i] = R1 + R3;
      B[i2] = R1 - R3;
      R1 = A[i1] - A[i3];
      R3 = B[i1] - B[i3];
      A[i1] = R2 + R3;
      A[i3] = R2 - R3;
      B[i1] = R4 - R1;
      B[i3] = R4 + R1;
      stat_add += 19;
    }
    if (k == M-1) goto ten;
    IE = Size/N1;
    stat_mult++;
    IA1 = 1;
    for (j = 1; j<N2; j++) {
      IA1 = IA1 + IE;
      stat_add++;
      if (j == JT) goto fifty;
      IA2 = IA1 + IA1 - 1;
      IA3 = IA2 + IA1 - 1;
      CO1 = WR[IA1];
      CO2 = WR[IA2];
      CO3 = WR[IA3];
      SI1 = -WI[IA1] - CO1;
      SI2 = -WI[IA2] - CO2;
      SI3 = -WI[IA3] - CO3;
      CS1 = WI[IA1]  - CO1;
      CS2 = WI[IA2]  - CO2;
      CS3 = WI[IA3]  - CO3;
      stat_add += 10;
      for (i=j; i<Size; i+=N1) {
        i1 = i + N2;
        i2 = i1 + N2;
        i3 = i2 + N2;
        R1 = A[i] + A[i2];
        R2 = A[i] - X[i2];
        T = A[i1] + A[i3];
        A[i] = R1 + T;
        R1 = R1 - T;
        S1 = B[i] + B[i2];
        S2 = B[i] - B[i2];
        T = B[i1] + B[i3];
        B[i] = S1 + T;
        S1 = S1 - T;
        T = (R1 + S1)*CO2;
        B[i2] = T + R1*SI2;
        A[i2] = T + S1*CS2;
        T = B[i1] - B[i3];
        R1 = R2 + T;
        R2 = R2 - T;
        T = A[i1] - A[i3];
        S1 = S2 - T;
        S2 = S2 + T;
        T = (R1 + S1)*CO1;
        B[i1] = T + R1*SI1;
        A[i1] = T + S1*CS1;
        T = (R2 + S2)*CO3;
        B[i3] = T + R2*SI3;
        A[i3] = T + S2*CS3;
        stat_add += 28;
        stat_mult += 9;
      }
      goto twenty;
    fifty:
      for (i=j; i<Size; i+=N1) {
        i1 = i + N2;
        i2 = i1 + N2;
        i3 = i2 + N2;
        R1 = A[i] + A[i2];
        R2 = A[i] - A[i2];
        S1 = B[i]  + B[i2];
        S2 = B[i] - B[i2];
        T = A[i1] + A[i3];
        A[i] = T + R1;
        B[i2] = T - R1;
        T = B[i1] + B[i3];
        B[i] = S1 + T;
        A[i2] = S1 - T;
        R1 = X[i1] - X[i3];
        S1 = B[i1] - B[i3];
        T = R2 + S1;
        R2 = R2 - S1;
        S1 = S2 - R1;
        S2 = S2 + R1;
        A[i1] = (T + S1)*CS1;
        B[i1] = (S1 - T)*CS1;
        A[i3] = (S2 - R2)*CS1;
        B[i3] = -(S2 + R2)*CS1;
        stat_add += 23;
        stat_mult += 4;
      }
    }
  twenty:
  }
 ten:
  free(WI);
  free(WR);
  PrintCounters("DFTTukey4Mult");
}

int log2(int n) {
  int i=0;
  int p = 1;
  while (p != n) {
    i++;
    p = (int)pow(2,i);
  }
  return i;
}

int log4(int n) {
  int i=0;
  int p = 1;
  while (p != n) {
    i++;
    p = (int)pow(4,i);
  }
  return i;
}


--------------
[go back]