Skip to content

Instantly share code, notes, and snippets.

@LIMPIX31
Last active February 28, 2026 07:47
Show Gist options
  • Select an option

  • Save LIMPIX31/f164982d3815fd8a28fe204bc6124389 to your computer and use it in GitHub Desktop.

Select an option

Save LIMPIX31/f164982d3815fd8a28fe204bc6124389 to your computer and use it in GitHub Desktop.
Radix-2^2 SDF FFT
// Copyright (c) 2026, Danil Karpenko
// All rights reserved.
module r22_fft1024 #
( parameter int DW = 16
, parameter int TW = 16
)
( input logic clk
, input logic [ 9:0] i_idx
, output logic [ 9:0] o_idx
, input logic signed [DW-1:0] i_re
, input logic signed [DW-1:0] i_im
, output logic signed [DW-1:0] o_re
, output logic signed [DW-1:0] o_im
);
localparam int N = 1024;
logic signed [DW+1:0] re_0, im_0;
logic signed [DW+3:0] re_1, im_1;
logic signed [DW+5:0] re_2, im_2;
logic signed [DW+7:0] re_3, im_3;
logic signed [DW+9:0] re_4, im_4;
logic [9:0] idx_0, idx_1, idx_2, idx_3, idx_4;
always_ff @(posedge clk) begin
o_idx <= idx_4;
end
r22_sdf #(N, 0, DW + 0, TW)
u_sdf0 (clk, i_idx, idx_0, i_re, i_im, re_0, im_0);
r22_sdf #(N, 1, DW + 2, TW)
u_sdf1 (clk, idx_0, idx_1, re_0, im_0, re_1, im_1);
r22_sdf #(N, 2, DW + 4, TW)
u_sdf2 (clk, idx_1, idx_2, re_1, im_1, re_2, im_2);
r22_sdf #(N, 3, DW + 6, TW)
u_sdf3 (clk, idx_2, idx_3, re_2, im_2, re_3, im_3);
r22_sdf #(N, 4, DW + 8, TW)
u_sdf4 (clk, idx_3, idx_4, re_3, im_3, re_4, im_4);
r22_round #(DW + 10, DW)
u_round_re (clk, re_4, o_re);
r22_round #(DW + 10, DW)
u_round_im (clk, im_4, o_im);
endmodule : r22_fft1024
module r22_sdf #
( parameter int N = 4
, parameter int S = 0
, parameter int DW = 16
, parameter int TW = 16
, parameter bit IV = 0
, parameter int ID = $clog2(N)
, parameter int LV = $clog2(N) - (S * 2) - 1
)
( input logic clk
, input logic [ID-1:0] i_idx
, output logic [ID-1:0] o_idx
, input logic signed [DW-1:0] i_re
, input logic signed [DW-1:0] i_im
, output logic signed [DW+1:0] o_re
, output logic signed [DW+1:0] o_im
);
logic signed [DW+1:0] re_0, im_0;
logic [ID-1:0] idx_0;
r22_rdx4 #(ID, DW, LV, LV - 1) u_rdx4
(clk, i_idx, idx_0, i_re, i_im, re_0, im_0);
generate if (S < $clog2(N) / 2 - 1) begin : gen_twiddle
r22_twiddle #(N, S, DW + 2, TW, IV) u_tw
(clk, idx_0, o_idx, re_0, im_0, o_re, o_im);
end else begin
assign {o_re, o_im} = {re_0, im_0};
assign o_idx = idx_0;
end endgenerate
endmodule : r22_sdf
module r22_rdx4 #
( parameter int ID = 2
, parameter int DW = 16
, parameter int B1 = 1
, parameter int B2 = 0
)
( input logic clk
, input logic [ID-1:0] i_idx
, output logic [ID-1:0] o_idx
, input logic signed [DW-1:0] i_re
, input logic signed [DW-1:0] i_im
, output logic signed [DW+1:0] o_re
, output logic signed [DW+1:0] o_im
);
logic signed [DW:0] bf1_re, bf1_im;
logic [ID-1:0] bf1_idx;
r22_bf1 #(ID, B1, DW + 0) u_bf1
(clk, i_idx, bf1_idx, i_re, i_im, bf1_re, bf1_im);
r22_bf2 #(ID, B2, DW + 1) u_bf2
(clk, bf1_idx, o_idx, bf1_re, bf1_im, o_re, o_im);
endmodule : r22_rdx4
module r22_bf2 #
( parameter int ID = 0
, parameter int LV = 0
, parameter int DW = 16
)
( input logic clk
, input logic [ID-1:0] i_idx
, output logic [ID-1:0] o_idx
, input logic signed [DW-1:0] i_re
, input logic signed [DW-1:0] i_im
, output logic signed [ DW:0] o_re
, output logic signed [ DW:0] o_im
);
logic mux, swap;
logic signed [DW:0] f_re, f_im;
logic signed [DW:0] d_re, d_im;
assign mux = i_idx[LV];
assign swap = i_idx[LV+1] & i_idx[LV];
always_comb begin
if (mux) begin
if (swap) begin
f_re = d_re - i_im;
f_im = d_im + i_re;
end else begin
f_re = d_re - i_re;
f_im = d_im - i_im;
end
end else begin
f_re = i_re;
f_im = i_im;
end
end
always_ff @(posedge clk) begin
if (mux) begin
if (swap) begin
o_re <= d_re + i_im;
o_im <= d_im - i_re;
end else begin
o_re <= d_re + i_re;
o_im <= d_im + i_im;
end
end else begin
o_re <= d_re;
o_im <= d_im;
end
end
always_ff @(posedge clk) begin
o_idx <= i_idx - ID'(1 << LV);
end
r22_delay #(LV, DW + 1) u_delay
(clk, i_idx[LV:0], f_re, f_im, d_re, d_im);
endmodule : r22_bf2
module r22_bf1 #
( parameter int ID = 0
, parameter int LV = 0
, parameter int DW = 16
)
( input logic clk
, input logic [ID-1:0] i_idx
, output logic [ID-1:0] o_idx
, input logic signed [DW-1:0] i_re
, input logic signed [DW-1:0] i_im
, output logic signed [ DW:0] o_re
, output logic signed [ DW:0] o_im
);
logic signed [DW:0] f_re, f_im;
logic signed [DW:0] d_re, d_im;
wire mux = i_idx[LV];
always_comb begin
if (mux) begin
f_re = d_re - i_re;
f_im = d_im - i_im;
end else begin
f_re = i_re;
f_im = i_im;
end
end
always_ff @(posedge clk) begin
if (mux) begin
o_re <= d_re + i_re;
o_im <= d_im + i_im;
end else begin
o_re <= d_re;
o_im <= d_im;
end
end
always_ff @(posedge clk) begin
o_idx <= i_idx - ID'(1 << LV);
end
r22_delay #(LV, DW + 1) u_delay
(clk, i_idx[LV:0], f_re, f_im, d_re, d_im);
endmodule : r22_bf1
module r22_delay #
( parameter int LV = 0
, parameter int DW = 16
)
( input logic clk
, input logic [ LV:0] idx
, input logic signed [DW-1:0] i_re
, input logic signed [DW-1:0] i_im
, output logic signed [DW-1:0] o_re
, output logic signed [DW-1:0] o_im
);
generate if (LV > 0) begin : gen_delay
logic [DW*2-1:0] buffer [1 << LV];
wire [LV-1:0] addr = idx[LV-1:0];
always_ff @(posedge clk) begin
buffer[addr] <= {i_re, i_im};
{o_re, o_im} <= buffer[addr + LV'(1)];
end
end else begin
always_ff @(posedge clk) begin
{o_re, o_im} <= {i_re, i_im};
end
end endgenerate
endmodule : r22_delay
module r22_twiddle #
( parameter int N = 16
, parameter int S = 0
, parameter int DW = 16
, parameter int TW = 16
, parameter bit IV = 0
, parameter int ID = $clog2(N)
, parameter int AW = $clog2(N) - (S * 2)
)
( input logic clk
, input logic [ID-1:0] i_idx
, output logic [ID-1:0] o_idx
, input logic signed [DW-1:0] i_re
, input logic signed [DW-1:0] i_im
, output logic signed [DW-1:0] o_re
, output logic signed [DW-1:0] o_im
);
logic signed [DW-1:0] r_re, r_im;
logic signed [TW-1:0] w_re, w_im;
always_ff @(posedge clk) begin
{r_re, r_im} <= {i_re, i_im};
o_idx <= i_idx - ID'(4);
end
r22_rom #(N, S, TW, IV) u_rom
(clk, i_idx[AW-1:0], w_re, w_im);
r22_rotate #(DW, TW) u_rotate
(clk, r_re, r_im, w_re, w_im, o_re, o_im);
endmodule : r22_twiddle
module r22_rotate #
( parameter int DW = 16
, parameter int TW = 16
)
( input logic clk
, input logic signed [DW-1:0] i_re
, input logic signed [DW-1:0] i_im
, input logic signed [TW-1:0] w_re
, input logic signed [TW-1:0] w_im
, output logic signed [DW-1:0] o_re
, output logic signed [DW-1:0] o_im
);
logic signed [DW:0] s1;
logic signed [TW:0] s2;
logic signed [DW:0] s3;
logic signed [TW-1:0] w_re_0, w_im_0;
logic signed [DW-1:0] i_im_0;
always_ff @(posedge clk) begin
s1 <= i_re - i_im;
s2 <= w_re - w_im;
s3 <= i_re + i_im;
w_re_0 <= w_re;
w_im_0 <= w_im;
i_im_0 <= i_im;
end
logic signed [DW+TW:0] p1;
logic signed [DW+TW:0] p2;
logic signed [DW+TW:0] p3;
always_ff @(posedge clk) begin
p1 <= s1 * w_re_0;
p2 <= s2 * i_im_0;
p3 <= s3 * w_im_0;
end
logic signed [DW+TW-1:0] r_re, r_im;
always_ff @(posedge clk) begin
r_re <= (DW+TW)'(p1 + p2);
r_im <= (DW+TW)'(p2 + p3);
end
r22_round #(DW+TW, DW, 1) u_round_re
(clk, r_re, o_re);
r22_round #(DW+TW, DW, 1) u_round_im
(clk, r_im, o_im);
endmodule : r22_rotate
module r22_round #
( parameter int I = 16
, parameter int O = 12
, parameter bit M = 0
)
( input logic clk
, input logic signed [I-1:0] i
, output logic signed [O-1:0] o
);
localparam int D = I - O - M;
wire sign = i[I-1];
wire lsb = i[D];
wire halfway = i[D-1];
wire sticky = |i[D-2:0];
wire round_up = halfway & (sticky | lsb);
wire signed [O-1:0] sum = O'(i[I-1:D] + round_up);
wire overflow = ~sign & sum[O-1];
always_ff @(posedge clk) begin
o <= overflow ? {1'b0, {(O-1){1'b1}}} : sum;
end
endmodule : r22_round
module r22_rom #
( parameter int N = 16
, parameter int S = 0
, parameter int W = 16
, parameter bit IV = 0
, parameter int AW = $clog2(N) - (S * 2)
)
( input logic clk
, input logic [AW-1:0] addr
, output logic signed [ W-1:0] re
, output logic signed [ W-1:0] im
);
localparam int A = N / (2 ** (2 + 2 * S));
localparam int D = N / (4 ** S);
logic [W*2-1:0] rom [1 << AW];
always_ff @(posedge clk) begin
{re, im} <= rom[addr];
end
initial begin : init_rom
int c, x;
for (c = 0; c < 4; c++) begin
var automatic real theta;
var automatic int s, l, k, cos, sin;
s = {c[0], c[1]} * (4 ** S);
l = c * A;
for (x = c * A; x < c * A + A; x++) begin
k = s * (x - l);
theta = $atan(1.0) * 8 * (real'(k) / real'(N));
cos = scale($cos(IV ? theta : -theta));
sin = scale($sin(IV ? theta : -theta));
rom[x] = {cos[W-1:0], sin[W-1:0]};
end
end
end
function automatic signed [W-1:0] scale(real x);
localparam int MAX = ((1 << W - 1) - 1);
localparam int MIN = (0 - (1 << W - 1));
var automatic real scaled = x * (1 << W - 1);
var automatic int v = $rtoi(scaled >= 0.0 ? scaled + 0.5 : scaled - 0.5);
if (v > MAX) v = MAX;
if (v < MIN) v = MIN;
return v;
endfunction
endmodule : r22_rom
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment