// Copyright (c) 2026, Danil Karpenko // All rights reserved. module r22_fft1024 # ( parameter int DW = 16 , parameter int TW = 16 ) ( input logic clk , input logic [ 9:0] i_idx , output logic [ 9:0] o_idx , input logic signed [DW-1:0] i_re , input logic signed [DW-1:0] i_im , output logic signed [DW-1:0] o_re , output logic signed [DW-1:0] o_im ); localparam int N = 1024; logic signed [DW+1:0] re_0, im_0; logic signed [DW+3:0] re_1, im_1; logic signed [DW+5:0] re_2, im_2; logic signed [DW+7:0] re_3, im_3; logic signed [DW+9:0] re_4, im_4; logic [9:0] idx_0, idx_1, idx_2, idx_3, idx_4; always_ff @(posedge clk) begin o_idx <= idx_4; end r22_sdf #(N, 0, DW + 0, TW) u_sdf0 (clk, i_idx, idx_0, i_re, i_im, re_0, im_0); r22_sdf #(N, 1, DW + 2, TW) u_sdf1 (clk, idx_0, idx_1, re_0, im_0, re_1, im_1); r22_sdf #(N, 2, DW + 4, TW) u_sdf2 (clk, idx_1, idx_2, re_1, im_1, re_2, im_2); r22_sdf #(N, 3, DW + 6, TW) u_sdf3 (clk, idx_2, idx_3, re_2, im_2, re_3, im_3); r22_sdf #(N, 4, DW + 8, TW) u_sdf4 (clk, idx_3, idx_4, re_3, im_3, re_4, im_4); r22_round #(DW + 10, DW) u_round_re (clk, re_4, o_re); r22_round #(DW + 10, DW) u_round_im (clk, im_4, o_im); endmodule : r22_fft1024 module r22_sdf # ( parameter int N = 4 , parameter int S = 0 , parameter int DW = 16 , parameter int TW = 16 , parameter bit IV = 0 , parameter int ID = $clog2(N) , parameter int LV = $clog2(N) - (S * 2) - 1 ) ( input logic clk , input logic [ID-1:0] i_idx , output logic [ID-1:0] o_idx , input logic signed [DW-1:0] i_re , input logic signed [DW-1:0] i_im , output logic signed [DW+1:0] o_re , output logic signed [DW+1:0] o_im ); logic signed [DW+1:0] re_0, im_0; logic [ID-1:0] idx_0; r22_rdx4 #(ID, DW, LV, LV - 1) u_rdx4 (clk, i_idx, idx_0, i_re, i_im, re_0, im_0); generate if (S < $clog2(N) / 2 - 1) begin : gen_twiddle r22_twiddle #(N, S, DW + 2, TW, IV) u_tw (clk, idx_0, o_idx, re_0, im_0, o_re, o_im); end else begin assign {o_re, o_im} = {re_0, im_0}; assign o_idx = idx_0; end endgenerate endmodule : r22_sdf module r22_rdx4 # ( parameter int ID = 2 , parameter int DW = 16 , parameter int B1 = 1 , parameter int B2 = 0 ) ( input logic clk , input logic [ID-1:0] i_idx , output logic [ID-1:0] o_idx , input logic signed [DW-1:0] i_re , input logic signed [DW-1:0] i_im , output logic signed [DW+1:0] o_re , output logic signed [DW+1:0] o_im ); logic signed [DW:0] bf1_re, bf1_im; logic [ID-1:0] bf1_idx; r22_bf1 #(ID, B1, DW + 0) u_bf1 (clk, i_idx, bf1_idx, i_re, i_im, bf1_re, bf1_im); r22_bf2 #(ID, B2, DW + 1) u_bf2 (clk, bf1_idx, o_idx, bf1_re, bf1_im, o_re, o_im); endmodule : r22_rdx4 module r22_bf2 # ( parameter int ID = 0 , parameter int LV = 0 , parameter int DW = 16 ) ( input logic clk , input logic [ID-1:0] i_idx , output logic [ID-1:0] o_idx , input logic signed [DW-1:0] i_re , input logic signed [DW-1:0] i_im , output logic signed [ DW:0] o_re , output logic signed [ DW:0] o_im ); logic mux, swap; logic signed [DW:0] f_re, f_im; logic signed [DW:0] d_re, d_im; assign mux = i_idx[LV]; assign swap = i_idx[LV+1] & i_idx[LV]; always_comb begin if (mux) begin if (swap) begin f_re = d_re - i_im; f_im = d_im + i_re; end else begin f_re = d_re - i_re; f_im = d_im - i_im; end end else begin f_re = i_re; f_im = i_im; end end always_ff @(posedge clk) begin if (mux) begin if (swap) begin o_re <= d_re + i_im; o_im <= d_im - i_re; end else begin o_re <= d_re + i_re; o_im <= d_im + i_im; end end else begin o_re <= d_re; o_im <= d_im; end end always_ff @(posedge clk) begin o_idx <= i_idx - ID'(1 << LV); end r22_delay #(LV, DW + 1) u_delay (clk, i_idx[LV:0], f_re, f_im, d_re, d_im); endmodule : r22_bf2 module r22_bf1 # ( parameter int ID = 0 , parameter int LV = 0 , parameter int DW = 16 ) ( input logic clk , input logic [ID-1:0] i_idx , output logic [ID-1:0] o_idx , input logic signed [DW-1:0] i_re , input logic signed [DW-1:0] i_im , output logic signed [ DW:0] o_re , output logic signed [ DW:0] o_im ); logic signed [DW:0] f_re, f_im; logic signed [DW:0] d_re, d_im; wire mux = i_idx[LV]; always_comb begin if (mux) begin f_re = d_re - i_re; f_im = d_im - i_im; end else begin f_re = i_re; f_im = i_im; end end always_ff @(posedge clk) begin if (mux) begin o_re <= d_re + i_re; o_im <= d_im + i_im; end else begin o_re <= d_re; o_im <= d_im; end end always_ff @(posedge clk) begin o_idx <= i_idx - ID'(1 << LV); end r22_delay #(LV, DW + 1) u_delay (clk, i_idx[LV:0], f_re, f_im, d_re, d_im); endmodule : r22_bf1 module r22_delay # ( parameter int LV = 0 , parameter int DW = 16 ) ( input logic clk , input logic [ LV:0] idx , input logic signed [DW-1:0] i_re , input logic signed [DW-1:0] i_im , output logic signed [DW-1:0] o_re , output logic signed [DW-1:0] o_im ); generate if (LV > 0) begin : gen_delay logic [DW*2-1:0] buffer [1 << LV]; wire [LV-1:0] addr = idx[LV-1:0]; always_ff @(posedge clk) begin buffer[addr] <= {i_re, i_im}; {o_re, o_im} <= buffer[addr + LV'(1)]; end end else begin always_ff @(posedge clk) begin {o_re, o_im} <= {i_re, i_im}; end end endgenerate endmodule : r22_delay module r22_twiddle # ( parameter int N = 16 , parameter int S = 0 , parameter int DW = 16 , parameter int TW = 16 , parameter bit IV = 0 , parameter int ID = $clog2(N) , parameter int AW = $clog2(N) - (S * 2) ) ( input logic clk , input logic [ID-1:0] i_idx , output logic [ID-1:0] o_idx , input logic signed [DW-1:0] i_re , input logic signed [DW-1:0] i_im , output logic signed [DW-1:0] o_re , output logic signed [DW-1:0] o_im ); logic signed [DW-1:0] r_re, r_im; logic signed [TW-1:0] w_re, w_im; always_ff @(posedge clk) begin {r_re, r_im} <= {i_re, i_im}; o_idx <= i_idx - ID'(4); end r22_rom #(N, S, TW, IV) u_rom (clk, i_idx[AW-1:0], w_re, w_im); r22_rotate #(DW, TW) u_rotate (clk, r_re, r_im, w_re, w_im, o_re, o_im); endmodule : r22_twiddle module r22_rotate # ( parameter int DW = 16 , parameter int TW = 16 ) ( input logic clk , input logic signed [DW-1:0] i_re , input logic signed [DW-1:0] i_im , input logic signed [TW-1:0] w_re , input logic signed [TW-1:0] w_im , output logic signed [DW-1:0] o_re , output logic signed [DW-1:0] o_im ); logic signed [DW:0] s1; logic signed [TW:0] s2; logic signed [DW:0] s3; logic signed [TW-1:0] w_re_0, w_im_0; logic signed [DW-1:0] i_im_0; always_ff @(posedge clk) begin s1 <= i_re - i_im; s2 <= w_re - w_im; s3 <= i_re + i_im; w_re_0 <= w_re; w_im_0 <= w_im; i_im_0 <= i_im; end logic signed [DW+TW:0] p1; logic signed [DW+TW:0] p2; logic signed [DW+TW:0] p3; always_ff @(posedge clk) begin p1 <= s1 * w_re_0; p2 <= s2 * i_im_0; p3 <= s3 * w_im_0; end logic signed [DW+TW-1:0] r_re, r_im; always_ff @(posedge clk) begin r_re <= (DW+TW)'(p1 + p2); r_im <= (DW+TW)'(p2 + p3); end r22_round #(DW+TW, DW, 1) u_round_re (clk, r_re, o_re); r22_round #(DW+TW, DW, 1) u_round_im (clk, r_im, o_im); endmodule : r22_rotate module r22_round # ( parameter int I = 16 , parameter int O = 12 , parameter bit M = 0 ) ( input logic clk , input logic signed [I-1:0] i , output logic signed [O-1:0] o ); localparam int D = I - O - M; wire sign = i[I-1]; wire lsb = i[D]; wire halfway = i[D-1]; wire sticky = |i[D-2:0]; wire round_up = halfway & (sticky | lsb); wire signed [O-1:0] sum = O'(i[I-1:D] + round_up); wire overflow = ~sign & sum[O-1]; always_ff @(posedge clk) begin o <= overflow ? {1'b0, {(O-1){1'b1}}} : sum; end endmodule : r22_round module r22_rom # ( parameter int N = 16 , parameter int S = 0 , parameter int W = 16 , parameter bit IV = 0 , parameter int AW = $clog2(N) - (S * 2) ) ( input logic clk , input logic [AW-1:0] addr , output logic signed [ W-1:0] re , output logic signed [ W-1:0] im ); localparam int A = N / (2 ** (2 + 2 * S)); localparam int D = N / (4 ** S); logic [W*2-1:0] rom [1 << AW]; always_ff @(posedge clk) begin {re, im} <= rom[addr]; end initial begin : init_rom int c, x; for (c = 0; c < 4; c++) begin var automatic real theta; var automatic int s, l, k, cos, sin; s = {c[0], c[1]} * (4 ** S); l = c * A; for (x = c * A; x < c * A + A; x++) begin k = s * (x - l); theta = $atan(1.0) * 8 * (real'(k) / real'(N)); cos = scale($cos(IV ? theta : -theta)); sin = scale($sin(IV ? theta : -theta)); rom[x] = {cos[W-1:0], sin[W-1:0]}; end end end function automatic signed [W-1:0] scale(real x); localparam int MAX = ((1 << W - 1) - 1); localparam int MIN = (0 - (1 << W - 1)); var automatic real scaled = x * (1 << W - 1); var automatic int v = $rtoi(scaled >= 0.0 ? scaled + 0.5 : scaled - 0.5); if (v > MAX) v = MAX; if (v < MIN) v = MIN; return v; endfunction endmodule : r22_rom