| Latency(CLK) | 3 | 
            
    
    module product(
            iData0, iData1, iData2, iData3,
            iVld, iStall,
            oData, oVld, oStall,
            reset, clk);
    
            parameter       W       = 32;
            parameter       S1      = 2'h1,
                            S2      = 2'h2,
                            S3      = 2'h3;
    
            input   [W-1:0] iData0, iData1, iData2, iData3;
            input           iVld;
            output          iStall;
            output  [W-1:0] oData;          // Result
            output          oVld;
            input           oStall;
            input           reset;
            input           clk;
    
            reg     [1:0]   state, stateD;
            wire            Stall   = oVld & oStall;
            reg     [W-1:0] aIn, bIn;
            wire    [W-1:0] yOut;
    
            reg             oVld;
            reg     [W-1:0] oData, oDataD;
            wire            iAlloc  = iVld & !iStall;
            wire            oAlloc  = oVld & !oStall;
    
            assign yOut     = aIn * bIn;  // Mul Resource (32bit clip)
    
            always @(state or Stall or
                    oData or yOut or
                    iVld or iData0 or iData1 or iData2 or iData3) begin
                    stateD  = state;        // Default
                    oDataD  = oData;
                    aIn     = {W{1'bx}};
                    bIn     = {W{1'bx}};
                    if (!Stall)
                            case (state)
                                    S1: if (iVld) begin
                                            stateD             = S2;
                                            {aIn, bIn, oDataD} = {iData0, iData1, yOut};
                                    end
                                    S2: begin
                                            stateD             = S3;
                                            {aIn, bIn, oDataD} = {oData,  iData2, yOut};
                                    end
                                    S3: begin
                                            stateD             = S1;
                                            {aIn, bIn, oDataD} = {oData,  iData3, yOut};
                                    end
                            endcase
            end
    
            always @(posedge clk)
                    if (reset)
                            state   <= #1 S1;
                    else
                            state   <= #1 stateD;
    
            always @(posedge clk)
                    if (reset)
                            oVld    <= #1 1'b0;
                    else
                            oVld    <= #1 (stateD == S3);
    
            always @(posedge clk)
                    oData   <= #1 oDataD;
    
            assign iStall   = iVld & (state != S3) | oStall;
    endmodule
    
             |