Learn ZYNQ (7)

矩阵相乘的例子

参考博客:http://blog.csdn.net/kkk584520/article/details/18812321

MatrixMultiply.c

    typedef int data_type;
    #define N 5

    void MatrixMultiply(data_type AA[N*N],data_type bb[N],data_type cc[N])
    {
        int i,j;
        for(i = 0;i<N;i++)
        {
            data_type sum = 0;
            for(j = 0;j<N;j++)
            {
                sum += AA[i*N+j]*bb[j];
            }
            cc[i] = sum;
        }
    }

修改后:

#include <ap_cint.h>
typedef uint15 data_type;
    #define N 5

    void MatrixMultiply(data_type AA[N*N],data_type bb[N],data_type cc[N])
    {
        int i,j;
        MatrixMultiply_label2:for(i = 0;i<N;i++)
        {
            data_type sum = 0;
            MatrixMultiply_label1:for(j = 0;j<N;j++)
            {
                sum += AA[i*N+j]*bb[j];
            }
            cc[i] = sum;
        }
    }

测试文件:TestMatrixMultiply.c:

#include <stdio.h>
#include <ap_cint.h>
typedef uint15 data_type;
#define N 5
const data_type MatrixA[] = {
    #include "a.h"
};
const data_type Vector_b[] = {
    #include "b.h"
};
const data_type MatlabResult_c[] = {
    #include "c.h"
};
data_type HLS_Result_c[N] = {0};
void CheckResult(data_type * matlab_result,data_type * your_result);

int main(void)
{
	int i;
     printf("Checking Results:
");
     MatrixMultiply(MatrixA,Vector_b,HLS_Result_c);
     CheckResult(MatlabResult_c,HLS_Result_c);
     return 0;
}
void CheckResult(data_type * matlab_result,data_type * your_result)
{
     int i;
     for(i = 0;i<N;i++)
     {
    	 printf("Idx %d: Error = %d 
",i,matlab_result[i]-your_result[i]);
     }
}

a.h

{82},  {10},  {16},  {15},  {66},
{91},  {28},  {98},  {43},  {4},
{13},  {55},  {96},  {92},  {85},
{92},  {96},  {49},  {80},  {94},
{64},  {97},  {81},  {96},  {68}

b.h

{76},
{75},
{40},
{66},
{18}

c.h

{9800},
{15846},
{16555},
{23124},
{22939}

ip核顶层:

	module test_multiply_v1_0 #
	(
		// Users to add parameters here

		// User parameters ends
		// Do not modify the parameters beyond this line


		// Parameters of Axi Slave Bus Interface S00_AXIS
		parameter integer C_S00_AXIS_TDATA_WIDTH	= 32,

		// Parameters of Axi Master Bus Interface M00_AXIS
		parameter integer C_M00_AXIS_TDATA_WIDTH	= 32,
		parameter integer C_M00_AXIS_START_COUNT	= 32
	)
	(
		// Users to add ports here

		// User ports ends
		// Do not modify the ports beyond this line


		// Ports of Axi Slave Bus Interface S00_AXIS
		input wire  s00_axis_aclk,
		input wire  s00_axis_aresetn,
		output wire  s00_axis_tready,
		input wire [C_S00_AXIS_TDATA_WIDTH-1 : 0] s00_axis_tdata,
		input wire [(C_S00_AXIS_TDATA_WIDTH/8)-1 : 0] s00_axis_tstrb,
		input wire  s00_axis_tlast,
		input wire  s00_axis_tvalid,

		// Ports of Axi Master Bus Interface M00_AXIS
		input wire  m00_axis_aclk,
		input wire  m00_axis_aresetn,
		output wire  m00_axis_tvalid,
		output wire [C_M00_AXIS_TDATA_WIDTH-1 : 0] m00_axis_tdata,
		output wire [(C_M00_AXIS_TDATA_WIDTH/8)-1 : 0] m00_axis_tstrb,
		output wire  m00_axis_tlast,
		input wire  m00_axis_tready
	);
// Instantiation of Axi Bus Interface S00_AXIS
	

	// Add user logic here
	my_stream_ip my_stream_ip_v1_0_S01_AXIS_inst (
                .ACLK(s00_axis_aclk),
                .ARESETN(s00_axis_aresetn),
                .S_AXIS_TREADY(s00_axis_tready),
                .S_AXIS_TDATA(s00_axis_tdata),
                .S_AXIS_TLAST(s00_axis_tlast),
                .S_AXIS_TVALID(s00_axis_tvalid),
                .M_AXIS_TVALID(m00_axis_tvalid),
                .M_AXIS_TDATA(m00_axis_tdata),
                .M_AXIS_TLAST(m00_axis_tlast),
                .M_AXIS_TREADY(m00_axis_tready)
            );  
	// User logic ends

	endmodule

ip核:(未完成)

`timescale 1ns / 1ps
module my_stream_ip 
 ( 
  ACLK, 
  ARESETN, 
  S_AXIS_TREADY, 
  S_AXIS_TDATA, 
  S_AXIS_TLAST, 
  S_AXIS_TVALID, 
  M_AXIS_TVALID, 
  M_AXIS_TDATA, 
  M_AXIS_TLAST, 
  M_AXIS_TREADY, 
 
 ); 
   
input                                    ACLK; 
input                                    ARESETN; 
output                                   S_AXIS_TREADY; 
input      [31 :0]                      S_AXIS_TDATA; 
input                                    S_AXIS_TLAST; 
input                                    S_AXIS_TVALID; 
output                                   M_AXIS_TVALID; 
output     [31 :0]                      M_AXIS_TDATA; 
output                                   M_AXIS_TLAST; 
input                                    M_AXIS_TREADY; 
  
   
   localparam NUMBER_OF_INPUT_WORDS  = 30; 
   
   localparam NUMBER_OF_OUTPUT_WORDS = 30; 
   
   localparam Idle  =3'b100; 
   localparam Read_Inputs = 3'b010; 
   localparam Write_Outputs  = 3'b001; 
   localparam Wait_Calculate = 3'b000;//my add
 
    //send...
   reg start2;
   reg reset2;
   //get...
   wire done2;
   wire idle2;
   wire ready2;
   //data  
   reg [31:0] AA [0:29];
   reg [31:0] bb [0:4];
   wire [31:0] cc [0:4];
   wire cc_val [0:4];
   reg [31:0] AA_index;
   reg [31:0] bb_index;
   reg [31:0] cc_index;
   reg [2:0] state; 
   
   reg [31:0] sum; 
   
   reg [NUMBER_OF_INPUT_WORDS -1:0] nr_of_reads; 
   reg [NUMBER_OF_OUTPUT_WORDS - 1:0] nr_of_writes; 
   
   assign S_AXIS_TREADY  =(state == Read_Inputs); 
   assign M_AXIS_TVALID = (state == Write_Outputs); 
   
   assign M_AXIS_TDATA = sum; 
   assign M_AXIS_TLAST = (nr_of_writes == 1); 
 
   always @(posedge ACLK) 
   begin  // process The_SW_accelerator 
      if(!ARESETN)              // Synchronous reset (active low) 
        begin 
          state        <= Idle; 
           nr_of_reads <= 0; 
           nr_of_writes <=0; 
          sum          <= 0; 
           AA_index <= 0;
           bb_index <= 0;
           reset2 <= 1;
           start2 <= 0;
        end 
      else 
        case (state) 
          Idle: 
            if (S_AXIS_TVALID== 1) 
            begin 
             state       <= Read_Inputs; 
             nr_of_reads <= NUMBER_OF_INPUT_WORDS - 1; 
             sum         <= 0; 
            end 
   
         Read_Inputs: 
            if(S_AXIS_TVALID == 1) 
            begin 
            
             if(nr_of_reads >= 5)
             begin
                AA[AA_index] <= S_AXIS_TDATA;
                AA_index <= AA_index + 1;
             end
             else
             begin
                bb[bb_index] <= S_AXIS_TDATA;
                bb_index <= bb_index + 1;
             end
             if (nr_of_reads == 0) 
               begin 
                 state        <= Write_Outputs; 
                 reset2 <= 0;
                 start2 <= 1;
                 nr_of_writes <= NUMBER_OF_OUTPUT_WORDS - 1; 
               end 
             else 
               nr_of_reads <= nr_of_reads - 1; 
            end 
   
         Wait_Calculate:
            if(done2 == 0)
            begin
                sum <= cc[0];
                state <= Write_Outputs;
            end
            
         Write_Outputs: 
            if(M_AXIS_TREADY == 1) 
            begin 
             if (nr_of_writes == 0) 
                state <= Idle; 
              else 
                sum <= done2;
                nr_of_writes <= nr_of_writes - 1; 
            end 
        endcase 
   end 
   MatrixMultiply U1 (
      .ap_clk(S_AXI_ACLK),
      .ap_rst(reset2),
      .ap_start(start2),
      .ap_done(done2),
      .ap_idle(idle2),
      .ap_ready(ready2),
      .AA_0(AA[0]),
      .AA_1(AA[1]),
      .AA_2(AA[2]),
      .AA_3(AA[3]),
      .AA_4(AA[4]),
      .AA_5(AA[5]),
      .AA_6(AA[6]),
      .AA_7(AA[7]),
      .AA_8(AA[8]),
      .AA_9(AA[9]),
      .AA_10(AA[10]),
      .AA_11(AA[11]),
      .AA_12(AA[12]),
      .AA_13(AA[13]),
      .AA_14(AA[14]),
      .AA_15(AA[15]),
      .AA_16(AA[16]),
      .AA_17(AA[17]),
      .AA_18(AA[18]),
      .AA_19(AA[19]),
      .AA_20(AA[20]),
      .AA_21(AA[21]),
      .AA_22(AA[22]),
      .AA_23(AA[23]),
      .AA_24(AA[24]),
      .bb_0(bb[0]),
      .bb_1(bb[1]),
      .bb_2(bb[2]),
      .bb_3(bb[3]),
      .bb_4(bb[4]),
      .cc_0(cc[0]),
      .cc_0_ap_vld(cc_val[0]),
      .cc_1(cc[1]),
      .cc_1_ap_vld(cc_val[1]),
      .cc_2(cc[2]),
      .cc_2_ap_vld(cc_val[2]),
      .cc_3(cc[3]),
      .cc_3_ap_vld(cc_val[3]),
      .cc_4(cc[4]),
      .cc_4_ap_vld(cc_val[4])
      );   
endmodule

ip核引用的为HLS从c语言生成的verylog代码。

原文地址:https://www.cnblogs.com/shenerguang/p/3797144.html