#include <string.h>
#include "platform.h"
#include "xparameters.h"
#include<stdio.h>
#include "xil_printf.h"
#include "ff.h"
#include "xdevcfg.h"
#include "xtime_l.h"
#include"xconv.h"
#include"xil_cache.h"
#pragma GCC optimize("O3")
static FATFS fatfs;
void print_float(float* buff,int length){
for(int i=0;i<length;i++)
printf("%f\n",buff[i]);
}
void print_short(short* buff,int length){
for(int i=0;i<length;i++)
printf("%d\n",buff[i]);
}
int SD_Init()
{
FRESULT rc;
rc = f_mount(&fatfs,"",0);
if(rc)
{
xil_printf("ERROR : f_mount returned %d\r\n",rc);
return XST_FAILURE;
}
return XST_SUCCESS;
}
int SD_Transfer_read(char *FileName,u32 DestinationAddress,u32 ByteLength)
{
FIL fil;
FRESULT rc;
UINT br;
rc = f_open(&fil,FileName,FA_READ);
if(rc)
{
xil_printf("ERROR : f_open returned %d\r\n",rc);
return XST_FAILURE;
}
rc = f_lseek(&fil, 0);
if(rc)
{
xil_printf("ERROR : f_lseek returned %d\r\n",rc);
return XST_FAILURE;
}
rc = f_read(&fil, (void*)DestinationAddress,ByteLength,&br);
if(rc)
{
xil_printf("ERROR : f_read returned %d\r\n",rc);
return XST_FAILURE;
}
rc = f_close(&fil);
if(rc)
{
xil_printf(" ERROR : f_close returned %d\r\n", rc);
return XST_FAILURE;
}
return XST_SUCCESS;
}
int SD_Transfer_write(char *FileName,u32 SourceAddress,u32 ByteLength)
{
FIL fil;
FRESULT rc;
UINT bw;
rc = f_open(&fil,FileName,FA_CREATE_ALWAYS | FA_WRITE);
if(rc)
{
xil_printf("ERROR : f_open returned %d\r\n",rc);
return XST_FAILURE;
}
rc = f_lseek(&fil, 0);
if(rc)
{
xil_printf("ERROR : f_lseek returned %d\r\n",rc);
return XST_FAILURE;
}
rc = f_write(&fil,(void*) SourceAddress,ByteLength,&bw);
if(rc)
{
xil_printf("ERROR : f_write returned %d\r\n", rc);
return XST_FAILURE;
}
rc = f_close(&fil);
if(rc){
xil_printf("ERROR : f_close returned %d\r\n",rc);
return XST_FAILURE;
}
return XST_SUCCESS;
}
void float2short(short* buff1,float* buff2,int length){
for(int i=0;i<length;i++){
buff1[i]=(short)(buff2[i]*1024);
}
}
struct param_short{
short images[10000][1][28][28];
short conv1_w[64][1][5][5];
short conv1_b[64];
short conv1_out[64][28][28];
short conv2_w[64][64][5][5];
short conv2_b[64];
short pool1_out[64][14][14];
short conv3_w[64][64][5][5];
short conv3_b[64];
short conv3_out[64][14][14];
short conv4_w[64][64][5][5];
short conv4_b[64];
short pool2_out[64][7][7];
};
struct param{
float conv1_w[64][1][5][5];
float conv1_b[64];
float conv1_out[64][28][28];
float conv2_w[64][64][5][5];
float conv2_b[64];
float conv2_out[64][28][28];
float pool1_out[64][14][14];
float conv3_w[64][64][5][5];
float conv3_b[64];
float conv3_out[64][14][14];
float conv4_w[64][64][5][5];
float conv4_b[64];
float conv4_out[64][14][14];
float pool2_out[64][7][7];
float fc1_w[10][64*7*7];
float fc1_b[10];
float fc1_out[10];
};
void pool(int h,int w,int ch,float* in,float* out){
int i,j,n,kx,ky;
for(i=0;i<h/2;i++)
for(j=0;j<w/2;j++)
for(n=0;n<ch;n++){
//float tmp=in[n][2*i][2*j]
float tmp1=*(in+n*h*w+2*i*w+2*j);
float tmp2=*(in+n*h*w+2*i*w+2*j+1);
float tmp3=*(in+n*h*w+(2*i+1)*w+2*j);
float tmp4=*(in+n*h*w+(2*i+1)*w+2*j+1);
float max1=(tmp1>tmp2)?tmp1:tmp2;
float max2=(tmp3>tmp4)?tmp3:tmp4;
*(out+n*h*w/4+i*w/2+j)=(max1>max2)?max1:max2;
}
}
void conv(int ch_in,int ch_out,int pad,int stride,int k,int h,int w,
float* in,float *weight,float *bias,float *out){
int h_o,w_o;
int i,j,n,m;
int kx,ky;
h_o=(h-k+2*pad)/stride+1;
w_o=(w-k+2*pad)/stride+1;
for(i=0;i<h_o;i++)
for(j=0;j<w_o;j++)
for(m=0;m<ch_out;m++){
//计算out[m][i][j]的地址
int addr_o=m*h_o*w_o+i*w_o+j;
*(out+addr_o)=bias[m];
for(n=0;n<ch_in;n++)
for(kx=0;kx<k;kx++)
for(ky=0;ky<k;ky++)
{
//out[m][i][j]+=in[n][i*stride+kx-pad][j*stride+ky-pad]*weight[m][n][kx][ky]
int row=i*stride+kx-pad;
int col=j*stride+ky-pad;
int addr_i=n*h*w+row*w+col;
//
int addr_w=m*ch_in*k*k+n*k*k+kx*k+ky;
if(row>=0&&row<h&&col>=0&&col<w)
*(out+addr_o)+=(*(in+addr_i))*(*(weight+addr_w));
}
*(out+addr_o)=(*(out+addr_o)>0.0)?(*(out+addr_o)):0.0;
}
}
void fc(float *in,float *w,float *b,float *out,int nin,int nout){
//out=w*in+b
int i,j;
for(i=0;i<nout;i++){
float tmp=b[i];
for(j=0;j<nin;j++){
//w[i][j]*in[j]
tmp+=w[i*nin+j]*in[j];
}
out[i]=tmp;
}
}
void conv_pl(XConv HlsXConv,short *IN,short *W,short *B,short *OUT,int N,int M,int SIZE,int pool){
XConv_Set_in_V(&HlsXConv,(u32)IN);
XConv_Set_weight_V(&HlsXConv,(u32)W);
XConv_Set_bias_V(&HlsXConv,(u32)B);
XConv_Set_out_V(&HlsXConv,(u32)OUT);
XConv_Set_N_V(&HlsXConv,N);
XConv_Set_M_V(&HlsXConv,M);
XConv_Set_SIZE_V(&HlsXConv,SIZE);
XConv_Set_pool_V(&HlsXConv,pool);
XConv_Start(&HlsXConv);
while(XConv_IsDone(&HlsXConv)==0);
return;
}
int main()
{
init_platform();
XTime tEnd,tCur;
u32 tUsed;
int labels[10000];
float images[10000][1][28][28];
struct param my_param;
struct param_short my_param_short;
SD_Init();
SD_Transfer_read("test_labels.bin",(u32)labels,10000*sizeof(int));
SD_Transfer_read("test_images.bin",(u32)images,10000*1*28*28*sizeof(float));
//
SD_Transfer_read("conv1.weight.bin",(u32)my_param.conv1_w,64*1*5*5*sizeof(float));
SD_Transfer_read("conv1.bias.bin",(u32)my_param.conv1_b,64*sizeof(float));
SD_Transfer_read("conv2.weight.bin",(u32)my_param.conv2_w,64*64*5*5*sizeof(float));
SD_Transfer_read("conv2.bias.bin",(u32)my_param.conv2_b,64*sizeof(float));
SD_Transfer_read("conv3.weight.bin",(u32)my_param.conv3_w,64*64*5*5*sizeof(float));
SD_Transfer_read("conv3.bias.bin",(u32)my_param.conv3_b,64*sizeof(float));
SD_Transfer_read("conv4.weight.bin",(u32)my_param.conv4_w,64*64*5*5*sizeof(float));
SD_Transfer_read("conv4.bias.bin",(u32)my_param.conv4_b,64*sizeof(float));
SD_Transfer_read("fc1.bias.bin",(u32)my_param.fc1_b,10*sizeof(float));
SD_Transfer_read("fc1.weight.bin",(u32)my_param.fc1_w,10*64*7*7*sizeof(float));
print("SD read parameters over!\r\n");
float2short((short*)my_param_short.images,(float*)images,10000*1*28*28);
float2short((short*)my_param_short.conv1_w,(float*)my_param.conv1_w,64*1*5*5);
float2short((short*)my_param_short.conv1_b,(float*)my_param.conv1_b,64);
float2short((short*)my_param_short.conv2_w,(float*)my_param.conv2_w,64*64*5*5);
float2short((short*)my_param_short.conv2_b,(float*)my_param.conv2_b,64);
float2short((short*)my_param_short.conv3_w,(float*)my_param.conv3_w,64*64*5*5);
float2short((short*)my_param_short.conv3_b,(float*)my_param.conv3_b,64);
float2short((short*)my_param_short.conv4_w,(float*)my_param.conv4_w,64*64*5*5);
float2short((short*)my_param_short.conv4_b,(float*)my_param.conv4_b,64);
Xil_DCacheFlushRange((u32)my_param_short.images,10000*1*28*28*sizeof(short));
Xil_DCacheFlushRange((u32)my_param_short.con
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
一个HLS设计的卷积神经网络加速器,并在zynq7020开发板上部署成功。数据集采用的是MNIST手写体,加速的网络为一个拥有4层卷积,2层池化和1层全连接层的自定义小网络,适合初学者学习。
资源推荐
资源详情
资源评论
收起资源包目录
卷积神经网络加速器.rar (19个子文件)
卷积神经网络加速器
sdk
main.c 10KB
hls
test.cpp 3KB
conv.cpp 10KB
conv.h 394B
train
conv1.weight.bin 6KB
mnist.npz 10.96MB
conv2.weight.bin 400KB
fc1.bias.bin 40B
test_images.bin 29.91MB
train.py 3KB
conv4.weight.bin 400KB
conv1.bias.bin 256B
conv2.bias.bin 256B
conv3.weight.bin 400KB
conv4.bias.bin 256B
fc1.weight.bin 123KB
test_labels.bin 39KB
conv3.bias.bin 256B
演示视频.mp4 81.57MB
共 19 条
- 1
FPGA硅农
- 粉丝: 8199
- 资源: 9
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
- 3
- 4
前往页