#include <stdio.h>
#include "export.h"
#include <cula.h>
#define min(a,b) (((a) < (b)) ? (a) : (b))
/* DLM for single precision SVD on CUDA, called from IDL as
CUDA_SVD, A, W, U, VT
where
A is an M rows by N cols IDL array (input)
S is a min(M,N) array of singular values (output)
U is an N by min(M,N) array (output)
VT is an min(M,N) by M array (output)
such that A = U diag(S) VT
This DLM encapsulates the CULA basic function culaSgesvd
It is programmed, unnecessarily since there is no device
code (yet), as a CUDA C project.
NOTE: The input is an n by m IDL array representing AT
in column major form as required by culaSgesvd.
Since AT = V diag(S) UT CULA will return S, V, UT.
Therefore, switching the last two, the output
IDL arrays are S, UT, V which represent the matrices
S, U VT.
Mort Canty (2010)
*/
void IDL_CDECL cuda_svd(int argc, IDL_VPTR argv[])
{
// output array pointers
float * w, * u, * vt;
// get the input matrix
float * a = (float *) argv[0]->value.arr->data;
// get its dimensions
long ndim_a = argv[0]->value.arr->n_dim;
long * dim_a = argv[0]->value.arr->dim;
int m = (int) dim_a[0];
int n = (int) dim_a[1];
// IDL output arrays
IDL_VPTR ivWptr;
long ndim_w = 1;
long dim_w[] = {min(m,n)};
w = (float * ) IDL_MakeTempArray( (int) IDL_TYP_FLOAT, ndim_w,
dim_w, IDL_ARR_INI_ZERO, &ivWptr);
IDL_VPTR ivUptr;
long dim_u[] = {m,min(m,n)};
u = (float * ) IDL_MakeTempArray( (int) IDL_TYP_FLOAT, ndim_a,
dim_u, IDL_ARR_INI_ZERO, &ivUptr);
IDL_VPTR ivVTptr;
long dim_v[] = {min(m,n),n};
vt = (float * ) IDL_MakeTempArray( (int) IDL_TYP_FLOAT, ndim_a,
dim_v, IDL_ARR_INI_ZERO, &ivVTptr);
// CULA general single precision SVD with host pointers
culaStatus s = culaInitialize();
if(s == culaNoError)
{
s = culaSgesvd('S','S',m,n,a,m,w,u,m,vt,min(m,n));
culaShutdown();
}
// return results to IDL (all zeroes if CULA failed to initialize)
IDL_VarCopy(ivWptr,argv[1]);
IDL_VarCopy(ivVTptr,argv[2]);
IDL_VarCopy(ivUptr,argv[3]);
}
// the entry point, which loads the routine into IDL
int IDL_Load(void)
{
static IDL_SYSFUN_DEF2 procedure_addr[] = {
{ (IDL_SYSRTN_GENERIC) cuda_svd, "CUDA_SVD", 4, 4, 0, 0 }
};
return IDL_SysRtnAdd(procedure_addr, IDL_FALSE, 1);
}
svd.zip_SVD_zip
版权申诉
110 浏览量
2022-09-20
18:31:28
上传
评论
收藏 46KB ZIP 举报
weixin_42653672
- 粉丝: 93
- 资源: 1万+
最新资源
- WebCrawler.scr
- 【计算机专业毕业设计】大学生就业信息管理系统设计源码.zip
- YOLO 数据集:8种路面缺陷病害检测【包含划分好的数据集、类别class文件、数据可视化脚本】
- JAVA实现Modbus RTU或Modbus TCPIP案例.zip
- 基于YOLOv8的FPS TPS AI自动锁定源码+使用步骤说明.zip
- JAVA实现Modbus RTU或Modbus TCPIP案例.zip
- 基于yolov8+streamlit的火灾检测部署源码+模型.zip
- 测试aaaaaaabbbbb
- VID20240521070643.mp4
- Android系统原理与开发学习要点详解-培训课件.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈