#include "akaze.h"
#include "akazed.h"
#include "fed.h"
#include <memory>
#include <cmath>
//#define DEBUG_SHOW
#ifdef DEBUG_SHOW
#include <opencv2/core.hpp>
#endif // DEBUG_SHOW
#ifndef MIN
# define MIN(a,b) ((a) > (b) ? (b) : (a))
#endif
#ifndef MAX
# define MAX(a,b) ((a) < (b) ? (b) : (a))
#endif
namespace akaze
{
void initAkazeData(AkazeData& data, const int max_pts, const bool host, const bool dev)
{
data.num_pts = 0;
data.max_pts = max_pts;
const size_t size = sizeof(AkazePoint) * max_pts;
data.h_data = host ? (AkazePoint*)malloc(size) : NULL;
data.d_data = NULL;
if (dev)
{
CHECK(cudaMalloc((void**)&data.d_data, size));
}
}
void freeAkazeData(AkazeData& data)
{
if (data.d_data != NULL)
{
CHECK(cudaFree(data.d_data));
}
if (data.h_data != NULL)
{
free(data.h_data);
}
data.num_pts = 0;
data.max_pts = 0;
}
void cuMatch(AkazeData& result1, AkazeData& result2)
{
hMatch(result1, result2);
if (result1.h_data)
{
int* h_ptr = &result1.h_data[0].match;
int* d_ptr = &result1.d_data[0].match;
CHECK(cudaMemcpy2D(h_ptr, sizeof(AkazePoint), d_ptr, sizeof(AkazePoint), 4 * sizeof(float), result1.num_pts, cudaMemcpyDeviceToHost));
}
}
Akazer::Akazer()
{
}
Akazer::~Akazer()
{
CHECK(cudaFree(omem));
}
void Akazer::init(int3 whp0, int _noctaves, int _max_scale, float _per, float _kcontrast, float _soffset, bool _reordering,
float _derivative_factor, float _dthreshold, int _diffusivity, int _descriptor_pattern_size)
{
whp.x = whp0.x;
whp.y = whp0.y;
whp.z = whp0.z;
noctaves = _noctaves;
max_scale = _max_scale;
per = _per;
kcontrast = _kcontrast;
soffset = _soffset;
reordering = _reordering;
derivative_factor = _derivative_factor;
dthreshold = _dthreshold;
diffusivity = DiffusivityType(_diffusivity);
descriptor_pattern_size = _descriptor_pattern_size;
setCompareIndices();
}
void Akazer::detectAndCompute(float* image, AkazeData& result, int3 whp0, const bool desc)
{
// Allocate memory
std::unique_ptr<int> oparams(new int[noctaves * 5 + 1]);
int* osizes = oparams.get();
int* offsets = osizes + noctaves;
int3* owhps = (int3*)(offsets + noctaves + 1);
float* tmem = NULL;
const bool reused = whp0.x == whp.x && whp0.y == whp.y;
if (reused)
{
this->allocMemory((void**)&omem, whp0, owhps, osizes, offsets, reused);
tmem = omem;
}
else
{
this->allocMemory((void**)&tmem, whp0, owhps, osizes, offsets, reused);
}
// Detect keypoints
this->detect(result, tmem, image, owhps, osizes, offsets);
// Compute descriptors
if (desc)
{
// Compute orientations
hCalcOrient(result, tmem, noctaves, max_scale);
// Compute descriptors
hDescribe(result, tmem, noctaves, max_scale, descriptor_pattern_size);
}
// Copy point data to host
if (result.h_data != NULL)
{
float* h_ptr = &result.h_data[0].x;
float* d_ptr = &result.d_data[0].x;
CHECK(cudaMemcpy2D(h_ptr, sizeof(AkazePoint), d_ptr, sizeof(AkazePoint), (desc ? FLEN * sizeof(unsigned char) : 0) + 6 * sizeof(float), result.num_pts, cudaMemcpyDeviceToHost));
}
// Post-processing
if (reused)
{
CHECK(cudaMemset(omem, 0, total_osize));
}
else
{
CHECK(cudaFree(tmem));
}
}
void Akazer::fastDetectAndCompute(unsigned char* image, AkazeData& result, int3 whp0, const bool desc)
{
// Allocate memory
std::unique_ptr<int> oparams(new int[noctaves * 5 + 1]);
int* osizes = oparams.get();
int* offsets = osizes + noctaves;
int3* owhps = (int3*)(offsets + noctaves + 1);
void* tmem = NULL;
const bool reused = whp0.x == whp.x && whp0.y == whp.y;
if (reused)
{
this->allocMemory((void**)&omem, whp0, owhps, osizes, offsets, reused);
tmem = omem;
}
else
{
this->allocMemory((void**)&tmem, whp0, owhps, osizes, offsets, reused);
}
// Detect keypoints
this->fastDetect(result, tmem, image, owhps, osizes, offsets);
// Compute descriptors
if (desc)
{
// Compute orientations
fastakaze::hCalcOrient(result, tmem, noctaves, max_scale);
// Compute descriptors
fastakaze::hDescribe(result, tmem, noctaves, max_scale, descriptor_pattern_size);
}
// Copy point data to host
if (result.h_data != NULL)
{
float* h_ptr = &result.h_data[0].x;
float* d_ptr = &result.d_data[0].x;
CHECK(cudaMemcpy2D(h_ptr, sizeof(AkazePoint), d_ptr, sizeof(AkazePoint), (desc ? FLEN * sizeof(unsigned char) : 0) + 6 * sizeof(float), result.num_pts, cudaMemcpyDeviceToHost));
}
// Post-processing
if (reused)
{
CHECK(cudaMemset(omem, 0, total_osize));
}
else
{
CHECK(cudaFree(tmem));
}
}
void Akazer::allocMemory(void** addr, int3& whp0, int3* owhps, int* osizes, int* offsets, const bool reused)
{
// Compute sizes
owhps[0] = whp0;
osizes[0] = whp0.y * whp0.z;
offsets[0] = 3 * osizes[0]; // Record response, scale and octave map for NMS
offsets[1] = offsets[0] + osizes[0] * max_scale * 4; // 4: Limg, Lsmooth, Lx, Ly
for (int i = 0, j = 1, k = 2; j < noctaves; i++, j++, k++)
{
owhps[j].x = (owhps[i].x >> 1);
owhps[j].y = (owhps[i].y >> 1);
if (owhps[j].x < 80 || owhps[j].y < 80)
{
noctaves = j;
break;
}
owhps[j].z = iAlignUp(owhps[j].x, 128);
osizes[j] = owhps[j].y * owhps[j].z;
offsets[k] = offsets[j] + osizes[j] * max_scale * 4;
}
// Allocate memory for images in octave
if ((reused && !omem) || !reused)
{
CHECK(cudaMalloc(addr, offsets[noctaves] * sizeof(float)));
}
if (reused)
{
total_osize = offsets[noctaves] * sizeof(float);
}
//return offsets[noctaves];
}
void Akazer::detect(AkazeData& result, float* tmem, float* image, int3* owhps, int* osizes, int* offsets)
{
// Get address of point counter
unsigned int* d_point_counter_addr;
getPointCounter((void**)&d_point_counter_addr);
CHECK(cudaMemset(d_point_counter_addr, 0, sizeof(unsigned int)));
setMaxNumPoints(result.max_pts);
int w, h, p, msz, ms_msz, mstep;
float* response_map = tmem;
float* size_map = tmem + osizes[0];
int* layer_map = (int*)(size_map + osizes[0]);
size_t nbytes = osizes[0] * sizeof(float);
float minv = 1e-6f;
int* iminv = (int*)&minv;
CHECK(cudaMemset(layer_map, -1, nbytes));
CHECK(cudaMemset(response_map, *iminv, nbytes));
CHECK(cudaMemset(size_map, *iminv, nbytes));
float* oldnld = NULL;
float* nldimg = NULL;
float* smooth = NULL;
float* flow = NULL;
float* temp = NULL;
float* dx = NULL;
float* dy = NULL;
float tmax = 0.25f;
float esigma = soffset;
float last_etime = 0.5 * soffset * soffset;
float curr_etime = 0;
float ttime = 0;
int naux = 0;
int oratio = 1;
int sigma_size = 0;
float smax = 1.0f;
if (FEATURE_TYPE == 0 || FEATURE_TYPE == 1 || FEATURE_TYPE == 4 || FEATURE_TYPE == 5)
{
smax = 10.0 * sqrtf(2.0f);
}
else if (FEATURE_TYPE == 2 || FEATURE_TYPE == 3)
{
smax = 12.0 * sqrtf(2.0f);
}
std::unique_ptr<float> exptr(new float[max_scale * 2]);
float* borders = exptr.get();
float* sizes = borders + max_scale;
float psz = 10000;
int neigh = 0;
//float* threshs = borders + max_scale;
#ifdef DEBUG_SHOW
cv::Mat nldshow, detshow, dxshow, dyshow;
cv::Mat response_show(owhps[0].y, owhps[0].x, CV_32FC1);
cv::Mat size_show(owhps[0].y, owhps[0].x, CV_32FC1);
cv::Mat layer_show(owhps[0].y, owhps[0].x, CV_32SC1);;
#endif // DEBUG_SHOW
for (int i = 0; i < noctaves; i++)
{
w = owhps[i].x;
h = owhps[i].y;
p = owhps[i].z;
msz = osizes[i];
ms_msz = msz * max_scale;
#ifdef DEBUG_SHOW
nldshow.create(h, w, CV_32FC1);
detshow.create(h, w, CV_32FC1);
dxshow.create(h, w, CV_32FC1);
dyshow.create(h, w, CV_32FC1);
#endif // DEBUG_SHOW
nldimg = tmem + offsets[i];
smooth = nldimg + ms_msz;
flow = smooth + ms_msz;
temp = flow + ms_msz;
dx = flow;
dy = temp;
// Create nonlinear space for current octave layer
for (int j = 0; j < max_scale; j++)
{
if (j == 0 && i == 0)
{
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
图像拼接CUDA.zip (79个子文件)
bak20240322
akazed.h 5KB
SIFT
sift_structrures.h 697B
sift.cpp 6KB
sift_d.h 8KB
sift_d.cu 45KB
sift.h 8KB
cuda_utils.h 4KB
ORB
orb_d.h 8KB
orb_d.cu 48KB
orb.h 8KB
orb_structures.h 566B
orb.cpp 5KB
result.jpg 5.23MB
AKAZE
cuda_utils.h 12KB
fed.cpp 5KB
akaze.cpp 13KB
test_registration.cpp 9KB
fed.h 3KB
akaze.h 8KB
akaze_d.h 8KB
akaze_d.cu 38KB
akaze_structures.h 895B
main2.cpp 14KB
ImageStitchingCuda.h 8KB
fed.cpp 4KB
data
akaze_show1.jpg 2.55MB
akaze_show2.jpg 3.14MB
akaze_show_matched.jpg 666KB
ImageStitchingCuda.cpp 22KB
ImageStitchingCuda.vcxproj 8KB
akazed.cu 113KB
akaze.cpp 24KB
main.cpp 3KB
x64
Release
vc143.pdb 2.09MB
test_registration.obj 1.68MB
orb_d.cu-1425680095.deps 14KB
akaze_d.cu882462689.deps 14KB
ImageStitchingCuda.log 12KB
ImageStitchingCuda.ipdb 321KB
orb.obj 1.03MB
surf.obj 1.03MB
fed.obj 251KB
akazed.cu1573422824.deps 14KB
akaze.obj 1.06MB
orb_d.cu.obj 188KB
surf_d.cu1297793917.deps 14KB
akazed.cu.obj 775KB
sift.obj 1.03MB
surfd.cu.cache 1KB
ImageStitchingCuda.obj 3.63MB
akazed.cu.cache 1KB
surf_d.cu.obj 221KB
surf_d.cu.cache 1KB
akaze_d.cu.cache 1KB
orb_d.cu.cache 1KB
sift_d.cu.obj 241KB
ImageStitchingCuda.exe.recipe 304B
ImageStitchingCuda.iobj 1021KB
akaze_d.cu.obj 295KB
main.obj 3.71MB
ImageSti.fa4a31c8.tlog
CL.write.1.tlog 8KB
CudaCompile.write.1u.tlog 380B
CudaCompile.read.1u.tlog 82KB
Cl.items.tlog 940B
link.secondary.1.tlog 879B
CL.command.1.tlog 10KB
link.command.1.tlog 5KB
link.read.1.tlog 14KB
link.write.1.tlog 2KB
ImageStitchingCuda.lastbuildstate 158B
CL.read.1.tlog 230KB
sift_d.cu683956913.deps 14KB
surfd.cu-520872766.deps 14KB
sift_d.cu.cache 1KB
ImageStitchingCuda.vcxproj.user 168B
fed.h 3KB
ImageStitchingCuda.vcxproj.filters 3KB
akaze.h 8KB
akaze_structures.h 848B
共 79 条
- 1
资源评论
爱吃橙子的哈士奇
- 粉丝: 30
- 资源: 17
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- foldcraftlauncher_262944.apk
- 珍藏多年的基于matlab实现潮流计算程序源代码集合,包含多个潮流计算程序.rar
- 使用FPGA实现串-并型乘法器
- 基于matlab实现针对基于双曲线定位的DV-Hop算法中误差误差出一种基于加权双曲线定位的DV-Hop改进算法.rar
- 基于matlab实现由遗传算法开发的整数规划,车辆调度问题.rar
- 电视家7.0(对电视配置要求高).apk
- 免费计算机毕业设计-基于JavaEE的医院病历管理系统设计与实现(包含论文+源码)
- 手机端 我的世界融合植物大战僵尸版.apk
- 植物大战僵尸 · 戴夫的老年生活 手机版.apk
- Runcraft · 我的世界跑酷游戏 手机端.apk
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功