# Copyright 2016 Paul Balanca. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Definition of 300 VGG-based SSD network.
This model was initially introduced in:
SSD: Single Shot MultiBox Detector
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg
https://arxiv.org/abs/1512.02325
Two variants of the model are defined: the 300x300 and 512x512 models, the
latter obtaining a slightly better accuracy on Pascal VOC.
Usage:
with slim.arg_scope(ssd_vgg.ssd_vgg()):
outputs, end_points = ssd_vgg.ssd_vgg(inputs)
This network port of the original Caffe model. The padding in TF and Caffe
is slightly different, and can lead to severe accuracy drop if not taken care
in a correct way!
In Caffe, the output size of convolution and pooling layers are computing as
following: h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1
Nevertheless, there is a subtle difference between both for stride > 1. In
the case of convolution:
top_size = floor((bottom_size + 2*pad - kernel_size) / stride) + 1
whereas for pooling:
top_size = ceil((bottom_size + 2*pad - kernel_size) / stride) + 1
Hence implicitely allowing some additional padding even if pad = 0. This
behaviour explains why pooling with stride and kernel of size 2 are behaving
the same way in TensorFlow and Caffe.
Nevertheless, this is not the case anymore for other kernel sizes, hence
motivating the use of special padding layer for controlling these side-effects.
@@ssd_vgg_300
"""
import math
from collections import namedtuple
import numpy as np
import tensorflow as tf
import tf_extended as tfe
from nets import custom_layers
from nets import ssd_common
slim = tf.contrib.slim
# =========================================================================== #
# SSD class definition.
# =========================================================================== #
SSDParams = namedtuple('SSDParameters', ['img_shape',
'num_classes',
'no_annotation_label',
'feat_layers',
'feat_shapes',
'anchor_size_bounds',
'anchor_sizes',
'anchor_ratios',
'anchor_steps',
'anchor_offset',
'normalizations',
'prior_scaling'
])
class SSDNet(object):
"""Implementation of the SSD VGG-based 300 network.
The default features layers with 300x300 image input are:
conv4 ==> 38 x 38
conv7 ==> 19 x 19
conv8 ==> 10 x 10
conv9 ==> 5 x 5
conv10 ==> 3 x 3
conv11 ==> 1 x 1
The default image size used to train this network is 300x300.
"""
default_params = SSDParams(
img_shape=(300, 300),
num_classes=21, # 自己的类别+1
no_annotation_label=21, # 自己的类别+1
feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],
feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
anchor_size_bounds=[0.15, 0.90],
# anchor_size_bounds=[0.20, 0.90],
anchor_sizes=[(21., 45.),
(45., 99.),
(99., 153.),
(153., 207.),
(207., 261.),
(261., 315.)],
# anchor_sizes=[(30., 60.),
# (60., 111.),
# (111., 162.),
# (162., 213.),
# (213., 264.),
# (264., 315.)],
anchor_ratios=[[2, .5],
[2, .5, 3, 1./3],
[2, .5, 3, 1./3],
[2, .5, 3, 1./3],
[2, .5],
[2, .5]],
anchor_steps=[8, 16, 32, 64, 100, 300],
anchor_offset=0.5,
normalizations=[20, -1, -1, -1, -1, -1],
prior_scaling=[0.1, 0.1, 0.2, 0.2]
)
def __init__(self, params=None):
"""Init the SSD net with some parameters. Use the default ones
if none provided.
"""
if isinstance(params, SSDParams):
self.params = params
else:
self.params = SSDNet.default_params
# ======================================================================= #
def net(self, inputs,
is_training=True,
update_feat_shapes=True,
dropout_keep_prob=0.5,
prediction_fn=slim.softmax,
reuse=None,
scope='ssd_300_vgg'):
"""SSD network definition.
"""
r = ssd_net(inputs,
num_classes=self.params.num_classes,
feat_layers=self.params.feat_layers,
anchor_sizes=self.params.anchor_sizes,
anchor_ratios=self.params.anchor_ratios,
normalizations=self.params.normalizations,
is_training=is_training,
dropout_keep_prob=dropout_keep_prob,
prediction_fn=prediction_fn,
reuse=reuse,
scope=scope)
# Update feature shapes (try at least!)
if update_feat_shapes:
shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes)
self.params = self.params._replace(feat_shapes=shapes)
return r
def arg_scope(self, weight_decay=0.0005, data_format='NHWC'):
"""Network arg_scope.
"""
return ssd_arg_scope(weight_decay, data_format=data_format)
def arg_scope_caffe(self, caffe_scope):
"""Caffe arg_scope used for weights importing.
"""
return ssd_arg_scope_caffe(caffe_scope)
# ======================================================================= #
def update_feature_shapes(self, predictions):
"""Update feature shapes from predictions collection (Tensor or Numpy
array).
"""
shapes = ssd_feat_shapes_from_net(predictions, self.params.feat_shapes)
self.params = self.params._replace(feat_shapes=shapes)
def anchors(self, img_shape, dtype=np.float32):
"""Compute the default anchor boxes, given an image shape.
"""
return ssd_anchors_all_layers(img_shape,
self.params.feat_shapes,
self.params.anchor_sizes,
self.params.anchor_ratios,
self.params.anchor_steps,
self.params.anchor_offset,
dtype)
def bboxes_encode(self, labels, bboxes, anchors,
scope=None):
"""Encode labels and bounding boxes.
"""
return ssd_common.tf_ssd_bboxes_encode(
labels, bboxes, anchors,
self.params.num_classes,
self
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
使用深度学习网络(目标检测/特征提取/特征匹配)建立的图像精准检索系统 整体说明 系统的实现背景基于:阿里天池的淘宝直播商品识别竞赛,直达链接;项目中的演示数据也是出于这里。可以将该竞赛理解为:输入直播视频,在商品库中进行检索,然后输出最匹配的商品。 该项目综合性程度很高的图像检索应用,包括:图像ROI区域检测与提取(采用了SSD)、ROI区域的分类(VGG-16与部分拼接网络)、ROI区域局部特征(LBP)/全局特征的提取(VGG-16与部分拼接网络,使用Triplet进行微调)和基于提取特征的检索算法(暴力匹配)的设计与实现。 系统检索效果上,在比赛官方的验证集上取得了Top20的成绩。 在自己的本地数据集上,随机抽取的100件商品集合中,目前的精确匹配率可以达到35%。
资源推荐
资源详情
资源评论
收起资源包目录
人工智能-项目实践-信息检索-使用深度学习网络(目标检测/特征提取/特征匹配)建立的图像精准检索系统 (307个子文件)
bootstrap.css 143KB
bootstrap.min.css 118KB
bootstrap-theme.css 26KB
bootstrap-theme.min.css 23KB
glyphicons-halflings-regular.eot 20KB
匹配正确的.gif 133B
匹配错误的.gif 133B
homepage.html 2KB
0.jpg 261KB
1.jpg 254KB
0.jpg 253KB
5.jpg 253KB
4.jpg 251KB
1.jpg 248KB
5.jpg 247KB
4.jpg 244KB
3.jpg 244KB
3.jpg 238KB
5.jpg 238KB
3.jpg 232KB
5.jpg 231KB
3.jpg 229KB
4.jpg 228KB
2.jpg 226KB
2.jpg 226KB
1.jpg 225KB
4.jpg 222KB
4.jpg 222KB
2.jpg 221KB
1.jpg 220KB
2.jpg 219KB
4.jpg 217KB
4.jpg 210KB
4.jpg 202KB
0.jpg 198KB
5.jpg 196KB
1.jpg 196KB
0.jpg 193KB
3.jpg 191KB
1.jpg 191KB
3.jpg 190KB
1.jpg 189KB
5.jpg 188KB
1.jpg 187KB
3.jpg 183KB
3.jpg 183KB
1.jpg 182KB
3.jpg 181KB
0.jpg 180KB
3.jpg 177KB
1.jpg 177KB
2.jpg 175KB
4.jpg 174KB
2.jpg 171KB
0.jpg 170KB
4.jpg 170KB
0.jpg 165KB
2.jpg 163KB
1.jpg 159KB
2.jpg 157KB
0.jpg 157KB
0.jpg 157KB
0.jpg 152KB
1.jpg 151KB
2.jpg 149KB
3.jpg 148KB
2.jpg 143KB
5.jpg 141KB
3.jpg 140KB
4.jpg 140KB
3.jpg 139KB
2.jpg 135KB
5.jpg 135KB
5.jpg 133KB
1.jpg 133KB
0.jpg 132KB
0.jpg 132KB
4.jpg 131KB
3.jpg 131KB
3.jpg 131KB
0.jpg 130KB
5.jpg 129KB
3.jpg 128KB
0.jpg 128KB
1.jpg 127KB
3.jpg 127KB
4.jpg 127KB
2.jpg 127KB
0.jpg 125KB
1.jpg 124KB
3.jpg 124KB
0.jpg 124KB
4.jpg 123KB
0.jpg 120KB
1.jpg 120KB
4.jpg 116KB
0.jpg 114KB
2.jpg 111KB
4.jpg 110KB
5.jpg 105KB
共 307 条
- 1
- 2
- 3
- 4
资源评论
博士僧小星
- 粉丝: 1774
- 资源: 5875
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功