# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import division
import argparse
import logging
import time
import numpy as np
from bottleneck import argpartition
import mxnet as mx
from data import cub200_iterator
from mxnet import gluon
from mxnet.gluon.model_zoo import vision as models
from mxnet import autograd as ag, nd
from model import MarginNet, MarginLoss
logging.basicConfig(level=logging.INFO)
# CLI
parser = argparse.ArgumentParser(description='train a model for image classification.')
parser.add_argument('--data-path', type=str, default='D:\Work\Data/valid1'
'',
help='path of data.')
parser.add_argument('--embed-dim', type=int, default=128,
help='dimensionality of image embedding. default is 128.')
parser.add_argument('--batch-size', type=int, default=70,
help='training batch size per device (CPU/GPU). default is 70.')
parser.add_argument('--batch-k', type=int, default=5,
help='number of images per class in a batch. default is 5.')
parser.add_argument('--gpus', type=str, default='',
help='list of gpus to use, e.g. 0 or 0,2,5. empty means using cpu.')
parser.add_argument('--epochs', type=int, default=20,
help='number of training epochs. default is 20.')
parser.add_argument('--optimizer', type=str, default='adam',
help='optimizer. default is adam.')
parser.add_argument('--lr', type=float, default=0.0001,
help='learning rate. default is 0.0001.')
parser.add_argument('--lr-beta', type=float, default=0.1,
help='learning rate for the beta in margin based loss. default is 0.1.')
parser.add_argument('--margin', type=float, default=0.2,
help='margin for the margin based loss. default is 0.2.')
parser.add_argument('--beta', type=float, default=1.2,
help='initial value for beta. default is 1.2.')
parser.add_argument('--nu', type=float, default=0.0,
help='regularization parameter for beta. default is 0.0.')
parser.add_argument('--factor', type=float, default=0.5,
help='learning rate schedule factor. default is 0.5.')
parser.add_argument('--steps', type=str, default='12,14,16,18',
help='epochs to update learning rate. default is 12,14,16,18.')
parser.add_argument('--wd', type=float, default=0.0001,
help='weight decay rate. default is 0.0001.')
parser.add_argument('--seed', type=int, default=123,
help='random seed to use. default=123.')
parser.add_argument('--model', type=str, default='resnet50_v2',
help='type of model to use. see vision_model for options.')
parser.add_argument('--save-model-prefix', type=str, default='margin_loss_model',
help='prefix of models to be saved.')
parser.add_argument('--use-pretrained', action='store_true',
help='enable using pretrained model from gluon.')
parser.add_argument('--kvstore', type=str, default='device',
help='kvstore to use for trainer.')
parser.add_argument('--log-interval', type=int, default=20,
help='number of batches to wait before logging.')
opt = parser.parse_args()
logging.info(opt)
# Settings.
mx.random.seed(opt.seed)
np.random.seed(opt.seed)
batch_size = opt.batch_size
gpus = [] if opt.gpus is None or opt.gpus is '' else [
int(gpu) for gpu in opt.gpus.split(',')]
num_gpus = len(gpus)
batch_size *= max(1, num_gpus)
context = [mx.gpu(i) for i in gpus] if num_gpus > 0 else [mx.cpu()]
steps = [int(step) for step in opt.steps.split(',')]
# Construct model.
kwargs = {'ctx': context, 'pretrained': opt.use_pretrained}
net = models.get_model(opt.model, **kwargs)
if opt.use_pretrained:
# Use a smaller learning rate for pre-trained convolutional layers.
for v in net.collect_params().values():
if 'conv' in v.name:
setattr(v, 'lr_mult', 0.01)
net.hybridize()
net = MarginNet(net.features, opt.embed_dim, opt.batch_k)
beta = mx.gluon.Parameter('beta', shape=(100,))
# Get iterators.
train_data, val_data = cub200_iterator(opt.data_path, opt.batch_k, batch_size, (3, 224, 224))
def get_distance_matrix(x):
"""Get distance matrix given a matrix. Used in testing."""
square = nd.sum(x ** 2.0, axis=1, keepdims=True)
distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose()))
return nd.sqrt(distance_square)
def evaluate_emb(emb, labels):
"""Evaluate embeddings based on Recall@k."""
d_mat = get_distance_matrix(emb)
d_mat = d_mat.asnumpy()
labels = labels.asnumpy()
names = []
accs = []
for k in [1, 2, 4, 8, 16]:
names.append('Recall@%d' % k)
correct, cnt = 0.0, 0.0
for i in range(emb.shape[0]):
d_mat[i, i] = 1e10
nns = argpartition(d_mat[i], k)[:k]
if any(labels[i] == labels[nn] for nn in nns):
correct += 1
cnt += 1
accs.append(correct/cnt)
return names, accs
def test(ctx):
"""Test a model."""
val_data.reset()
outputs = []
labels = []
for batch in val_data:
data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
for x in data:
outputs.append(net(x)[-1])
labels += label
outputs = nd.concatenate(outputs, axis=0)[:val_data.n_test]
labels = nd.concatenate(labels, axis=0)[:val_data.n_test]
return evaluate_emb(outputs, labels)
def get_lr(lr, epoch, steps, factor):
"""Get learning rate based on schedule."""
for s in steps:
if epoch >= s:
lr *= factor
return lr
def train(epochs, ctx):
"""Training function."""
if isinstance(ctx, mx.Context):
ctx = [ctx]
net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
opt_options = {'learning_rate': opt.lr, 'wd': opt.wd}
if opt.optimizer == 'sgd':
opt_options['momentum'] = 0.9
if opt.optimizer == 'adam':
opt_options['epsilon'] = 1e-7
trainer = gluon.Trainer(net.collect_params(), opt.optimizer,
opt_options,
kvstore=opt.kvstore)
if opt.lr_beta > 0.0:
# Jointly train class-specific beta.
# See "sampling matters in deep embedding learning" paper for details.
beta.initialize(mx.init.Constant(opt.beta), ctx=ctx)
trainer_beta = gluon.Trainer([beta], 'sgd',
{'learning_rate': opt.lr_beta, 'momentum': 0.9},
kvstore=opt.kvstore)
loss = MarginLoss(margin=opt.margin, nu=opt.nu)
best_val = 0.0
for epoch in range(epochs):
tic = time.time()
prev_loss, cumulative_loss = 0.0, 0.0
# Learning rate schedule.
trainer.set_learning_rate(get_lr(opt.lr, epoch, steps, opt.factor))
logging.info('Epoch %d learning rate=%f', epoch, trainer.learning_rate)
if opt.lr_beta > 0.0:
trainer_beta.set_learning_rate(get_lr(opt.
weixin_42653672
- 粉丝: 107
- 资源: 1万+
最新资源
- 项目采用YOLO V4算法模型进行目标检测,使用Deep SORT目标跟踪算法 .zip
- 针对实时视频流和静态图像实现的对象检测和跟踪算法 .zip
- 部署 yolox 算法使用 deepstream.zip
- 基于webmagic、springboot和mybatis的MagicToe Java爬虫设计源码
- 通过实时流协议 (RTSP) 使用 Yolo、OpenCV 和 Python 进行深度学习的对象检测.zip
- 基于Python和HTML的tb商品列表查询分析设计源码
- 基于国民技术RT-THREAD的MULTInstrument多功能电子测量仪器设计源码
- 基于Java技术的网络报修平台后端设计源码
- 基于Python的美食杰中华菜系数据挖掘与分析设计源码
- 基于Java与JavaScript混合技术的吉森摄影项目设计源码
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
评论0