from __future__ import absolute_import
import argparse
import csv
import logging
import os
import random
import sys
from io import open
import pandas as pd
import numpy as np
import torch
import time
import collections
import torch.nn as nn
from collections import defaultdict
import gc
import itertools
from multiprocessing import Pool
import functools
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
TensorDataset, Dataset)
from typing import Callable, Dict, List, Generator, Tuple
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm
import json
import math
from bert_utils import load_and_cache_examples, set_seed, get_f1
from itertools import cycle
from transformers import (
WEIGHTS_NAME,
AdamW,
BertConfig,
BertForSequenceClassification,
BertTokenizer,
AlbertConfig,
AlbertForSequenceClassification,
AlbertTokenizer,
DistilBertConfig,
DistilBertForSequenceClassification,
DistilBertTokenizer,
XLMConfig,
XLMForSequenceClassification,
XLMTokenizer,
get_linear_schedule_with_warmup,
)
MODEL_CLASSES = {
"bert": (BertConfig, BertForSequenceClassification, BertTokenizer),
"albert": (AlbertConfig, AlbertForSequenceClassification, BertTokenizer),
"xlm": (XLMConfig, XLMForSequenceClassification, XLMTokenizer),
"distilbert": (DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer),
}
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S',
level = logging.INFO)
logger = logging.getLogger(__name__)
def main():
parser = argparse.ArgumentParser()
## Required parameters
parser.add_argument("--data_dir", default=None, type=str, required=True,
help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
help="")
parser.add_argument("--output_dir", default=None, type=str, required=True,
help="The output directory where the model predictions and checkpoints will be written.")
parser.add_argument("--train_language", default=None, type=str, required=True)
parser.add_argument("--model_type", type=str, required=True)
## Other parameters
parser.add_argument("--max_seq_length", default=256, type=int,
help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--num_labels", default=2, type=int)
parser.add_argument("--overwrite_cache", action='store_true')
parser.add_argument("--config_name", default=None, type=str)
parser.add_argument("--tokenizer_name", default=None, type=str)
parser.add_argument("--do_train", action='store_true',
help="Whether to run training.")
parser.add_argument("--do_lower_case", action='store_true')
parser.add_argument("--do_test", action='store_true',
help="Whether to run training.")
parser.add_argument("--do_eval", action='store_true',
help="Whether to run eval on the dev set.")
parser.add_argument('--from_tf', action='store_true',
help='whether load tensorflow weights')
parser.add_argument("--do_eval_train", action='store_true',
help="Whether to run eval on the train set.")
parser.add_argument("--per_gpu_train_batch_size", default=8, type=int,
help="Batch size per GPU/CPU for training.")
parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int,
help="Batch size per GPU/CPU for evaluation.")
parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
help="Number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument("--local_rank", default=-1, type=int)
parser.add_argument("--learning_rate", default=1e-4, type=float,
help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.0, type=float,
help="Weight deay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float,
help="Epsilon for Adam optimizer.")
parser.add_argument("--eval_steps", default=-1, type=int,
help="")
parser.add_argument("--train_steps", default=-1, type=int,
help="")
parser.add_argument("--warmup_steps", default=0, type=int,
help="Linear warmup over warmup_steps.")
parser.add_argument('--seed', type=int, default=1,
help="random seed for initialization")
args = parser.parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
args.n_gpu = torch.cuda.device_count()
args.device = device
# Set seed
set_seed(args)
args.model_type = args.model_type.lower()
config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path, num_labels=args.num_labels)
tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case)
logger.info("Training/evaluation parameters %s", args)
args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
# Training
if args.do_train:
# Prepare model
model = model_class.from_pretrained(args.model_name_or_path, from_tf=args.from_tf, config=config)
# fgm = FGM(model)
model.to(args.device)
if args.n_gpu > 1:
model = torch.nn.DataParallel(model)
args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
train_dataset = load_and_cache_examples(args, tokenizer, is_training=1)
train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size//args.gradient_accumulation_steps)
num_train_optimization_steps = args.train_steps
# Prepare optimizer
param_optimizer = list(model.named_parameters())
# hack to remove pooler, which is not used
# thus it produce None grad that break apex
param_optimizer = [n for n in param_optimizer]
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
global_step = 0
logger.info("***** Running training *****")
logger.info(" Num examples = %d", len(train_dataset))
logger.info(" Batch size = %d", args.train_batch_size)
logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
logger.info(" Num steps = %d", num_train_optimization_steps)
best_acc=0
model.train()
tr_loss = 0
nb_tr_examples, nb_tr_steps = 0, 0
bar = tqdm(range(num_train_optimization_steps),total=num_train_optimization_steps)
train_dataloader=cycle(train_dataloader)
output_dir = args.output_dir + "eval_results_{}_{}_{}_{}_{}_{}".format(
list(filter
中文文本句对相似度匹配-ATEC数据集.zip
版权申诉
99 浏览量
2023-10-19
22:25:03
上传
评论
收藏 2.96MB ZIP 举报
天天501
- 粉丝: 591
- 资源: 4666
最新资源
- Python爬取淘宝热卖商品并可视化分析
- 5152单片机proteus仿真和源码将按键次数写入AT24C02再读出并用1602LCD显示
- SE-SSD复现过程(Det3D的安装教程)
- 基于Python的在线学习与推荐系统设计与实现(论文+源码)-kaic
- 串口通过 YMODEM 协议进行文件传输
- 蓝桥杯2024年第十五届省赛真题-前缀总分
- com.qihoo.appstore_300101305-1.apk
- tensorflow-gpu-2.7.1-cp37-cp37m-manylinux2010-x86-64.whl
- tensorflow-2.7.2-cp37-cp37m-manylinux2010-x86-64.whl
- tensorflow-2.7.1-cp39-cp39-manylinux2010-x86-64.whl
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈