From 0f83d3f3ccef022dd941cdd6b00edbe9d9b577ac Mon Sep 17 00:00:00 2001 From: "chongjiu.jin" Date: Tue, 24 Dec 2019 10:26:47 +0800 Subject: [PATCH] python bert code fix --- pytorch-bert-code/bert-example.py | 2 +- pytorch-bert-code/bert.py | 47 ++++++++++++++----------------- pytorch-bert-code/bert/README.md | 14 ++++++++- pytorch-bert-code/run.py | 3 +- pytorch-bert-code/train_eval.py | 18 +++++++++++- 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/pytorch-bert-code/bert-example.py b/pytorch-bert-code/bert-example.py index ff1999c..8935279 100644 --- a/pytorch-bert-code/bert-example.py +++ b/pytorch-bert-code/bert-example.py @@ -32,7 +32,7 @@ bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization(vocab_file=vocab_file, do_lower_case=do_lower_case) # 加载模型 -model_bert = BertModel.from_pretrained(bert_path) +model_bert = BertModel.from_pretrained(bert_path,config=bert_config) model_bert.to(device) diff --git a/pytorch-bert-code/bert.py b/pytorch-bert-code/bert.py index f2990b2..2f37e2e 100644 --- a/pytorch-bert-code/bert.py +++ b/pytorch-bert-code/bert.py @@ -1,30 +1,29 @@ # coding: UTF-8 import torch import torch.nn as nn -import torch.nn.functional as F # from pytorch_pretrained_bert import BertModel, BertTokenizer -from transformers import BertModel, BertTokenizer - +from transformers import BertModel, BertTokenizer,BertConfig +import os class Config(object): """配置参数""" def __init__(self, dataset): self.model_name = 'bert' - self.train_path = dataset + '/data/train.txt' - self.dev_path = dataset + '/data/dev.txt' - self.test_path = dataset + '/data/test.txt' + self.train_path = dataset + '/data/train.txt' # 训练集 + self.dev_path = dataset + '/data/dev.txt' # 验证集 + self.test_path = dataset + '/data/test.txt' # 测试集 self.class_list = [x.strip() for x in open( - dataset + '/data/class.txt').readlines()] - self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' - self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + dataset + '/data/class.txt').readlines()] # 类别名单 + self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 - - self.num_classes = len(self.class_list) - self.num_epochs = 3 - self.batch_size = 128 - self.pad_size = 32 - self.learning_rate = 5e-5 + self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 + self.num_classes = len(self.class_list) # 类别数 + self.num_epochs = 3 # epoch数 + self.batch_size = 128 # mini-batch大小 + self.pad_size = 32 # 每句话处理成的长度(短填长切) + self.learning_rate = 5e-5 # 学习率 self.bert_path = './bert' self.tokenizer = BertTokenizer.from_pretrained(self.bert_path) self.hidden_size = 768 @@ -34,20 +33,16 @@ class Model(nn.Module): def __init__(self, config): super(Model, self).__init__() - self.bert = BertModel.from_pretrained(config.bert_path) + bert_config_file = os.path.join(config.bert_path, f'bert_config.json') + bert_config = BertConfig.from_json_file(bert_config_file) + self.bert = BertModel.from_pretrained(config.bert_path,config=bert_config) for param in self.bert.parameters(): param.requires_grad = True self.fc = nn.Linear(config.hidden_size, config.num_classes) - - def forward(self, input_ids,# 输入的句子 - input_mask,# 对padding部分进行mask,和句子一个size,padding部分用0表示,如:[1, 1, 1, 1, 0, 0] - segments_ids - ): - _, pooled = self.bert(input_ids, attention_mask=input_mask,token_type_ids=segments_ids)#pooled [batch_size, hidden_size] + def forward(self, x): + context = x[0] # 输入的句子 + mask = x[2] # 对padding部分进行mask,和句子一个size,padding部分用0表示,如:[1, 1, 1, 1, 0, 0] + _, pooled = self.bert(context, attention_mask=mask) out = self.fc(pooled) return out - def loss(self,outputs,labels): - criterion=F.cross_entropy - loss = criterion(outputs, labels) - return loss diff --git a/pytorch-bert-code/bert/README.md b/pytorch-bert-code/bert/README.md index 6b12a65..098ea2b 100644 --- a/pytorch-bert-code/bert/README.md +++ b/pytorch-bert-code/bert/README.md @@ -1,6 +1,18 @@ -### how to convert bert Converting Tensorflow Checkpoints to pytorch model file + +update to transformer 2.3.0 + +转换工具已经失效 + + chinese bert + + https://github.com/ymcui/Chinese-BERT-wwm/blob/master/README_EN.md + +下载 BERT-wwm-ext, Chinese 或者 BERT-wwm, Chinese pytorch模型 +------- + +transformer 2.1.1 ### 如何将bert model 的Tensorflow模型 转换为pytorch模型 diff --git a/pytorch-bert-code/run.py b/pytorch-bert-code/run.py index 5e9775e..23e2ab3 100644 --- a/pytorch-bert-code/run.py +++ b/pytorch-bert-code/run.py @@ -2,7 +2,8 @@ import time import torch import numpy as np -from train_eval import train +from train_eval import train, init_network +from importlib import import_module import argparse from utils import build_dataset, build_iterator, get_time_dif import bert diff --git a/pytorch-bert-code/train_eval.py b/pytorch-bert-code/train_eval.py index 5303307..50fbabe 100644 --- a/pytorch-bert-code/train_eval.py +++ b/pytorch-bert-code/train_eval.py @@ -9,7 +9,23 @@ from utils import get_time_dif from transformers.optimization import AdamW - +# 权重初始化,默认xavier +def init_network(model, method='xavier', exclude='embedding', seed=123): + for name, w in model.named_parameters(): + if exclude not in name: + if len(w.size()) < 2: + continue + if 'weight' in name: + if method == 'xavier': + nn.init.xavier_normal_(w) + elif method == 'kaiming': + nn.init.kaiming_normal_(w) + else: + nn.init.normal_(w) + elif 'bias' in name: + nn.init.constant_(w, 0) + else: + pass def train(config, model, train_iter, dev_iter, test_iter):