update to transformers

This commit is contained in:
Duzeyao
2019-10-25 23:37:34 +08:00
parent f4ab09186c
commit 9b630a53fe
8 changed files with 20 additions and 22 deletions

View File

@@ -2,7 +2,7 @@
## Description
- Chinese version of GPT2 training code, using BERT tokenizer or BPE tokenizer. It is based on the extremely awesome repository from HuggingFace team [Pytorch-Transformers](https://github.com/huggingface/pytorch-transformers). Can write poems, news, novels, or train general language models. Support char level, word level and BPE level. Support large training corpus.
- Chinese version of GPT2 training code, using BERT tokenizer or BPE tokenizer. It is based on the extremely awesome repository from HuggingFace team [Transformers](https://github.com/huggingface/transformers). Can write poems, news, novels, or train general language models. Support char level, word level and BPE level. Support large training corpus.
- 中文的GPT2训练代码使用BERT的Tokenizer或GPT2自带的BPE Tokenizer或Sentencepiece的BPE model感谢[kangzhonghua](https://github.com/kangzhonghua)的贡献。可以写诗新闻小说或是训练通用语言模型。支持字为单位或是分词模式或是BPE模式。支持大语料训练。
- 微信交流群请见Issue第一条。

View File

@@ -1,4 +1,4 @@
import pytorch_transformers
import transformers
import torch
import os
import json
@@ -65,7 +65,7 @@ def main():
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
print('config:\n' + model_config.to_json_string())
n_ctx = model_config.n_ctx
@@ -97,7 +97,7 @@ def main():
print('you need to specify a trained model.')
exit(1)
else:
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
model.eval()
model.to(device)

View File

@@ -3,7 +3,7 @@ import torch.nn.functional as F
import os
import argparse
from tqdm import trange
from pytorch_transformers import GPT2LMHeadModel
from transformers import GPT2LMHeadModel
def is_word(word):

View File

@@ -1,10 +1,9 @@
import torch
import torch.nn.functional as F
import pytorch_transformers
import os
import argparse
from tqdm import trange
from pytorch_transformers import GPT2LMHeadModel
from transformers import GPT2LMHeadModel
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 此处设置程序使用哪些显卡
@@ -139,7 +138,6 @@ def main():
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = tokenization_bert.BertTokenizer(vocab_file=args.tokenizer_path)
model_config = pytorch_transformers.GPT2Config.from_json_file(args.model_config)
model = GPT2LMHeadModel.from_pretrained(args.model_path)
model.to(device)
model.eval()

View File

@@ -1,4 +1,4 @@
pytorch-transformers
transformers
torch
numpy
tqdm

View File

@@ -22,7 +22,7 @@ import os
import unicodedata
from io import open
from pytorch_transformers.tokenization_utils import PreTrainedTokenizer
from transformers.tokenization_utils import PreTrainedTokenizer
logger = logging.getLogger(__name__)

View File

@@ -1,4 +1,4 @@
import pytorch_transformers
import transformers
import torch
import os
import json
@@ -78,7 +78,7 @@ def main():
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
print('config:\n' + model_config.to_json_string())
n_ctx = model_config.n_ctx
@@ -118,9 +118,9 @@ def main():
print('files built')
if not args.pretrained_model:
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
model = transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
else:
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
model.train()
model.to(device)
@@ -139,8 +139,8 @@ def main():
total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation)
print('total steps = {}'.format(total_steps))
optimizer = pytorch_transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
scheduler = pytorch_transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
optimizer = transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
scheduler = transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
t_total=total_steps)
if fp16:
try:

View File

@@ -1,4 +1,4 @@
import pytorch_transformers
import transformers
import torch
import os
import json
@@ -69,7 +69,7 @@ def main():
from tokenizations import tokenization_bert
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
print('config:\n' + model_config.to_json_string())
n_ctx = model_config.n_ctx
@@ -101,9 +101,9 @@ def main():
print('files built')
if not args.pretrained_model:
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
model = transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
else:
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
model.train()
model.to(device)
multi_gpu = False
@@ -115,8 +115,8 @@ def main():
total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation)
print('total steps = {}'.format(total_steps))
optimizer = pytorch_transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
scheduler = pytorch_transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
optimizer = transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
scheduler = transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
t_total=total_steps)
if fp16:
try: