update to transformers
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
## Description
|
||||
|
||||
- Chinese version of GPT2 training code, using BERT tokenizer or BPE tokenizer. It is based on the extremely awesome repository from HuggingFace team [Pytorch-Transformers](https://github.com/huggingface/pytorch-transformers). Can write poems, news, novels, or train general language models. Support char level, word level and BPE level. Support large training corpus.
|
||||
- Chinese version of GPT2 training code, using BERT tokenizer or BPE tokenizer. It is based on the extremely awesome repository from HuggingFace team [Transformers](https://github.com/huggingface/transformers). Can write poems, news, novels, or train general language models. Support char level, word level and BPE level. Support large training corpus.
|
||||
- 中文的GPT2训练代码,使用BERT的Tokenizer或GPT2自带的BPE Tokenizer或Sentencepiece的BPE model(感谢[kangzhonghua](https://github.com/kangzhonghua)的贡献)。可以写诗,新闻,小说,或是训练通用语言模型。支持字为单位或是分词模式或是BPE模式。支持大语料训练。
|
||||
- 微信交流群:请见Issue第一条。
|
||||
|
||||
|
||||
6
eval.py
6
eval.py
@@ -1,4 +1,4 @@
|
||||
import pytorch_transformers
|
||||
import transformers
|
||||
import torch
|
||||
import os
|
||||
import json
|
||||
@@ -65,7 +65,7 @@ def main():
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
||||
|
||||
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||
print('config:\n' + model_config.to_json_string())
|
||||
|
||||
n_ctx = model_config.n_ctx
|
||||
@@ -97,7 +97,7 @@ def main():
|
||||
print('you need to specify a trained model.')
|
||||
exit(1)
|
||||
else:
|
||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||
model.eval()
|
||||
model.to(device)
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import torch.nn.functional as F
|
||||
import os
|
||||
import argparse
|
||||
from tqdm import trange
|
||||
from pytorch_transformers import GPT2LMHeadModel
|
||||
from transformers import GPT2LMHeadModel
|
||||
|
||||
|
||||
def is_word(word):
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import pytorch_transformers
|
||||
import os
|
||||
import argparse
|
||||
from tqdm import trange
|
||||
from pytorch_transformers import GPT2LMHeadModel
|
||||
from transformers import GPT2LMHeadModel
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 此处设置程序使用哪些显卡
|
||||
|
||||
@@ -139,7 +138,6 @@ def main():
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
tokenizer = tokenization_bert.BertTokenizer(vocab_file=args.tokenizer_path)
|
||||
model_config = pytorch_transformers.GPT2Config.from_json_file(args.model_config)
|
||||
model = GPT2LMHeadModel.from_pretrained(args.model_path)
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
pytorch-transformers
|
||||
transformers
|
||||
torch
|
||||
numpy
|
||||
tqdm
|
||||
|
||||
@@ -22,7 +22,7 @@ import os
|
||||
import unicodedata
|
||||
from io import open
|
||||
|
||||
from pytorch_transformers.tokenization_utils import PreTrainedTokenizer
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
12
train.py
12
train.py
@@ -1,4 +1,4 @@
|
||||
import pytorch_transformers
|
||||
import transformers
|
||||
import torch
|
||||
import os
|
||||
import json
|
||||
@@ -78,7 +78,7 @@ def main():
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
||||
|
||||
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||
print('config:\n' + model_config.to_json_string())
|
||||
|
||||
n_ctx = model_config.n_ctx
|
||||
@@ -118,9 +118,9 @@ def main():
|
||||
print('files built')
|
||||
|
||||
if not args.pretrained_model:
|
||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
|
||||
model = transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
|
||||
else:
|
||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||
model.train()
|
||||
model.to(device)
|
||||
|
||||
@@ -139,8 +139,8 @@ def main():
|
||||
total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation)
|
||||
print('total steps = {}'.format(total_steps))
|
||||
|
||||
optimizer = pytorch_transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
|
||||
scheduler = pytorch_transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
|
||||
optimizer = transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
|
||||
scheduler = transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
|
||||
t_total=total_steps)
|
||||
if fp16:
|
||||
try:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import pytorch_transformers
|
||||
import transformers
|
||||
import torch
|
||||
import os
|
||||
import json
|
||||
@@ -69,7 +69,7 @@ def main():
|
||||
from tokenizations import tokenization_bert
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
||||
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||
print('config:\n' + model_config.to_json_string())
|
||||
|
||||
n_ctx = model_config.n_ctx
|
||||
@@ -101,9 +101,9 @@ def main():
|
||||
print('files built')
|
||||
|
||||
if not args.pretrained_model:
|
||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
|
||||
model = transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
|
||||
else:
|
||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||
model.train()
|
||||
model.to(device)
|
||||
multi_gpu = False
|
||||
@@ -115,8 +115,8 @@ def main():
|
||||
total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation)
|
||||
print('total steps = {}'.format(total_steps))
|
||||
|
||||
optimizer = pytorch_transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
|
||||
scheduler = pytorch_transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
|
||||
optimizer = transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
|
||||
scheduler = transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
|
||||
t_total=total_steps)
|
||||
if fp16:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user