update to transformers
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
||||||
- Chinese version of GPT2 training code, using BERT tokenizer or BPE tokenizer. It is based on the extremely awesome repository from HuggingFace team [Pytorch-Transformers](https://github.com/huggingface/pytorch-transformers). Can write poems, news, novels, or train general language models. Support char level, word level and BPE level. Support large training corpus.
|
- Chinese version of GPT2 training code, using BERT tokenizer or BPE tokenizer. It is based on the extremely awesome repository from HuggingFace team [Transformers](https://github.com/huggingface/transformers). Can write poems, news, novels, or train general language models. Support char level, word level and BPE level. Support large training corpus.
|
||||||
- 中文的GPT2训练代码,使用BERT的Tokenizer或GPT2自带的BPE Tokenizer或Sentencepiece的BPE model(感谢[kangzhonghua](https://github.com/kangzhonghua)的贡献)。可以写诗,新闻,小说,或是训练通用语言模型。支持字为单位或是分词模式或是BPE模式。支持大语料训练。
|
- 中文的GPT2训练代码,使用BERT的Tokenizer或GPT2自带的BPE Tokenizer或Sentencepiece的BPE model(感谢[kangzhonghua](https://github.com/kangzhonghua)的贡献)。可以写诗,新闻,小说,或是训练通用语言模型。支持字为单位或是分词模式或是BPE模式。支持大语料训练。
|
||||||
- 微信交流群:请见Issue第一条。
|
- 微信交流群:请见Issue第一条。
|
||||||
|
|
||||||
|
|||||||
6
eval.py
6
eval.py
@@ -1,4 +1,4 @@
|
|||||||
import pytorch_transformers
|
import transformers
|
||||||
import torch
|
import torch
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
@@ -65,7 +65,7 @@ def main():
|
|||||||
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
||||||
|
|
||||||
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||||
print('config:\n' + model_config.to_json_string())
|
print('config:\n' + model_config.to_json_string())
|
||||||
|
|
||||||
n_ctx = model_config.n_ctx
|
n_ctx = model_config.n_ctx
|
||||||
@@ -97,7 +97,7 @@ def main():
|
|||||||
print('you need to specify a trained model.')
|
print('you need to specify a trained model.')
|
||||||
exit(1)
|
exit(1)
|
||||||
else:
|
else:
|
||||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||||
model.eval()
|
model.eval()
|
||||||
model.to(device)
|
model.to(device)
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import torch.nn.functional as F
|
|||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
from tqdm import trange
|
from tqdm import trange
|
||||||
from pytorch_transformers import GPT2LMHeadModel
|
from transformers import GPT2LMHeadModel
|
||||||
|
|
||||||
|
|
||||||
def is_word(word):
|
def is_word(word):
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import pytorch_transformers
|
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
from tqdm import trange
|
from tqdm import trange
|
||||||
from pytorch_transformers import GPT2LMHeadModel
|
from transformers import GPT2LMHeadModel
|
||||||
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 此处设置程序使用哪些显卡
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 此处设置程序使用哪些显卡
|
||||||
|
|
||||||
@@ -139,7 +138,6 @@ def main():
|
|||||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
tokenizer = tokenization_bert.BertTokenizer(vocab_file=args.tokenizer_path)
|
tokenizer = tokenization_bert.BertTokenizer(vocab_file=args.tokenizer_path)
|
||||||
model_config = pytorch_transformers.GPT2Config.from_json_file(args.model_config)
|
|
||||||
model = GPT2LMHeadModel.from_pretrained(args.model_path)
|
model = GPT2LMHeadModel.from_pretrained(args.model_path)
|
||||||
model.to(device)
|
model.to(device)
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
pytorch-transformers
|
transformers
|
||||||
torch
|
torch
|
||||||
numpy
|
numpy
|
||||||
tqdm
|
tqdm
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ import os
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
from io import open
|
from io import open
|
||||||
|
|
||||||
from pytorch_transformers.tokenization_utils import PreTrainedTokenizer
|
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
12
train.py
12
train.py
@@ -1,4 +1,4 @@
|
|||||||
import pytorch_transformers
|
import transformers
|
||||||
import torch
|
import torch
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
@@ -78,7 +78,7 @@ def main():
|
|||||||
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
||||||
|
|
||||||
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||||
print('config:\n' + model_config.to_json_string())
|
print('config:\n' + model_config.to_json_string())
|
||||||
|
|
||||||
n_ctx = model_config.n_ctx
|
n_ctx = model_config.n_ctx
|
||||||
@@ -118,9 +118,9 @@ def main():
|
|||||||
print('files built')
|
print('files built')
|
||||||
|
|
||||||
if not args.pretrained_model:
|
if not args.pretrained_model:
|
||||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
|
model = transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
|
||||||
else:
|
else:
|
||||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||||
model.train()
|
model.train()
|
||||||
model.to(device)
|
model.to(device)
|
||||||
|
|
||||||
@@ -139,8 +139,8 @@ def main():
|
|||||||
total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation)
|
total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation)
|
||||||
print('total steps = {}'.format(total_steps))
|
print('total steps = {}'.format(total_steps))
|
||||||
|
|
||||||
optimizer = pytorch_transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
|
optimizer = transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
|
||||||
scheduler = pytorch_transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
|
scheduler = transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
|
||||||
t_total=total_steps)
|
t_total=total_steps)
|
||||||
if fp16:
|
if fp16:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import pytorch_transformers
|
import transformers
|
||||||
import torch
|
import torch
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
@@ -69,7 +69,7 @@ def main():
|
|||||||
from tokenizations import tokenization_bert
|
from tokenizations import tokenization_bert
|
||||||
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡
|
||||||
model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
model_config = transformers.modeling_gpt2.GPT2Config.from_json_file(args.model_config)
|
||||||
print('config:\n' + model_config.to_json_string())
|
print('config:\n' + model_config.to_json_string())
|
||||||
|
|
||||||
n_ctx = model_config.n_ctx
|
n_ctx = model_config.n_ctx
|
||||||
@@ -101,9 +101,9 @@ def main():
|
|||||||
print('files built')
|
print('files built')
|
||||||
|
|
||||||
if not args.pretrained_model:
|
if not args.pretrained_model:
|
||||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
|
model = transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config)
|
||||||
else:
|
else:
|
||||||
model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained(args.pretrained_model)
|
||||||
model.train()
|
model.train()
|
||||||
model.to(device)
|
model.to(device)
|
||||||
multi_gpu = False
|
multi_gpu = False
|
||||||
@@ -115,8 +115,8 @@ def main():
|
|||||||
total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation)
|
total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation)
|
||||||
print('total steps = {}'.format(total_steps))
|
print('total steps = {}'.format(total_steps))
|
||||||
|
|
||||||
optimizer = pytorch_transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
|
optimizer = transformers.AdamW(model.parameters(), lr=lr, correct_bias=True)
|
||||||
scheduler = pytorch_transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
|
scheduler = transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
|
||||||
t_total=total_steps)
|
t_total=total_steps)
|
||||||
if fp16:
|
if fp16:
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user