del convert_tf_checkpoint_to_pytorch.py

2019-12-23 10:10:32 +08:00
parent 8a1f2ff0ce
commit 4a0c4f17b2
3 changed files with 42 additions and 106 deletions
--- a/pytorch-bert-code/bert/README.md
+++ b/pytorch-bert-code/bert/README.md
@@ -0,0 +1,39 @@
 ### how to convert bert Converting Tensorflow Checkpoints to pytorch model file
 ### 如何将bert model 的Tensorflow模型 转换为pytorch模型
 下载Tensorflow模型文件
 解压缩到文件夹下
 应该有
 - bert_config.json
 - bert_model.ckpt.data-00000-of-00001
 - bert_model.ckpt.index
 - bert_model.ckpt.meta
 - vocab.txt
 这几个文件
 运行run.sh
 后生成对应pytorch_model.bin
 具体代码
 ```
 export BERT_BASE_DIR=。/
 transformers bert \
  $BERT_BASE_DIR/bert_model.ckpt \
  $BERT_BASE_DIR/bert_config.json \
  $BERT_BASE_DIR/pytorch_model.bin
 ```
 原来convert_tf_checkpoint_to_pytorch.py被新版本废除
--- a/pytorch-bert-code/bert/convert_tf_checkpoint_to_pytorch.py
+++ b/pytorch-bert-code/bert/convert_tf_checkpoint_to_pytorch.py
@@ -1,105 +0,0 @@
 # coding=utf-8
 # Copyright 2018 The HugginFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Convert BERT checkpoint."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import re
 import argparse
 import tensorflow as tf
 import torch
 import numpy as np
 from modeling import BertConfig, BertModel
 parser = argparse.ArgumentParser()
 ## Required parameters
 parser.add_argument("--tf_checkpoint_path",
                    default = None,
                    type = str,
                    required = True,
                    help = "Path the TensorFlow checkpoint path.")
 parser.add_argument("--bert_config_file",
                    default = None,
                    type = str,
                    required = True,
                    help = "The config json file corresponding to the pre-trained BERT model. \n"
                        "This specifies the model architecture.")
 parser.add_argument("--pytorch_dump_path",
                    default = None,
                    type = str,
                    required = True,
                    help = "Path to the output PyTorch model.")
 args = parser.parse_args()
 def convert():
    # Initialise PyTorch model
    config = BertConfig.from_json_file(args.bert_config_file)
    model = BertModel(config)
    # Load weights from TF model
    path = args.tf_checkpoint_path
    print("Converting TensorFlow checkpoint from {}".format(path))
    init_vars = tf.train.list_variables(path)
    names = []
    arrays = []
    for name, shape in init_vars:
        print("Loading {} with shape {}".format(name, shape))
        array = tf.train.load_variable(path, name)
        print("Numpy array shape {}".format(array.shape))
        names.append(name)
        arrays.append(array)
    for name, array in zip(names, arrays):
        name = name[5:]  # skip "bert/"
        print("Loading {}".format(name))
        name = name.split('/')
        if name[0] in ['redictions', 'eq_relationship']:
            print("Skipping")
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        pointer.data = torch.from_numpy(array)
    # Save pytorch-model
    torch.save(model.state_dict(), args.pytorch_dump_path)
 if __name__ == "__main__":
    convert()
--- a/pytorch-bert-code/bert/run.sh
+++ b/pytorch-bert-code/bert/run.sh
@@ -1 +1,3 @@
-python3 convert_tf_checkpoint_to_pytorch.py --tf_checkpoint_path bert_model.ckpt --bert_config_file bert_config.json --pytorch_dump_path bert_model.bin
+export BERT_BASE_DIR=./
 transformers bert   $BERT_BASE_DIR/bert_model.ckpt   $BERT_BASE_DIR/bert_config.json   $BERT_BASE_DIR/pytorch_model.bin
`@@ -1 +1,3 @@`
	`python3 convert_tf_checkpoint_to_pytorch.py --tf_checkpoint_path bert_model.ckpt --bert_config_file bert_config.json --pytorch_dump_path bert_model.bin`	`export BERT_BASE_DIR=./`

		`transformers bert $BERT_BASE_DIR/bert_model.ckpt $BERT_BASE_DIR/bert_config.json $BERT_BASE_DIR/pytorch_model.bin`