From 3f1b188e242d49b68c30e9e685904e11d2b6bc05 Mon Sep 17 00:00:00 2001 From: Duzeyao <330501241@qq.com> Date: Wed, 13 Nov 2019 15:58:05 +0800 Subject: [PATCH] bugfix --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index ae001cb..009e436 100644 --- a/train.py +++ b/train.py @@ -52,7 +52,7 @@ def main(): parser.add_argument('--batch_size', default=8, type=int, required=False, help='训练batch size') parser.add_argument('--lr', default=1.5e-4, type=float, required=False, help='学习率') parser.add_argument('--warmup_steps', default=2000, type=int, required=False, help='warm up步数') - parser.add_argument('--log_step', default=1, type=int, required=False, help='多少步汇报一次loss') + parser.add_argument('--log_step', default=1, type=int, required=False, help='多少步汇报一次loss,设置为gradient accumulation的整数倍') parser.add_argument('--stride', default=768, type=int, required=False, help='训练时取训练数据的窗口步长') parser.add_argument('--gradient_accumulation', default=1, type=int, required=False, help='梯度积累') parser.add_argument('--fp16', action='store_true', help='混合精度') @@ -221,7 +221,7 @@ def main(): step + 1, piece_num, epoch + 1, - running_loss / log_step)) + running_loss * gradient_accumulation / log_step)) running_loss = 0 piece_num += 1