From 56e8628d44976d0f6e7add03900fbb4e7723c4dc Mon Sep 17 00:00:00 2001
From: b2baccline <23131013+b2baccline@users.noreply.github.com>
Date: Wed, 23 Dec 2020 19:14:50 +0800
Subject: [PATCH] =?UTF-8?q?:sparkles:=20=E6=B7=BB=E5=8A=A0=20HTMLUtil?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
ballcat-common/ballcat-common-core/pom.xml | 4 ++
.../ballcat/common/core/util/HtmlUtil.java | 45 +++++++++++++++++++
ballcat-dependencies/pom.xml | 7 +++
3 files changed, 56 insertions(+)
create mode 100644 ballcat-common/ballcat-common-core/src/main/java/com/hccake/ballcat/common/core/util/HtmlUtil.java
diff --git a/ballcat-common/ballcat-common-core/pom.xml b/ballcat-common/ballcat-common-core/pom.xml
index 4b2c5046..48e5f27b 100644
--- a/ballcat-common/ballcat-common-core/pom.xml
+++ b/ballcat-common/ballcat-common-core/pom.xml
@@ -58,5 +58,9 @@
compile
+
+ org.jsoup
+ jsoup
+
\ No newline at end of file
diff --git a/ballcat-common/ballcat-common-core/src/main/java/com/hccake/ballcat/common/core/util/HtmlUtil.java b/ballcat-common/ballcat-common-core/src/main/java/com/hccake/ballcat/common/core/util/HtmlUtil.java
new file mode 100644
index 00000000..9a7b3367
--- /dev/null
+++ b/ballcat-common/ballcat-common-core/src/main/java/com/hccake/ballcat/common/core/util/HtmlUtil.java
@@ -0,0 +1,45 @@
+package com.hccake.ballcat.common.core.util;
+
+import cn.hutool.core.util.StrUtil;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.safety.Whitelist;
+
+/**
+ * @author Hccake 2020/12/21
+ * @version 1.0
+ */
+public class HtmlUtil {
+
+ /**
+ * html 转字符串,保留换行样式
+ * @link https://stackoverflow.com/questions/5640334/how-do-i-preserve-line-breaks-when-using-jsoup-to-convert-html-to-plain-text
+ * @param html html字符串
+ * @param mergeLineBreak 是否合并换行符
+ * @return 保留换行格式的纯文本
+ */
+ public static String toText(String html, boolean mergeLineBreak) {
+ if (StrUtil.isEmpty(html)) {
+ return html;
+ }
+ Document document = Jsoup.parse(html);
+ // makes html() preserve linebreaks and spacing
+ document.outputSettings(new Document.OutputSettings().prettyPrint(false));
+ document.select("br").append("\\n");
+ document.select("p").prepend("\\n\\n");
+ String s = document.html().replaceAll("\\\\n", "\n");
+ String result = Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
+ // 合并多个换行
+ return mergeLineBreak ? result.replaceAll("(\r?\n(\\s*\r?\n)+)", "\n") : result;
+ }
+
+ /**
+ * html 转字符串,保留换行样式,默认合并换行符
+ * @param html html字符串
+ * @return 保留换行格式的纯文本
+ */
+ public static String toText(String html) {
+ return toText(html, true);
+ }
+
+}
diff --git a/ballcat-dependencies/pom.xml b/ballcat-dependencies/pom.xml
index 9ee8f130..de2e94dc 100644
--- a/ballcat-dependencies/pom.xml
+++ b/ballcat-dependencies/pom.xml
@@ -55,6 +55,7 @@
2.2.6
3.0.3
6.1.7.Final
+ 1.13.1
@@ -254,6 +255,12 @@
${hibernate-validator.version}
+
+ org.jsoup
+ jsoup
+ ${jsoup.version}
+
+