From 56e8628d44976d0f6e7add03900fbb4e7723c4dc Mon Sep 17 00:00:00 2001 From: b2baccline <23131013+b2baccline@users.noreply.github.com> Date: Wed, 23 Dec 2020 19:14:50 +0800 Subject: [PATCH] =?UTF-8?q?:sparkles:=20=E6=B7=BB=E5=8A=A0=20HTMLUtil?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ballcat-common/ballcat-common-core/pom.xml | 4 ++ .../ballcat/common/core/util/HtmlUtil.java | 45 +++++++++++++++++++ ballcat-dependencies/pom.xml | 7 +++ 3 files changed, 56 insertions(+) create mode 100644 ballcat-common/ballcat-common-core/src/main/java/com/hccake/ballcat/common/core/util/HtmlUtil.java diff --git a/ballcat-common/ballcat-common-core/pom.xml b/ballcat-common/ballcat-common-core/pom.xml index 4b2c5046..48e5f27b 100644 --- a/ballcat-common/ballcat-common-core/pom.xml +++ b/ballcat-common/ballcat-common-core/pom.xml @@ -58,5 +58,9 @@ compile + + org.jsoup + jsoup + \ No newline at end of file diff --git a/ballcat-common/ballcat-common-core/src/main/java/com/hccake/ballcat/common/core/util/HtmlUtil.java b/ballcat-common/ballcat-common-core/src/main/java/com/hccake/ballcat/common/core/util/HtmlUtil.java new file mode 100644 index 00000000..9a7b3367 --- /dev/null +++ b/ballcat-common/ballcat-common-core/src/main/java/com/hccake/ballcat/common/core/util/HtmlUtil.java @@ -0,0 +1,45 @@ +package com.hccake.ballcat.common.core.util; + +import cn.hutool.core.util.StrUtil; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.safety.Whitelist; + +/** + * @author Hccake 2020/12/21 + * @version 1.0 + */ +public class HtmlUtil { + + /** + * html 转字符串,保留换行样式 + * @link https://stackoverflow.com/questions/5640334/how-do-i-preserve-line-breaks-when-using-jsoup-to-convert-html-to-plain-text + * @param html html字符串 + * @param mergeLineBreak 是否合并换行符 + * @return 保留换行格式的纯文本 + */ + public static String toText(String html, boolean mergeLineBreak) { + if (StrUtil.isEmpty(html)) { + return html; + } + Document document = Jsoup.parse(html); + // makes html() preserve linebreaks and spacing + document.outputSettings(new Document.OutputSettings().prettyPrint(false)); + document.select("br").append("\\n"); + document.select("p").prepend("\\n\\n"); + String s = document.html().replaceAll("\\\\n", "\n"); + String result = Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); + // 合并多个换行 + return mergeLineBreak ? result.replaceAll("(\r?\n(\\s*\r?\n)+)", "\n") : result; + } + + /** + * html 转字符串,保留换行样式,默认合并换行符 + * @param html html字符串 + * @return 保留换行格式的纯文本 + */ + public static String toText(String html) { + return toText(html, true); + } + +} diff --git a/ballcat-dependencies/pom.xml b/ballcat-dependencies/pom.xml index 9ee8f130..de2e94dc 100644 --- a/ballcat-dependencies/pom.xml +++ b/ballcat-dependencies/pom.xml @@ -55,6 +55,7 @@ 2.2.6 3.0.3 6.1.7.Final + 1.13.1 @@ -254,6 +255,12 @@ ${hibernate-validator.version} + + org.jsoup + jsoup + ${jsoup.version} + +