✨ 添加短信长度计算工具类

2021-05-15 10:35:37 +08:00
parent a5e25dba5e
commit 698bdcf266
2 changed files with 198 additions and 0 deletions
--- a/ballcat-common/ballcat-common-util/src/main/java/com/hccake/ballcat/common/charset/GSMCharset.java
+++ b/ballcat-common/ballcat-common-util/src/main/java/com/hccake/ballcat/common/charset/GSMCharset.java
@@ -0,0 +1,123 @@
+/**
+ * Copyright (C) 2011 Twitter, Inc.
+ * <p>
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package com.hccake.ballcat.common.charset;
+
+/**
+ * This class encodes and decodes Java Strings to and from the SMS default alphabet. It
+ * also supports the default extension table. The default alphabet and it's extension
+ * table is defined in GSM 03.38.
+ *
+ * @author joelauer
+ * @author hccake
+ */
+public class GSMCharset {
+
+	/**
+	 * The extension character uses this character as a precharacter
+	 */
+	public static final int EXTENDED_ESCAPE = 0x1b;
+
+	/**
+	 * Page break (extended table).
+	 */
+	public static final int PAGE_BREAK = 0x0a;
+
+	public static final char[] CHAR_TABLE = { '@', '\u00a3', '$', '\u00a5', '\u00e8', '\u00e9', '\u00f9', '\u00ec',
+			'\u00f2', '\u00c7', '\n', '\u00d8', '\u00f8', '\r', '\u00c5', '\u00e5', '\u0394', '_', '\u03a6', '\u0393',
+			'\u039b', '\u03a9', '\u03a0', '\u03a8', '\u03a3', '\u0398', '\u039e', ' ', '\u00c6', '\u00e6', '\u00df',
+			'\u00c9', ' ', '!', '"', '#', '\u00a4', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
+			'2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '\u00a1', 'A', 'B', 'C', 'D', 'E',
+			'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+			'\u00c4', '\u00d6', '\u00d1', '\u00dc', '\u00a7', '\u00bf', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
+			'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\u00e4', '\u00f6',
+			'\u00f1', '\u00fc', '\u00e0', };
+
+	/**
+	 * Extended character table. Characters in this table are accessed by the 'escape'
+	 * character in the base table. It is important that none of the 'inactive' characters
+	 * ever be matchable with a valid base-table character as this breaks the encoding
+	 * loop.
+	 *
+	 * @see #EXTENDED_ESCAPE
+	 */
+	public static final char[] EXT_CHAR_TABLE = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '^', 0, 0,
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', 0, 0, 0, 0, 0, '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0,
+			0, 0, 0, '[', '~', ']', 0, '|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\u20ac', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+			0, 0, 0, 0, };
+
+	/**
+	 * Verifies that this charset can represent every character in the Java String (char
+	 * sequence).
+	 * @param str0 The String to verfiy
+	 * @return True if the charset can represent every character in the Java String,
+	 * otherwise false.
+	 */
+	public static boolean canRepresent(CharSequence str0) {
+		return need7bitsNum(str0) >= 0;
+	}
+
+	/**
+	 * Gets the number of 7bits of the string required under GSM-7 encoding
+	 * @return Returns -1 if the string cannot be encoded in GSM-7, otherwise returns the
+	 * number of 7bits of the string required under GSM-7 encoding
+	 */
+	public static int need7bitsNum(CharSequence str0) {
+		if (str0 == null) {
+			return 0;
+		}
+
+		int need7bitsNum = 0;
+		int len = str0.length();
+		for (int i = 0; i < len; i++) {
+			// get the char in this string
+			char c = str0.charAt(i);
+			// a very easy check a-z, A-Z, and 0-9 are always valid
+			if (c >= 'A' && c <= 'z') {
+				need7bitsNum++;
+				continue;
+			}
+			if (c >= '0' && c <= '9') {
+				need7bitsNum++;
+				continue;
+			}
+			// gsm-7 maximum codepoint supported is €
+			if (c > '€') {
+				return -1;
+			}
+
+			// search both charmaps (if char is in either, we're good!)
+			boolean found = false;
+			for (int j = 0; j < CHAR_TABLE.length; j++) {
+				if (c == CHAR_TABLE[j]) {
+					need7bitsNum++;
+					found = true;
+					break;
+				}
+				else if (c == EXT_CHAR_TABLE[j]) {
+					need7bitsNum = need7bitsNum + 2;
+					found = true;
+					break;
+				}
+			}
+			// if we searched both charmaps and didn't find it, then its bad
+			if (!found) {
+				return -1;
+			}
+
+		}
+		return need7bitsNum;
+	}
+
+}
--- a/ballcat-common/ballcat-common-util/src/main/java/com/hccake/ballcat/common/util/SmsUtils.java
+++ b/ballcat-common/ballcat-common-util/src/main/java/com/hccake/ballcat/common/util/SmsUtils.java
@@ -0,0 +1,75 @@
+package com.hccake.ballcat.common.util;
+
+import com.hccake.ballcat.common.charset.GSMCharset;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * 短信的工具类
+ *
+ * @author hccake
+ */
+public class SmsUtils {
+
+	/**
+	 * 短信载荷可用字节数
+	 */
+	public static final int SMS_PAYLOAD_BYTE_NUM = 140;
+
+	/**
+	 * 在 GSM-7 编码下，短信的最大有效字数：（140 * 8） / 7 = 160 GSM-7 编码使用 7 个 bit 表示一个标准字符，对于 €^ {} []〜|
+	 * 这些扩展字符会使用 2 个 7bit位 展示
+	 */
+	public static final int MAX_WORLD_NUM_IN_GSM = 160;
+
+	/**
+	 * 在 UCS-2 编码下，长短信的各部分消息需要占用 7 个bit来记录 UDH
+	 */
+	public static final int MAX_WORLD_NUM_IN_UCS2 = 70;
+
+	/**
+	 * UDH 占用 6 Byte / 48 bit
+	 */
+	public static final int UDH_BYTE_NUM = 6;
+
+	/**
+	 * 根据短信内容，获得对应的短信条数
+	 *
+	 * 每条短信的有效载荷为 140 个字节，如果消息文本长于 140 字节，则将会串联成为多条消息，又称长短信。
+	 * 长短信的各部分消息的载荷中会划分一部分字节用于创建用户数据头（UDH），用于接受设备对接收到的消息进行排序处理。
+	 *
+	 * UDH占用 6个字节 或 48位。这减少了每个消息部分中可以包含多少个字符的空间。
+	 * @param smsContent 短信内容
+	 * @return 短信条数
+	 */
+	public static int smsNumber(String smsContent) {
+		int wordsNum = GSMCharset.need7bitsNum(smsContent);
+		if (wordsNum == 0) {
+			return 0;
+		}
+
+		// wordsNum > 0, 表示当前短信支持 GSM-7 编码
+		if (wordsNum > 0) {
+			// 短短信没有 UDH，在 GSM-7 编码下，最大支持 160 个字符
+			if (wordsNum <= MAX_WORLD_NUM_IN_GSM) {
+				return 1;
+			}
+			// 长短信，需要占用 48 个 bit，所以最大只能支持 （1120 - 48）/ 7 = 153 个字符
+			return wordsNum % 153 == 0 ? wordsNum / 153 : wordsNum / 153 + 1;
+		}
+
+		// 当不符合 GSM-7 编码时，使用 UCS-2 编码(默认大端序)
+		// UTF-16 兼容 UCS-2，所以这里可以用 UTF_16BE 来解码获得字节
+		byte[] bytes = smsContent.getBytes(StandardCharsets.UTF_16BE);
+		// 两个字节表示一个字符
+		wordsNum = bytes.length / 2;
+		// 短短信没有 UDH，在 UCS-2 编码下，最大支持 70 个字符
+		if (wordsNum <= MAX_WORLD_NUM_IN_UCS2) {
+			return 1;
+		}
+		// 长短信，需要占用 6 个字节，所以最大只能支持 （140 - 6）/ 2 = 67 个字符
+		return wordsNum % 67 == 0 ? wordsNum / 67 : wordsNum / 67 + 1;
+
+	}
+
+}