EmojiFilter

殊字符 emoji过滤器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import org.apache.commons.lang3.StringUtils;

/**
* 微信特殊字符 emoji过滤器
*
* @author Beldon
*
*/
public abstract class EmojiFilter {

/**
* 检测是否有emoji字符
*
* @param source
* @return 一旦含有就抛出
*/
public static boolean containsEmoji(String source) {
if (StringUtils.isBlank(source)) {
return false;
}

int len = source.length();
for (int i = 0; i < len; i++) {
char codePoint = source.charAt(i);

if (isEmojiCharacter(codePoint)) {
// do nothing,判断到了这里表明,确认有表情字符
return true;
}
}

return false;
}

private static boolean isEmojiCharacter(char codePoint) {
return (codePoint == 0x0) || (codePoint == 0x9) || (codePoint == 0xA)
|| (codePoint == 0xD)
|| ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
|| ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
|| ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF));
}

/**
* 过滤emoji 或者 其他非文字类型的字符
*
* @param source
* @return
*/
public static String filterEmoji(String source) {
if(source==null || "".equals(source)){
return "";
}
String regEx="[`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
source = source.replaceAll(regEx, "");
String regEx2 = "[^0-9a-zA-Z\u4e00-\u9fa5.,,。?“”]+";
source = source.replaceAll(regEx2, "").trim();
if (!containsEmoji(source)) {
return source;// 如果不包含,直接返回
}
// 到这里铁定包含
StringBuilder buf = null;

int len = source.length();

for (int i = 0; i < len; i++) {
char codePoint = source.charAt(i);
if (isEmojiCharacter(codePoint)) {
if (buf == null) {
buf = new StringBuilder(source.length());
}

buf.append(codePoint);
} else {
}
}

if (buf == null) {
return source;// 如果没有找到 emoji表情,则返回源字符串
} else {
if (buf.length() == len) {// 这里的意义在于尽可能少的toString,因为会重新生成字符串
buf = null;
return source;
} else {
return buf.toString();
}
}

}

}