Java正则表达式过滤html标签

665 views

Published on

ddddddd

0 Comments
0 Likes
Statistics
Notes
  • Be the first to comment

  • Be the first to like this

No Downloads
Views
Total views
665
On SlideShare
0
From Embeds
0
Number of Embeds
14
Actions
Shares
0
Downloads
4
Comments
0
Likes
0
Embeds 0
No embeds

No notes for slide

Java正则表达式过滤html标签

  1. 1. import java.util.regex.Matcher;<br />import java.util.regex.Pattern;<br />/**<br /> * <p><br /> * Title: HTML相关的正则表达式工具类<br /> * </p><br /> * <p><br /> * Description: 包括过滤HTML标记,转换HTML标记,替换特定HTML标记<br /> * </p><br /> * <p><br /> * Copyright: Copyright (c) 2006<br /> * </p><br /> * <br /> * @author hejian<br /> * @version 1.0<br /> * @createtime 2006-10-16<br /> */<br />public class HtmlRegexpUtil {<br />private final static String regxpForHtml = "<([^>]*)>"; // 过滤所有以<开头以>结尾的标签<br />private final static String regxpForImgTag = "<s*imgs+([^>]*)s*>"; // 找出IMG标签<br />private final static String regxpForImaTagSrcAttrib = "src="([^"]+)""; // 找出IMG标签的SRC属性<br />/**<br /> * <br /> */<br />public HtmlRegexpUtil() {<br />// TODO Auto-generated constructor stub<br />}<br />/**<br /> * <br /> * 基本功能:替换标记以正常显示<br /> * <p><br /> * <br /> * @param input<br /> * @return String<br /> */<br />public String replaceTag(String input) {<br />if (!hasSpecialChars(input)) {<br />return input;<br />}<br />StringBuffer filtered = new StringBuffer(input.length());<br />char c;<br />for (int i = 0; i <= input.length() - 1; i++) {<br />c = input.charAt(i);<br />switch (c) {<br />case '<':<br />filtered.append("&lt;");<br />break;<br />case '>':<br />filtered.append("&gt;");<br />break;<br />case '"':<br />filtered.append("&quot;");<br />break;<br />case '&':<br />filtered.append("&amp;");<br />break;<br />default:<br />filtered.append(c);<br />}<br />}<br />return (filtered.toString());<br />}<br />/**<br /> * <br /> * 基本功能:判断标记是否存在<br /> * <p><br /> * <br /> * @param input<br /> * @return boolean<br /> */<br />public boolean hasSpecialChars(String input) {<br />boolean flag = false;<br />if ((input != null) && (input.length() > 0)) {<br />char c;<br />for (int i = 0; i <= input.length() - 1; i++) {<br />c = input.charAt(i);<br />switch (c) {<br />case '>':<br />flag = true;<br />break;<br />case '<':<br />flag = true;<br />break;<br />case '"':<br />flag = true;<br />break;<br />case '&':<br />flag = true;<br />break;<br />}<br />}<br />}<br />return flag;<br />}<br />/**<br /> * <br /> * 基本功能:过滤所有以"<"开头以">"结尾的标签<br /> * <p><br /> * <br /> * @param str<br /> * @return String<br /> */<br />public static String filterHtml(String str) {<br />Pattern pattern = Pattern.compile(regxpForHtml);<br />Matcher matcher = pattern.matcher(str);<br />StringBuffer sb = new StringBuffer();<br />boolean result1 = matcher.find();<br />while (result1) {<br />matcher.appendReplacement(sb, "");<br />result1 = matcher.find();<br />}<br />matcher.appendTail(sb);<br />return sb.toString();<br />}<br />/**<br /> * <br /> * 基本功能:过滤指定标签<br /> * <p><br /> * <br /> * @param str<br /> * @param tag<br /> * 指定标签<br /> * @return String<br /> */<br />public static String fiterHtmlTag(String str, String tag) {<br />String regxp = "<s*" + tag + "s+([^>]*)s*>";<br />Pattern pattern = Pattern.compile(regxp);<br />Matcher matcher = pattern.matcher(str);<br />StringBuffer sb = new StringBuffer();<br />boolean result1 = matcher.find();<br />while (result1) {<br />matcher.appendReplacement(sb, "");<br />result1 = matcher.find();<br />}<br />matcher.appendTail(sb);<br />return sb.toString();<br />}<br />/**<br /> * <br /> * 基本功能:替换指定的标签<br /> * <p><br /> * <br /> * @param str<br /> * @param beforeTag<br /> * 要替换的标签<br /> * @param tagAttrib<br /> * 要替换的标签属性值<br /> * @param startTag<br /> * 新标签开始标记<br /> * @param endTag<br /> * 新标签结束标记<br /> * @return String<br /> * @如:替换img标签的src属性值为[img]属性值[/img]<br /> */<br />public static String replaceHtmlTag(String str, String beforeTag,<br />String tagAttrib, String startTag, String endTag) {<br />String regxpForTag = "<s*" + beforeTag + "s+([^>]*)s*>";<br />String regxpForTagAttrib = tagAttrib + "="([^"]+)"";<br />Pattern patternForTag = Pattern.compile(regxpForTag);<br />Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);<br />Matcher matcherForTag = patternForTag.matcher(str);<br />StringBuffer sb = new StringBuffer();<br />boolean result = matcherForTag.find();<br />while (result) {<br />StringBuffer sbreplace = new StringBuffer();<br />Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag<br />.group(1));<br />if (matcherForAttrib.find()) {<br />matcherForAttrib.appendReplacement(sbreplace, startTag<br />+ matcherForAttrib.group(1) + endTag);<br />}<br />matcherForTag.appendReplacement(sb, sbreplace.toString());<br />result = matcherForTag.find();<br />}<br />matcherForTag.appendTail(sb);<br />return sb.toString();<br />}<br />}<br />if($this->compress){    $KindData = preg_replace("~>s+r~", ">", preg_replace("~>s+n~", ">", $KindData)); //modify 压缩    $KindData = preg_replace("~>s+<~", "><", $KindData);   } <br />只压缩HTML代码,对JAVASCRIPT不影响.<br />

×