using System.Text.RegularExpressions;//需要引用 // 利用正则表达式去掉"<"和">"之间的内容 private string StripHT(string strHtml) { Regex regex=new Regex("<.+?>",RegexOptions.IgnoreCase); string strOutput=regex.Replace(strHtml,""); return strOutput; } //方法二(不知为什么此方法占用CPU100%)
public static string DropHTML(string strHtml) { string [] aryReg ={ @"<script[^>]*?>.*?</script>", @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""''])(\\[""''tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", @"([\r])[\s]+", @"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"&#(\d+);", @"-->", @"<!--.*" }; string [] aryRep = { "", "", "", "\"", "&", "<", ">", " ", "\xa1",//chr(161), "\xa2",//chr(162), "\xa3",//chr(163), "\xa9",//chr(169), "", "\r", "" }; string newReg =aryReg[0]; string strOutput=strHtml; for(int i = 0;i<aryReg.Length;i++) { Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase ); strOutput = regex.Replace(strOutput,aryRep[i]); } strOutput.Replace("<",""); strOutput.Replace(">",""); strOutput.Replace("\r",""); return strOutput; } |