mirror of
				https://github.com/rudollee/HTMLParser.git
				synced 2025-10-31 01:27:12 +00:00 
			
		
		
		
	new pattern for url
This commit is contained in:
		
							parent
							
								
									feabbc7e1b
								
							
						
					
					
						commit
						fd30db4aa2
					
				| @ -6,9 +6,8 @@ | ||||
| <head runat="server"> | ||||
|     <title></title> | ||||
| 	<style type="text/css"> | ||||
| 		#source { | ||||
| 			width: 293px; | ||||
| 			height: 99px; | ||||
| 		body { | ||||
| 			width: 600px; | ||||
| 		} | ||||
| 	</style> | ||||
| </head> | ||||
|  | ||||
| @ -21,9 +21,8 @@ namespace HTMLParser.Example | ||||
| 			ParserEx parse = new ParserEx(); | ||||
| 
 | ||||
| 			this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value); | ||||
| 			this.parsedYoutube.InnerHtml		= parse.ParseYoutube(this.txtSource.Value); | ||||
| 			this.parsedUrlAndYoutube.InnerHtml	= parse.ParseUrlAndYoutube(this.txtSource.Value); | ||||
| 		} | ||||
| 
 | ||||
| 			this.parsedYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First(); | ||||
| 			this.parsedUrlAndYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First() + parse.ParseUrl(this.txtSource.Value); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| @ -1,20 +1,52 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Text.RegularExpressions; | ||||
| 
 | ||||
| namespace HTMLParser | ||||
| { | ||||
|     public class ParserEx | ||||
|     { | ||||
| 		/// <summary> | ||||
| 		/// Prevent use of Html tags | ||||
| 		/// </summary> | ||||
| 		/// <param name="article"></param> | ||||
| 		/// <returns></returns> | ||||
| 		public string PreventHTML(string article) | ||||
| 		{ | ||||
| 			return article.Replace("<", "<").Replace(">", ">"); | ||||
| 		} | ||||
| 
 | ||||
| 		/// <summary> | ||||
| 		/// Prevent use of Risky Tags | ||||
| 		/// </summary> | ||||
| 		/// <param name="article"></param> | ||||
| 		/// <returns></returns> | ||||
| 		public string PreventRiskyTag(string article) | ||||
| 		{ | ||||
| 			return article.Replace("script", "").Replace("iframe", "").Replace("object", ""); | ||||
| 		} | ||||
| 
 | ||||
| 		/// <summary> | ||||
| 		/// Parse Url | ||||
| 		/// </summary> | ||||
| 		/// <param name="article"></param> | ||||
| 		/// <returns></returns> | ||||
| 		public string ParseUrl(string article) | ||||
| 		{ | ||||
| 			if (string.IsNullOrEmpty(article)) return string.Empty; | ||||
| 
 | ||||
| 			string Pttrn = @"(((http|https|ftp|telnet|news)://|www\.)[^youtube][a-z0-9-]+.[][a-zA-Z0-9:&#@=_~%;?/.+-]+)"; | ||||
| 			string Lnk = "<a href=\"$1\" target=\"_blank\">$1</a>"; | ||||
| 			string pttrn = @"((?:(?:https?|http|ftp|gopher|telnet|file|notes|ms-help):(?://|\\\\)(?:www\.)?|www\.)[\w\d:#@%/;$()~_?\+,\-=\\.&]+)"; | ||||
| 
 | ||||
| 			return Regex.Replace(article, Pttrn, Lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www."); | ||||
| 			string lnk = "<a href=\"$1\" target=\"_blank\">$1</a>"; | ||||
| 
 | ||||
| 			return Regex.Replace(article, pttrn, lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www."); | ||||
| 		} | ||||
| 
 | ||||
| 		/// <summary> | ||||
| 		/// Parse Youtube Url to script | ||||
| 		/// </summary> | ||||
| 		/// <param name="article"></param> | ||||
| 		/// <returns></returns> | ||||
| 		public string ParseYoutube(string article) | ||||
| 		{ | ||||
| 			if (string.IsNullOrEmpty(article)) return string.Empty; | ||||
| @ -25,6 +57,22 @@ namespace HTMLParser | ||||
| 			return Regex.Replace(article, pttrn, script, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("https://<div class=\"youtube\"", "<div class=\"youtube\""); | ||||
| 		} | ||||
| 
 | ||||
| 		public List<string> GenerateYoutubeScripts(string article) | ||||
| 		{ | ||||
| 			if (string.IsNullOrEmpty(article)) return null; | ||||
| 
 | ||||
| 			Regex regex = new Regex(@"youtu(?:\.be|be\.com)/(?:.*v(?:/|=)|(?:.*/)?)([a-zA-Z0-9-_]+)"); | ||||
| 			Match match = regex.Match(article); | ||||
| 			List<string> scripts = new List<string>(); | ||||
| 			while (match.Success) | ||||
| 			{ | ||||
| 				scripts.Add(string.Format("<div class=\"youtube\" ><iframe src=\"https://www.youtube.com/embed/{0}\" frameborder=\"0\" allowfullscreen></iframe></div>", match.Value.Replace("youtu.be/", ""))); | ||||
| 				match = match.NextMatch(); | ||||
| 			} | ||||
| 
 | ||||
| 			return scripts; | ||||
| 		} | ||||
| 
 | ||||
| 		public string ParseUrlAndYoutube(string article) | ||||
| 		{ | ||||
| 			return this.ParseYoutube(this.ParseUrl(article)); | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user