diff --git a/HTMLParser.Example/Parsed.aspx b/HTMLParser.Example/Parsed.aspx
index eaeeab2..38a4d8d 100644
--- a/HTMLParser.Example/Parsed.aspx
+++ b/HTMLParser.Example/Parsed.aspx
@@ -6,9 +6,8 @@
diff --git a/HTMLParser.Example/Parsed.aspx.cs b/HTMLParser.Example/Parsed.aspx.cs
index 44963d0..af9c477 100644
--- a/HTMLParser.Example/Parsed.aspx.cs
+++ b/HTMLParser.Example/Parsed.aspx.cs
@@ -20,10 +20,9 @@ namespace HTMLParser.Example
{
ParserEx parse = new ParserEx();
- this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value);
- this.parsedYoutube.InnerHtml = parse.ParseYoutube(this.txtSource.Value);
- this.parsedUrlAndYoutube.InnerHtml = parse.ParseUrlAndYoutube(this.txtSource.Value);
+ this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value);
+ this.parsedYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First();
+ this.parsedUrlAndYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First() + parse.ParseUrl(this.txtSource.Value);
}
-
}
}
\ No newline at end of file
diff --git a/HTMLParser/ParserEx.cs b/HTMLParser/ParserEx.cs
index 3a03f2c..2af4dcb 100644
--- a/HTMLParser/ParserEx.cs
+++ b/HTMLParser/ParserEx.cs
@@ -1,20 +1,52 @@
using System;
+using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace HTMLParser
{
public class ParserEx
{
+ ///
+ /// Prevent use of Html tags
+ ///
+ ///
+ ///
+ public string PreventHTML(string article)
+ {
+ return article.Replace("<", "<").Replace(">", ">");
+ }
+
+ ///
+ /// Prevent use of Risky Tags
+ ///
+ ///
+ ///
+ public string PreventRiskyTag(string article)
+ {
+ return article.Replace("script", "").Replace("iframe", "").Replace("object", "");
+ }
+
+ ///
+ /// Parse Url
+ ///
+ ///
+ ///
public string ParseUrl(string article)
{
if (string.IsNullOrEmpty(article)) return string.Empty;
- string Pttrn = @"(((http|https|ftp|telnet|news)://|www\.)[^youtube][a-z0-9-]+.[][a-zA-Z0-9:@=_~%;?/.+-]+)";
- string Lnk = "$1";
+ string pttrn = @"((?:(?:https?|http|ftp|gopher|telnet|file|notes|ms-help):(?://|\\\\)(?:www\.)?|www\.)[\w\d:#@%/;$()~_?\+,\-=\\.&]+)";
- return Regex.Replace(article, Pttrn, Lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www.");
+ string lnk = "$1";
+
+ return Regex.Replace(article, pttrn, lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www.");
}
+ ///
+ /// Parse Youtube Url to script
+ ///
+ ///
+ ///
public string ParseYoutube(string article)
{
if (string.IsNullOrEmpty(article)) return string.Empty;
@@ -25,6 +57,22 @@ namespace HTMLParser
return Regex.Replace(article, pttrn, script, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("https:// GenerateYoutubeScripts(string article)
+ {
+ if (string.IsNullOrEmpty(article)) return null;
+
+ Regex regex = new Regex(@"youtu(?:\.be|be\.com)/(?:.*v(?:/|=)|(?:.*/)?)([a-zA-Z0-9-_]+)");
+ Match match = regex.Match(article);
+ List
scripts = new List();
+ while (match.Success)
+ {
+ scripts.Add(string.Format("", match.Value.Replace("youtu.be/", "")));
+ match = match.NextMatch();
+ }
+
+ return scripts;
+ }
+
public string ParseUrlAndYoutube(string article)
{
return this.ParseYoutube(this.ParseUrl(article));