1
0
mirror of https://github.com/rudollee/HTMLParser.git synced 2025-06-07 16:06:09 +00:00

new pattern for url

This commit is contained in:
wook 2017-05-10 13:33:58 +09:00
parent feabbc7e1b
commit fd30db4aa2
3 changed files with 56 additions and 10 deletions

View File

@ -6,9 +6,8 @@
<head runat="server"> <head runat="server">
<title></title> <title></title>
<style type="text/css"> <style type="text/css">
#source { body {
width: 293px; width: 600px;
height: 99px;
} }
</style> </style>
</head> </head>

View File

@ -21,9 +21,8 @@ namespace HTMLParser.Example
ParserEx parse = new ParserEx(); ParserEx parse = new ParserEx();
this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value); this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value);
this.parsedYoutube.InnerHtml = parse.ParseYoutube(this.txtSource.Value); this.parsedYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First();
this.parsedUrlAndYoutube.InnerHtml = parse.ParseUrlAndYoutube(this.txtSource.Value); this.parsedUrlAndYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First() + parse.ParseUrl(this.txtSource.Value);
} }
} }
} }

View File

@ -1,20 +1,52 @@
using System; using System;
using System.Collections.Generic;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
namespace HTMLParser namespace HTMLParser
{ {
public class ParserEx public class ParserEx
{ {
/// <summary>
/// Prevent use of Html tags
/// </summary>
/// <param name="article"></param>
/// <returns></returns>
public string PreventHTML(string article)
{
return article.Replace("<", "&lt;").Replace(">", "&gt;");
}
/// <summary>
/// Prevent use of Risky Tags
/// </summary>
/// <param name="article"></param>
/// <returns></returns>
public string PreventRiskyTag(string article)
{
return article.Replace("script", "").Replace("iframe", "").Replace("object", "");
}
/// <summary>
/// Parse Url
/// </summary>
/// <param name="article"></param>
/// <returns></returns>
public string ParseUrl(string article) public string ParseUrl(string article)
{ {
if (string.IsNullOrEmpty(article)) return string.Empty; if (string.IsNullOrEmpty(article)) return string.Empty;
string Pttrn = @"(((http|https|ftp|telnet|news)://|www\.)[^youtube][a-z0-9-]+.[][a-zA-Z0-9:&#@=_~%;?/.+-]+)"; string pttrn = @"((?:(?:https?|http|ftp|gopher|telnet|file|notes|ms-help):(?://|\\\\)(?:www\.)?|www\.)[\w\d:#@%/;$()~_?\+,\-=\\.&]+)";
string Lnk = "<a href=\"$1\" target=\"_blank\">$1</a>";
return Regex.Replace(article, Pttrn, Lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www."); string lnk = "<a href=\"$1\" target=\"_blank\">$1</a>";
return Regex.Replace(article, pttrn, lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www.");
} }
/// <summary>
/// Parse Youtube Url to script
/// </summary>
/// <param name="article"></param>
/// <returns></returns>
public string ParseYoutube(string article) public string ParseYoutube(string article)
{ {
if (string.IsNullOrEmpty(article)) return string.Empty; if (string.IsNullOrEmpty(article)) return string.Empty;
@ -25,6 +57,22 @@ namespace HTMLParser
return Regex.Replace(article, pttrn, script, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("https://<div class=\"youtube\"", "<div class=\"youtube\""); return Regex.Replace(article, pttrn, script, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("https://<div class=\"youtube\"", "<div class=\"youtube\"");
} }
public List<string> GenerateYoutubeScripts(string article)
{
if (string.IsNullOrEmpty(article)) return null;
Regex regex = new Regex(@"youtu(?:\.be|be\.com)/(?:.*v(?:/|=)|(?:.*/)?)([a-zA-Z0-9-_]+)");
Match match = regex.Match(article);
List<string> scripts = new List<string>();
while (match.Success)
{
scripts.Add(string.Format("<div class=\"youtube\" ><iframe src=\"https://www.youtube.com/embed/{0}\" frameborder=\"0\" allowfullscreen></iframe></div>", match.Value.Replace("youtu.be/", "")));
match = match.NextMatch();
}
return scripts;
}
public string ParseUrlAndYoutube(string article) public string ParseUrlAndYoutube(string article)
{ {
return this.ParseYoutube(this.ParseUrl(article)); return this.ParseYoutube(this.ParseUrl(article));