mirror of
https://github.com/rudollee/HTMLParser.git
synced 2025-06-07 16:06:09 +00:00
new pattern for url
This commit is contained in:
parent
feabbc7e1b
commit
fd30db4aa2
@ -6,9 +6,8 @@
|
|||||||
<head runat="server">
|
<head runat="server">
|
||||||
<title></title>
|
<title></title>
|
||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
#source {
|
body {
|
||||||
width: 293px;
|
width: 600px;
|
||||||
height: 99px;
|
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
@ -21,9 +21,8 @@ namespace HTMLParser.Example
|
|||||||
ParserEx parse = new ParserEx();
|
ParserEx parse = new ParserEx();
|
||||||
|
|
||||||
this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value);
|
this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value);
|
||||||
this.parsedYoutube.InnerHtml = parse.ParseYoutube(this.txtSource.Value);
|
this.parsedYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First();
|
||||||
this.parsedUrlAndYoutube.InnerHtml = parse.ParseUrlAndYoutube(this.txtSource.Value);
|
this.parsedUrlAndYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First() + parse.ParseUrl(this.txtSource.Value);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,20 +1,52 @@
|
|||||||
using System;
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
namespace HTMLParser
|
namespace HTMLParser
|
||||||
{
|
{
|
||||||
public class ParserEx
|
public class ParserEx
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Prevent use of Html tags
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="article"></param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public string PreventHTML(string article)
|
||||||
|
{
|
||||||
|
return article.Replace("<", "<").Replace(">", ">");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Prevent use of Risky Tags
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="article"></param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public string PreventRiskyTag(string article)
|
||||||
|
{
|
||||||
|
return article.Replace("script", "").Replace("iframe", "").Replace("object", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Parse Url
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="article"></param>
|
||||||
|
/// <returns></returns>
|
||||||
public string ParseUrl(string article)
|
public string ParseUrl(string article)
|
||||||
{
|
{
|
||||||
if (string.IsNullOrEmpty(article)) return string.Empty;
|
if (string.IsNullOrEmpty(article)) return string.Empty;
|
||||||
|
|
||||||
string Pttrn = @"(((http|https|ftp|telnet|news)://|www\.)[^youtube][a-z0-9-]+.[][a-zA-Z0-9:&#@=_~%;?/.+-]+)";
|
string pttrn = @"((?:(?:https?|http|ftp|gopher|telnet|file|notes|ms-help):(?://|\\\\)(?:www\.)?|www\.)[\w\d:#@%/;$()~_?\+,\-=\\.&]+)";
|
||||||
string Lnk = "<a href=\"$1\" target=\"_blank\">$1</a>";
|
|
||||||
|
|
||||||
return Regex.Replace(article, Pttrn, Lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www.");
|
string lnk = "<a href=\"$1\" target=\"_blank\">$1</a>";
|
||||||
|
|
||||||
|
return Regex.Replace(article, pttrn, lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Parse Youtube Url to script
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="article"></param>
|
||||||
|
/// <returns></returns>
|
||||||
public string ParseYoutube(string article)
|
public string ParseYoutube(string article)
|
||||||
{
|
{
|
||||||
if (string.IsNullOrEmpty(article)) return string.Empty;
|
if (string.IsNullOrEmpty(article)) return string.Empty;
|
||||||
@ -25,6 +57,22 @@ namespace HTMLParser
|
|||||||
return Regex.Replace(article, pttrn, script, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("https://<div class=\"youtube\"", "<div class=\"youtube\"");
|
return Regex.Replace(article, pttrn, script, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("https://<div class=\"youtube\"", "<div class=\"youtube\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<string> GenerateYoutubeScripts(string article)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(article)) return null;
|
||||||
|
|
||||||
|
Regex regex = new Regex(@"youtu(?:\.be|be\.com)/(?:.*v(?:/|=)|(?:.*/)?)([a-zA-Z0-9-_]+)");
|
||||||
|
Match match = regex.Match(article);
|
||||||
|
List<string> scripts = new List<string>();
|
||||||
|
while (match.Success)
|
||||||
|
{
|
||||||
|
scripts.Add(string.Format("<div class=\"youtube\" ><iframe src=\"https://www.youtube.com/embed/{0}\" frameborder=\"0\" allowfullscreen></iframe></div>", match.Value.Replace("youtu.be/", "")));
|
||||||
|
match = match.NextMatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
return scripts;
|
||||||
|
}
|
||||||
|
|
||||||
public string ParseUrlAndYoutube(string article)
|
public string ParseUrlAndYoutube(string article)
|
||||||
{
|
{
|
||||||
return this.ParseYoutube(this.ParseUrl(article));
|
return this.ParseYoutube(this.ParseUrl(article));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user