mirror of
https://github.com/rudollee/HTMLParser.git
synced 2025-06-07 07:56:09 +00:00
new pattern for url
This commit is contained in:
parent
feabbc7e1b
commit
fd30db4aa2
@ -6,9 +6,8 @@
|
||||
<head runat="server">
|
||||
<title></title>
|
||||
<style type="text/css">
|
||||
#source {
|
||||
width: 293px;
|
||||
height: 99px;
|
||||
body {
|
||||
width: 600px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
@ -20,10 +20,9 @@ namespace HTMLParser.Example
|
||||
{
|
||||
ParserEx parse = new ParserEx();
|
||||
|
||||
this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value);
|
||||
this.parsedYoutube.InnerHtml = parse.ParseYoutube(this.txtSource.Value);
|
||||
this.parsedUrlAndYoutube.InnerHtml = parse.ParseUrlAndYoutube(this.txtSource.Value);
|
||||
this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value);
|
||||
this.parsedYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First();
|
||||
this.parsedUrlAndYoutube.InnerHtml = parse.GenerateYoutubeScripts(this.txtSource.Value).First() + parse.ParseUrl(this.txtSource.Value);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -1,20 +1,52 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace HTMLParser
|
||||
{
|
||||
public class ParserEx
|
||||
{
|
||||
/// <summary>
|
||||
/// Prevent use of Html tags
|
||||
/// </summary>
|
||||
/// <param name="article"></param>
|
||||
/// <returns></returns>
|
||||
public string PreventHTML(string article)
|
||||
{
|
||||
return article.Replace("<", "<").Replace(">", ">");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Prevent use of Risky Tags
|
||||
/// </summary>
|
||||
/// <param name="article"></param>
|
||||
/// <returns></returns>
|
||||
public string PreventRiskyTag(string article)
|
||||
{
|
||||
return article.Replace("script", "").Replace("iframe", "").Replace("object", "");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse Url
|
||||
/// </summary>
|
||||
/// <param name="article"></param>
|
||||
/// <returns></returns>
|
||||
public string ParseUrl(string article)
|
||||
{
|
||||
if (string.IsNullOrEmpty(article)) return string.Empty;
|
||||
|
||||
string Pttrn = @"(((http|https|ftp|telnet|news)://|www\.)[^youtube][a-z0-9-]+.[][a-zA-Z0-9:&#@=_~%;?/.+-]+)";
|
||||
string Lnk = "<a href=\"$1\" target=\"_blank\">$1</a>";
|
||||
string pttrn = @"((?:(?:https?|http|ftp|gopher|telnet|file|notes|ms-help):(?://|\\\\)(?:www\.)?|www\.)[\w\d:#@%/;$()~_?\+,\-=\\.&]+)";
|
||||
|
||||
return Regex.Replace(article, Pttrn, Lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www.");
|
||||
string lnk = "<a href=\"$1\" target=\"_blank\">$1</a>";
|
||||
|
||||
return Regex.Replace(article, pttrn, lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse Youtube Url to script
|
||||
/// </summary>
|
||||
/// <param name="article"></param>
|
||||
/// <returns></returns>
|
||||
public string ParseYoutube(string article)
|
||||
{
|
||||
if (string.IsNullOrEmpty(article)) return string.Empty;
|
||||
@ -25,6 +57,22 @@ namespace HTMLParser
|
||||
return Regex.Replace(article, pttrn, script, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("https://<div class=\"youtube\"", "<div class=\"youtube\"");
|
||||
}
|
||||
|
||||
public List<string> GenerateYoutubeScripts(string article)
|
||||
{
|
||||
if (string.IsNullOrEmpty(article)) return null;
|
||||
|
||||
Regex regex = new Regex(@"youtu(?:\.be|be\.com)/(?:.*v(?:/|=)|(?:.*/)?)([a-zA-Z0-9-_]+)");
|
||||
Match match = regex.Match(article);
|
||||
List<string> scripts = new List<string>();
|
||||
while (match.Success)
|
||||
{
|
||||
scripts.Add(string.Format("<div class=\"youtube\" ><iframe src=\"https://www.youtube.com/embed/{0}\" frameborder=\"0\" allowfullscreen></iframe></div>", match.Value.Replace("youtu.be/", "")));
|
||||
match = match.NextMatch();
|
||||
}
|
||||
|
||||
return scripts;
|
||||
}
|
||||
|
||||
public string ParseUrlAndYoutube(string article)
|
||||
{
|
||||
return this.ParseYoutube(this.ParseUrl(article));
|
||||
|
Loading…
x
Reference in New Issue
Block a user