1
0
mirror of https://github.com/rudollee/HTMLParser.git synced 2025-06-06 15:36:08 +00:00
This commit is contained in:
wook 2017-05-08 21:40:38 +09:00
parent d210f53583
commit feabbc7e1b
14 changed files with 449 additions and 8 deletions

View File

@ -0,0 +1 @@
<%@ Application Codebehind="Global.asax.cs" Inherits="HTMLParser.Example.Global" Language="C#" %>

View File

@ -0,0 +1,16 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.SessionState;
namespace HTMLParser.Example
{
public class Global : System.Web.HttpApplication
{
protected void Application_Start(object sender, EventArgs e)
{
}
}
}

View File

@ -0,0 +1,141 @@
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.3\build\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.props" Condition="Exists('..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.3\build\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.props')" />
<Import Project="..\packages\Microsoft.Net.Compilers.1.3.2\build\Microsoft.Net.Compilers.props" Condition="Exists('..\packages\Microsoft.Net.Compilers.1.3.2\build\Microsoft.Net.Compilers.props')" />
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProductVersion>
</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{40A82F71-B960-46A7-ABFB-51624688A52A}</ProjectGuid>
<ProjectTypeGuids>{349c5851-65df-11da-9384-00065b846f21};{fae04ec0-301f-11d3-bf4b-00c04f79efbc}</ProjectTypeGuids>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>HTMLParser.Example</RootNamespace>
<AssemblyName>HTMLParser.Example</AssemblyName>
<TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
<UseIISExpress>true</UseIISExpress>
<IISExpressSSLPort />
<IISExpressAnonymousAuthentication />
<IISExpressWindowsAuthentication />
<IISExpressUseClassicPipelineMode />
<UseGlobalApplicationHostFile />
<NuGetPackageImportStamp>
</NuGetPackageImportStamp>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="HTMLParser, Version=1.0.0.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\HTMLParser\bin\Release\netstandard1.4\HTMLParser.dll</HintPath>
</Reference>
<Reference Include="Microsoft.CodeDom.Providers.DotNetCompilerPlatform, Version=1.0.3.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
<HintPath>..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.3\lib\net45\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.dll</HintPath>
</Reference>
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Web.DynamicData" />
<Reference Include="System.Web.Entity" />
<Reference Include="System.Web.ApplicationServices" />
<Reference Include="System.ComponentModel.DataAnnotations" />
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Core" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Web.Extensions" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Drawing" />
<Reference Include="System.Web" />
<Reference Include="System.Xml" />
<Reference Include="System.Configuration" />
<Reference Include="System.Web.Services" />
<Reference Include="System.EnterpriseServices" />
</ItemGroup>
<ItemGroup>
<Content Include="packages.config" />
<None Include="Web.Debug.config">
<DependentUpon>Web.config</DependentUpon>
</None>
<None Include="Web.Release.config">
<DependentUpon>Web.config</DependentUpon>
</None>
</ItemGroup>
<ItemGroup>
<Content Include="Global.asax" />
<Content Include="Parsed.aspx" />
<Content Include="Web.config" />
</ItemGroup>
<ItemGroup>
<Compile Include="Global.asax.cs">
<DependentUpon>Global.asax</DependentUpon>
</Compile>
<Compile Include="Parsed.aspx.cs">
<DependentUpon>Parsed.aspx</DependentUpon>
<SubType>ASPXCodeBehind</SubType>
</Compile>
<Compile Include="Parsed.aspx.designer.cs">
<DependentUpon>Parsed.aspx</DependentUpon>
</Compile>
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<Folder Include="App_Data\" />
<Folder Include="Models\" />
</ItemGroup>
<PropertyGroup>
<VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">10.0</VisualStudioVersion>
<VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
</PropertyGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
<Import Project="$(VSToolsPath)\WebApplications\Microsoft.WebApplication.targets" Condition="'$(VSToolsPath)' != ''" />
<Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v10.0\WebApplications\Microsoft.WebApplication.targets" Condition="false" />
<ProjectExtensions>
<VisualStudio>
<FlavorProperties GUID="{349c5851-65df-11da-9384-00065b846f21}">
<WebProjectProperties>
<UseIIS>True</UseIIS>
<AutoAssignPort>True</AutoAssignPort>
<DevelopmentServerPort>7207</DevelopmentServerPort>
<DevelopmentServerVPath>/</DevelopmentServerVPath>
<IISUrl>http://localhost:7207/</IISUrl>
<NTLMAuthentication>False</NTLMAuthentication>
<UseCustomServer>False</UseCustomServer>
<CustomServerUrl>
</CustomServerUrl>
<SaveServerSettingsInUserFile>False</SaveServerSettingsInUserFile>
</WebProjectProperties>
</FlavorProperties>
</VisualStudio>
</ProjectExtensions>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\Microsoft.Net.Compilers.1.3.2\build\Microsoft.Net.Compilers.props')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Net.Compilers.1.3.2\build\Microsoft.Net.Compilers.props'))" />
<Error Condition="!Exists('..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.3\build\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.props')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.1.0.3\build\Microsoft.CodeDom.Providers.DotNetCompilerPlatform.props'))" />
</Target>
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

View File

@ -0,0 +1,32 @@
<%@ Page Language="C#" AutoEventWireup="true" CodeBehind="Parsed.aspx.cs" Inherits="HTMLParser.Example.Parsed" %>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
<title></title>
<style type="text/css">
#source {
width: 293px;
height: 99px;
}
</style>
</head>
<body>
<form id="form1" runat="server">
source<br />
<textarea id="txtSource" runat="server" style="width: 100%; min-height: 50px;"></textarea><br />
url parsed
<div id="ParsedUrl" runat="server" style="width: 100%; min-height: 50px; margin: 5px; padding: 5px; border: 1px solid #ddd" >&nbsp;</div>
youtube parsed
<div id="parsedYoutube" runat="server" style="width: 100%; min-height: 50px; margin: 5px; padding: 5px; border: 1px solid #ddd" >&nbsp;</div>
url and youtube parsed
<div id="parsedUrlAndYoutube" runat="server" style="width: 100%; min-height: 50px; margin: 5px; padding: 5px; border: 1px solid #ddd" >&nbsp;</div>
<asp:Button ID="Button1" runat="server" OnClick="Button1_Click" Text="Parse" />
</form>
</body>
</html>

View File

@ -0,0 +1,29 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using HTMLParser;
using System.Text.RegularExpressions;
namespace HTMLParser.Example
{
public partial class Parsed : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
}
protected void Button1_Click(object sender, EventArgs e)
{
ParserEx parse = new ParserEx();
this.ParsedUrl.InnerHtml = parse.ParseUrl(this.txtSource.Value);
this.parsedYoutube.InnerHtml = parse.ParseYoutube(this.txtSource.Value);
this.parsedUrlAndYoutube.InnerHtml = parse.ParseUrlAndYoutube(this.txtSource.Value);
}
}
}

View File

@ -0,0 +1,69 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace HTMLParser.Example {
public partial class Parsed {
/// <summary>
/// form1 control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.HtmlControls.HtmlForm form1;
/// <summary>
/// txtSource control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.HtmlControls.HtmlTextArea txtSource;
/// <summary>
/// ParsedUrl control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.HtmlControls.HtmlGenericControl ParsedUrl;
/// <summary>
/// parsedYoutube control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.HtmlControls.HtmlGenericControl parsedYoutube;
/// <summary>
/// parsedUrlAndYoutube control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.HtmlControls.HtmlGenericControl parsedUrlAndYoutube;
/// <summary>
/// Button1 control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.Button Button1;
}
}

View File

@ -0,0 +1,35 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("HTMLParser.Example")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("HTMLParser.Example")]
[assembly: AssemblyCopyright("Copyright © 2017")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("40a82f71-b960-46a7-abfb-51624688a52a")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Revision and Build Numbers
// by using the '*' as shown below:
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

View File

@ -0,0 +1,30 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- For more information on using web.config transformation visit https://go.microsoft.com/fwlink/?LinkId=125889 -->
<configuration xmlns:xdt="http://schemas.microsoft.com/XML-Document-Transform">
<!--
In the example below, the "SetAttributes" transform will change the value of
"connectionString" to use "ReleaseSQLServer" only when the "Match" locator
finds an attribute "name" that has a value of "MyDB".
<connectionStrings>
<add name="MyDB"
connectionString="Data Source=ReleaseSQLServer;Initial Catalog=MyReleaseDB;Integrated Security=True"
xdt:Transform="SetAttributes" xdt:Locator="Match(name)"/>
</connectionStrings>
-->
<system.web>
<!--
In the example below, the "Replace" transform will replace the entire
<customErrors> section of your web.config file.
Note that because there is only one customErrors section under the
<system.web> node, there is no need to use the "xdt:Locator" attribute.
<customErrors defaultRedirect="GenericError.htm"
mode="RemoteOnly" xdt:Transform="Replace">
<error statusCode="500" redirect="InternalError.htm"/>
</customErrors>
-->
</system.web>
</configuration>

View File

@ -0,0 +1,31 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- For more information on using web.config transformation visit https://go.microsoft.com/fwlink/?LinkId=125889 -->
<configuration xmlns:xdt="http://schemas.microsoft.com/XML-Document-Transform">
<!--
In the example below, the "SetAttributes" transform will change the value of
"connectionString" to use "ReleaseSQLServer" only when the "Match" locator
finds an attribute "name" that has a value of "MyDB".
<connectionStrings>
<add name="MyDB"
connectionString="Data Source=ReleaseSQLServer;Initial Catalog=MyReleaseDB;Integrated Security=True"
xdt:Transform="SetAttributes" xdt:Locator="Match(name)"/>
</connectionStrings>
-->
<system.web>
<compilation xdt:Transform="RemoveAttributes(debug)" />
<!--
In the example below, the "Replace" transform will replace the entire
<customErrors> section of your web.config file.
Note that because there is only one customErrors section under the
<system.web> node, there is no need to use the "xdt:Locator" attribute.
<customErrors defaultRedirect="GenericError.htm"
mode="RemoteOnly" xdt:Transform="Replace">
<error statusCode="500" redirect="InternalError.htm"/>
</customErrors>
-->
</system.web>
</configuration>

View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
For more information on how to configure your ASP.NET application, please visit
https://go.microsoft.com/fwlink/?LinkId=169433
-->
<configuration>
<system.web>
<compilation debug="true" targetFramework="4.5.2"/>
<httpRuntime targetFramework="4.5.2"/>
</system.web>
<system.codedom>
<compilers>
<compiler language="c#;cs;csharp" extension=".cs"
type="Microsoft.CodeDom.Providers.DotNetCompilerPlatform.CSharpCodeProvider, Microsoft.CodeDom.Providers.DotNetCompilerPlatform, Version=1.0.3.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"
warningLevel="4" compilerOptions="/langversion:6 /nowarn:1659;1699;1701"/>
<compiler language="vb;vbs;visualbasic;vbscript" extension=".vb"
type="Microsoft.CodeDom.Providers.DotNetCompilerPlatform.VBCodeProvider, Microsoft.CodeDom.Providers.DotNetCompilerPlatform, Version=1.0.3.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"
warningLevel="4" compilerOptions="/langversion:14 /nowarn:41008 /define:_MYTYPE=\&quot;Web\&quot; /optionInfer+"/>
</compilers>
</system.codedom>
</configuration>

View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Microsoft.CodeDom.Providers.DotNetCompilerPlatform" version="1.0.3" targetFramework="net452" />
<package id="Microsoft.Net.Compilers" version="1.3.2" targetFramework="net452" developmentDependency="true" />
</packages>

View File

@ -5,6 +5,8 @@ VisualStudioVersion = 15.0.26403.7
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HTMLParser", "HTMLParser\HTMLParser.csproj", "{0D713178-8D10-4521-BA52-DA3B3F5071CD}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HTMLParser.Example", "HTMLParser.Example\HTMLParser.Example.csproj", "{40A82F71-B960-46A7-ABFB-51624688A52A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -15,6 +17,10 @@ Global
{0D713178-8D10-4521-BA52-DA3B3F5071CD}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0D713178-8D10-4521-BA52-DA3B3F5071CD}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0D713178-8D10-4521-BA52-DA3B3F5071CD}.Release|Any CPU.Build.0 = Release|Any CPU
{40A82F71-B960-46A7-ABFB-51624688A52A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{40A82F71-B960-46A7-ABFB-51624688A52A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{40A82F71-B960-46A7-ABFB-51624688A52A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{40A82F71-B960-46A7-ABFB-51624688A52A}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -1,8 +0,0 @@
using System;
namespace HTMLParser
{
public class Class1
{
}
}

33
HTMLParser/ParserEx.cs Normal file
View File

@ -0,0 +1,33 @@
using System;
using System.Text.RegularExpressions;
namespace HTMLParser
{
public class ParserEx
{
public string ParseUrl(string article)
{
if (string.IsNullOrEmpty(article)) return string.Empty;
string Pttrn = @"(((http|https|ftp|telnet|news)://|www\.)[^youtube][a-z0-9-]+.[][a-zA-Z0-9:&#@=_~%;?/.+-]+)";
string Lnk = "<a href=\"$1\" target=\"_blank\">$1</a>";
return Regex.Replace(article, Pttrn, Lnk, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("href=\"www.", "href=\"http://www.");
}
public string ParseYoutube(string article)
{
if (string.IsNullOrEmpty(article)) return string.Empty;
string pttrn = @"youtu(?:\.be|be\.com)/(?:.*v(?:/|=)|(?:.*/)?)([a-zA-Z0-9-_]+)";
string script = "<div class=\"youtube\" ><iframe width=\"600\" height=\"338\" src=\"https://www.youtube.com/embed/$1\" frameborder=\"0\" allowfullscreen></iframe></div>";
return Regex.Replace(article, pttrn, script, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(150)).Replace("https://<div class=\"youtube\"", "<div class=\"youtube\"");
}
public string ParseUrlAndYoutube(string article)
{
return this.ParseYoutube(this.ParseUrl(article));
}
}
}