Mega Code Archive

 
Categories / ASP.Net Tutorial / Development
 

Read page header

<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs"  Inherits="ScrapeHeadings" %> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" > <head runat="server">   <title>Scraping Headings</title> </head> <body>     <form id="form1" runat="server">       <asp:Panel ID="panUrl" runat="server" GroupingText="Search" CssClass="myPanel">          Enter Url:          <asp:TextBox ID="txtUrl" runat="server" Columns="50"></asp:TextBox><br />          <asp:Button ID="btnSearch" runat="server" OnClick="btnSearch_Click" Text="Search" />       </asp:Panel>              <asp:Panel ID="panHeadings"                   runat="server"                   GroupingText="Headings in this Url"                   CssClass="myPanel" >          <asp:Literal ID="litContent" runat="server" />       </asp:Panel>     </form> </body> </html> File: Default.aspx.cs using System; using System.Data; using System.Configuration; using System.Collections; using System.Web; using System.Web.Security; using System.Web.UI; using System.Web.UI.WebControls; using System.Web.UI.WebControls.WebParts; using System.Web.UI.HtmlControls; using System.Net; using System.IO; using System.Text; using System.Text.RegularExpressions; public partial class ScrapeHeadings : System.Web.UI.Page {    protected void Page_Load(object sender, EventArgs e)    {      litContent.Text = "";    }    protected void btnSearch_Click(object sender, EventArgs e)    {       try       {          WebClient client = new WebClient();          string content = client.DownloadString(txtUrl.Text);          Regex reg = new Regex(@"<h\d>.+</h\d>", RegexOptions.IgnoreCase);          MatchCollection mc = reg.Matches(content);          foreach (Match m in mc)          {             litContent.Text += HttpUtility.HtmlEncode(m.Value) + "<br/>";          }       }       catch       {          litContent.Text = "Could not connect to " + txtUrl.Text;       }          } }