以前常听到别人做采集程序,今天俺也小试身手。。 using System; using System.Collections; using System.ComponentModel; using System.Data; using System.Drawing; using System.Web; using System.Web.SessionState; using System.Web.UI; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.IO; using System.Data.SqlClient; using System.Web.UI.WebControls; using System.Web.UI.HtmlControls; namespace Temp { /// <summary> /// GetHtmlSourceFromUrl 的摘要说明。 /// </summary> public class GetHtmlSourceFromUrl : System.Web.UI.Page { private SqlConnection con; private void Page_Load(object sender, System.EventArgs e) { // 在此处放置用户代码以初始化页面 if(!IsPostBack) { InsertToDB(); } } private void InsertToDB() { for(int i=1;i<1000;i++) { string Html = string.Empty; string SqlText="insert into BeiJingBus(BusLineNumber,Html) Values(@BLN,@Html)"; string ConnectionString=@"Server=.xxxx;User ID=xxxx;Pwd=xxxxx;DataBase=Map"; con =new SqlConnection(ConnectionString); SqlCommand cmd = new SqlCommand(SqlText,con); string Url=@"http://beijing.ibusdb.com/?busline="+i+"&s=busline&x=31&y=18"; // ^_^,这是取北京的 转到首页,换换参数,就可以取其它城市的了 string Content = string.Empty; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); HttpWebResponse response =(HttpWebResponse) request.GetResponse(); StreamReader stream = new StreamReader(response.GetResponseStream(),Encoding.GetEncoding("gb2312")); Content =stream.ReadToEnd(); stream.Close(); response.Close(); int start=3487; //去广告 取正文 int end= Content.Length-4222;//去广告 取正文 if(end-start>0) //判断存在第i路公交 { Html=Regex.Replace(Content.Substring(start,end-start),@"<[^>]+>",""); //分离html代码 try { cmd.Parameters.Add("@BLN",SqlDbType.Int).Value =i; cmd.Parameters.Add("@Html",SqlDbType.Text).Value= Html; con.Open(); cmd.ExecuteNonQuery(); } catch(SqlException err) { Response.Write(err.Message); con.Close(); break; } con.Close(); } } } --表sql脚本: CREATE TABLE [dbo].[BeiJingBus]( [id] [int] IDENTITY(1,1) NOT NULL, [BusLineNumber] [int] NULL, [Html] [nvarchar](max) COLLATE Chinese_PRC_CI_AS NULL ) ON [PRIMARY] |