using System;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace web
{
public partial class dang : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
WebClient wc = new WebClient();
byte[] bt = wc.DownloadData(@"网址");
string res = Encoding.Default.GetString(bt);
res = Regex.Replace(res, @"<!DOCTYPE .*?第1页", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res = Regex.Replace(res, @"<!--页尾 开始 -->.*?</html>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res = Regex.Replace(res, @"<div id=""divBottomPageNavi"".*?</div>.*?</div>.*?</div>.*?</div>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res = Regex.Replace(res, @"</div><div class='list_r_title_text3a'>.*?list_r_line""></div>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res = Regex.Replace(res, @"<div class=""clear"">.*?", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res = Regex.Replace(res, @"<div class=""list_r_list"">.*?<h2>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res = Regex.Replace(res, @"</h2>.*?</div>.*?</div>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res = Regex.Replace(res, @"<a name=""link_prd_name"" href='", "");
res = Regex.Replace(res, @"' target=""_blank"">", "ww");
res = Regex.Replace(res, @"ww.*?</a>", "");
res = Regex.Replace(res, "</div>", ";");
res = res.Substring(0, res.Length - 1).ToString();
string[] ress = res.Split(';');
for (int i = 0; i < ress.Length - 1; i++)
{
WebClient wc1 = new WebClient();
string ur = @"" + ress[i] + "";
byte[] bt1 = wc.DownloadData(ur);
string res1 = Encoding.Default.GetString(bt1);
res1 = Regex.Replace(res1, @"<!DOCTYPE .*?您最近的浏览历史", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res1 = Regex.Replace(res1, @"<h2 class=""black14"">.*?<!--价格购买区结束-->", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
res1 = Regex.Replace(res1, @"<a name=""review_point""></a>.*?</html>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
// Response.Write(res1);
Regex chubantime = new Regex(@"<li>出版时间:.*?</li>");
if (chubantime.IsMatch(res1))
{
string chubantime1 = chubantime.Match(res1).Value.ToString();
chubantime1 = Regex.Replace(chubantime1, "<li>出版时间:", "");
chubantime1 = Regex.Replace(chubantime1, "</li>", "");
Response.Write(chubantime1.Trim() + "<br>");
}
Regex zishu = new Regex(@"<li>字 数:.*?</li>");
if (zishu.IsMatch(res1))
{
string zishu1 = zishu.Match(res1).Value.ToString();
zishu1 = Regex.Replace(zishu1, "<li>字 数:", "");
zishu1 = Regex.Replace(zishu1, "</li>", "");
Response.Write(zishu1.Trim() + "<br>");
}
Regex banci = new Regex(@"<li>版 次:.*?</li>");
if (banci.IsMatch(res1))
{
string banci1 = banci.Match(res1).Value.ToString();
banci1 = Regex.Replace(banci1, "<li>版 次:", "");
banci1 = Regex.Replace(banci1, "</li>", "");
Response.Write(banci1.Trim() + "<br>");
}
Regex yeshu = new Regex(@"<li>页 数:.*?</li>");
if (yeshu.IsMatch(res1))
{
string yeshu1 = yeshu.Match(res1).Value.ToString();
yeshu1 = Regex.Replace(yeshu1, "<li>页 数:", "");
yeshu1 = Regex.Replace(yeshu1, "</li>", "");
Response.Write(yeshu1.Trim() + "<br>");
}
Regex yshsj = new Regex(@"<li>印刷时间:.*?</li>");
if (yshsj.IsMatch(res1))
{
string yshsj1 = yshsj.Match(res1).Value.ToString();
yshsj1 = Regex.Replace(yshsj1, "<li>印刷时间:", "");
yshsj1 = Regex.Replace(yshsj1, "</li>", "");
Response.Write(yshsj1.Trim() + "<br>");
}
Regex kaiben = new Regex(@"<li>开 本:.*?</li>");
if (kaiben.IsMatch(res1))
{
string kaiben1 = kaiben.Match(res1).Value.ToString();
kaiben1 = Regex.Replace(kaiben1, "<li>开 本:", "");
kaiben1 = Regex.Replace(kaiben1, "</li>", "");
Response.Write(kaiben1.Trim() + "<br>");
}
Regex yinci = new Regex(@"<li>印 次:.*?</li>");
if (yinci.IsMatch(res1))
{
string yinci1 = yinci.Match(res1).Value.ToString();
yinci1 = Regex.Replace(yinci1, "<li>印 次:", "");
yinci1 = Regex.Replace(yinci1, "</li>", "");
Response.Write(yinci1.Trim() + "<br>");
}
Regex zhizhang = new Regex(@"<li纸 张:.*?</li>");
if (zhizhang.IsMatch(res1))
{
string zhizhang1 = zhizhang.Match(res1).Value.ToString();
zhizhang1 = Regex.Replace(zhizhang1, "<li>纸 张:", "");
zhizhang1 = Regex.Replace(zhizhang1, "</li>", "");
Response.Write(zhizhang1.Trim() + "<br>");
}
Regex isbnn = new Regex(@"<li>I S B N :.*?</li>");
if (isbnn.IsMatch(res1))
{
string isbnn1 = isbnn.Match(res1).Value.ToString();
isbnn1 = Regex.Replace(isbnn1, "<li>I S B N :", "");
isbnn1 = Regex.Replace(isbnn1, "</li>", "");
Response.Write(isbnn1.Trim() + "<br>");
}
Regex baozhuang = new Regex(@"<li>包 装:.*?</li>");
if (baozhuang.IsMatch(res1))
{
string baozhuang1 = baozhuang.Match(res1).Value.ToString();
baozhuang1 = Regex.Replace(baozhuang1, "<li>包 装:", "");
baozhuang1 = Regex.Replace(baozhuang1, "</li>", "");
Response.Write(baozhuang1.Trim() + "<br>");
}
Regex chubanshe = new Regex(@"<div id='publisher_'>出 版 社:.*?</div>");
if (chubanshe.IsMatch(res1))
{
string chubanshe1 = chubanshe.Match(res1).Value.ToString();
chubanshe1 = Regex.Replace(chubanshe1, "<div id='publisher_'>出 版 社:", "");
chubanshe1 = Regex.Replace(chubanshe1, "</div>", "");
Response.Write(chubanshe1.Trim() + "<br>");
}
Regex zuozhe = new Regex(@"<div id='author_' >作 者:.*?</div>");
if (zuozhe.IsMatch(res1))
{
string zuozhe1 = zuozhe.Match(res1).Value.ToString();
zuozhe1 = Regex.Replace(zuozhe1, "<div id='author_' >作 者:", "");
zuozhe1 = Regex.Replace(zuozhe1, "</div>", "");
Response.Write(zuozhe1.Trim() + "<br>");
}
Regex dingjia = new Regex(@"<span class=""gray87"">定价:<span class=""del"">.*?</span></span>");
if (dingjia.IsMatch(res1))
{
string dingjia1 = dingjia.Match(res1).Value.ToString();
dingjia1 = Regex.Replace(dingjia1, @"<span class=""gray87"">定价:<span class=""del"">", "");
dingjia1 = Regex.Replace(dingjia1, "</span></span>", "");
Response.Write(dingjia1.Trim() + "<br>");
}
Regex jiage = new Regex(@"<span class=""redc30"">价格:.*?</b></span>");
if (jiage.IsMatch(res1))
{
string jiage1 = jiage.Match(res1).Value.ToString();
jiage1 = Regex.Replace(jiage1, @"<span class=""redc30"">价格:<b>", "");
jiage1 = Regex.Replace(jiage1, "</b></span>", "");
Response.Write(jiage1.Trim() + "<br>");
}
Regex neirong = new Regex(@"内容简介</h2> <div class=""right_content"">.*?</div><div class=""dashed"">");
if (neirong.IsMatch(res1))
{
string neirong1 = neirong.Match(res1).Value.ToString();
neirong1 = Regex.Replace(neirong1, @"内容简介</h2> <div class=""right_content"">", "");
neirong1 = Regex.Replace(neirong1, @"</div><div class=""dashed"">", "");
Response.Write(neirong1.Trim() + "<br>");
}
Regex mulu = new Regex(@"目录</h2> <div class=""right_content"">.*?</div>");
if (mulu.IsMatch(res1))
{
string mulu1 = mulu.Match(res1).Value.ToString();
mulu1 = Regex.Replace(mulu1, @"目录</h2> <div class=""right_content"">", "");
mulu1 = Regex.Replace(mulu1, "</div>", "");
Response.Write(mulu1.Trim() + "<br>");
}
Regex phs = new Regex(@"<img src="".*?id=""img_show_prd""/>");
if (phs.IsMatch(res1))
{
string phs1 = phs.Match(res1).Value.ToString();
phs1 = Regex.Replace(phs1, @"<img src=""", "");
phs1 = Regex.Replace(phs1, @""" id=""img_show_prd""/>", ""); //小图
string phsname = Regex.Replace(phs1,@"http.*?\.com/\d.*/\d.*/",string.Empty,RegexOptions.IgnoreCase|RegexOptions.Singleline);
Response.Write("图片名"+phsname+"<br>");
string phbb = phs1.Substring(0, phs1.Length - 5) + "o.jpg"; //大图
//WebClient WCs = new WebClient();
//WebClient WCb = new WebClient();
//WCp.DownloadFile(@"http://www.XXX.com/img/XXX_logo.gif", Server.MapPath("XX.gif"));
//WCs.DownloadFile(@"""phs1""",Server.MapPath(""));
//WCs.DownloadFile(@"""phbb""");
Response.Write(phs1 + "<br>");
Response.Write(phbb + "<br>");
}
//Regex phb = new Regex(@"<a class=""gray878787a"" href=""javascript:ImgBtnChgPrd_Click\(this,'(.*?)'\)"" name=""bigpicture"">点击查看大图</a></div>");
//if (phb.IsMatch(res1))
//{
// string phb1 = phs.Match(res1).Value.ToString();
// phb1 = Regex.Replace(phb1, @"this,'", "");
// phb1 = Regex.Replace(phb1, @"'\)"" name=""bigpicture"">点击查看大图</a></div>", ""); //大图
// Response.Write(phb1);
//}
}
WebClient WCp = new WebClient();
WCp.DownloadFile(@"http://www.XXX.com/img/XXX_logo.gif", Server.MapPath("XXX.gif")); //这里是用WEBCLIENT保存图片
}
}
}
此处只列出了.CS文件
分享到:
相关推荐
C# 数据采集系统 智能采集 数据分析 整套源码
用于智能电表自动抄取,学习交流。程序为C#开发,通过串口进行数据采集。
C# Modbus TCP/IP数据采集程序
C# 数据采集器读写 实例源码(硬件读写)
发送命令采集串口数据C# 串口程序采集串口数据
采集数据和信息,并对数据进行分析,采用C/C++架构
【程序老媛出品,必属精品,亲测...资源名:LCR仪器的数据采集C#程序源码 资源类型:程序源代码 源码说明:使用串口、正则表达式和数据IO卡与PLC同步采集LCR测试仪的测试数据 适合人群:新手及有一定经验的开发人员
NI数据采集卡,C#数据采集程序。。。。
C#编写的串口数据采集器,实时动态显示曲线!这是个一个医疗器械做的一个测试工具!还不错!动态显示动态保存数据!
通过opc协议读取dcs数据并保存到表格中,每天保存一个表格
1,窗体程序分两部分,一部分是485配置页面,另一部分是DTU数据采集; 2,485配置部分是针对RS485传感器modbus通讯所开发的,如果报文格式不同,请自行修改保温部分; 3,DTU数据采集部分,支持多个DTU,而且每个...
C#网页数据采集工具
这是个人做的一个实用小项目,主要用于与下位机设备通信,将下位机采集的数据实时显示并记录,界面设计参考Windows任务管理器 这是个人做的一个实用小项目,主要用于与下位机设备通信,将下位机采集的数据实时显示...
使用一个开源的技术来读写罗克韦尔PLC数据,使用的是基于以太网的TCP/IP实现,不需要额外的组件,读取操作只要放到后台线程就不会卡死线程,本组件支持超级方便的高性能读写操作 1)附件C#代码全开源,所有代码公开...
1,窗体程序一部分是485配置页面,另一部分是DTU数据采集; 2,485配置部分是针对RS485传感器modbus通讯所开发的,如果报文格式不同,请自行修改保温部分; 3,DTU数据采集部分,支持多个DTU,而且每个DTU下支持多个...
实现10路串口数据的采集,显示,解析保存,并能够实时图表显示
C#数据采集器读写程序源码
安装三菱CNC通讯.EXE文件,将默认文件夹内生产的DLL文件拷贝到C#程序文件内,对DLL文件进行引用
c# vs2008数据采集(正则采集),关于天气预报的数据采集。 和WebSevice接口数据采集
一个超强的采集类,是你免去你好多时间和精力.全面方便