各种获取信息

商品信息

using Ivony.Html.Parser;
using Ivony.Html;
using OpenQA.Selenium;
using OpenQA.Selenium.Firefox;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Skay.WebBot;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json;
using System.Data.SqlClient;

namespace taobao
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        public static Thread th;
        private void button1_Click(object sender, EventArgs e)
        {
            th = new Thread(new ThreadStart(JDData));
            th.Start();
        }
        void JDData()
        {   
            for(int i = 1; i <= 10; i++)
            {
                string str = "http://list.jd.com/list.html?cat=9987,653,655&page="+i+"&ext=502864::1943^^&go=0&JL=6_0_0";
                HttpUtility httpOne = new HttpUtility();
                string htmlOne = httpOne.GetHtmlText(str);
                var documenthtml = new JumonyParser().Parse(htmlOne);
                var items = documenthtml.Find(".gl-item");
                foreach(var item in items)
                {
                    string title = item.FindFirst(".p-name em").InnerText(); //商品名称
                    string contents = item.FindFirst(".p-commit strong a").InnerText();//评论人数
                    string vender = item.FindFirst(".p-shop").Attribute("data-shop_name").Value();//卖主
                    //string vender = item.FindFirst(".p-shop span a").InnerText();
                    string surl = item.FindFirst(".p-name a").Attribute("href").Value();
                    
                    HttpUtility httpsurl = new HttpUtility();
                    string htmlsurl = httpsurl.GetHtmlText(surl, "gbk", "text/html; charset=gbk");
                    string screen, resolution_ratio, camer_fore, camer_back;
                    screen = resolution_ratio = camer_fore = camer_back = "";
                    var documenthtmlsurl = new JumonyParser().Parse(htmlsurl);
                    try
                    {
                         screen = documenthtmlsurl.FindFirst(".detail p").InnerText().Split(':')[1];//屏幕大小
                         resolution_ratio = documenthtmlsurl.FindLast(".fore0 .detail p").InnerText().Split(':')[1];//分辨率
                         camer_fore = documenthtmlsurl.FindFirst(".fore1 .detail p").Attribute("title").Value();//前摄像头
                         camer_back = documenthtmlsurl.FindLast(".fore1 .detail p").Attribute("title").Value();//后摄像头
                    }
                    catch
                    {

                    }
                    string AllCots, imgCots, GoodCots, MidCots, lowCots;
                    string weight, Nametitle, goodsId = "", onselfTime, From1, hotPoint, system1, color, priceEnd="";
                    AllCots = imgCots = GoodCots = MidCots = lowCots = weight = Nametitle = goodsId = onselfTime = From1 = hotPoint = system1 = color = "";
                    var things = documenthtmlsurl.Find("#parameter2 li");
                    foreach(var thing in things)
                    {
                        try
                        {
                            if (thing.InnerText().Split(':')[0].Trim() == "商品毛重")
                            {
                                weight = thing.InnerText().Split(':')[1];
                            }
                            else if (thing.InnerText().Split(':')[0].Trim() == "商品名称")
                            {
                                Nametitle = thing.InnerText().Split(':')[1];
                            }
                            else if (thing.InnerText().Split(':')[0].Trim() == "商品编号")
                            {
                                goodsId = thing.InnerText().Split(':')[1];
                            }
                            else if (thing.InnerText().Split(':')[0].Trim() == "上架时间")
                            {
                                onselfTime = thing.InnerText().Split(':')[1];
                            }
                            else if (thing.InnerText().Split(':')[0].Trim() == "商品产地")
                            {
                                From1 = thing.InnerText().Split(':')[1];
                            }
                            else if (thing.InnerText().Split(':')[0].Trim() == "热点")
                            {
                                hotPoint = thing.InnerText().Split(':')[1];
                            }
                            else if (thing.InnerText().Split(':')[0].Trim() == "系统")
                            {
                                system1 = thing.InnerText().Split(':')[1];
                            }
                            else if (thing.InnerText().Split(':')[0].Trim() == "机身颜色")
                            {
                                color = thing.InnerText().Split(':')[1];
                            }
                        }
                        catch { }
                    }
                    string priceUrl = "http://p.3.cn/prices/get?type=1&area=1_72_4137&pdtk=&pduid=795687743&pdpin=&pdbp=0&skuid=J_"+goodsId+"&callback=cnp";
                    string subhtml = httpsurl.GetHtmlText(surl, "gbk", "text/html; charset=gbk");
                    string Area_html = httpsurl.GetHtmlText(priceUrl, "gbk", "text/html;charset=gbk", "");

                    try
                    {
                        //JObject Area_Jo = (JObject)JsonConvert.DeserializeObject(Area_html);
                        JObject Area_Jo = (JObject)JsonConvert.DeserializeObject(Area_html.Replace("cnp", "").Replace("(", "").Replace(")", "").Replace("[", "").Replace("]", "").Replace(";", ""));
                        priceEnd = Area_Jo["p"].ToString();//价格
                    }
                    catch
                    {
                        MessageBox.Show("捕获异常");
                    }

                    //string urlEnd = "http://club.jd.com/productpage/p-"+goodsId+"-s-0-t-5-p-0.html?callback=fetchJSON_comment98vv22256";
                    string urlEnd = "http://club.jd.com/clubservice.aspx?method=GetCommentsCount&referenceIds="+goodsId;
                    string subhtmlEnd = httpsurl.GetHtmlText(urlEnd, "gbk", "text/html; charset=gbk");
                    string Area_htmlEnd = httpsurl.GetHtmlText(urlEnd, "gbk", "text/html;charset=gbk", "");
                    try
                    {
                        string sad = "500";
                        JObject Area_Jo = (JObject)JsonConvert.DeserializeObject(Area_htmlEnd.Split('[')[1].Replace("]}", ""));
                        AllCots = Area_Jo["CommentCount"].ToString();
                        imgCots = sad;
                        GoodCots = Area_Jo["GoodCount"].ToString();
                        MidCots = Area_Jo["GeneralCount"].ToString();
                        lowCots = Area_Jo["PoorCount"].ToString();
                    }
                    catch
                    {
                        MessageBox.Show("异常");
                    }

                    SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
                    conn.Open();
                    string into = string.Format("insert into jdfirst (name, contents, vender, screen, resolution_ratio, camer_fore, camer_back, AllCots, imgCots, GoodCots, MidCots, lowCots, weight, Nametitle, goodsId, onselfTime, From1, hotPoint, system1, color, priceEnd) values ('"+title+"', '"+contents+"','"+vender+"','"+screen+"','"+resolution_ratio+"','"+camer_fore+"','"+camer_back+"','"+AllCots+"', '"+imgCots+"', '"+GoodCots+"', '"+MidCots+"', '"+lowCots+"', '"+weight+"', '"+Nametitle+"', '"+goodsId+"', '"+onselfTime+"', '"+From1+"', '"+hotPoint+"', '"+system1+"', '"+color+"', '"+priceEnd+"')");
                    SqlCommand com = new SqlCommand(into, conn);
                    int s = com.ExecuteNonQuery();
                }
            }
        }
    }
}

  

原文地址:https://www.cnblogs.com/Tinamei/p/5170045.html