C#获取网页信息并存入数据库

1,获取以及商品分类信息

给一网页获取网页上商品信息的分类

using Skay.WebBot;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Ivony.Html;
using Ivony.Html.Parser;
using System.Data.SqlClient;

namespace catchGoods
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        public static Thread th;
        private void button1_Click(object sender, EventArgs e)
        {
            th = new Thread(GetJDData);
            th.Start();
         }
        public void GetJDData()
        {
            SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
            conn.Open();

            string str = "http://www.htluxe.com";
            HttpUtility http = new HttpUtility();
            string html = http.GetHtmlText(str);
            var documenthtml = new JumonyParser().Parse(html);
            var items = documenthtml.Find(".categroup dl");
            foreach(var item in items)
            {
                string name = item.FindFirst("h4 a").InnerText();
                string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
                string remark = remarkOdd.Split('=')[1];
                this.Invoke((EventHandler)(delegate
                {
                    listBox1.Items.Add(name);

                }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼(委托?
                string into = string.Format("insert into exerciseOneSort (className, remark) values ('" + name + "', '" + remark + "')");
                SqlCommand com = new SqlCommand(into, conn);
                int i = com.ExecuteNonQuery();

                var elements = item.Find("dt p a");
                foreach(var element in elements)
                {
                    string nameTwo = element.InnerText();
                    string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
                    string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values ('" + nameTwo + "', '" + url + "', '" + remark + "')");
                    SqlCommand comTwo = new SqlCommand(intoTwo, conn);
                    int j = comTwo.ExecuteNonQuery();
                }
            }
        }
    }
}

 完整版

using Skay.WebBot;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Ivony.Html;
using Ivony.Html.Parser;
using System.Data.SqlClient;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json;

namespace catchGoods
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        public static Thread th;
        private void button1_Click(object sender, EventArgs e)
        {
            th = new Thread(GetJDDataOne);
            th.Start();
            //SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
            //conn.Open();
            //string str = string.Format("delete from exerciseTwoSort");
            //SqlCommand com = new SqlCommand(str, conn);
            //int w = com.ExecuteNonQuery();
         }
        public void GetJDDataOne()
        {
            SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
            conn.Open();

            string str = "http://www.htluxe.com";
            HttpUtility http = new HttpUtility();
            string html = http.GetHtmlText(str);
            var documenthtml = new JumonyParser().Parse(html);
            var items = documenthtml.Find(".categroup dl");
            foreach(var item in items)
            {
                string name = item.FindFirst("h4 a").InnerText();
                string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
                string remark = remarkOdd.Split('=')[1];
                this.Invoke((EventHandler)(delegate
                {
                    listBox1.Items.Add(name+"  "+remark);

                }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
                string into = string.Format("insert into exerciseOneSort (className, remark) values ('" + name + "', '" + remark + "')");
                SqlCommand com = new SqlCommand(into, conn);
                int i = com.ExecuteNonQuery();

                var elements = item.Find("dt p a");
                foreach(var element in elements)
                {
                    string nameTwo = element.InnerText();
                    string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
                    this.Invoke((EventHandler)(delegate
                    {
                        listBox1.Items.Add(nameTwo + "  " +url + "  " + remark);

                    }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
                    string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values ('" + nameTwo + "', '" + url + "', '" + remark + "')");
                    SqlCommand comTwo = new SqlCommand(intoTwo, conn);
                    int j = comTwo.ExecuteNonQuery();
                }
            }
        }
        int page = 0;
        string surl;
        public static Thread th2;
        private void button2_Click(object sender, EventArgs e)
        {
            listBox1.Items.Clear();
            th2 = new Thread(threadTwo);
            th2.Start();
            //SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
            //conn.Open();
            //string str = string.Format("delete from GoodsList");
            //SqlCommand com = new SqlCommand(str, conn);
            //int d = com.ExecuteNonQuery();
            //MessageBox.Show(Convert.ToString(d));
        }
        public void threadTwo()
        {
            SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
            conn.Open();
            //如果字符串中含有单引号,解决方法1----------------------------------
            //string titlestr = "念佛'夜晚访'问欧诺'法";
            //string pricestr = "99.00";
            //string sqlstr = string.Format("insert into goods (name,price) values (@name,'" + pricestr + "')");
            //SqlCommand sqlcom = new SqlCommand(sqlstr, conn);
            //sqlcom.Parameters.Add("@name", titlestr);
            //sqlcom.ExecuteNonQuery();
            //解决方法2-----------------------------------------------------------------------
            //string bufffuck = "fdgjjf'fgfgf";
            //bufffuck = bufffuck.Replace("'", "''");
            //string sqlstr = string.Format("insert into goods (name) values ('"+bufffuck+"')");
            //SqlCommand sqlcom = new SqlCommand(sqlstr, conn);
            //int y = sqlcom.ExecuteNonQuery();


            string sel = string.Format("select url from exerciseTwoSort");
            DataTable dt = new DataTable();
            SqlDataAdapter dapt = new SqlDataAdapter(sel, conn);
            dapt.Fill(dt);

            for (int i = 0; i < dt.Rows.Count; i++)
            {
                surl = dt.Rows[i][0].ToString();
                HttpUtility httpTwo = new HttpUtility();
                string htmlTwo = httpTwo.GetHtmlText(surl);
                var documenthtml = new JumonyParser().Parse(htmlTwo);
                var pageto = Convert.ToString(documenthtml.FindFirst(".goods-page-min label").InnerText());
                page = Convert.ToInt32(pageto.Split('/')[1]);
                GetJDData();
            }
        }
         void GetJDData()
         {
              for (int j = 1; j <= page; j++)
              {
                  string htmlTwo = surl + "&price_min=0&price_max=0&page=" + j + "&sort=sort_order%20asc,last_update&order=DESC";
                  HttpUtility httpMid = new HttpUtility();
            string htmlMid = httpMid.GetHtmlText(htmlTwo);
            var documenthtmlMid = new JumonyParser().Parse(htmlMid);
            var items = documenthtmlMid.Find(".piclist li");
            foreach(var item in items)
            {
                string title = item.FindFirst(".base a").InnerText();
                title = title.Replace("'", "''");
                //string goodsurl = "http://www.htluxe.com/"+item.FindFirst(".base a").Attribute("href").Value();
                //string subhtml = http.GetHtmlText(goodsurl, "utf-8", "text/html; charset=utf-8");
                //string Area_Html = http.GetHtmlText(goodsurl.Split('?')[0] + "?act=price&" + goodsurl.Split('?')[1], "utf-8", "text/html;charset=utf-8", "");
                try
                {                     
                    string nowPrice = item.FindFirst(".minprice").InnerText();
                    string oldPrice = item.FindFirst(".maxprice").InnerText();
                    string popular = item.FindFirst(".ratecount strong").InnerText();
                    string sales = item.FindFirst(".soldnum strong").InnerText();
                    string contents = item.FindFirst(".commentcount strong").InnerText().ToString();
                    string htmlThree = "http://www.htluxe.com/" + item.FindFirst("dt a").Attribute("href").Value().ToString();
                    HttpUtility httpThree = new HttpUtility();
                    string htmlBuff = httpThree.GetHtmlText(htmlThree);
                    var documenthtmlThree = new JumonyParser().Parse(htmlBuff);
                    string sben = documenthtmlThree.FindFirst(".promotionMiddleTop p").InnerText().ToString();
                    string num = sben.Split('')[1];
                   
                       string starLevel = documenthtmlThree.FindFirst(".m-ratescore i").InnerText().ToString();
                       bufff(title, nowPrice, oldPrice, popular, sales, num, contents, starLevel);
                       this.Invoke((EventHandler)(delegate
                       {
                           listBox1.Items.Add(title + "  " + nowPrice + "  " + num + "  " + oldPrice + "  " + sales + "  " + popular + " " + contents + " " + starLevel);

                       }));
                        //有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
                    //this.listBox1.Items.Add("");
                    //listBox1.Items.Add(title + "  " + nowPrice + "  " + num + "  " + oldPrice + "  " + sales + "  " + popular);                   
                       
                }
                catch
                {
                    MessageBox.Show("异常");
                }
                
            }

              }     
        }
        private static void bufff(string title, string nowPrice, string oldPrice,
            string popular, string sales, string num, string contents, string starLevel)
        {
            SqlConnection conn2 = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
            conn2.Open();

            string strstr = string.Format("insert into GoodsList (name, num, sales, popular, starLevel, contents, price, oldPrice) values ('" + title + "', '" +num + "',  '" + sales + "', '" + popular + "', '"+starLevel+"', '"+contents+"', '" + nowPrice + "', '" + oldPrice + "')");
            SqlCommand com2 = new SqlCommand(strstr, conn2);
            int g = com2.ExecuteNonQuery();
        }
    }
}
原文地址:https://www.cnblogs.com/Tinamei/p/5162163.html