用C#.net开发的当当网信息查询工具

学生时代的小玩具

用C#.net开发的一个用来抓取当当网计算机类图书信息的工具


Program.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms;
namespace spider
{
    static class Program
    {
        /// <summary>
        /// 应用程序的主入口点。
        /// </summary>
        [STAThread]
        static void Main()
        {
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            Application.Run(new Form1());
        }
    }
}

Form1.cs

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
namespace spider
{
    public partial class Form1 : Form
    {
        private string url = @"http://category.dangdang.com/all/?category_path=01.54.26.00.00.00&page_index=";
        private static int page = 1;
        private Parse p;
        public Form1()
        {
            InitializeComponent();
        }
        private void buttonstart_Click(object sender, EventArgs e)
        {
            page = 1;
            Execute();
        }
        private void buttonprev_Click(object sender, EventArgs e)
        {
            page--;
            Execute();
        }
        private void buttonnext_Click(object sender, EventArgs e)
        {
            page++;
            Execute();
        }
        private void buttonjump_Click(object sender, EventArgs e)
        {
            page = int.Parse(textBox2.Text);
            Execute();
        }
        private void Execute()
        {
            webBrowser1.Navigate(url + page.ToString());
            textBox1.Text = url + page.ToString();
            Cursor.Current = Cursors.WaitCursor;
        }
        private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            HtmlDocument doc = webBrowser1.Document;
            p = new Parse(doc);
            DataTable dt = p.dt;
            dataGridView1.DataSource = dt;
            dataGridView1.Columns[0].Width = 150;
            dataGridView1.Columns[1].Width = 150;
            dataGridView1.Columns[2].Width = 150;
            dataGridView1.Columns[3].Width = 80;
            dataGridView1.Columns[4].Width = 450;
            Cursor.Current = Cursors.Default;
            MessageBox.Show("解析完成");
        }
        private void buttonsave_Click(object sender, EventArgs e)
        {
            SaveFileDialog sfd = new SaveFileDialog();
            sfd.DefaultExt = "txt";
            if (sfd.ShowDialog() == DialogResult.OK)
            {
                string path = sfd.FileName;
                StringBuilder sb = new StringBuilder();
                List<Book> list = p.list;
                foreach (Book book in list)
                {
                    sb.Append(book.ToString());
                }
                string text = sb.ToString();
                File.AppendAllText(path, text, Encoding.Default);
                MessageBox.Show("保存成功\n" + path);
            }
        }
    }
}


Book.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace spider
{
    class Book
    {
        public string name { get; set; }
        public string author { get; set; }
        public string pub { get; set; }
        public string time { get; set; }
        public string describ { get; set; }
        public Book()
        {
        }
        public Book(string name, string author, string pub, string time, string describ)
        {
            this.name = name;
            this.author = author;
            this.pub = pub;
            this.time = time;
            this.describ = describ;
        }
        public override string ToString()
        {
            return "书名:" + name + "\r\n"
                + "作者:" + author + "\r\n"
                + "出版商:" + pub + "\r\n"
                + "出版时间:" + time + "\r\n"
                + "描述:" + describ + "\r\n\r\n";
        }
    }
}

Parse.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Data;
using System.Text.RegularExpressions;
namespace spider
{
    class Parse
    {
        private HtmlDocument dom;
        public DataTable dt { get; set; }
        public List<Book> list { get; set; }
        public Parse(HtmlDocument dom)
        {
            this.dom = dom;
            dt = new DataTable();
            list = new List<Book>();
            dt.Columns.Add("书名");
            dt.Columns.Add("作者");
            dt.Columns.Add("出版社");
            dt.Columns.Add("出版时间");
            dt.Columns.Add("描述");
            Execute();
        }
        public void Execute()
        {
            HtmlElementCollection els = dom.GetElementsByTagName("div");
            foreach (HtmlElement el in els)
            {
                if (el.GetAttribute("classname") == "listitem detail")//图书信息
                {
                    Book book = new Book();
                    HtmlElementCollection els2 = el.GetElementsByTagName("li");
                    foreach (HtmlElement el2 in els2)
                    {
                        if (el2.GetAttribute("classname") == "maintitle")//书名
                        {
                            book.name = el2.OuterText;
                        }
                        if (el2.GetAttribute("classname") == "publisher_info")
                        {
                            HtmlElementCollection els3 = el2.GetElementsByTagName("a");
                            StringBuilder sb = new StringBuilder();
                            foreach (HtmlElement el3 in els3)
                            {
                                if (el3.GetAttribute("name") == "Author")//作者
                                {
                                    if (sb.Length==0)
                                    {
                                        sb.Append(el3.OuterText);
                                    }
                                    else
                                    {
                                        sb.Append("," + el3.OuterText);
                                    }
                                }
                                if (el3.GetAttribute("name") == "Pub")//出版商
                                {
                                    book.pub = el3.OuterText;
                                }
                            }
                            book.author = sb.ToString();
                            Regex r = new Regex(@"(\d{4})\-(\d{2})\-(\d{2})");
                            Match m = r.Match(el2.OuterText);
                            if (m.Success)//出版时间
                            {
                                book.time = m.Value;
                            }
                        }
                        if (el2.GetAttribute("classname") == "describ")//描述
                        {
                            book.describ = el2.OuterText;
                        }
                    }
                    DataRow dr = dt.NewRow();
                    dr["书名"] = book.name;
                    dr["作者"] = book.author;
                    dr["出版社"] = book.pub;
                    dr["出版时间"] = book.time;
                    dr["描述"] = book.describ;
                    dt.Rows.Add(dr);
                    list.Add(book);
                }
            }
        }
    }
}



原文地址:https://www.cnblogs.com/hanfeihan1992/p/4504078.html