分享一个天气历史数据的采集脚本

最近一个项目中需要用到过往的天气数据, 我找到了天气后报这个网站 (www.tianqihoubao.com), 并在SS中完成了相关采集, 和大家分享一下.

首先分析这个网站提供了两种信息:

1. 省市关系

2. 天气记录

对应的我们创建数据结构:

[Serializable]
public class Province
{
    public string ProvinceName;
    public string ProvinceUrl;
}

[Serializable]
public class City
{
    public Province Province;
    public string CityName;
    public string CityUrl;
}

[Serializable]
public class WeatherDataSet
{
    public City City;
    public string Title;
    public string Url;
}

[Serializable]
public class WeatherData
{
    public WeatherDataSet DataSet;
    public string Date;
    public string TextWeather;
    public string Temp;
    public string Wind;
}

 

>> 网站一共提供34个直辖市/省/特区的天气信息, 完整的列表在: http://www.tianqihoubao.com/lishi/index.htm

对应的采集语句是:

var list = Default.SelectNodes("#content DT a");

>> 每个省都有下辖的城市列表, 如: http://www.tianqihoubao.com/lishi/hebei.htm

对应的采集语句是:

var list = Default.SelectNodes("#content DD a");

>> 每个城市都有一个历史天气记录列表, 如: http://www.tianqihoubao.com/lishi/shijiazhuang.html

对应的采集语句是:

Default.SelectNodes("#content>div.pcity a");

>> 进入每条历史天气记录, 就可以得到当月的天气数据了:

对应的采集语句是:

var list = Default.SelectNodes("#content>table.b tr:gt(0)");
foreach(var item in list)
{
    var date = item.SelectSingleNode("td:eq(0)").Text();
    var textWeather = item.SelectSingleNode("td:eq(1)").Text();
    var temp = item.SelectSingleNode("td:eq(2)").Text();
    var wind = item.SelectSingleNode("td:eq(3)").Text();
}

将这些语句分别包装为方法, 并将结果绑定到最开始定义的数据结构中:

public List<Province> GetProvinceList() {...} //获取直辖市/省/特区
public List<City> GetCityList(Province province) {...} //获取城市列表
public List<WeatherDataSet> GetWeatherDataSet(City city) {...} //获取指定城市的天气历史记录集
public List<WeatherData> GetWeatherData(WeatherDataSet ds) {...} //获取天气历史数据

>> 完整的脚本: (复制到SS中即可直接运行)

SS下载地址为: http://www.gdtsearch.com/products.spiderstudio.docapi.htm

public void Run()
{
    Logger.ClearAll();
    Default.ScriptErrorsSuppressed = true;
    
    var pl = GetProvinceList();
    foreach(var p in pl)
    {
        Logger.Log(p.ProvinceName);
        Logger.Log(p.ProvinceUrl);
    }
    var cl = GetCityList(pl[1]);
    foreach(var c in cl)
    {
        Logger.Log(c.Province.ProvinceName);
        Logger.Log(c.Province.ProvinceUrl);
        Logger.Log(c.CityName);
        Logger.Log(c.CityUrl);
    }
    var ds = GetWeatherDataSet(cl[1]);
    foreach(var d in ds)
    {
        Logger.Log(d.City.CityName);
        Logger.Log(d.Title);
        Logger.Log(d.Url);
    }
    var dl = GetWeatherData(ds[0]);
    foreach(var d in dl)
    {
        Logger.Log(d.DataSet.Title);
        Logger.Log(d.Date);
        Logger.Log(d.TextWeather);
        Logger.Log(d.Temp);
        Logger.Log(d.Wind);
    }
} 


public List<Province> GetProvinceList()
{
    Default.Navigate("http://www.tianqihoubao.com/lishi/index.htm");
    Default.Ready("#content DT");
    var list = Default.SelectNodes("#content DT a");
    var result = new List<Province>();
    foreach(var item in list)
    {
        var p = new Province();
        p.ProvinceName = item.Text();
        p.ProvinceUrl = item.Attr("href");
        p.ProvinceUrl = new Uri(Default.Url, p.ProvinceUrl).ToString();
        result.Add(p);
    }
    return result;
}

public List<City> GetCityList(Province province)
{
    Default.Navigate(province.ProvinceUrl);
    Default.Ready("#content DD");
    var list = Default.SelectNodes("#content DD a");
    var result = new List<City>();
    foreach(var item in list)
    {
        var c = new City();
        c.Province = province;
        c.CityName = item.Text();
        c.CityUrl = item.Attr("href");
        c.CityUrl = new Uri(Default.Url, c.CityUrl).ToString();
        result.Add(c);
    }
    return result;
}

public List<WeatherDataSet> GetWeatherDataSet(City city)
{
    Default.Navigate(city.CityUrl);
    Default.Ready("#content>div.pcity");
    var list = Default.SelectNodes("#content>div.pcity a");
    var result = new List<WeatherDataSet>();
    foreach(var item in list)
    {
        var ds = new WeatherDataSet();
        ds.Title = item.Text();
        ds.Url = item.Attr("href");
        ds.Url = new Uri(Default.Url, ds.Url).ToString();
        ds.City = city;
        result.Add(ds);
    }
    return result;
}

public List<WeatherData> GetWeatherData(WeatherDataSet ds)
{
    Default.Navigate(ds.Url);
    Default.Ready("#content>table.b");
    var list = Default.SelectNodes("#content>table.b tr:gt(0)");
    var result = new List<WeatherData>();
    foreach(var item in list)
    {
        var d = new WeatherData();
        d.DataSet = ds;
        d.Date = item.SelectSingleNode("td:eq(0)").Text();
        d.TextWeather = item.SelectSingleNode("td:eq(1)").Text();
        d.Temp = item.SelectSingleNode("td:eq(2)").Text();
        d.Wind = item.SelectSingleNode("td:eq(3)").Text();
        result.Add(d);
    }
    return result;
}

[Serializable]
public class Province
{
    public string ProvinceName;
    public string ProvinceUrl;
}

[Serializable]
public class City
{
    public Province Province;
    public string CityName;
    public string CityUrl;
}

[Serializable]
public class WeatherDataSet
{
    public City City;
    public string Title;
    public string Url;
}

[Serializable]
public class WeatherData
{
    public WeatherDataSet DataSet;
    public string Date;
    public string TextWeather;
    public string Temp;
    public string Wind;
}
View Code

>> 运行效果:

原文地址:https://www.cnblogs.com/iamzyf/p/3529460.html