some information

// ---------------------------------------------------------------------
// <copyright file="UserUtility.cs" company="Microsoft">
//    Copyright Microsoft Corporation, all rights reserved
// </copyright>
// ---------------------------------------------------------------------

namespace ReleaseTool.Common
{
    
using System;
    
using System.DirectoryServices;
    
using System.DirectoryServices.ActiveDirectory;
    
using System.IO;
    
using System.Net;
    
using System.Xml.Linq;

    
/// <summary>
    
/// The user utility class to retrieve the display name.
    
/// </summary>
    public sealed class UserUtility
    {
        
/// <summary>
        
/// Get the display names.
        
/// </summary>
        
/// <param name="account">The domain account.</param>
        
/// <returns>The display name of AD.</returns>
        public static string GetDisplayName(string account)
        {
            
if (string.IsNullOrEmpty(account))
            {
                
throw new ArgumentNullException(string.Format("The argumment account: {0} is invalide.", account));
            }

            
try
            {
                
return GetNameFromWho(account);
            }
            
catch
            {
                
return GetNameFromDomain(account);
            }
        }

        
/// <summary>
        
/// Get the display names from http://who/.
        
/// </summary>
        
/// <param name="account">The domain account.</param>
        
/// <returns>The display name of AD.</returns>
        private static string GetNameFromWho(string account)
        {
            
string alias = GetAlias(account);

            Who.PeopleStoreSoapClient client 
= new Who.PeopleStoreSoapClient();
            var ppl 
= client.FindPeopleByAlias(new string[] { alias });

            
if (ppl.Length > 0)
            {
                
return ppl[0].Name;
            }

            
throw new Exception("No such alias from who");
        }

        
/// <summary>
        
/// Get the display names from domain.
        
/// </summary>
        
/// <param name="account">The domain account.</param>
        
/// <returns>The display name of AD.</returns>
        private static string GetNameFromDomain(string account)
        {
            Domain currentDomain 
= Domain.GetCurrentDomain();
            DomainCollection domains 
= currentDomain.Parent.Children;

            
// split the account
            Domain subDomain = currentDomain;
            
if (account.Contains("\\"))
            {
                subDomain 
= UserUtility.GetSubDomain(domains, account);
                
if (subDomain == null)
                {
                    subDomain 
= currentDomain;
                }
            }

            DirectoryEntry entry 
= subDomain.GetDirectoryEntry();
            
try
            {
                DirectorySearcher search 
= new DirectorySearcher(entry);
                search.Filter 
= "(SAMAccountName=" + UserUtility.GetAlias(account) + ")";
                search.PropertiesToLoad.Add(
"displayName");

                SearchResult result 
= search.FindOne();

                
if (result != null)
                {
                    
if (string.IsNullOrEmpty(result.Properties["displayname"][0].ToString()))
                    {
                        
// LogUtility.Log(new Exception(string.Concat("Can't find ", account, " in displayname property")));
                    }

                    
return result.Properties["displayname"][0].ToString();
                }
                
else
                {
                    
// LogUtility.Log(new Exception(string.Concat("Can't find ", search.Filter)));
                    return string.Empty;
                }
            }
            
catch
            {
                
throw;
            }
        }

        
/// <summary>
        
/// Get the sub domain.
        
/// </summary>
        
/// <param name="domains">The domains.</param>
        
/// <param name="domainAccount">The domain account.</param>
        
/// <returns>The sub domain.</returns>
        private static Domain GetSubDomain(DomainCollection domains, string domainAccount)
        {
            
string[] splitedAccount = domainAccount.Split('\\');
            
foreach (Domain domain in domains)
            {
                
if (domain.Name.Contains(splitedAccount[0]))
                {
                    
return domain;
                }
            }

            
return null;
        }

        
/// <summary>
        
/// Get the alias.
        
/// </summary>
        
/// <param name="domainAccount">The domain account.</param>
        
/// <returns>The alias.</returns>
        private static string GetAlias(string domainAccount)
        {
            
string[] splitedAccount = domainAccount.Split('\\');
            
return splitedAccount.Length == 2 ? splitedAccount[1] : domainAccount;
        }
    }
}


    using System.IO;
    
using System.Xml;
    
using System.Xml.Xsl;

    
/// <summary>
    
/// The xslt tranformation helper class.
    
/// </summary>
    public static class XsltHelper
    {
        
/// <summary>
        
/// Transform the xml to xslt.
        
/// </summary>
        
/// <param name="xml">The XML source not xml file.</param>
        
/// <param name="xslt">The xslt file path not xslt source.</param>
        
/// <returns>The transformed result.</returns>
        public static string Transform(string xml, string xslt)
        {
            
// XslCompiledTransform xsltTransform = new XslCompiledTransform();
            XslTransform xsltTransform = new XslTransform();
            xsltTransform.Load(xslt);
            XmlReader reader 
= XmlReader.Create(new StringReader(xml));
            StringWriter sw 
= new System.IO.StringWriter();
            
            
// xsltTransform.Transform(reader, XmlWriter.Create(sw));
            XmlDocument doc = new XmlDocument();
            doc.LoadXml(xml);
            xsltTransform.Transform(doc, 
new XsltArgumentList(), sw);
            
return sw.ToString();
        }

    } 

 using System;

using System.Collections.Generic;
using System.Linq;

using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using System.Text;


namespace GetInfoByReg
{
    
class Program
    {
        
private static string uri;
        
private static string file;
        
private static int count = -1;
        
static void Main(string[] args)
        {
            uri 
= "http://sh.fangjia.com/xiaoqu/--e-{0}|r-%E6%99%AE%E9%99%80%E5%8C%BA";
            file 
= AppDomain.CurrentDomain.BaseDirectory + "data.txt";

            
if (!File.Exists(file)) File.Create(file);
            Console.WriteLine(
"--------------------------");
            Console.WriteLine(
"开始采集数据,请等待...");
            Console.WriteLine(
"--------------------------");
            
int pages = 1;
            
int counts = 0;

            
do
            {
                Save(
"开始读取第"+pages.ToString()+"页数据!");
                counts 
+= OperateInfo(pages++);
                System.Threading.Thread.Sleep(
2000);
            } 
while (pages <= count);

            Console.WriteLine(
"采集完成!共" + counts + "条,文件存放在" + file);
            Console.ReadKey();
        }
        
static int OperateInfo(int page)
        {
            
string _uri = uri.Replace("{0}", page.ToString());

            WebClient client 
= new WebClient();
            
byte[] datas = client.DownloadData(_uri);
            
string txt = Encoding.UTF8.GetString(datas);

            
/*
            string txt=@"
            <div class=""fsize14 margin-bottom8"">
                                <strong>
                                <a href=""/xiaoqu-4796-%E6%9B%B9%E6%9D%A8%E4%BA%8C%E6%9D%91"" target=""_blank"">
                                曹杨二村</a>
                                </strong>
                            </div>
                            <div class=""margin-bottom5"">
                                普陀区 
                                                曹杨路1107弄,</div>
            ";
            
*/
            
if (count == -1)
            {
                
string str = "<div class=\"pagination-simple right\">\\s+<span>1/([1-9]+)</span>";
                MatchCollection mcc 
= Regex.Matches(txt, str);
                count 
= int.Parse(Regex.Replace(mcc[0].Value, str, "$1"));
            }

            
//匹配小区列表
            string pattern = "<div class=\"fsize14 margin-bottom8\">\\s+<strong>\\s+<a\\s+[^>]+>\\s+(.+?)</a>\\s+</strong>" +
                            
"\\s+</div>\\s+<div class=\"margin-bottom5\">([^<]+)</div>";
            
//获取所有的匹配
            string name, address; //小区名字和地址
            MatchCollection mc = Regex.Matches(txt, pattern);
            
foreach (Match m in mc)
            {
                name 
= Regex.Replace(m.Value, pattern, "$1");
                address 
= Regex.Replace(m.Value, pattern, "$2");
                address 
= Regex.Replace(address, "[\\s,( )]+""").Replace("&nbsp;","");
                Save(name 
+ "----地址:" + address);
            }
            Console.WriteLine(
"" + page + "页采集到" + mc.Count + "条!");
            
return mc.Count;
        }
        
static void Save(string str)
        {
            
using (StreamWriter sw = new StreamWriter(file, true, Encoding.UTF8))
            {
                sw.WriteLine(str);
                sw.Flush();
                sw.Close();
            }
        }

    }
}
原文地址:https://www.cnblogs.com/bober/p/1962527.html