获取网页快照

unit uWebCracker;

interface

uses mshtml,SHdocvw,classes,SysUtils,StrUtils;

const

MAXPAGECOUNT=20;

type

TWebPageRecord=record

URL:string;

Title:string;

Text:string;

end;

type

TWebCracker=class(TObject)

private

FWebPageRecordArray:array[0..MAXPAGECOUNT-1] of TWebPageRecord;

FWebPageCount:integer;

public

constructor Create;

destructor Free;

procedure SnapShot;

function GetWebText(AIndex:integer):string;

function GetWebTitle(AIndex:integer):sttring;

function GetWebURL(AIndex:integer):string;

procedure Clear;

procedure Refresh;

function GetWebPageCount:integer;

end;

implementation

constructor TWebCracker.Create;

begin

inherited Create;

FWebPageCount:=0;

end;

destructor TWebCracker.Free;

begin

clear;

inherited Free;

end;

procedure TWebCracker.SnapShot;

const

ERRORNOTLOADCOMPLETE='可能打开的网页还没有完全加载,请当所有的网页下载完后再刷新!'

var

ShellWindow:IShellWindow;

WebBrowser:IWebBrower2;

I,ShellWindowCount:integer;

HTMLDocument:IHTMLDocument2;

URL:string;

WebPageRecord:TWebPageRecord;

begin

FWebPageCount :=0;

ShellWindow:=CoShellWindow.Create;

ShellWindowCount :=ShellWindow.Create;

if ShellWindowCount>MAXPAGECOUNT then

ShellWindowCount:=MAXPAGECOUNT;

for i:=0 to ShellWindowCount-1 do

begin

WebBrowser:=ShellWindow.Item(I) as IWebBrowser2;

URL:=WebBrowser.LocationURL;

if (WebBrowser<>nil) and (not IsLocationFile(URL)) then

begin

try

HTMLDocument :=WebBrowser.Document as IHTMLDocument2;

WebPageRecord.URL :=URL;

WebPageRecord.Title :=HTMLDocument.title;

WebPageRecord.Text :=HTMLDocument.body.outerText;

FWebPageRecordArray[I] :=WebPageRecord;

Inc(FWebPageCount);

except

on Exception do

raise Exception.Create(ERRORNOTLOADCOMPLETE);

end;

end;

ShellWindow :=nil;

end;

end;

function TWebCracker.GetWebText(AIndex:integer):string;

begin

Result :=FWebPageRecordArray[AIndex].Text;

end;

function TWebCracker.GetWebTitle(AIndex:integer):string;

begin

Result :=FWebPageRecordArray[AIndex].Title;

end;

function TWebCracker.GetWebURL(AIndex:integer):string;

begin

Result :=FWebPageRecordArray[AIndex].URL;

end;

procedureTWebCracker.Clear;

begin

FWebPageCount :=0;

end;

procedureTWebCracker.Refresh;

begin

self.Snapshot;

end;

functionTWebCracker.GetWebPageCount:integer;

begin

Result :=FWebPageCount;

end;

原文地址:https://www.cnblogs.com/djcsch2001/p/2035826.html