webbrowser

WebBrowser 操作记要

WebBrowser 操作记要 
WebBrowser1.GoHome; //到浏览器默认主页 
WebBrowser1.Refresh; //刷新 
WebBrowser1.GoBack; //后退 
WebBrowser1.GoForward; //前进 
WebBrowser1.Navigate('...'); //打开指定页面 
WebBrowser1.Navigate('about:blank'); //打开空页面 
________________________________________ 

//打开空页面, 并写入... 

WebBrowser1.Navigate('about: <head> <title>标题> </title> <body>页面内容 </body>'); 
________________________________________ 

//读取网页脚本中的变量: 

procedure TForm1.Button1Click(Sender: TObject); 
var 
s: string; 
i: Integer; 
begin 
s := WebBrowser1.OleObject.document.Script.str; 
i := WebBrowser1.OleObject.document.Script.num; 
ShowMessage(s); //Hello 
ShowMessage(IntToStr(i)); //99 

//也可以这样读: 
s := WebBrowser1.OleObject.document.parentWindow.str; 
i := WebBrowser1.OleObject.document.parentWindow.num; 
ShowMessage(s); //Hello 
ShowMessage(IntToStr(i)); //99 
end; 

&amp;lt;br&amp;gt;假如网页中有这样的语句:&amp;lt;br&amp;gt;&amp;amp;amp;lt;script&amp;amp;amp;gt;&amp;lt;br&amp;gt;var&amp;lt;br&amp;gt; str = "Hello";&amp;lt;br&amp;gt; i = 99;&amp;lt;br&amp;gt;&amp;amp;amp;lt;/script&amp;amp;amp;gt; 
________________________________________ 
//调用网页脚本中的函数: 

procedure TForm1.Button1Click(Sender: TObject); 
begin 
WebBrowser1.OleObject.document.parentWindow.MB(); //HTML-Js 

//如需指定脚本语言, 需要: 
WebBrowser1.OleObject.document.parentWindow.execScript('MB()','JavaScript'); //HTML-Js 
end; 

&amp;lt;br&amp;gt;假如有这样的脚本:&amp;lt;br&amp;gt;&amp;amp;amp;lt;script&amp;amp;amp;gt;&amp;lt;br&amp;gt;function MB(){ &amp;lt;br&amp;gt; alert('HTML-Js');&amp;lt;br&amp;gt;}&amp;lt;br&amp;gt;&amp;amp;amp;lt;/script&amp;amp;amp;gt; 
________________________________________ 
//判断网页及内部框架网页是否全部下载完毕 

procedure TForm1.WebBrowser1DocumentComplete(ASender: TObject; 
const pDisp: IDispatch; var URL: OleVariant); 
begin 
if WebBrowser1.Application = pDisp then 
begin 
Text := '网页下载完毕!'; 
end; 
end; 

________________________________________ 
//改变背景色或背景图片: 
WebBrowser1.OleObject.document.body.bgcolor := '#FF0000'; 
WebBrowser1.OleObject.document.body.background := '...图片地址'; 
________________________________________ 

//操作有 ID 标签的对象: 
var 
s: string; 
begin 
s := WebBrowser1.OleObject.document.getElementByID('span1').innerText; 
ShowMessage(s); //这是 span1 标签中的内容 

//或者: 
s := WebBrowser1.OleObject.document.parentWindow.span1.innerText; 
ShowMessage(s); //这是 span1 标签中的内容 

//隐藏它: 
WebBrowser1.OleObject.document.parentWindow.span1.style.display := 'none'; 
end; 

&amp;lt;br&amp;gt;假如网页中有这样的内容:&amp;lt;br&amp;gt;&amp;amp;amp;lt;span id=span1&amp;amp;amp;gt;这是 span1 标签中的内容&amp;amp;amp;lt;/span&amp;amp;amp;gt;&amp;lt;br&amp;gt; 
________________________________________ 
//获取网页源代码 
var 
s: string; 
begin 
s := WebBrowser1.OleObject.document.body.innerHTML; //body内的所有代码 
s := WebBrowser1.OleObject.document.body.outerHTML; //body内的所有代码, 包含body标签 
s := WebBrowser1.OleObject.document.documentElement.innerHTML; //html内的所有代码 
end; 

//获取网页全部源代码 
uses ActiveX; 
var 
ms: TMemoryStream; 
begin 
if not Assigned(WebBrowser1.Document) then Exit; 
ms := TMemoryStream.Create; 
(WebBrowser1.Document as IPersistStreamInit).Save(TStreamAdapter.Create(ms), True); 
ms.Position := 0; 
Memo1.Lines.LoadFromStream(ms, TEncoding.UTF8); 
// Memo1.Lines.LoadFromStream(ms, TEncoding.Default); {GB2312 等双字节} 
ms.Free; 
end; 
________________________________________ 

//WebBrowser 中的右键菜单 

//先要添加ApplicationEvents1,指定其Message事件 

//屏蔽右键菜单 
procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean); 
begin 
with Msg do 
begin 
if not IsChild(WebBrowser1.Handle, hWnd) then Exit; 
Handled := (message = WM_RBUTTONDOWN) or (message = WM_RBUTTONUP) or (message = WM_CONTEXTMENU); 
end; 
end; 

//替换右键菜单 
procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean); 
var mPoint: TPoint; 
begin 
if IsChild(WebBrowser1.Handle, Msg.Hwnd) and 
((Msg.Message = WM_RBUTTONDOWN) or (Msg.Message = WM_RBUTTONUP)) then 
begin 
GetCursorPos(mPoint); //得到光标位置 
PopupMenu1.Popup(mPoint.X, mPoint.Y); //弹出popupmenu1的菜单 
Handled:=True; 
end; 
end; 
________________________________________ 

//新页面写入 
begin 
WebBrowser1.Navigate('about:blank'); 
WebBrowser1.OleObject.Document.Writeln('ok'); 
end; 

//从流中写入: 
var 
ms: TMemoryStream; 
begin 
ms := TMemoryStream.Create; 
Memo1.Lines.SaveToStream(ms); 
ms.Position := 0; 
(WebBrowser1.Document as IPersistStreamInit).Load(TStreamAdapter.Create(ms)); 
ms.Free; 
end; 

//禁止提示脚步错误 
procedure TForm1.WebBrowser1NavigateComplete2(ASender: TObject; 
const pDisp: IDispatch; var URL: OleVariant); 
begin 
WebBrowser1.Silent := True; 
end; 

//禁止弹出窗口 
procedure TForm1.WebBrowser1NewWindow2(ASender: TObject; var ppDisp: IDispatch; 
var Cancel: WordBool); 
begin 
Cancel := True; 
end; 
________________________________________ 

procedure TMainFrm.btnTestClick(Sender: TObject); 
var 
HtmlDoc:IHTMLDocument2; 
myitem:Olevariant; 
i:integer; 
str:string; 
begin 

myitem:= Web.Document; 

if myitem.frames.length <>0 then 
myitem:=myitem.frames.item(2).document; 
for i := 0 to myitem.all.length - 1 do 
begin 
if myitem.all.item(i).tagName = 'SELECT' then // 下拉框选择 
begin 

myitem.all.item(i).selectedindex:= myitem.all.item(i).options.length-1; 
if strtoint(myitem.all.item(i).value) <0 then myitem.all.item(i).value:=0; 
end; 

if myitem.all.item(i).tagName = 'INPUT' then 
begin 

if Uppercase(myitem.all.item(i).type)='SUBMIT' then//提交表单 
myitem.all.item(i).click; 

end; 
end; 


end; 
View Code

webbrowser本窗口打开

unit Unit1;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, OleCtrls, SHDocVw, StdCtrls;

type
  TForm1 = class(TForm)
    Button1: TButton;
    WebBrowser1: TWebBrowser;
    procedure Button1Click(Sender: TObject);
    procedure WebBrowser1NewWindow2(Sender: TObject; var ppDisp: IDispatch;
      var Cancel: WordBool);
    procedure tempWBBeforeNavigate2(Sender: TObject;
      const pDisp: IDispatch; var URL, Flags, TargetFrameName, PostData,
      Headers: OleVariant; var Cancel: WordBool);
    procedure WebBrowser1DocumentComplete(Sender: TObject;
      const pDisp: IDispatch; var URL: OleVariant);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form1: TForm1;

implementation

{$R *.dfm}
var
  tempWB : TWebBrowser;

procedure TForm1.Button1Click(Sender: TObject);
begin
  Self.WebBrowser1.Navigate('http://www.google.com.hk');
end;

procedure TForm1.WebBrowser1NewWindow2(Sender: TObject;
  var ppDisp: IDispatch; var Cancel: WordBool);
begin
  if not Assigned(tempWB) then tempWB := TWebBrowser.Create(Self);
  tempWB.OnBeforeNavigate2 := Self.tempWBBeforeNavigate2;
  ppDisp := tempWB.OleObject;
end;

procedure TForm1.tempWBBeforeNavigate2(Sender: TObject;
  const pDisp: IDispatch; var URL, Flags, TargetFrameName, PostData,
  Headers: OleVariant; var Cancel: WordBool);
begin
  Self.WebBrowser1.Navigate(Url);
  Cancel := True;
end;

procedure TForm1.WebBrowser1DocumentComplete(Sender: TObject;
  const pDisp: IDispatch; var URL: OleVariant);
begin
  if Assigned(tempWB) then FreeAndNil(tempWB);
end;

end.
View Code

webbrowser获取页面全部链接

unit Unit1;

interface

uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, OleCtrls, SHDocVw;

type
TForm1 = class(TForm)
WebBrowser1: TWebBrowser;
Button1: TButton;
Edit1: TEdit;
ListBox1: TListBox;
procedure Button1Click(Sender: TObject);
procedure WebBrowser1DocumentComplete(Sender: TObject;
const pDisp: IDispatch; var URL: OleVariant);
private
{ Private declarations }
public
{ Public declarations }
end;

var
Form1: TForm1;

implementation

{$R *.dfm}

procedure TForm1.Button1Click(Sender: TObject);
var 
i: Integer;
begin


webbrowser1.Navigate(edit1.Text);

end;

procedure TForm1.WebBrowser1DocumentComplete(Sender: TObject;
const pDisp: IDispatch; var URL: OleVariant);
var
i:integer;
begin
for i := 0 to Webbrowser1.OleObject.Document.links.Length - 1 do
Listbox1.Items.Add(Webbrowser1.OleObject.Document.Links.Item(i));
end;

end.
View Code

操作WebBrowser 元素值

1. 自动填写表单并发布

两种方法
 var
  i:integer;
  Doc:IHTMLDocument2;
  input:OleVariant;
  userinputelement,pwdinputelement,ValidateElement:ihtmlinputelement;
  ValidateImage: IHTMLImgElement;
  imagecount:integer;
  form:ihtmlformelement;
  myitem:Olevariant;

begin
  Doc:=WebBrowser1.document as ihtmldocument2;
  if doc=nil then exit;

// 第一种方式
  userinputelement:=(doc.all.item('nicknameId',0) as ihtmlinputelement);  
  userinputelement.value:=edit7.text;
  pwdinputelement:=(doc.all.item('pwd',0) as ihtmlinputelement);

  pwdinputelement.value:=edit8.text;

  pwdinputelement:=(doc.all.item('name',0) as ihtmlinputelement);
  pwdinputelement.value:=edit9.text;

  myitem:=WebBrowser1.document;

// 第二种方式 并可操作 combo radio select 元素表
  for i:=0 to myitem.all.length-1 do
  begin
    ///
    if myitem.all.item(i).tagName = 'SELECT' then // 下拉框选择
    begin
      if myitem.all.item(i).Name='birth_year' then  
       myitem.all.item(i).value:='1980'; //     

    end;

    if myitem.all.item(i).tagName = 'INPUT' then   
    begin
      if Uppercase(myitem.all.item(i).type)='RADIO' then  
      begin
        if myitem.all.item(i).value='男生' then myitem.all.item(i).checked:=true; // 选中值是 '求'的选项
      end;

     if Uppercase(myitem.all.item(i).type)=Text then  
      begin

          myitem.all.item(i).value='';
       end;
    end;

  end;

2.操作超链接

  var
 i:integer;
 myitem:Olevariant;
begin    //xid_reg_handle
  myitem:=WebBrowser1.document;

  // 第一种方式
 aVal:=myitem.getElementById('xid_reg_handle').href;
   myitem.getElementById('xid_reg_handle').click;  // 模拟点击超链接
  showmessage(InttosTr(myitem.Links.length));   

  for i:=0 to myitem.Links.length-1 do
  begin

     // myitem.Links.item(i).href // hrefUrl 可获取
    if myitem.Links.item(i).innertext='名称' then // <a href=''> 名称' </a>
      myitem.Links.item(i).click;// 模拟点击超链接
  end;
end;
View Code

通过MSHTML实现一个HTML解析类

最近经常会模拟网页提交返回网页源码,然后获得网页中相应的元素,于是需要常常解析Html中相应的各种元素,网络是个好东西,搜索一番,就找到了好几个Delphi版本的HtmlParser的类库,试着使用了几个,发现解析起来都不完整,或多或少的回出现一些问题!于是想到了如果界面上有一个浏览器,我们可以通过WebBrowser的Document接口对网页元素进行操作,很是方便!但是模拟网页提交,界面上是不一定要出现WebBrowser的,肯定有办法,不通过WebBrowser就直接解析HTML的,那便是我不要WebBrowser这个外壳,只要他里面的Document文档接口对象就能实现对Html的解析了,查找了一番MSDN,然后Google一下,果然可行,构建方法如下:

//创建IHTMLDocument2接口
  CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);

接口创建好了之后就能够对文档元素进行解析了,很是爽快!

结合了我自己的特有操作,我对Combobox,Table,Frame等一些网页元素做了相应的封装,实现了一个HTMLParser,大致代码如下:

这里只给出声明,代码请在最后下载 

复制代码
代码
(******************************************************)
(*                得闲工作室                          *)
(*              网页元素操作类库                      *)
(*                                                    *)
(*              DxHtmlElement Unit                    *)
(*    Copyright(c) 2008-2010  不得闲                  *)
(*    email:appleak46@yahoo.com.cn     QQ:75492895    *)
(******************************************************)
unit DxHtmlElement;

interface
uses Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo;

{Get EleMent Type}
function IsSelectElement(eleElement: IHTMLElement): Boolean;
function IsPwdElement(eleElement: IHTMLElement): Boolean;
function IsTextElement(element: IHTMLElement): boolean;
function IsTableElement(element: IHTMLElement): Boolean;
function IsElementCollection(element: IHTMLElement): Boolean;
function IsChkElement(element: IHTMLElement): boolean;
function IsRadioBtnElement(element: IHTMLElement): boolean;
function IsMemoElement(element: IHTMLElement): boolean;
function IsFormElement(element: IHTMLElement): boolean;
function IsIMGElement(element: IHTMLElement): boolean;
function IsInIMGElement(element: IHTMLElement): boolean;
function IsLabelElement(element: IHTMLElement): boolean;
function IsLinkElement(element: IHTMLElement): boolean;
function IsListElement(element: IHTMLElement): boolean;
function IsControlElement(element: IHTMLElement): boolean;
function IsObjectElement(element: IHTMLElement): boolean;
function IsFrameElement(element: IHTMLElement): boolean;
function IsInPutBtnElement(element: IHTMLElement): boolean;
function IsInHiddenElement(element: IHTMLElement): boolean;
function IsSubmitElement(element: IHTMLElement): boolean;
{Get ImgElement Data}
function GetPicIndex(doc: IHTMLDocument2; Src: string; Alt: string): Integer;
function GetPicElement(doc: IHTMLDocument2;imgName: string;src: string;Alt: string): IHTMLImgElement;
function GetRegCodePic(doc: IHTMLDocument2;ImgName: string; Src: string; Alt: string): TPicture; overload;
function GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture; overload;
function GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture;overload;

type
  TObjectFromLResult = function(LRESULT: lResult;const IID: TIID; WPARAM: wParam;out pObject): HRESULT; stdcall;
  TEleMentType = (ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE,
  ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN);


function GetElementType(element: IHTMLELEMENT): TEleMentType;
function GetElementTypeName(element: IHTMLELEMENT): string;
function GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement;
function GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable;
function GetWebBrowserHtmlTableCellText(Doc: IHTMLDocument2;
         const TableIndex, RowIndex, ColIndex: Integer;var ResValue: string):   Boolean;
function GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;

function GetWebBrowserHtmlTableCellHtml(Doc: IHTMLDocument2;
         const TableIndex,RowIndex,ColIndex: Integer;var ResValue: string):   Boolean;
function GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function GetWebBrowserHtmlTableHtml(Doc: IHTMLDocument2;
         const TableIndex,RowIndex: Integer;var ResValue: string):   Boolean;

type
  TDxWebFrameCollection = class;
  TDxWebElementCollection = class;

 
  TLoadState = (Doc_Loading,Doc_Completed,Doc_Invalidate);

  TDxWebFrame = class
  private
    FFrame: IHTMLWINDOW2;
    FElementCollections: TDxWebElementCollection;
    FWebFrameCollections: TDxWebFrameCollection;
    function GetSrc: string;
    function GetElementCount: integer;
    function GetWebFrameCollections: TDxWebFrameCollection;
    function GetElementCollections: TDxWebElementCollection;
    function GetDocument: IHTMLDOCUMENT2;
    function GetReadState: TLoadState;
    function GetIsLoaded: boolean;
    procedure SetFrame(const Value: IHTMLWINDOW2);
    function GetName: string;
  public
    Constructor Create(IFrame: IHTMLWINDOW2);
    Destructor Destroy;override;
    property Frame: IHTMLWINDOW2 read FFrame write SetFrame;
    property Src: string read GetSrc;
    property Document: IHTMLDOCUMENT2 read GetDocument;
    property Name: string read GetName;
    property Frames: TDxWebFrameCollection read GetWebFrameCollections;
    property ElementCount: integer read GetElementCount;
    property ElementCollections: TDxWebElementCollection read GetElementCollections;
    property ReadyState: TLoadState read GetReadState;
    property IsLoaded: boolean read GetIsLoaded;  
  end;


  TDxWebFrameCollection = Class
  private
    FFrameCollection: IHTMLFramesCollection2;
    Frame: TDxWebFrame;
    function GetCount: integer;
    function GetFrameInterfaceByIndex(index: integer): IHTMLWINDOW2;
    function GetFrameInterfaceByName(Name: string): IHTMLWINDOW2;
    function GetFrameByIndex(index: integer): TDxWebFrame;
    function GetFrameByName(Name: string): TDxWebFrame;
    procedure SetFrameCollection(const Value: IHTMLFramesCollection2);
  public
    Constructor Create(ACollection: IHTMLFramesCollection2);
    Destructor Destroy;override;
    property FrameCollection: IHTMLFramesCollection2 read FFrameCollection write SetFrameCollection;
    property Count: integer read GetCount;
    property FrameInterfaceByIndex[index: integer]: IHTMLWINDOW2 read GetFrameInterfaceByIndex;
    property FrameInterfaceByName[Name: string]: IHTMLWINDOW2 read GetFrameInterfaceByName;

    property FrameByIndex[index: integer]: TDxWebFrame read GetFrameByIndex;
    property FrameByName[Name: string]: TDxWebFrame read GetFrameByName;
  end;
  
  TDxWebElementCollection = class
  private
    FCollection: IHTMLElementCollection;
    FChildCollection:  TDxWebElementCollection;
    function GetCollection(index: String): TDxWebElementCollection;
    function GetCount: integer;
    function GetElement(itemName: string; index: integer): IHTMLElement;
    function GetElementByName(itemName: string): IHTMLELEMENT;
    function GetElementByIndex(index: integer): IHTMLELEMENT;
    procedure SetCollection(const Value: IHTMLElementCollection);
  public
    Constructor Create(ACollection: IHTMLElementCollection);
    Destructor Destroy;override;
    property Collection: IHTMLElementCollection read FCollection write SetCollection;
    property ChildElementCollection[index: String]: TDxWebElementCollection read GetCollection;
    property ElementCount: integer read GetCount;
    property Element[itemName: string;index: integer]: IHTMLElement read GetElement;
    property ElementByName[itemName: string]: IHTMLELEMENT read GetElementByName;
    property ElementByIndex[index: integer]: IHTMLELEMENT read GetElementByIndex;
  end;

  TLinkCollection = class(TDxWebElementCollection)
  
  end;
  TDxWebTable = class;

  TDxTableCollection = class
  private
    FTableCollection: IHTMLElementCollection;
    FDocument: IHTMLDOCUMENT2;
    FWebTable: TDxWebTable;
    function GetTableInterfaceByName(AName: string): IHTMLTABLE;
    procedure SetDocument(Value: IHTMLDOCUMENT2);
    function GetTableInterfaceByIndex(index: integer): IHTMLTABLE;
    function GetCount: integer;
    function GetTableByIndex(index: integer): TDxWebTable;
    function GetTableByName(AName: string): TDxWebTable;
  public
    Constructor Create(Doc: IHTMLDOCUMENT2);
    destructor Destroy;override;
    property TableInterfaceByName[AName: string]: IHTMLTABLE read GetTableInterfaceByName;
    property TableInterfaceByIndex[index: integer]: IHTMLTABLE read GetTableInterfaceByIndex;

    property TableByName[AName: string]: TDxWebTable read GetTableByName;
    property TableByIndex[index: integer]: TDxWebTable read GetTableByIndex;
    
    property Document: IHTMLDOCUMENT2 read FDocument write SetDocument;
    property Count: integer read GetCount;
  end;

  TDxWebTable = class
  private
    FTableInterface: IHTMLTABLE;
    function GetRowCount: integer;
    procedure SetTableInterface(const Value: IHTMLTABLE);
    function GetCell(ACol, ARow: integer): string;
    function GetRowColCount(RowIndex: integer): integer;
    function GetInnerHtml: string;
    function GetInnerText: string;
    function GetCellElement(ACol, ARow: Integer): IHTMLTableCell;
  public
    Constructor Create(ATable: IHTMLTABLE);
    property TableInterface: IHTMLTABLE read FTableInterface write SetTableInterface;
    property RowCount: integer read GetRowCount;
    property Cell[ACol: integer;ARow: integer]: string read GetCell;
    property CellElement[ACol: Integer;ARow: Integer]: IHTMLTableCell read GetCellElement;
    property RowColCount[RowIndex: integer]: integer read GetRowColCount;
    property InnerHtml: string read GetInnerHtml;
    property InnerText: string read GetInnerText;
  end;

  TDxWebCombobox = class
  private
    FHtmlSelect: IHTMLSelectElement;
    function GetCount: Integer;
    procedure SetItemIndex(const Value: Integer);
    function GetItemIndex: Integer;
    function GetName: string;
    procedure SetName(const Value: string);
    function GetValue: string;
    procedure SetValue(const Value: string);
    procedure SetCombInterface(const Value: IHTMLSelectElement);
    function GetItemByName(EleName: string): string;
    function GetItemByIndex(index: integer): string;
    function GetItemAttribute(index: Integer; AttribName: string): OleVariant;
  public
    constructor Create(AWebCombo: IHTMLSelectElement);
    procedure Add(Ele: IHTMLElement);
    procedure Insert(Ele: IHTMLElement;Index: Integer);
    procedure Remove(index: Integer);

    property CombInterface: IHTMLSelectElement read FHtmlSelect write SetCombInterface;
    property Count: Integer read GetCount;
    property ItemIndex: Integer read GetItemIndex write SetItemIndex;
    property ItemByIndex[index: integer]: string read GetItemByIndex;
    property ItemByName[EleName: string]: string read GetItemByName;
    property ItemAttribute[index: Integer;AttribName: string]: OleVariant read GetItemAttribute;
    property Name: string read GetName write SetName;
    property value: string read GetValue write SetValue;
  end;

implementation
end.
复制代码


 HTMLParser解析类的代码实现单元

复制代码
代码
(******************************************************)
(*                得闲工作室                          *)
(*              HTML解析单元库                        *)
(*                                                    *)
(*              DxHtmlParser Unit                     *)
(*    Copyright(c) 2008-2010  不得闲                  *)
(*    email:appleak46@yahoo.com.cn     QQ:75492895    *)
(******************************************************)
unit DxHtmlParser;

interface
uses Windows,MSHTML,ActiveX,DxHtmlElement,Forms;

type
  TDxHtmlParser = class
  private
    FHtmlDoc: IHTMLDocument2;
    FHTML: string;
    FWebTables: TDxTableCollection;
    FWebElements: TDxWebElementCollection;
    FWebComb: TDxWebCombobox;
    procedure SetHTML(const Value: string);
    function GetWebCombobox(AName: string): TDxWebCombobox;
  public
    constructor Create;
    destructor Destroy;override;
    property HTML: string read FHTML write SetHTML;
    property WebTables: TDxTableCollection read FWebTables;
    property WebElements: TDxWebElementCollection read FWebElements;
    property WebCombobox[Name: string]: TDxWebCombobox read GetWebCombobox;
  end;
implementation

{ TDxHtmlParser }

constructor TDxHtmlParser.Create;
begin
  CoInitialize(nil);
  //创建IHTMLDocument2接口
  CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
  Assert(FHtmlDoc<>nil,'构建HTMLDocument接口失败');
  FHtmlDoc.Set_designMode('On'); //设置为设计模式,不执行脚本
  while not (FHtmlDoc.readyState = 'complete') do
  begin
    sleep(1);
    Application.ProcessMessages;
  end;                   
  FWebTables := TDxTableCollection.Create(FHtmlDoc);
  FWebElements := TDxWebElementCollection.Create(nil);
  FWebComb := TDxWebCombobox.Create(nil);
end;

destructor TDxHtmlParser.Destroy;
begin
  FWebTables.Free;
  FWebElements.Free;
  FWebComb.Free;
  CoUninitialize;
  inherited;
end;

function TDxHtmlParser.GetWebCombobox(AName: string): TDxWebCombobox;
begin
   if FWebElements.Collection <> nil then
   begin
     FWebComb.CombInterface := FWebElements.ElementByName[AName] as IHTMLSelectElement;
     Result := FWebComb;
   end
   else Result := nil;
end;

procedure TDxHtmlParser.SetHTML(const Value: string);
begin
  if FHTML <> Value then
  begin
    FHTML := Value;
    FHtmlDoc.body.innerHTML := FHTML;
    FWebElements.Collection := FHtmlDoc.all;
  end;
end;

end.
View Code

用MSHTML控制网页中所有元素

http://www.cnblogs.com/yuanbao/archive/2007/09/03/878213.html

前些日子用VS2005中的WebBrowser来控制网页中的元素,虽然VS2005封装了很多不错的功能,但用起来总觉得有所欠缺。比如我想如得到框架页内网页的源码,找来的去,就是找不到合适的方法。
        MSHTML是微软公司的一个COM组件,该组件封装了HTML语言中的所有元素及其属性,通过其提供的标准接口,可以访问指定网页的所有元素.MSHTML对象模型是由一些对象和集合组成的.处于根部的是HTML,描述了打开页面的1个窗口,包括一系列集合和对象。如Frames集合,History,Location,Navigator,Document,Vi—sum,Event对象等.其中描述呈现在客户窗口实际网页的是Document对象。由一系列的属性、方法、对象和集合组成.其中All集合中包含网页中所有标记(Tag)元素,其主要的方法和属性有:
  (1)Length(长度):即标记出现的个数,可以把标记的集合理解为从0开始的一维数组,其次序按照标记在网页位置排列;
  (2)Tags(标记):用于过滤出给定标记的集合,如Doc.Al1.Tags(P)得到所有分段标记P;
  (3)Item(项目):用于选择集合中的某1个元素,如object.item(0)得到集合的第1个元素,而object.item(i)得到第i+1个元素. 
此外,IHTMLElement也是个常用的集合对象,代表网页中指定标记的集合,通过这个集合对象,可以得到网页上特定标记的内容.IHTMLElement有4个主要属性:
  (1)InnerText:开始标记和结束标记之间的文本;
  (2)InnerHTML:开始标记和结束标记之间的文本和HTML;
  (3)OuterText:对象的文本;
  (4)OuterHTML:对象的文本和HTML.
自动提交

 

 procedure TForm1.Button1Click(Sender: TObject);
              var
              Doc:IHTMLDocument2;
              input:OleVariant;
              userinputelement,pwdinputelement:ihtmlinputelement;
              begin
              doc:=webbrowser1.document as ihtmldocument2;
              userinputelement:=(doc.all.item('user'(也就是网页中用户名控件的名字),0) as ihtmlinputelement);
              userinputelement.value:=edit1.text;(也就是你要向网页输入的东西)
              pwdinputelement:=(doc.all.item('password',0) as ihtmlinputelement);
              pwdinputelement.value:=edit2.text;
              input:=doc.all.item('submit',0);
              input.click;
              end;  
  当提交数据按钮没有NAME属性时,采用如下方法:

 procedure TForm1.Button1Click(Sender: TObject);
              var
              Doc:IHTMLDocument2;
              form:ithmlformelement;
              userinputelement,pwdinputelement:ihtmlinputelement;              
begin
              doc:=webbrowser1.document as ihtmldocument2;
              userinputelement:=(doc.all.item('user'(也就是网页中用户名控件的名字),0) as ihtmlinputelement);
              userinputelement.value:=edit1.text;(也就是你要向网页输入的东西)
              pwdinputelement:=(doc.all.item('password',0) as ihtmlinputelement);
              pwdinputelement:=edit2.text;
              form:=(doc.all.item('login_form',0) as ihtmlformelement):
              form.submit;
              end;              

              

当前页为框架页时,采用如下方法:
 procedure TForm1.Button1Click(Sender: TObject);
var
                  doc2:IHTMLDocument2;
                  o : Olevariant;
                  ole_index: OleVariant;
                  frame_dispatch: IDispatch;
                  frame_win: IHTMLWindow2;
                  frame_doc: IHTMLDocument2;      
begin
                begin
                    doc2 := WebBrowser1.Document as IHTMLDocument2;
                    ole_index:=0;
                    frame_dispatch := doc2.Frames.Item(ole_index);
                    if frame_dispatch <> nil then
                    begin
                        frame_win := frame_dispatch as IHTMLWindow2;
                        frame_doc := frame_win.document;
                       // memo1.lines.add(IHTMLDocument2(frame_doc).body.outerHTML);
                    End;
              end;              
0
0
(请您对文章做出评价)
View Code

TWebBrowser设置

去除滚动条和边框

((WebBrowser1.Document as IHTMLDocument2).body as HTMLBody).scroll := 'no';  
(WebBrowser1.Document as IHTMLDocument2).body.style.border := '0';  
(WebBrowser1.Document as IHTMLDocument2).body.style.borderStyle := 'none';  
(WebBrowser1.Document as IHTMLDocument2).body.style.margin := '0';  
(WebBrowser1.Document as IHTMLDocument2).body.style.padding := '0';  
(WebBrowser1.Document as IHTMLDocument2).body.style.overflow := 'hidden';  


模拟点击网页中的按钮
{模拟一个页面}
 WebBrowser1.Navigate( 'about:<head><title>标题</title><body bgcolor=#ff0000>'+
                                          '<form method="POST" action="http://del.cnblogs.com">'+ 
                                          '<input type="submit" value="提交" id="btnID" name="btnName">'+ 
                                          '</form></body>'); 

 {假如知道按钮名称, 譬如是: btnName}
WebBrowser1.OleObject.document.all.item('btnName').click;
WebBrowser1.OleObject.document.all.item('btnName', 0).click;

{假如知道按钮的 ID, 譬如是: btnID}
 WebBrowser1.OleObject.document.getElementByID('btnID').click; 

 {假如只知道是第几个按钮, 譬如是第一个}
 WebBrowser1.OleObject.document.getElementsByTagName('input').item(0).click; 

MaxScrollHeight := doc.body.getAttribute('ScrollHeight', 0); //获得滚动条最大高度
MaxScrollWidth := doc.body.getAttribute('ScrollWidth', 0);//获得滚动条最大宽度
Form1.WebBrowser1.OleObject.Document.ParentWindow.ScrollBy(MaxScrollWidth ,MaxScrollHeight  ); //滚动到最右最下
    //MaxScrollHeight := doc.body.getAttribute('ScrollHeight', 0); //获得滚动条最大高度
    MaxScrollWidth := doc.body.getAttribute('ScrollWidth', 0);//获得滚动条最大宽度
View Code

Webbrowser 一些特殊網頁元素的訪問

 在論壇上偶有朋友問及網頁中 表格數據的讀取!今天再遇到。便先寫在這。以後再遇到其他的元素再一一添加
    <一>.delphi 中 webbrowser 對表格數據的讀取
var  
      ovTable: OleVariant;
      i,j: integer;     
begin
       ovTable:=Wb.OleObject.Document.all.tags('TABLE').item(1);//取第二表格集合
       for i := 1 to (ovTable.Rows.Length - 1) do //循環行
       begin
                for j := 1 to (ovTable.Rows.Item(i).Cells.Length ) do// 循環列
                begin    
                       單元格數據 := ovTable.Rows.Item(i).Cells.Item(j-1).InnerText;
                end;   
       end;
end;
 
==============================2011年6月22日=================================
<二>對css中定義的背景圖片讀取方法
WebBrowser1.OleObject.document.getElementById('bgDiv').currentStyle.BackGroundImage
WebBrowser1.OleObject.document.body.currentStyle.BackGroundImage 
==============================2011年9月25日=================================
<三> Delphi歷遍网页中指定标签内的子元素
        首先取得我们想要提取的标签,比如www.baidu.com首页的某个 div id 为 ‘nv’ 
       申明 tags为 OleVariant;
       tags:= :=wb.OleObject.document.all.item('nv',0).all;
       获取nv标签内的第一个字元素A 的outerhtml为:  str := tags.item(0,0).outerhtml;//其中0就代理第一个元素
==============================2013年2月28日=================================
 document.getElementById('bet-race-num-1').parentNode.nextSibling.firstChild.value='abc'
View Code

通过webbrowser读取网页上确定标签ID的值

Edit1.text := ((wbmap.document as IHTMLDocument2).all.item('tname', 0) as ihtmlinputelement).value
 dit1.text:= wbmap.OleObject.document.getElementByID('tname').value;

wbmap:webbrowser;
tname:网页标签ID名;
View Code

js nextSibling属性和previousSibling属性

  1:nextSibling属性

        该属性表示当前节点的下一个节点(其后的节点与当前节点同属一个级别);如果其后没有与其同级的节点,则返回null。

       需要特别注意的是:该属性在不同的浏览器中的执行结果并不都相同,见下面例示:

      先来看一个例子:
view plaincopy to clipboardprint?
<body>   
<div>   
<input id="a4" type="button" onclick="alert(this.nextSibling);" value="d" />   
<input id="a5" type="button" onclick="alert(this.nextSibling);" value="e" />   
</div>   
</bod  
      该对象的结构表面上看,div的nextSibling只有2项——两个input节点。但实际上有5项——/n,input,/n,input,/n。这是因为input作为创建各种表单输入控件的标签,无论是生成button、checkbox、radio...等或其他表单控件,IE都会自动在后面创建一个1字节位的空白。

 

       IE将跳过在节点之间产生的空格文档节点(如:换行字符),而Mozilla不会这样——FF会把诸如空格换行之类的排版元素视作节点读取,因此,在ie中用nextSibling便可读取到的下一个节点元素,在FF中就需要这样写:nextSibling.nextSibling了。

       opera和safari对nextSibling的处理方式与FF一致

        2:previousSibling属性

        该属性与nextSibling属性的作用正好相反。例如:someTagObject.nextSibling.previousSibling其实返回的是该标签元素本身,但前提必须是:该标签元素的后面必须有一个同级的元素,否则就返回null了。

       3:通过nextSibling或者 previousSibling所获得的HTML标签元素对象的属性问题

       一般先通过nextSibling.nodeName来获知其标签名,或者通过nextSibling.nodeType来获知其标签类型,然后,如果该nextSibling.nodeName = #text,则通过nextSibling.nodeValue来获知其文本值;否则,可以通过nextSibling.innerHTML等其他常用标签元素属性来获取其属性。
View Code

遍历li

var  
  i, j,m,n: integer;  
  ovTable,ovTableul: OleVariant;  

//这一部分是取得“无序列表”的部分  
ovTable:=Webbrowser1.OleObject.Document.getElementsByTagName('ul').item(0);  
ovTableul:=ovTable.getElementsByTagName('li');  
   
n:=ovTableul.Length;  
   
if n>0 then  
begin  
  for i:=0 to n-1 do  
  begin  
    self.Memo1.Lines.Add(ovTableul.item(i).InnerText);  
  end;  
end;  
View Code

 Webbrowser无Name及ID时自动点击按钮

procedure TForm1.Button1Click(Sender: TObject);
var
    i:integer;
begin
    for i:=0 to wb1.OleObject.document.getElementsByTagName('a').length-1 do
    begin
    if wb1.OleObject.document.getElementsByTagName('a').item(i).innerhtml='唯一关键字1' then
    begin
      memo1.Lines.Add(wb1.OleObject.document.getElementsByTagName('a').item(i+1).innerhtml);
      if wb1.OleObject.document.getElementsByTagName('a').item(i+1).innerhtml<>'唯一关键字2'then
      wb1.OleObject.document.getElementsByTagName('a').item(i+1).click;
    end;
    if wb1.OleObject.document.getElementsByTagName('a').item(i).innerhtml='唯一关键字2' then
    begin
      memo1.Lines.Add(wb1.OleObject.document.getElementsByTagName('a').item(i-1).innerhtml);
      if wb1.OleObject.document.getElementsByTagName('a').item(i-1).innerhtml<>'唯一关键字1'then
        begin
          wb1.OleObject.document.getElementsByTagName('a').item(i-1).click;
          break;
        end;
     end;
  end;
end;
以上代码基本实现了无name和无id的自动点击。
View Code

设置WebBrowser 代理服务器 与 UserAgent

uses UrlMon, WinInet;

{-------------------------------------------------------------------------------
  过程名:    SetProcessProxy
  作者:      kelei
  日期:      2013.08.03
  参数:      aProxyServer代理服务器; aProxyPort代理服务器端口
  返回值:    True设置成功
  SetProcessProxy('127.0.0.1', 80);
-------------------------------------------------------------------------------}
function SetProcessProxy(const aProxyServer: string; const aProxyPort: Integer): Boolean;
var
  vProxyInfo: TInternetProxyInfo;
begin
  vProxyInfo.dwAccessType := INTERNET_OPEN_TYPE_PROXY;
  vProxyInfo.lpszProxy := PChar(Format('http=%s:%d', [aProxyServer, aProxyPort]));
  vProxyInfo.lpszProxyBypass := PChar('');
  Result := UrlMkSetSessionOption(INTERNET_OPTION_PROXY, @vProxyInfo, SizeOf(vProxyInfo, 0) = S_OK;
end;

{-------------------------------------------------------------------------------
  过程名:    SetProcessUserAgent
  作者:      kelei
  日期:      2013.08.03
  参数:      aUserAgent HTTP请求头UserAgent内容
  返回值:    True设置成功
  SetProcessUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3')
-------------------------------------------------------------------------------}
function SetProcessUserAgent(const aUserAgent: string): Boolean;
begin
  Result := UrlMkSetSessionOption(URLMON_OPTION_USERAGENT, PChar(aUserAgent), Length(aUserAgent), 0) = S_OK;
end;
View Code

WebBrowser 点击任意元素 或图片

procedure TForm1.btnClickUrlClick(Sender: TObject);
var
   J:integer;
   spDisp: IDispatch;
   IDoc1: IHTMLDocument2;
   ielc: IHTMLElementCollection ;
   ihtml:IHTMLElement;
   iane:IHTMLAnchorElement;
begin
   WebNav.Document.QueryInterface(IHTMLDocument2,iDoc1);
   ielc:=idoc1.Get_all;
   for J:=0 to ielc.length-1 do
   begin
     Application.ProcessMessages;
     spDisp := ielc.item(J, 0);
     if SUCCEEDED(spDisp.QueryInterface(IHTMLAnchorElement ,iane))then
     begin
       if iane.href='http://www.nq51.com/' then //这里我在网页里的url是http://www.nq51.com调用的时候自动加上了'/'需要注意一下
       begin
         ihtml:=ielc.item(J,0) as IHTMLElement;
         ihtml.click;
       end;
     end;
   end;
end;
View Code

WebBrowser自动填表

1
<input type="text" name="xxx" size="20">
对于网页这种连接 我们可以用如下方式实现填写内容。
var
  o: Olevariant;
  all: IHTMLElementCollection;
  item: IHTMLElement;
begin
  o := WebBrowser1.oleobject.document.all.item('xxx', 0);
  o.value:='myValue';

2

o := Web1.oleobject.document.all.item('username',0);
               o.value:='liupan9999';
               Memo1.Lines.Add('填入密码');
               o := Web1.oleobject.document.all.item('password',0);
               o.value:='songbai1';
               Memo1.Lines.Add('登录');
               Web1.oleobject.document.Forms.Item('loginform', 0).submit;
View Code

delphi 几个实用的HTML解析函数

 1)HTML 标签值攫取函数,任意标签哦,纯字符串分析,可以配合IDHTTP编程

uses StrUtils;

function ExtractHtmlTagValues(const HtmlText: string; TagName, AttribName: string; var Values: TStringList): integer;

function FindFirstCharAfterSpace(const Line: string; StartPos: integer): Integer;
var i: integer; 
begin 
Result := -1; 
for i := StartPos to Length(Line) do 
begin 
if (Line[i] <> ' ') then 
begin 
Result := i; 
exit; 
end; 
end; 
end;

function FindFirstSpaceAfterChars(const Line: string; StartPos: integer): Integer; 
begin 
Result := PosEx(' ', Line, StartPos);
end;

function FindFirstSpaceBeforeChars(const Line: string; StartPos: integer): Integer;
var i: integer; 
begin 
Result := 1; 
for i := StartPos downto 1 do 
begin 
if (Line[i] = ' ') then 
begin 
Result := i; 
exit; 
end; 
end; 
end;

var InnerTag: string; 
LastPos, LastInnerPos: Integer; 
SPos, LPos, RPos: Integer; 
AttribValue: string; 
ClosingChar: char; 
TempAttribName: string; 
begin 
Result := 0;
LastPos := 1;
while (true) do
begin
// find outer tags '<' & '>'
LPos := PosEx('<', HtmlText, LastPos);
if (LPos <= 0) then break;
RPos := PosEx('>', HtmlText, LPos+1);
if (RPos <= 0) then
LastPos := LPos + 1
else
LastPos := RPos + 1;

// get inner tag 
InnerTag := Copy(HtmlText, LPos+1, RPos-LPos-1); 
InnerTag := Trim(InnerTag); // remove spaces 
if (Length(InnerTag) < Length(TagName)) then continue;

// check tag name 
if (SameText(Copy(InnerTag, 1, Length(TagName)), TagName)) then 
begin 
// found tag 
AttribValue := ''; 
LastInnerPos := Length(TagName)+1; 
while (LastInnerPos < Length(InnerTag)) do 
begin 
// find first '=' after LastInnerPos 
RPos := PosEx('=', InnerTag, LastInnerPos); 
if (RPos <= 0) then break;

// this way you can check for multiple attrib names and not a specific attrib 
SPos := FindFirstSpaceBeforeChars(InnerTag, RPos); 
TempAttribName := Trim(Copy(InnerTag, SPos, RPos-SPos)); 
if (true) then 
begin 
// found correct tag 
LPos := FindFirstCharAfterSpace(InnerTag, RPos+1); 
if (LPos <= 0) then 
begin 
LastInnerPos := RPos + 1; 
continue; 
end; 
LPos := FindFirstCharAfterSpace(InnerTag, LPos); // get to first char after '=' 
if (LPos <= 0) then continue; 
if ((InnerTag[LPos] <> '"') and (InnerTag[LPos] <> '''')) then 
begin 
// AttribValue is not between '"' or ''' so get it 
RPos := FindFirstSpaceAfterChars(InnerTag, LPos+1); 
if (RPos <= 0) then 
AttribValue := Copy(InnerTag, LPos, Length(InnerTag)-LPos+1) 
else 
AttribValue := Copy(InnerTag, LPos, RPos-LPos+1); 
end 
else 
begin 
// get url between '"' or ''' 
ClosingChar := InnerTag[LPos]; 
RPos := PosEx(ClosingChar, InnerTag, LPos+1); 
if (RPos <= 0) then 
AttribValue := Copy(InnerTag, LPos+1, Length(InnerTag)-LPos-1) 
else 
AttribValue := Copy(InnerTag, LPos+1, RPos-LPos-1) 
end;

if (SameText(TempAttribName, AttribName)) and (AttribValue <> '') then 
begin 
Values.Add(AttribValue);
inc(Result); 
end; 
end;

if (RPos <= 0) then 
LastInnerPos := Length(InnerTag) 
else 
LastInnerPos := RPos+1; 
end; 
end; 
end; 
end;


用法示例:
取得页面中所有链接
var
Links : TStringList;
LinkFound,i : Integer;
begin
Links := TStringList.Create;
LinkFound := ExtractHtmlTagValues(HtmlText,'A','HREF',Links);
for i:=0 to LinkFound-1 do
begin
//Add your own codes here
end;
Links.Free;
end;

2)表单元素值攫取函数,可以从HTML文本中按照给定的Input名称解析出其Value

function GetValByName(S, Sub: string) : string;
var
EleS,EleE,iPos: Integer;
ELeStr,ValSt: String;
St,Ct : Integer;
function FindEleRange(str: string ; front : boolean; posi : integer): Integer;
var
i: integer;
begin
if Front then
begin
for i:=posi-1 downto 1 do
if Str[i]='<' then
begin
Result := i;
break;
end;
end else begin
for i := posi+1 to length(Str) do
if Str[i]='>' then
begin
Result := i;
break;
end;
end;
end;
function FindEnd (str : string; posi : integer) : Integer;
var
i: integer;
begin
for i:=posi to length(str) do
begin
if (str[i] ='"') or (str[i] ='''') or (str[i] =' ') then
begin
result := i-1;
break;
end;
end;
end;
begin
iPos := Pos('name="'+lowercase(Sub)+'"',lowercase(S));
if iPos = 0 then iPos := Pos('name='+lowercase(Sub),lowercase(S));
if iPos = 0 then iPos := Pos('name='''+lowercase(Sub)+'''',lowercase(S));
if iPos = 0 then exit;
EleS := FindEleRange(S,TRUE,iPos);
EleE := FindEleRange(S,FALSE,iPos);
EleStr := Copy(S,EleS,EleE-EleS+1);
ValSt := 'value="';
iPos := Pos(ValSt,EleStr);
if iPos = 0 then
begin
ValSt := 'value=''';
iPos := Pos(ValSt,EleStr);
end;
if iPos = 0 then
begin
ValSt := 'value=';
iPos := Pos(ValSt,EleStr);
end;
St := iPos+length(ValSt);
Ct := FindEnd(EleStr,St)-St+1;
Result := Copy(EleStr,St,Ct);
end;

用法示例:
取得页面中名为 Submit 的表单项的值
var
InputValue : String;
begin
InputValue := GetValByName(HtmlText,'Submit'); 
end;

3)取某两个字符串中间的字符

function getStrFromHtml(var Source: String; SbStr, bStr, eStr: String): String;
var
I: Integer;
sbPos, bPos, ePos: Integer;
S: String;
begin
S := Source;

Result := '' ;
if SBStr <> '' then
Begin
sbPos := Pos(UpperCase(SbStr), UpperCase(S));
if sbPos > 0 then
Delete(S, 1, sbPos - 1 + length(sbStr))
Else
Exit;
End;

bPos := Pos(UpperCase(bStr), UpperCase(S));
if bPos > 0 then
Delete(S, 1, bPos - 1 + length(bStr))
Else
Exit;

ePos := pos(UpperCase(eStr), UpperCase(S));
if ePos > 0 then
Delete(S, ePos, length(S));

Result := S;
end;

用法实例:
FUserID := getStrFromHtml(reqStr, 'id="userID"', 'value="', '"');
View Code

WebBorwser 解决无法模拟Enter

procedure   TForm1.ApplicationEvents1Message(var   Msg:   tagMSG; 
    var   Handled:   Boolean); 
{   fixes   the   malfunction   of   some   keys   within   webbrowser   control   }
const 
    StdKeys   =   [VK_TAB,   VK_RETURN];   {   standard   keys   } 
    ExtKeys   =   [VK_DELETE,   VK_BACK,   VK_LEFT,   VK_RIGHT];   {   extended   keys   }
    fExtended   =   $01000000;   {   extended   key   flag   } 
begin 
    Handled   :=   False; 
    with   Msg   do 
        if   ((Message   > =   WM_KEYFIRST)   and   (Message   <=   WM_KEYLAST))   and
            ((wParam   in   StdKeys)   or   {$IFDEF   VER120}(GetKeyState(VK_CONTROL)   <   0)   or   {$ENDIF}
            (wParam   in   ExtKeys)   and   ((lParam   and   fExtended)   =   fExtended))   then
        try 
            if   IsChild(WebBrowser1.Handle,   hWnd)   then 
            {   handles   all   browser   related   messages   } 
            begin 
                with   WebBrowser1.Application   as   IOleInPlaceActiveObject   do
                    Handled   :=   TranslateAccelerator(Msg)   =   S_OK; 
                if   not   Handled   then 
                begin 
                    Handled   :=   True; 
                    TranslateMessage(Msg); 
                    DispatchMessage(Msg); 
                end; 
            end; 
        except   end; 
end;   //   IEMessageHandler 
uses   activex,   OleCtrls......
View Code

设置webbrowser 为单独代理不影响IE

我看到有一个VB写的程序,webbrowser可以单独设置代理,360,ie8和火狐的IP不变,(测试网页www.myip.cn或者百度 我的IP)360网络检查也没有看到代理,但是那个程序确实是通过http代理浏览网页,(代理IP如211.136.10.25:80)各位大虾知道在delphi程序中怎么实现吗,有代码更好,我自己网上找了一段VB代码,但测试不成功,100分,不够的话我再加分
VB代码如下


[程序设计]设置程序中使用的WebBrowser控件的代理,不影响系统IE浏览器
Option Explicit

Private Declare Function InternetSetOption Lib "wininet.dll" Alias "InternetSetOptionA" (ByVal hInternet As Long, ByVal dwOption As Long, ByRef lpBuffer As Any, ByVal dwBufferLength As Long) As Long

Private Type INTERNET_PROXY_INFO
        dwAccessType As Long
        lpszProxy As String
        lpszProxyBypass As String
End Type

Private Const INTERNET_OPTION_PROXY = 38
Private Const INTERNET_OPTION_SETTINGS_CHANGED = 39
Private Const INTERNET_OPEN_TYPE_DIRECT = 1
Private Const INTERNET_OPEN_TYPE_PROXY = 3
      
Private Sub SetProxy(aStrIP As String, aStrPort As String, aBolUseProxy As Boolean)

    Dim strProxy As String
    Dim inf As INTERNET_PROXY_INFO
    aStrIP = Trim(aStrIP)
    aStrPort = Trim(aStrPort)
    If (aStrIP + aStrPort = "") Or Not aBolUseProxy Then
       strProxy = ""
    Else
       strProxy = "http=" + aStrIP + ":" + aStrPort
    End If
            
    If Trim(strProxy) <> "" Then
       inf.dwAccessType = INTERNET_OPEN_TYPE_PROXY
       inf.lpszProxy = strProxy
       inf.lpszProxyBypass = ""
       Call InternetSetOption(0, INTERNET_OPTION_PROXY, inf, Len(inf))
       Call InternetSetOption(0, INTERNET_OPTION_SETTINGS_CHANGED, "", 0)
    Else
       inf.dwAccessType = INTERNET_OPEN_TYPE_DIRECT
       inf.lpszProxy = ""
       inf.lpszProxyBypass = ""
       Call InternetSetOption(0, INTERNET_OPTION_PROXY, inf, Len(inf))
       Call InternetSetOption(0, INTERNET_OPTION_SETTINGS_CHANGED, "", 0)
    End If
    
End Sub
   
'===使用代理上网
Private Sub Command1_Click()
    SetProxy txtIP.Text, txtPort.Text, True
    WebBrowser1.Navigate "http://ipseeker.cn"
End Sub
      
'===不使用代理上网
Private Sub Command2_Click()
    SetProxy txtIP.Text, txtPort.Text, False
    WebBrowser1.Navigate "http://ipseeker.cn"
End Sub

Private Sub Form_Load()
    WebBrowser1.Navigate "http://ipseeker.cn"
End Sub

原文地址 

http://www.agoil.cn/bbs/read-htm-tid-207697.html

  提取 webbrowser鼠标单击的超链接的文字内容

我使用万一的代码做了个例子

应该能满足你的需求

a.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
<title>无标题文档</title>
</head>

<body>
<label>btn
<input id="aa" type="button" onclick="location='http://www.google.com'" name="Submit" value="提交" />
</label>
<a id="bb" href="http://www.baidu.com">ahref</a>
<br />
<label>btn
<input id="aa" type="button" onclick="" name="Submit" value="提交" />
</label>
<a href="http://so.com" target="_blank">so</a>
</body>
</html>


单元文件.  窗体上一个memo, 一个webBrowser
unit Unit1;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, OleCtrls, SHDocVw, MSHTML, ActiveX, StdCtrls;

type

  TObjectProcedure = procedure of object;

   TEventObject = class(TInterfacedObject, IDispatch)
   private
     FOnEvent: TObjectProcedure;

   protected
     function GetTypeInfoCount(out Count: Integer): HResult; stdcall;
     function GetTypeInfo(index, LocaleID: Integer; out TypeInfo): HResult; stdcall;
     function GetIDsOfNames(const IID: TGUID; Names: Pointer; NameCount, LocaleID: Integer;
       DispIDs: Pointer): HResult; stdcall;
     function Invoke(dispid: Integer; const IID: TGUID; LocaleID: Integer; Flags: Word;
       var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult; stdcall;

   public
     constructor Create(const OnEvent: TObjectProcedure);
     property OnEvent: TObjectProcedure read FOnEvent write FOnEvent;
   end;

  TForm1 = class(TForm)
    wb1: TWebBrowser;
    mmo1: TMemo;
    procedure wb1BeforeNavigate2(ASender: TObject; const pDisp: IDispatch;
      var URL, Flags, TargetFrameName, PostData, Headers: OleVariant;
      var Cancel: WordBool);
    procedure FormCreate(Sender: TObject);
    procedure wb1TitleChange(ASender: TObject; const Text: WideString);
    procedure wb1DocumentComplete(ASender: TObject; const pDisp: IDispatch;
      var URL: OleVariant);
  private
    { Private declarations }
    FCurrHrefText : string;
    procedure Document_OnMouseOver;
  public
    { Public declarations }
  end;

var
  Form1: TForm1;
  htmlDoc: IHTMLDocument2;

implementation


{$R *.dfm}

procedure TForm1.Document_OnMouseOver;
var
   element: IHTMLElement;
 begin
   FCurrHrefText := '';

   if htmlDoc = nil then
     Exit;

   element := htmlDoc.parentWindow.event.srcElement;
   mmo1.Clear;
   if LowerCase(element.tagName) = 'a' then
   begin
     mmo1.Lines.Add('LINK info...');
     mmo1.Lines.Add(Format('HREF : %s', [element.getAttribute('href', 0)]));
     mmo1.Lines.Add(Format('title : %s', [element.innerText]));

     FCurrHrefText := element.innerText;
   end
   else if LowerCase(element.tagName) = 'img' then
   begin
     mmo1.Lines.Add('IMAGE info...');
     mmo1.Lines.Add(Format('SRC : %s', [element.getAttribute('src', 0)]));
   end
   else
   begin
     mmo1.Lines.Add(Format('TAG : %s', [element.tagName]));
     mmo1.Lines.Add(Format('TAG : %s', [element.getAttribute('value', 0)]));
   end;
end;

procedure TForm1.FormCreate(Sender: TObject);
begin
  wb1.Navigate(ExtractFilePath(Application.ExeName)+'a.html');
//wb1.Navigate('http://passport.csdn.net/UserLogin.aspx');
   Mmo1.Clear;
   Mmo1.Lines.Add('Move your mouse over the document...');
end;

procedure TForm1.wb1BeforeNavigate2(ASender: TObject; const pDisp: IDispatch;
  var URL, Flags, TargetFrameName, PostData, Headers: OleVariant;
  var Cancel: WordBool);
begin
  //如果是直接打开新窗口, 也是同理获得元素信息

    if Pos('http:', URL) > 0 then
    begin
      ShowMessage('当前URL描述:' + FCurrHrefText);
//      Cancel := True;
    end;
  htmlDoc := nil;
end;

procedure TForm1.wb1DocumentComplete(ASender: TObject; const pDisp: IDispatch;
  var URL: OleVariant);
begin
if Assigned(wb1.Document) then
   begin
     htmlDoc := wb1.Document as IHTMLDocument2;
     if htmlDoc.frames.length > 0 then
     begin
       htmlDoc := (IDispatch(htmlDoc.frames.item(0)) as IHTMLWindow2).Document;
     end;
     htmlDoc.onmouseover := (TEventObject.Create(Document_OnMouseOver) as IDispatch);
   end;
end;

procedure TForm1.wb1TitleChange(ASender: TObject; const Text: WideString);
begin

end;

{ TEventObject }

 constructor TEventObject.Create(const OnEvent: TObjectProcedure);
 begin
   inherited Create;
   FOnEvent := OnEvent;
 end;

 function TEventObject.GetIDsOfNames(const IID: TGUID; Names: Pointer;
   NameCount, LocaleID: Integer; DispIDs: Pointer): HResult;
 begin
   Result := E_NOTIMPL;
 end;

 function TEventObject.GetTypeInfo(index, LocaleID: Integer; out TypeInfo): HResult;
 begin
   Result := E_NOTIMPL;
 end;

 function TEventObject.GetTypeInfoCount(out Count: Integer): HResult;
 begin
   Result := E_NOTIMPL;
 end;

 function TEventObject.Invoke(dispid: Integer; const IID: TGUID; LocaleID: Integer;
   Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult;
 begin
   if (dispid = DISPID_VALUE) then
   begin
     if Assigned(FOnEvent) then
       FOnEvent;
     Result := S_OK;
   end
   else
     Result := E_NOTIMPL;
 end;

end.


刚才看错了 
正确的处理方法如下

//单元文件
//窗口控件及命名见单元文件内的定义
//已在Delphi xe测试通过

unit Unit11;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, OleCtrls, SHDocVw, ComCtrls, activeX;

type
  TForm11 = class(TForm)
    pgc1: TPageControl;
    ts1: TTabSheet;
    ts2: TTabSheet;
    wb1: TWebBrowser;
    wb2: TWebBrowser;
    procedure FormCreate(Sender: TObject);
    procedure pgc1Change(Sender: TObject);
  private
    { Private declarations }
    //当前激活的WebBrowser控件
    {当激活的WebBrowser控件变化时更新该字段的值, 在IEMessageHandler中使用}
    FCurrBW : TWebBrowser;

    procedure IEMessageHandler(var Msg: TMsg; var Handled: Boolean);
  public
    { Public declarations }
  end;

var
  Form11: TForm11;

implementation

{$R *.dfm}
procedure TForm11.IEMessageHandler(var Msg: TMsg; var Handled: Boolean);
const
  StdKeys = [VK_TAB, VK_RETURN]; { 标准键 }
  ExtKeys = [VK_DELETE, VK_BACK, VK_LEFT, VK_RIGHT]; { 扩展键 }
  fExtended = $01000000; { 扩展键标志 }
begin
  Handled := False;

  if (FCurrBW = nil) then
  begin
    Handled := False;
    Exit;
  end;

  with Msg do
  begin
    if ((Message >= WM_KEYFIRST) and (Message <= WM_KEYLAST)) and
      ((wParam in StdKeys) or (GetKeyState(VK_CONTROL) < 0) or
      (wParam in ExtKeys) and ((lParam and fExtended) = fExtended)) then
    begin
      try
        with FCurrBW.Application as IOleInPlaceActiveObject do
          Handled := TranslateAccelerator(Msg) = S_OK;

          if not Handled then
          begin
            Handled := True;
            TranslateMessage(Msg);
            DispatchMessage(Msg);
          end;

      except
      end;
    end;
  end;

end;
procedure TForm11.pgc1Change(Sender: TObject);
begin
  case pgc1.ActivePageIndex of
    0 : FCurrBW := wb1;
    1 : FCurrBW := wb2;
  end;
end;

procedure TForm11.FormCreate(Sender: TObject);
begin
  FCurrBW := wb1;
  Application.OnMessage := IEMessageHandler;
  wb1.Navigate('http://bbs.csdn.net/topics/390341172?page=1#post-393434373');
  wb2.Navigate('http://bbs.csdn.net/topics/390341172?page=1#post-393434373');
end;

end.
View Code

getElementBy系列

getElementBy系列
WEB标准下可以通过getElementById(), getElementsByName(), and getElementsByTagName_r()访问DOCUMENT中的任一个标签
1、getElementById()
getElementById()可以访问DOCUMENT中的某一特定元素,顾名思义,就是通过ID来取得元素,所以只能访问设置了ID的元素。
比如说有一个DIV的ID为docid:<div id="docid"></div>
那么就可以用getElementById("docid")来获得这个元素。返回具有指定 ID 属性值的第一个
2.getElementsByName()
这个是通过NAME来获得元素,但不知大家注意没有,这个是GET ELEMENTS,复数ELEMENTS代表获得的不是一个元素,为什么呢?
因 为DOCUMENT中每一个元素的ID是唯一的,但NAME却可以重复。打个比喻就像人的身份证号是唯一的(理论上,虽然现实中有重复),但名字重复的却 很多。如果一个文档中有两个以上的标签NAME相同,那么getElementsByName()就可以取得这些元素组成一个数组。
比如有两个DIV:
<div name="docname" id="docid1"></div>
<div name="docname" id="docid2"></div>
那么可以用getElementsByName("docname")获得这两个DIV,用getElementsByName("docname")[0]访问第一个DIV。
3、getElementsByTagName_r()
这 个呢就是通过TAGNAME(标签名称)来获得元素,一个DOCUMENT中当然会有相同的标签,所以这个方法也是取得一个数组。可以用 getElementsByTagName_r("div")来访问它们,用getElementsByTagName_r("div")[0]访问第一个 DIV,用getElementsByTagName_r("div")[1]访问第二个DIV。
如:
<body>
<div name="docname" id="docid1" onClick="bgcolor()"></div>
<div name="docname" id="docid2" onClick="bgcolor()"></div>
</body>
</html>
<script language="JavaScript" type="text/JavaScript">
<!--
function bgcolor(){
var docnObj=document.getElementsByTagName_r("div");
docnObj[0].style.backgroundColor = "black";
docnObj[1].style.backgroundColor = "black";
}
-->
</script>
总 结一下标准DOM,访问某一特定元素尽量用标准的getElementById(),访问标签用标准的getElementByTagName(),但 IE不支持getElementsByName(),所以就要避免使用getElementsByName(),但 getElementsByName()和不符合标准的document.all[]也不是全无是处,它们有自己的方便之处,用不用那就看网站的用户使用 什么浏览器,由你自己决定了。
Javascript中的getElementById十分常用,但在标准的页面中,一个id只能出现一次, 如果我想同时控制多个元素,例如点一个链接,让多个层隐藏,该怎么做?用class,当然,同一个class是可以允许在页面中重复出现的,那么有没有 getElementByClass呢?没有,但是可以解决:
//创建一个数组
var allPageTags = new Array();
function hideDivWithClasses(theClass) {
var allPageTags=document.getElementsByTagName_r("div");
//遍历页面中的所有标签
for (i=0; i<allPageTags.length; i++)
//找到我们需要改变的class
if (allPageTags[i].className==theClass) {
//改变这个class的样式
allPageTags[i].style.display='none';
}
}
}
——————————————应用(照片日记编辑图文排版)——————————
//图文右对齐
function phototxtright()
{
var Tags = new Array();
var j=0;
var theClassl='photo_edit';
var theClassc='photo_editcenter';
var theClassr='photo_editright';
var allHTMLTags=document.getElementsByTagName_r('*');
    for (var i=0; i<allHTMLTags.length; i++)
    {
    if (allHTMLTags[i].className==theClassl||allHTMLTags[i].className==theClassc) {
        Tags[j]=allHTMLTags[i];
        Tags[j].className="photo_editright";
          j++;
    }
    }
document.getElementByIdx_x("edit_box").style.textAlign="right";
}
//改变字号
function setfontsize(num)
{  
var Tags = new Array();
var j=0;
var theClasstxt='nofocused';
var allHTMLTags=document.getElementsByTagName_r('*');
    for (var i=0; i<allHTMLTags.length; i++)
    {
    if (allHTMLTags[i].className==theClasstxt) {
   Tags[j]=allHTMLTags[i];
    Tags[j].style.fontSize= num+"px";
           j++;
    }
    }
document.getElementByIdx_x("edit_box").style.fontSize= num+"px";
}
===================================
详见:
http://hi.baidu.com/dandan_ze/item/b91adaa7f30ef0dc5bf19116

  利用WebBorwser和MSHTML.tlb做广告过滤器完全源码公开

程序组成:
两个引用对象:Microsoft HTML Object Library,Microsoft Internet Object
两个窗体: frmAbout.frm frmMenu.frm
两个*.bas: APIs.bas,mSysTray.bas
两个Class: MyIE.cls, windows.cls(其中windows.cls是collection对象的扩展,放MyIE.cls)
下面公开这两个主要类的代码(如要全部代码请留email,要看演示上www.jjsoft.cn,版权归作者,要用于商业目的请和作者联系fazhu@163.net)
myIE.cls
------------------------------------------------------------------------------------------------------
Option EXPlicit

Private WithEvents mIE As SHDocVw.InternetExplorer
Private WithEvents IE_IFrame As MSHTML.HTMLIFrame
Private WithEvents win2 As MSHTML.HTMLWindow2
Private WithEvents doc2 As MSHTML.HTMLDocument
'///////////////////////////////////////////////////////
'判断Frame对象
Private tmpIE_IFrame As MSHTML.HTMLIFrame
Private IE_FCols As MSHTML.FramesCollection
'///////////////////////////////////////////////////////
Private body As MSHTML.HTMLBody
Private IElements As MSHTML.IHTMLElement
Private mHWnd As Long
Private mDoc As MSHTML.IHTMLDocument2
Private isLoaded As Integer
Private isClicked As Integer
Private isCleaned As Integer
Private tmpState As String
Private Const FlashClassID As String = "CLSID:D27CDB6E-AE6D-11CF-96B8-444553540000"
'determine the refresh button is clicked
'Private m_nPageCounter As Integer
'Private m_nObjCounter As Integer
Private m_bIsRefresh As Boolean
Private mSArrays As Variant
Private mPtr As POINTAPI
'//////////////////////////////////////////
Public Function Banding(item As SHDocVw.InternetExplorer) As SHDocVw.InternetExplorer
    On Error GoTo Err
    Dim tmpName As String, tmpie As SHDocVw.InternetExplorer
    'Dim tmpdoc As MSHTML.HTMLDocument
    Set tmpie = item
    If (tmpie Is Nothing) Then Exit Function

    If Not (TypeOf item Is IWebBrowser2) Then Exit Function
            
    tmpName = tmpie.FullName
    tmpName = Mid(tmpName, InStrRev(tmpName, "") + 1)
    If UCase(tmpName) = "IEXPLORE.EXE" Then
        Set mIE = tmpie
        mHWnd = mIE.hwnd
       ' Call BandingDoc(mIE2)
    End If
    tmpName = ""
    Set tmpie = Nothing
    Set Banding = mIE
Bye:
    
    If Not (tmpie Is Nothing) Then Set tmpie = Nothing
    Exit Function
Err:
    MsgBox "Error:" & Err.Description & " in Banding"
    Resume Bye
End Function
Public Property Get IEHandle() As Long
    IEHandle = mHWnd
End Property
Private Sub Class_Initialize()
    m_bIsRefresh = True
    
    '////////////////////////
    '非弹出式广告特征集
    mSArrays = Array("input", "a", "iframe", "area", "frame")
    '////////////////////////
End Sub
Private Sub Class_Terminate()
    Set mDoc = Nothing
    Set mIE = Nothing
End Sub
Private Sub mIE_BeforeNavigate2(ByVal pDisp As Object, URL As Variant, Flags As Variant, TargetFrameName As Variant, PostData As Variant, Headers As Variant, Cancel As Boolean)
    On Error Resume Next
    Dim tmpie As SHDocVw.InternetExplorer
    If Not (mDoc Is Nothing) Then
        Set mDoc = Nothing

    Else
        Exit Sub
    End If
    Call BandingDoc("mIE_BeforeNavigate2")
    'm_nPageCounter = m_nPageCounter + 1
End Sub
Private Sub mIE_DocumentComplete(ByVal pDisp As Object, URL As Variant)
    On Error Resume Next
    'm_nPageCounter = m_nPageCounter - 1
    Call BandingDoc("mIE_DocumentComplete")
    If m_bIsRefresh Then
        If (tmpState = "interactive") Then _
            isLoaded = 1
            Call BandingDoc2(mIE)
    Else
        If (tmpState = "complete") Then _
            isLoaded = 1
            Call BandingDoc2(mIE)
    End If
End Sub
Private Sub mIE_DownloadBegin()
    On Error Resume Next
    If Not (mDoc Is Nothing) Then Set mDoc = Nothing
    Call BandingDoc("mIE_DownloadBegin")
    
    'Remarked by zdj 2004-02-02
    'If m_bIsRefresh = False Then m_bIsRefresh = True
    'm_nObjCounter = m_nObjCounter + 1
End Sub
Private Sub mIE_DownloadComplete()
    'm_nObjCounter = m_nObjCounter - 1
    'Call BandingDoc("mIE_DownloadComplete")
    'If (tmpState = "complete") Then
    '    isLoading = 0
    '    Call BandingDoc2(mIE)
    'End If
    '////////////////////////////////////////////
    'The refresh button is clicked

    'If Not (m_bIsRefresh) Then m_bIsRefresh = True
    'If m_nObjCounter = 1 Then m_nObjCounter = 0
    
    'Remarked by zdj 2004-02-02
    'If (m_bIsRefresh) Then
    '    isLoaded = 1
    '    Call BandingDoc2(mIE)
    'End If
    '
    
    '////////////////////////////////////////////
End Sub
Private Sub BandingDoc(ByVal strWhere As String)
    On Error GoTo Err:
    If mIE Is Nothing Then
        Exit Sub
    End If
    
    If mDoc Is Nothing Then Set mDoc = mIE.document
    tmpState = mDoc.readyState
    If tmpState <> "complete" Then isLoaded = 0
    'Debug.Print mDoc.readyState & " " & strWhere
Bye:
    Exit Sub
Err:
    If Err.Number = -2147467259 Then Resume Bye
    MsgBox Err.Number & Err.Description & strWhere
    Resume Bye
End Sub
Private Sub mIE_NavigateComplete2(ByVal pDisp As Object, URL As Variant)
        'm_nPageCounter = m_nPageCounter + 1
        'm_nObjCounter = m_nObjCounter + 1
        
        'Remarked by zdj 2004-02-02
        'm_bIsRefresh = False
End Sub
Private Sub mIE_NewWindow2(ppDisp As Object, Cancel As Boolean)
    Dim tmpobj As IHTMLDocument2, tmpString As String
    Dim notPopups As Boolean, tmpobj2 As IHTMLElement
    Dim i As Integer
    If (BlockedPopups = True) Then

        GetCursorPos mPtr
        Set tmpobj = mIE.document
        Set tmpobj2 = tmpobj.elementFromPoint(mPtr.X, mPtr.Y)
        If tmpobj2 Is Nothing Then
            notPopups = Not (isLoaded = 0)
        Else
            If (tmpobj2.document.activeElement) Is Nothing Then
                notPopups = Not (isLoaded = 0)
            Else
                tmpString = LCase(tmpobj2.document.activeElement.tagName)
                For i = LBound(mSArrays) To UBound(mSArrays)
                    If tmpString = CStr(mSArrays(i)) Then
                        notPopups = True
                        Exit For
                    End If
                Next i
            End If
        End If
        If notPopups = False Then
            Cancel = True

            If EnabledBeep Then Beep 500, 100
            isCleaned = isCleaned + 1
        End If
    End If
    Set tmpobj2 = Nothing
    Set tmpobj = Nothing
End Sub
Private Sub BandingDoc2(ByVal pDisp As Object)
    On Error Resume Next
    Dim tmpdoc As Object, iwin As MSHTML.HTMLWindow2
    Dim tmpdoc2 As MSHTML.HTMLDocument
    Dim i As Integer, j As Integer
    Dim ii As Integer, jj As Integer
    Dim k As Integer, killed As Boolean
    
    If TypeOf pDisp Is IWebBrowser2 Then
        Call pDisp.ExecWB(OLECMDID_SHOWMESSAGE, OLECMDEXECOPT_DONTPROMPTUSER)
        Set tmpdoc = pDisp.document
        
        If TypeName(tmpdoc) = "HTMLDocument" Then
          
            Set doc2 = tmpdoc
            Set win2 = doc2.parentWindow
            Set body = doc2.body
            
            'Skip the error message
            'win2.clearTimeout (0)
            
            '绑定flash对象
            If (BlockedFlash = True) Then

                i = cleanFlash(doc2.All.tags("OBJECT"), doc2.All.tags("EMBED"))
            End If
            
            '绑定动画对象
            If (BlockedAnimate = True) Then
                j = cleanAnimated(doc2.All.tags("IMG"))
            End If
            '/////////////////////////////////
            
            If (BlockedFlying = True) Then
                k = cleanFlyingAds(doc2.All.tags("DIV"))
            End If
            
            '////////////////////////////////////////////////
            '过滤框架中的广告
                If TypeName(doc2.body) = "HTMLFrameSetSite" Then
                  If doc2.readyState = "complete" Then
                    win2.Status = "正在阻止框架中的广告..."
                    ii = RecursivlyFlash(doc2.frames)

                    jj = RecursivlyAnimate(doc2.frames)
                    'win2.Status = "阻止完毕!"
                  End If
                End If
            '////////////////////////////////////////////////
            
            '//////////////////////////////////
            ' skip the onload event in body tag
            'body.onload = ""
            body.onunload = ""
            '//////////////////////////////////
            killed = (isCleaned > 0 Or i > 0 Or j > 0 Or ii > 0 Or jj > 0 Or k > 0)
            If (killed) Then
                Call showAlertInfo(isCleaned + i + j + ii + jj + k)
            End If
        End If
    End If
    isCleaned = 0
    Set tmpdoc = Nothing
End Sub
Private Function cleanFlash(ByVal item As MSHTML.IHTMLElementCollection, ByVal item2 As MSHTML.IHTMLElementCollection) As Integer
    

    On Error GoTo Errs
    Dim i As Integer
    Dim objelments As MSHTML.HTMLObjectElement, objstyle As MSHTML.IHTMLStyle
    Dim objembed As MSHTML.HTMLEmbed
    
    '网页中无此标签的对象
    If (item Is Nothing) Then Exit Function
    
    
    i = 0
    
    '/////////////////////////////////////////////////////////
    For Each objelments In item
        'DoEvents
        
        If Not (objelments Is Nothing) Then
            
            If (item.Length = 0) Then Exit For
            If UCase(objelments.classid) = FlashClassID Then
                
                Set objstyle = objelments.Style
                With objstyle
                    
                    .visibility = "Hidden"
                    '.Width = 0
                    '.Height = 0
                    

                End With
                Set objstyle = Nothing
                i = i + 1
            End If
         
         End If
    Next objelments
    '//////////////////////////////////////////////////////////
    
    '网页中无此标签的对象
    If (item2 Is Nothing) Then Exit Function
    
    
    For Each objembed In item2
        'DoEvents
        If Not (objembed Is Nothing) Then
            
            If (item2.Length = 0) Then Exit For
            If InStr(1, LCase(objembed.src), ".swf") > 0 Then
                
                Set objstyle = objembed.Style
                With objstyle
                    
                    .visibility = "Hidden"
                    '.Width = 0
                    '.Height = 0

                    
                End With
                Set objstyle = Nothing
            
            End If
        End If
    Next objembed
    cleanFlash = i
Bye:
    Exit Function
Errs:
    cleanFlash = -1
    Resume Bye
End Function
Private Function cleanAnimated(ByVal item As MSHTML.IHTMLElementCollection) As Integer
    
    On Error GoTo Errs
    Dim i As Integer
    Dim objImgs As MSHTML.IHTMLImgElement, objImg As MSHTML.HTMLImg
    Dim objstyle As MSHTML.IHTMLStyle
    
    '网页中无此标签的对象
    If (item Is Nothing) Then Exit Function
    i = 0
    
    For Each objImgs In item
        
        If Not (objImgs Is Nothing) Then
            
            If (item.Length = 0) Then Exit For
            
            Set objImg = objImgs
            
            Set objstyle = objImg.Style
            If InStr(1, LCase(objImg.src), ".gif") > 0 Then

                
                DoEvents
                With objstyle
                    
                    .visibility = "hidden"
                    '.Width = 0
                    '.Height = 0
                    
                End With
                i = i + 1
            
            End If
        End If
        
        Set objstyle = Nothing
        Set objImg = Nothing
       
    Next objImgs
    cleanAnimated = i
Bye:
    Exit Function
Errs:
    cleanAnimated = -1
    Resume Bye
End Function
Private Function RecursivlyFlash(ByRef frame As FramesCollection) As Integer
        On Error GoTo Errs
        Dim X As Object, ihtmle As IHTMLElementCollection

        Dim i As Integer, spWin As IHTMLWindow2
        
        Set X = frame.document.frames
        
        If X.Length = 0 Then Exit Function
        
        For i = 0 To X.Length - 1
             'DoEvents
             Call RecursivlyFlash(X(i))
             Set ihtmle = X(i).document.All
             
             If BlockedFlash Then
                
                RecursivlyFlash = cleanFlash(ihtmle.tags("OBJECT"), ihtmle.tags("EMBED"))
                
             End If
             
             Set ihtmle = Nothing
        Next i
Bye:
    Exit Function
Errs:
    RecursivlyFlash = -1
    Resume Bye
End Function
Private Function RecursivlyAnimate(ByRef frame As FramesCollection) As Integer
        
        On Error GoTo Errs
        Dim X As Object, ihtmle As IHTMLElementCollection

        Dim i As Integer, spWin As IHTMLWindow2
        
        Set X = frame.document.frames
        
        If X.Length = 0 Then Exit Function
        
        For i = 0 To X.Length - 1
             'DoEvents
             Call RecursivlyAnimate(X(i))
             Set ihtmle = X(i).document.All
             
             If BlockedAnimate Then
                
                RecursivlyAnimate = cleanAnimated(ihtmle.tags("IMG"))
                
             End If
             
             Set ihtmle = Nothing
        Next i
Bye:
    Exit Function
Errs:
    RecursivlyAnimate = -1
    Resume Bye
End Function
Private Function cleanFlyingAds(ByVal item As MSHTML.IHTMLElementCollection) As Integer
    On Error GoTo Errs
    Dim i As Integer, l As Integer, j As Integer
    Dim tmpobj As Object
    

    l = item.Length
    For i = 0 To l - 1
        DoEvents
        Set tmpobj = item(i)
        If (tmpobj.Style.position = "absolute") Then
            tmpobj.Style.visibility = "hidden"
            j = j + 1
        End If
        Set tmpobj = Nothing
    Next i
    cleanFlyingAds = j
Bye:
    Exit Function
Errs:
   cleanFlyingAds = -1
   Resume Bye
End Function
'/////////////////////////////////////////////////////////////
'显示警告语
Private Sub showAlertInfo(ByVal Count As Integer)
    With win2
        .Status = "已阻止网页中符合条件的" & Count & "个广告!(www.jjsoft.cn)"
    End With
    
End Sub
'////////////////////////////////////////////////////////////
Private Sub AlertBeep()
    Beep 500, 500
End Sub
Private Sub win2_onunload()
    On Error Resume Next
    
    ' the refresh button is clicked
    If mDoc.readyState = "complete" Then m_bIsRefresh = True
    isLoaded = 1
End Sub
------------------------------------------------------------------------------------------------------
Windows.cls
'局部变量,保存集合
Private mCol As Collection
Private WithEvents winShell As SHDocVw.ShellWindows
Private Function Add(Key As SHDocVw.InternetExplorer) As MyIE
    '创建新对象
    Dim objNewMember As MyIE
    Set objNewMember = New MyIE

    '设置传入方法的属性

    If Not objNewMember.Banding(Key) Is Nothing Then
        mCol.Add objNewMember, CStr(objNewMember.IEHandle)
    End If
    '返回已创建的对象
    Set Add = objNewMember
    Set objNewMember = Nothing

End Function
Public Property Get item(vntIndexKey As Variant) As MyIE
    '引用集合中的一个元素时使用。
    'vntIndexKey 包含集合的索引或关键字,
    '这是为什么要声明为 Variant 的原因
    '语法:Set foo = x.Item(xyz) or Set foo = x.Item(5)
  Set item = mCol(vntIndexKey)
End Property
 
Public Property Get Count() As Long
    '检索集合中的元素数时使用。语法:Debug.Print x.Count
    Count = mCol.Count
End Property

Public Sub Remove(vntIndexKey As Variant)
    '删除集合中的元素时使用。
    'vntIndexKey 包含索引或关键字,这是为什么要声明为 Variant 的原因
    '语法:x.Remove(xyz)

    mCol.Remove vntIndexKey
End Sub

Public Property Get NewEnum() As IUnknown
    '本属性允许用 For...Each 语法枚举该集合。
    Set NewEnum = mCol.[_NewEnum]
End Property

Private Sub Class_Initialize()
    '创建类后创建集合
    
    Call Refresh
End Sub

Private Sub Class_Terminate()
    '类终止后破坏集合
    Set mCol = Nothing
    Set winShell = Nothing
End Sub
Private Sub Refresh()
    
    On Error GoTo Proc_Err
    Dim SWs As New SHDocVw.ShellWindows
    Dim var As SHDocVw.InternetExplorer
    
    Set mCol = Nothing
    Set mCol = New Collection
    For Each var In SWs
       Add var

    Next
    
    
    If ObjPtr(winShell) <> ObjPtr(SWs) Then
        Set winShell = SWs
    End If
    Set SWs = Nothing
    Set var = Nothing
    Exit Sub
Proc_Err:
    
End Sub
Private Sub winShell_WindowRegistered(ByVal lCookie As Long)
    Call Refresh
End Sub
Private Sub winShell_WindowRevoked(ByVal lCookie As Long)
    Call Refresh
End Sub
-----------------------------------------------------------------------------------------------------

  始终用WebBrowser打开网页

要在同一个WebBrowser里显示,可以这样:
再放一个小的WebBrowser2,设置它在WebBrowser1下面(设置Visible为False好象无效),
// 在WebBrowser1的OnNewWindow2事件中:
procedure TForm1.WebBrowser1NewWindow2(Sender: TObject;
var ppDisp: IDispatch; var Cancel: WordBool);
begin
ppDisp := WebBrowser2.Application; // 新的窗口先指向WebBrowser2
end;
// 在WebBrowser2的OnBeforeNavigate2事件中:
procedure TForm1.WebBrowser2BeforeNavigate2(Sender: TObject;
const pDisp: IDispatch; var URL, Flags, TargetFrameName, PostData,
Headers: OleVariant; var Cancel: WordBool);
begin
WebBrowser1.Navigate(string(URL)); // 再指回WebBrowser1
Cancel := True;
end;

  关于delphi点击webbrowser中任意一点的问题

有时候我们需要delphi载入webbrowser1打开网页的时候 需要点击某一个点的位置 可能是坐标 可能是按钮 可能是其他的控件
应该如何来实现呢? 这里来简单说明一下点击坐标的过程
点击过程很明显我们移动鼠标来点击或者发送消息来点击
移动鼠标点击的比较常见 这里详细说明一下发送消息来点击的办法
发送消息来点击的思路是sendmessage()发送消息来实现的
导入句柄 点击的就可以了。但是这里的句柄(webbrowser的句柄)其实是不好找的。如果找到了合适的正确的句柄点击起来还是非常容易的
这里有一个过程可以清楚的说明sendmessage的点击过程
procedure sendclick(var x,y:integer) ;
begin
SendMessage(GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD),WM_LBUTTONDOWN,
////MK_LBUTTON, MAKELONG(strtoint(edit2.Text),strtoint(edit3.Text)));
MK_LBUTTON, MAKELONG(x,y));
sleep(500);
SendMessage(GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD),WM_LBUTTONUP,
////MK_LBUTTON, MAKELONG(strtoint(edit2.Text),strtoint(edit3.Text)));
MK_LBUTTON, MAKELONG(x,y)) ;
PostMessage(GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD),WM_LBUTTONDOWN,
////MK_LBUTTON, MAKELONG(strtoint(edit2.Text),strtoint(edit3.Text)));
MK_LBUTTON, MAKELONG(x,y));
sleep(500);
PostMessage(GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD),WM_LBUTTONUP,
////MK_LBUTTON, MAKELONG(strtoint(edit2.Text),strtoint(edit3.Text)));
MK_LBUTTON, MAKELONG(x,y)) ;
end;
这里定义了一个过程
GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD) 这是webbrowser的句柄
整个过程发送了 sendmessage和postmessage2个包 这里是防止一次没点中 所以再补点一次
关于点击的坐标是过程中导入的参数 x,y。
这里的X Y坐标是相对于窗体的坐标 就是相对于webbrowser的坐标 所以必须要用spy++来查找点击的坐标 

  具有自动查找Web页面上所有链接的网络浏览器

具有自动查找Web页面上所有链接的网络浏览器

1. 概述
WEB的应用已经深入到现在社会的方方面面,作为一个软件开发人员或其他技术人员,都有可能遇见在Internet上查询大量的资料和信息的情况,一般来说用的最多的就是WEB的搜索Engine。当我们使用它查出大量的资料链接后,还有可能遇到更多的链接,但要自己去看他们是否是链接,那确实是一件很麻烦的事情。这篇文章就是来讲,如何用Delphi的MSHTML_TLB.pas来开发一个具有自动查找Web页面上所有链接的简单网络浏览器。我是在IE5的环境下写的这个程序,当然它可以向下兼容,如IE4。
2.关于MSHTML_TLB.pas
MSHTML_TLB.pas是Deliphi里面自带的一个类。它的含义是Microsoft HTML对象库。它不能够包含在所有的工程或程序中,原因是它实在是太大了,整个文件的代码共有241,899l行,那么长。大小约有12M。下面我们来看看它是如何加入到程序中的。
1. 首先,我们打开Delphi,建立一个新的application。我把form1保存为MainFrm.pas,把application保存为FindLinks.dPR.
2. 要想实现IE 的功能我们就必须要使用Microsoft HTML对象库(MSHTML type library.)如何实现呢?如图1, Project->Import Type Library:

然后你会看到关于"Microsoft HTML Object Library (Version 4.0)"的列表,如图2。
接下来可能会遇到一些问题。比如,在列表里面没有出现"Microsoft HTML Object Library (Version 4.0)"。这是为什么呢?那是IE的问题,由于IE版本的不同(我用的是IE5)。我建议最好是先查询你的计算机里面有没有mshtml.tlb这个文件。
在9x里面它是存在与C:WINDOWSSYSTEM目录里面,在2000里面它在WINNTsystem32目录里面。如果找到了这个文件,就可以用图2的click on the "Add..." button,然后选择mshtml.tlb,就可以了,如果没有找到它,那说明你没有安装IE或你的IE版本太低,请升级IE。
最后,当我们选择了倒入的库后,会等待一段时间,因为它实在是太长了,不过请千万不要因为是死机了。它会给自动查找提供很多帮助。
3. 工程实现。
界面设计如下图:


使用以下组件:
控件 命名 TEXT
TLabel lblURL 资料网址
TEdit edtURL http://www.huihu.com
TButton btnFindLinks 查询连接
TListBox lstbxLinks null

4. 程序设计
1. 在Form1的interface部分,在uses后面加入,OleCtrls, SHDocVw, and OleServer.这些所应用的类,都是基于我们所要创建的TinternetExplorer的,它是IE的ActiveX的对象。但是这里还有其它的方式(TinternetExplorer)进行,我们采用TwebBrowser 控制在我们的form1。
2. 我们在private里面加入如下代码:
FInternetExplorer: TInternetExplorer;
procedure WebBrowserDocumentComplete(Sender: TObject; var pDisp: OleVariant;
var URL: OleVariant);    
最后用Ctrl-Shift-C完成类的声明。
3. 在impelmentation后面加入如下声明:
uses MSHTML_TLB, ComObj;
要使用的类。
4. 在form1的OnCreate事件中加入如下:
   FInternetExplorer := TInternetExplorer.Create(Self);
  FInternetExplorer.OnDocumentComplete := WebBrowserDocumentComplete;
5. 最后在form1的TForm1.WebBrowserDocumentComplete里面加入如下代码:
1. procedure TForm1.WebBrowserDocumentComplete(Sender: TObject;
2.   var pDisp: OleVariant; var URL: OleVariant);
3. var
4.  Doc: IHTMLDocument2;
5.  ElementCollection: IHTMLElementCollection;
6.  HtmlElement: IHTMLElement;
7.  I: Integer;
8.  AnchorString: string;
9. begin
10.  lstbxLinks.Clear;
11.  // 在处理网页的时候发现它没有完全下载,将不会进行处理连接
12.   Doc := FInternetExplorer.Document as IHTMLDocument2;
13.  if Doc = nil then
14.   raise Exception.Create('Couldn''t convert the ' +
15.    'FInternetExplorer.Document to an IHTMLDocument2');
16.  // 夺取web上的所有元素。
17.  ElementCollection := Doc.all;
18.  for I := 0 to ElementCollection.length - 1 do
19.  begin
20.   file://得到当前的元素
21.   HtmlElement := ElementCollection.item(I, '') as IHTMLElement;
22.   // 查找网页原代码中的LINK标记。
23.   // 发现其它的html标记 (例如: TABLE, FONT, etc.)
24.   if HTMLElement.tagName = 'A' then
25.   begin
26.    // 在详细的link里面抓取innerText,innertext就是标记中<href=后面的东西>例如:

27.    // 我们在web里面看见"西南民族学院"
28.    // <a href="http://www.swun.edu.cn"><b>西南民族学院</b></a>.
29.      AnchorString := HtmlElement.innerText;
30.    if AnchorString = '' then
31.     AnchorString := '(Empty Name)';
32.    AnchorString := AnchorString + ' -  ' +
33.     (HtmlElement as IHTMLAnchorElement).href;
34.    lstbxLinks.Items.Add(AnchorString);
35.   end;
36.  end;
37. end;

  最后我们在button(btnFindLinks)加入Onclick 事件:
1. // 在被浏览的web里面进行查询连接。
2.  FInternetExplorer.Navigate(edtURL.Text, EmptyParam, EmptyParam,
   EmptyParam, EmptyParam);
从以上的程序里面我们可以看出它的原理了,实际上是很简单的,看过html原代码的人都知道,使网页产生连接的代码就是:<a href="http://www.swun.edu.cn"><b>西南民族学院</b></a>.
我程序的原理就是通过截取href后面的字符串,并在"""号后面截止。
然后把它保存为另外的字符串。然后通过TwebBrowser显示出来。
最后让我们来编译这个程序,的却,编译它很费时间,因为编译多达241,899l行的MSHTML_TLB.pas,是一件很麻烦的事情。其中还包括多达20多个的warning错误,但请放心这是MSHTML_TLB.pas的问题,与其它程序无关。这样一个小型的查找Web页面上所有链接的简单网络浏览器就出现在我们面前。本程序在IE5.0和Delphi6下编译通过。

  webbrowser 常用方法示例

var   Form   :     IHTMLFormElement     ;
        D:IHTMLDocument2     ;
begin
    with   WebBrowser1   do   begin
          D   :=   Document   as   IHTMLDocument2;
          Form   :=   D.Forms.item( 'form1 ',0)   as   IHTMLFormElement;   //form1为表单名
          //title为表单中的文本框
        (form.item( 'title ',0)   as   IHTMLElement).setAttribute( 'value ',s_title,0);  
        (form.item( 'content ',0)   as   IHTMLElement).setAttribute( 'value ',edit1.text,0);
        (form.item( 'add ',0)   as   IHTMLElement).click;//add为按钮名称
    end;

在delphi的WebBrowser中获取和设置Input表单值
var
    i:Integer;
    myole:oleVariant;
begin
    myole := wb1.Document;
    for i := 0 to myole.all.length - 1 do
    begin
        if myole.all.item(i).tagName = 'INPUT' then
        begin

            mmo1.Lines.Add(myole.all.item(i).name);

            mmo1.Lines.Add(myole.all.item(i).value);
        end;
    end;

end;


WebBrowser1.GoHome; //到浏览器默认主页
WebBrowser1.Refresh; //刷新
WebBrowser1.GoBack; //后退
WebBrowser1.GoForward; //前进
WebBrowser1.Navigate('...'); //打开指定页面
WebBrowser1.Navigate('about:blank'); //打开空页面
--------------------------------------------------------------------------------
//打开空页面, 并写入...

WebBrowser1.Navigate('about:<head><title>标题></title><body>页面内容</body>');
--------------------------------------------------------------------------------
//读取网页脚本中的变量:

procedure TForm1.Button1Click(Sender: TObject);
var
s: string;
i: Integer;
begin
s := WebBrowser1.OleObject.document.Script.str;
i := WebBrowser1.OleObject.document.Script.num;
ShowMessage(s); //Hello
ShowMessage(IntToStr(i)); //99

//也可以这样读:
s := WebBrowser1.OleObject.document.parentWindow.str;
i := WebBrowser1.OleObject.document.parentWindow.num;
ShowMessage(s); //Hello
ShowMessage(IntToStr(i)); //99
end;
假如网页中有这样的语句:
<script>
var
str = "Hello";
i = 99;
</script>

--------------------------------------------------------------------------------

//调用网页脚本中的函数:

procedure TForm1.Button1Click(Sender: TObject);
begin
WebBrowser1.OleObject.document.parentWindow.MB(); //HTML-Js

//如需指定脚本语言, 需要:
WebBrowser1.OleObject.document.parentWindow.execScript('MB()','JavaScript'); //HTML-Js
end;
假如有这样的脚本:
<script>
function MB(){
alert('HTML-Js');
}
</script>

--------------------------------------------------------------------------------

//判断网页及内部框架网页是否全部下载完毕

procedure TForm1.WebBrowser1DocumentComplete(ASender: TObject;
const pDisp: IDispatch; var URL: OleVariant);
begin
if WebBrowser1.Application = pDisp then
begin
Text := '网页下载完毕!';
end;
end;

--------------------------------------------------------------------------------

//改变背景色或背景图片:
WebBrowser1.OleObject.document.body.bgcolor := '#FF0000';
WebBrowser1.OleObject.document.body.background := '...图片地址';
--------------------------------------------------------------------------------
//操作有 ID 标签的对象:
var
s: string;
begin
s := WebBrowser1.OleObject.document.getElementByID('span1').innerText;
ShowMessage(s); //这是 span1 标签中的内容

//或者:
s := WebBrowser1.OleObject.document.parentWindow.span1.innerText;
ShowMessage(s); //这是 span1 标签中的内容

//隐藏它:
WebBrowser1.OleObject.document.parentWindow.span1.style.display := 'none';
end;
假如网页中有这样的内容:
<span id=span1>这是 span1 标签中的内容</span>


--------------------------------------------------------------------------------

//获取网页源代码
var
s: string;
begin
s := WebBrowser1.OleObject.document.body.innerHTML; //body内的所有代码
s := WebBrowser1.OleObject.document.body.outerHTML; //body内的所有代码, 包含body标签
s := WebBrowser1.OleObject.document.documentElement.innerHTML; //html内的所有代码
end;
--------------------------------------------------------------------------------
//WebBrowser 中的右键菜单

//先要添加ApplicationEvents1,指定其Message事件

//屏蔽右键菜单
procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean);
begin
with Msg do
begin
if not IsChild(WebBrowser1.Handle, hWnd) then Exit;
Handled := (message = WM_RBUTTONDOWN) or (message = WM_RBUTTONUP) or (message = WM_CONTEXTMENU);
end;
end;

//替换右键菜单
procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean);
var mPoint: TPoint;
begin
if IsChild(WebBrowser1.Handle, Msg.Hwnd) and
((Msg.Message = WM_RBUTTONDOWN) or (Msg.Message = WM_RBUTTONUP)) then
begin
GetCursorPos(mPoint); //得到光标位置
PopupMenu1.Popup(mPoint.X, mPoint.Y); //弹出popupmenu1的菜单
Handled:=True;
end;
end;
--------------------------------------------------------------------------------
//新页面写入
begin
WebBrowser1.Navigate('about:blank');
WebBrowser1.OleObject.Document.Writeln('ok');
end; 

  Delphi实现网页采集

说到网页采集,通常大家以为到网上偷数据,然后把到收集到的数据挂到自己网上去。其实也可以将采集到的数据做为公司的参考,或把收集的数据跟自己公司的业务做对比等。 
目前网页采集多为3P代码为多(3P即ASP、PHP 、JSP)。用得最有代表的就动易科技公司BBS中新闻采集系统,和网上流传的新浪新闻采集系统等都是用ASP程序来使用,但速度从理论上来说不是很好。如果尝试用其它软件的多线程采集是不是更快?答案是肯定的。用DELPHI、VC、VB、JB都可以,PB似乎比较不好做。以下用DELPHI来解释采集网页数据。 
一、 简单的新闻采集 
新闻采集是最简单的,只要识别标题、副题、作者、出处、日期、新闻主体、分页就可以了。在采集之前肯定要取得网页的内容,所以在DELPHI里加入idHTTP控件(在indy Clients面板),然后用idHTTP1.GET 方法取得网页的内容,声明如下: 
function Get(AURL: string): string; overload; 
AURL参数,是string类型,指定一个URL地址字符串。函数返回也是string类型,返回网页的HTML源文件。比如我们可以这样调用: 
tmpStr:= idHTTP1.Get(‘http://www.163.com’); 
调用成功后,tmpstr变量里存储的就是网易主页的代码了。 
接下来,讲一下数据的截取,这里,我定义了这么一个函数: 
function TForm1.GetStr(StrSource,StrBegin,StrEnd:string):string; 
var 
in_star,in_end:integer; 
begin 
in_star:=AnsiPos(strbegin,strsource)+length(strbegin); 
in_end:=AnsiPos(strend,strsource); 
result:=copy(strsource,in_sta,in_end-in_star); 
end; 
StrSource:string类型,表示HTML源文件。 
StrBegin:string类型,表示截取开始的标记。 
StrEnd:string,表示截取结束的标记。 
函数返回字符串StrSource中从StrSource到StrBegin之间的一段文本。 
比如: 
strtmp:=TForm1.GetStr(‘A123BCD’,‘A’,‘BC’); 
运行后,strtmp的值为:’123’。 
关于函数里用到的AnsiPos和copy,都是系统定义的,可以从delphi的帮助文件里找到相关说明,我在这里也简单罗嗦一下: 
function AnsiPos(const Substr, S: string): Integer 
返回Substr在S中第一次出现的位置。 
function copy(strsource,in_sta,in_end-in_star): string; 
返回字符串strsource中,从in_sta(整型数据)开始到in_end-in_star(整型数据)结束的字符串。 
有了以上函数,我们就可以通过设置各种标记,来截取想要的文章内容了。在程序中,比较麻烦的是我们需要设置许多标记,要定位某一项内容,必须设置它的开始和结束标志。比如要取得网页上的文章标题,必须事先查看网页代码,查看出文章标题前边和后边的一些特征代码,通过这些特征代码,来截取文章的标题。 
下面我们来实际演示一下,假设要采集的文章地址为http://www.xxx.com/test.htm 
代码为: 
<html> 
<head> 
<meta http-equiv="Content-Language" content="zh-cn"> 
<meta name="GENERATOR" content="Microsoft FrontPage 5.0"> 
<meta name="ProgId" content="FrontPage.Editor.Document"> 
<meta http-equiv="Content-Type" content="text/html; charset=gb2312"> 
<title>新建网页 1</title> 
</head> 
<body> 
<p align="center"><b>文章标题</b></p> 
<table border="1" cellpadding="0" cellspacing="0" style="border-collapse: collapse" bordercolor="#111111" width="100%" id="AutoNumber1"> 
<tr><td width="60%">作者</td> 
<td width="40%">出处</td></tr> 
</table> 
<p><font size="2">这里是文章内容正文。</font></p> 
<a href='..new_pr.asp'>上一页</a>  <a href='new_ne.asp'>下一页</a> 
</body> 
</html> 
第一步,我们用StrSource:= idHTTP1.Get(‘http://www.xxx.com/test.htm ’);将网页代码保存在strsource变量中。 
然后定义strTitle、strAuthor、strCopyFrom、strContent: 
strTitle:= GetStr(StrSource,’ <p align="center"><b>’,’ </b></p>’): 
strAuthor:= GetStr(StrSource,’ <tr><td width="60%">’,’ </td>’): 
strCopyFrom:= GetStr(StrSource,’ <td width="40%">’,’ </td></tr>’): 
strContent:= GetStr(StrSource,’ <p><font size="2">,’ </font></p>’): 
这样,就能把文章的标题、副题、作者、出处、日期、内容和分页分别存储在以上变量中。 
第二步,用循环的办法,打开下一页,并取得内容,加到strContent变量中。 
StrSource:= idHTTP1.Get(‘new_ne.asp’); 
strContent:= strContent +GetStr(StrSource,’ <p><font size="2">,’ </font></p>’): 
然后再判断有没有下一页,如果还有就接着取得下一页的内容。 
这样就完成了一个简单的截取过程。从以上的程序代码可以看到,我们使用的截取办法都是找截取内容的头部和尾部的,如果遇到这个头部和尾部有多个怎么办?似乎没办法,只会找到第一个,所以在找之前应该验证一下是不是只有一处有这个截取的内容的前后部。 
以上内容没有程序验证,仅供参考,如果认为有用可以试试。
View Code
原文地址:https://www.cnblogs.com/blogpro/p/11453206.html