使用IHTMLDocument2分析Html

http://hi.baidu.com/ahstudy2010/blog/item/102d7df3468f3ca4a40f52be.html

利用CWebBrowser2获取IHTMLDocument2接口,IHTMLDocument2接口是mshtml中最重要的接口之一,取得这个接口,就可以开始分析html代码

下面的代码将获取特定的html 文本标签


// 从ctlBrowser当前html文档选中区域中获取名为strTagName的标签,存入*ppElement中
BOOL GetCrntHtmlElement(CWebBrowser2  &ctlBrowser, const CString &strTagName, IHTMLElement **ppElement)
{
    
*ppElement = NULL; // initial as NULL
     CString strText;
     CComBSTR cbsText, cbsTagName;
     CComPtr
<IHTMLTxtRange> spTxtRange = NULL;

    
static CString SELECTTION_TYPE_TEXT = _T("text");
    
static CString SELECTTION_TYPE_CONTROL = _T("control");
    
static CString SELECTTION_TYPE_NONE = _T("none");

    
try
     {
        
// calculate the first control based on the user selection
         HRESULT hr = E_FAIL;
         CComPtr
<IDispatch> spDispDoc = ctlBrowser.get_Document();
         CComPtr
<IHTMLDocument2> spHtmlDoc = NULL;
         hr
= spDispDoc->QueryInterface(IID_IHTMLDocument2, (void**)&spHtmlDoc);
        
if (FAILED(hr) || NULL == spHtmlDoc)
         {
            
return FALSE;
         }
         CComPtr
<IHTMLSelectionObject> spSelection = NULL;
         hr
= spHtmlDoc->get_selection(&spSelection);
        
if(SUCCEEDED(hr) && spSelection)
         {
             hr
= spSelection->get_type(&cbsText);
             strText
= cbsText;
            
if (0 == strText.CompareNoCase(SELECTTION_TYPE_TEXT)
                
|| 0 == strText.CompareNoCase(SELECTTION_TYPE_NONE) )
             {
                 CComPtr
<IDispatch> spTextRangeDisp = NULL;
                 hr
= spSelection->createRange(&spTextRangeDisp);
                
if(SUCCEEDED(hr) && spTextRangeDisp)
                 {
                     hr
= spTextRangeDisp->QueryInterface(IID_IHTMLTxtRange, (void**)&spTxtRange);
                    
if(SUCCEEDED(hr) && spTxtRange) // get a text range here
                     {                
                         CComPtr
<IHTMLElement> spElement=NULL;
                        
// get self element first for tag: such as "h6"
                         hr = spTxtRange->QueryInterface(IID_IHTMLElement, (void**)&spElement);
                        
if (SUCCEEDED(hr) && spElement)
                         {
                             hr
= spElement->get_tagName(&cbsTagName);
                            
if (SUCCEEDED(hr) && 0 == strTagName.CompareNoCase(CString(cbsTagName)) )
                             {    
// found   tag now
                                return TRUE;
                             }
                         }
                        
// loop search in parent for tag: such as "h6"
                         spElement=NULL;
                         CComPtr
<IHTMLElement> spParent = NULL;
                         hr  
=spTxtRange->parentElement (&spParent);
                        
while (SUCCEEDED(hr) && spParent)
                         {
                             hr
= spParent->get_tagName(&cbsTagName);
                            
if (SUCCEEDED(hr) && 0 == strTagName.CompareNoCase(CString(cbsTagName)) )
                             {    
// found tag now
                                return TRUE;
                             }
                             spElement
= spParent;
                             spParent
= NULL;
                             hr
= spElement->get_parentElement(&spParent);
                         }
                     }
                 }
             }
         }
     }
    
catch(CException* e)
     {
         e
->ReportError();
         e
->Delete();        
     }

    
return FALSE;
}
原文地址:https://www.cnblogs.com/carl2380/p/2093377.html