获取电驴首页推荐信息和指定栏目信息

标 题: 获取电驴首页推荐信息和指定栏目信息
作 者: itdef
链 接: http://www.cnblogs.com/itdef/p/4081963.html 

欢迎转帖 请保持文本完整并注明出处

/*******************************************************************************
*  @file        
*  @author      def< qq group: 324164944 >
*  @blog        http://www.cnblogs.com/itdef/
*  @brief     
/*******************************************************************************/
 
#include "stdafx.h"
 
#include <afxinet.h>
#include <atlsimpstr.h>
#include <fstream>  
#include <iostream>
#include <sstream>
 
#include <set>
 
using namespace std;
 
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
 
int GetHttpFileData(CString strUrl,char* DownloadHtmFileName);
int ParseHomePageDownloadFile(char* szfileName);
int UTF8Str2GBK(const string& strUTF8,string& strGBK);
void GetHomePageRecommend(char* szName,const string& strGbk);
 
// 唯一的应用程序对象
 
CWinApp theApp;
 
using namespace std;
 
 
 
int ParseUpdateFile(char* szfileName)
{
    int iRet = -1;
 
    if(NULL == szfileName)
        return iRet;
 
    fstream fs(szfileName);
    stringstream ss ;   // 创建字符串流对象
    ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中
    fs.close();
    string str = ss.str();  // 获取流中的字符串
    string strGbk;
 
    int i = UTF8Str2GBK(str,strGbk);
 
    if(strGbk.size() == 0 || i != 0)
    {
        cerr << "transfer utf8 to gbk error" << endl;
        return iRet;
    }
 
 
    basic_string <char>::size_type keyWordStart = strGbk.find("<title>");
    basic_string <char>::size_type keyWordEnd = strGbk.find("</title>",keyWordStart+1);
 
    if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) )
    {
        string strKeyWord = strGbk.substr(keyWordStart+7,keyWordEnd - keyWordStart -7);
        cout << strKeyWord << endl;
    }
 
 
 
    keyWordStart = strGbk.find("<div class=\"cv-title\">");
    keyWordEnd = strGbk.find("</div>",keyWordStart+1);
 
    if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) )
    {
        string strKeyWord = strGbk.substr(keyWordStart+22,keyWordEnd - keyWordStart -22);
        cout << strKeyWord << endl;
    }
 
    iRet = 0;
    return iRet;
}
 
void ShowUpdateInfo(char* szHtmAddress)
{
    if ( 0 != GetHttpFileData(szHtmAddress,"HtmDownloadFile"))
    {
        cerr << "GetHttpFileData error once" << endl;   
    }
 
    if( 0 != ParseUpdateFile("HtmDownloadFile"))
    {
        cerr << "ParseUpdateFile error once" << endl;   
    }
 
 
}
 
 
void ShowHomePageElement(char* szHomePageAddress)
{
    if ( 0 != GetHttpFileData(szHomePageAddress,"HtmDownloadFile"))
    {
        cerr << "GetHttpFileData error once" << endl;   
    }
    if( 0 != ParseHomePageDownloadFile("HtmDownloadFile"))
    {
        cerr << "GetHttpFileData error once" << endl;   
    }
}
 
 
 
int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
    int nRetCode = 0;
 
    // 初始化 MFC 并在失败时显示错误
    if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
    {
        // TODO: 更改错误代码以符合您的需要
        _tprintf(_T("错误: MFC 初始化失败\n"));
        nRetCode = 1;
    }
    else
    {
        // TODO: 在此处为应用程序的行为编写代码。
        ShowHomePageElement("http://www.verycd.com/");
        cout << "****************************************************" << endl;
 
        ShowUpdateInfo("http://www.verycd.com/entries/790244/");
        cout << "****************************************************" << endl;
 
        ShowUpdateInfo("http://www.verycd.com/entries/519062/");
        cout << "****************************************************" << endl;
         
        ShowUpdateInfo("http://www.verycd.com/entries/780306/");
        cout << "****************************************************" << endl;
 
        ShowUpdateInfo("http://www.verycd.com/entries/522227/");
        cout << "****************************************************" << endl;
 
        ShowUpdateInfo("http://www.verycd.com/entries/507338/");
        cout << "****************************************************" << endl;
 
        ShowUpdateInfo("http://www.verycd.com/entries/515005/");
        cout << "****************************************************" << endl;
 
        ShowUpdateInfo("http://www.verycd.com/entries/794197/");
        cout << "****************************************************" << endl;
 
        ShowUpdateInfo("http://www.verycd.com/entries/511135/");
        cout << "****************************************************" << endl;
         
    }
 
 
    system("pause");
         
    return nRetCode;
}
 
 
 
 
int UTF8Str2GBK(const string& strUTF8,string& strGBK)
{
    int i = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
    WCHAR *wsz = NULL;
    TCHAR *tsz = NULL;
    int iRet = -1;
 
    wsz = new WCHAR[i+1];
    if( NULL == wsz)
    {
        goto UTF8Str2GBK_EXIT;
    }
    MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, wsz, i);
 
    i = WideCharToMultiByte(CP_ACP, 0, wsz, -1, NULL, 0, NULL, NULL);
    tsz = new TCHAR[i+1];
    if( NULL == tsz)
    {
        goto UTF8Str2GBK_EXIT;
    }
    WideCharToMultiByte(CP_ACP, 0, wsz, -1, tsz, i, NULL, NULL);
     
    strGBK = string(tsz);
 
    iRet = 0;
UTF8Str2GBK_EXIT:
 
    delete []wsz;
    delete []tsz;
 
    return iRet;
}
 
 
int ParseHomePageDownloadFile(char* szfileName)
{
    int iRet = -1;
 
    if(NULL == szfileName)
        return iRet;
 
    fstream fs(szfileName);
    stringstream ss ;   // 创建字符串流对象
    ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中
    fs.close();
    string str = ss.str();  // 获取流中的字符串
    string strGbk;
 
    int i = UTF8Str2GBK(str,strGbk);
 
    if(strGbk.size() == 0 || i != 0)
    {
        cerr << "transfer utf8 to gbk error" << endl;
        return iRet;
    }
    cout << "首页大推" << endl;
    GetHomePageRecommend("VeryCD.TrackEvent('base','首页大推',",strGbk);
    cout << "首页小推" << endl;
    GetHomePageRecommend("VeryCD.TrackEvent('base','首页小推',",strGbk);
 
 
    iRet = 0;
    return iRet;
}
 
 
void GetHomePageRecommend(char* szName,const string& strGbk)
{
    set<string> setKeyWord;
    //cout << strGbk;
    basic_string <char>::size_type keyWordStart = strGbk.find(szName);
    basic_string <char>::size_type keyWordEnd = strGbk.find("')",keyWordStart+1);
 
    if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) )
    {
        string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37);
        setKeyWord.insert(strKeyWord);
 
        //cout << "电驴首页小推  " << strKeyWord << endl;
    }
 
    while( keyWordStart != string::npos && keyWordEnd != string::npos)
    {
        keyWordStart = strGbk.find(szName,keyWordEnd+1);
        keyWordEnd = strGbk.find("')",keyWordStart+1);
        if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) )
        {
            string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37);
            setKeyWord.insert(strKeyWord);
            //cout << "电驴首页小推  " << strKeyWord << endl;
        }
 
    }
 
    set<string>::iterator pos;
    for(pos = setKeyWord.begin();pos != setKeyWord.end();++ pos)
    {
        cout << "电驴首页推荐  " << *pos << endl;
    }
 
}
 
 
 
int GetHttpFileData(CString strUrl,char* szDownloadHtmFileName)
{
    CInternetSession Session("Internet Explorer", 0);
    CHttpFile *pHttpFile = NULL;
    CString strData;
    CString strClip;
    int iRet = -1;
 
    if(szDownloadHtmFileName == NULL)
    {   
        cerr << "DownloadHtmFileName is NULL" << endl;
        Session.Close();
        return iRet;
    }
 
    ofstream of(szDownloadHtmFileName);
    if (of.bad())
    {
        cerr << "of create file error" << endl;
        Session.Close();
        return iRet;
    }
 
    try
    {
        pHttpFile = (CHttpFile*)Session.OpenURL(strUrl);
        while ( pHttpFile->ReadString(strClip) )
        {
            of << strClip;
        }
    }catch(CInternetException* pEx)
    {
        TCHAR pszError[64];
        pEx->GetErrorMessage(pszError, 64);
        cerr << __FUNCTION__ << pszError << endl;
        goto GetHttpFileData_EXIT;
    }
 
    iRet = 0;
 
GetHttpFileData_EXIT:
    Session.Close();
    of.close();
 
    return iRet;
}

  关于字符集转换的 文章 

C++11与Unicode及使用标准库进行UTF-8、UTF-16、UCS2、UCS4/UTF-32编码转换

原文地址:https://www.cnblogs.com/itdef/p/4081963.html