00001
00002
00003
00004
00005
00006 #if !defined(AFX_FSWEBPAGEDOWNLOADER_H__A6C75579_FDC3_46CB_A771_85AB6875B099__INCLUDED_)
00007 #define AFX_FSWEBPAGEDOWNLOADER_H__A6C75579_FDC3_46CB_A771_85AB6875B099__INCLUDED_
00008
00009 #if _MSC_VER > 1000
00010 #pragma once
00011 #endif
00012
00013 #include "fsScheduleMgr.h"
00014 #include "tree.h"
00015
00016 enum fsWPDExtsType
00017 {
00018 WPDET_DOWNLOAD,
00019 WPDET_NOTDOWNLOAD,
00020 WPDET_OFF
00021 };
00022
00023 #define WPDF_KEEPFOLDERSTRUCTURE 1
00024
00025 #define WPDF_DONTSTOREPAGES 2
00026
00027 #define WPDF_DELCOMPLETEDDLDS 4
00028
00029 struct fsWPDSettings
00030 {
00031 fsString strHTMLExts;
00032 BOOL bNotAllPages;
00033 BOOL bNotAllFiles;
00034 BOOL bNotAllImages;
00035 BOOL bDownloadImages;
00036 BOOL bDownloadFiles;
00037 BOOL bDownloadStyles;
00038 int iDepth;
00039 fsString strExts;
00040 fsWPDExtsType enExtsType;
00041 fsString strImgsExts;
00042 fsWPDExtsType enImgsExtsType;
00043 vmsDownloadsGroupSmartPtr pDLGroup;
00044 fsString strFolderSaveTo;
00045 int iReserved;
00046 fsString strUserName;
00047 fsString strPassword;
00048 BOOL bSavePagesUnderHTM;
00049
00050 DWORD dwFlags;
00051
00052 fs::list <struct fsWPDIgnoreListItem*> vIgnoreList;
00053 };
00054
00055 struct fsWPDIgnoreListItem
00056 {
00057 fsString strURL;
00058 DWORD dwFlags;
00059 };
00060
00061 #define WPD_ILITEM_SUBFOLDERSALSO 1
00062
00063 #define WPD_ILITEM_THISPATHONLY 2
00064
00065 struct fsWPDSettings_v1
00066 {
00067 fsString strHTMLExts;
00068 BOOL bNotAllPages;
00069 BOOL bNotAllFiles;
00070 BOOL bNotAllImages;
00071 BOOL bDownloadImages;
00072 BOOL bDownloadFiles;
00073 BOOL bDownloadStyles;
00074 int iDepth;
00075 fsString strExts;
00076 fsWPDExtsType enExtsType;
00077 fsString strImgsExts;
00078 fsWPDExtsType enImgsExtsType;
00079 fsString strDLGroup;
00080 fsString strFolderSaveTo;
00081 int iReserved;
00082 };
00083
00084 #define WPSTATE_DLDWASDELETED 1
00085
00086 #define WPSTATE_PAGEPROCESSED 2
00087
00088 enum _WP_LinkType
00089 {
00090 WPLT_A,
00091 WPLT_IMG,
00092 WPLT_STYLESHEET
00093 };
00094
00095 struct _WP_UnprocessedLinks
00096 {
00097 UINT nWPIDWhere;
00098 UINT nParserUrl;
00099 _WP_LinkType lt;
00100 };
00101
00102 struct fsDLWebPage
00103 {
00104 vmsDownloadSmartPtr dld;
00105 UINT uDldId;
00106 fsString strURL;
00107 fsString strFile;
00108 BYTE bState;
00109 UINT nID;
00110
00111 fs::list <_WP_UnprocessedLinks> *pvUnpLinks;
00112
00113 fs::list <fsString> *pvUrls;
00114 };
00115
00116 typedef fs::ListTree <fsDLWebPage>* t_wptree;
00117
00118 typedef void (*fntWPDEvents) (class fsWebPageDownloader* dldr, enum fsWPDEvent ev, int info, LPVOID lp);
00119
00120 enum fsWPDEvent
00121 {
00122 WPDE_DLDEVENTRECEIVED,
00123 WPDE_FILEADDED,
00124 WPDE_DLDWILLBEDELETED,
00125 WPDE_DLDRESTORED,
00126 WPDE_WEBPAGEWILLBEDELETED,
00127 WPDE_STOPPED,
00128 WPDE_DONE,
00129 };
00130
00131 enum vmsWPDPageType
00132 {
00133 WPDPT_PAGE,
00134 WPDPT_FILE,
00135 WPDPT_IMAGE,
00136 WPDPT_CSS,
00137 };
00138
00139 class fsWebPageDownloader
00140 {
00141 friend class fsWebPageDownloadsMgr;
00142 public:
00143
00144 void GetDownloadingSiteName (LPSTR psz);
00145
00146 BOOL Create (LPCSTR pszPageURL, BOOL bAutoStart = TRUE, fsSchedule *task = NULL);
00147
00148 t_wptree GetRootPage();
00149
00150 BOOL Load (HANDLE hFile, WORD wVer);
00151
00152 BOOL Load_OLD (HANDLE hFile, BOOL bOldVer);
00153
00154 BOOL Save (HANDLE hFile);
00155
00156 void DeleteAllDownloads(BOOL bByUser);
00157
00158 void DetachFromDownloads();
00159
00160 void SetEventFunc (fntWPDEvents pfn, LPVOID lp);
00161
00162 void SetAutoStartDownloading (BOOL b);
00163
00164 void StopDownloading();
00165
00166 void StartDownloading();
00167
00168 vmsDownloadSmartPtr GetDownload (int iIndex);
00169
00170 int GetDownloadCount();
00171
00172 static void ReadDefaultWPDS (fsWPDSettings* wpds);
00173
00174 fsWPDSettings* GetWDPS ();
00175
00176 BOOL IsDownloading();
00177
00178 BOOL IsOnAutoStart();
00179
00180 BOOL IsScheduled ();
00181
00182 BOOL IsDone();
00183
00184 BOOL IsRunning();
00185
00186 int GetDoneFileCount();
00187
00188 int GetFileCount();
00189
00190 float GetPercentDone();
00191
00192 LPCSTR GetStartURL ();
00193
00194 fs::ListTree <fsDLWebPage>* FindWebPageTree (vmsDownloadSmartPtr dld);
00195
00196 fsWebPageDownloader();
00197 virtual ~fsWebPageDownloader();
00198
00199 protected:
00200
00201 BOOL IsURLShouldBeIgnored (fsURL& url);
00202
00203 int ParseHTMLFrameUrls(fsHTMLParser &parser, t_wptree wptree, BOOL bFixUrlsOnly, LPCSTR pszBaseURL);
00204 BOOL m_bWasShutdownMsg;
00205
00206 void CorrectUnpUrls (fsDLWebPage* wpfrom, fsDLWebPage* wpto);
00207
00208 DWORD OnCheckFileExtIsOK (vmsDownloadSmartPtr dld);
00209
00210 void Load_PerformRollback();
00211 BOOL m_bIsDeleting;
00212 BOOL m_bStopped;
00213
00214 void GetFileForReplace (fsDLWebPage* wpwhere, fsDLWebPage *wpwhat, LPSTR pszFile);
00215
00216 void GetPtrToFile (LPCSTR pszToFile, LPCSTR pszFromFile, LPSTR pszPtr);
00217
00218 void DeleteWebPage (fsDLWebPage *wp);
00219
00220 fsDLWebPage* FindWebPage (UINT nID);
00221 fsDLWebPage* FindWebPage (vmsDownloadSmartPtr dld);
00222 fsDLWebPage* FindWebPage (LPCSTR pszFullUrl);
00223
00224 void OnDldRedirected (vmsDownloadSmartPtr dld);
00225
00226 BOOL IsUrlsEqual (fsURL& url1, LPCSTR pszUrl2);
00227 UINT m_nMaxID;
00228 CRITICAL_SECTION m_cs_Done_Redir_Events;
00229
00230 BOOL Load (HANDLE hFile, t_wptree root, WORD wVer);
00231 BOOL Save (HANDLE hFile, t_wptree root);
00232
00233 void Event (fsWPDEvent ev, int info = 0);
00234 fntWPDEvents m_pfnEvents;
00235 LPVOID m_lpEventsParam;
00236
00237 int ParseHTMLLinkUrls(fsHTMLParser &parser, t_wptree wptree, BOOL bFixUrlsOnly, LPCSTR pszBaseURL);
00238
00239 int ParseHTMLUrls (fsHTMLParser& parser, t_wptree wptree, BOOL bFixUrlsOnly, LPCSTR pszBaseURL);
00240
00241 int ParseHTMLImages(fsHTMLParser &parser, t_wptree wptree, BOOL bFixUrlsOnly, LPCSTR pszBaseURL);
00242
00243 BOOL CrackUrl (LPCSTR pszFullUrl, LPSTR* ppszUrlWA, LPSTR* ppszFA = NULL);
00244
00245 void ParseHTMLFile (t_wptree wptree, BOOL bFixUrlsOnly);
00246
00247 UINT ParseHTML (LPCSTR pszHTML, t_wptree wptree, BOOL bFixUrlsOnly);
00248
00249 int FindConfIndex (vmsDownloadSmartPtr dld);
00250
00251 void OnWPDownloadDone (vmsDownloadSmartPtr dld);
00252
00253 void WebPage_FindDownload (fsDLWebPage *wp);
00254
00255 fsDLWebPage* AddPage (fs::ListTree <fsDLWebPage> *root, LPCSTR pszPageURL, vmsWPDPageType enPageType, BOOL bSetCTReq = FALSE, BOOL bAutoStart = TRUE, fsSchedule *task = NULL);
00256
00257 fsDLWebPage* AddWebPage (fsDLWebPage *wp, fs::ListTree <fsDLWebPage> *root, fsSchedule *task = NULL);
00258
00259 static DWORD _DldEvents (fsDownload* dld, enum fsDownloadsMgrEvent ev, LPVOID lp);
00260
00261 void ApplySettingsToDld (vmsDownloadSmartPtr dld);
00262
00263 fsWPDSettings m_wpds;
00264 fs::ListTree <fsDLWebPage> m_pages;
00265 fsString m_strStartServer;
00266
00267 struct _Conformity
00268 {
00269 fsDLWebPage* wp;
00270 fs::ListTree <fsDLWebPage>* wptree;
00271 };
00272
00273 std::vector <_Conformity> m_vConfs;
00274 };
00275
00276 #endif