00001
00002
00003
00004
00005
00006 #include "stdafx.h"
00007 #include "FdmApp.h"
00008 #include "vmsVideoSiteHtmlCodeParser.h"
00009 #include "inetutil.h"
00010
00011 _COM_SMARTPTR_TYPEDEF (IXMLDOMNamedNodeMap, __uuidof (IXMLDOMNamedNodeMap));
00012
00013 #ifdef _DEBUG
00014 #undef THIS_FILE
00015 static char THIS_FILE[]=__FILE__;
00016 #define new DEBUG_NEW
00017 #endif
00018
00019 #define is_valid_char(c) (c >= 32 && c < 127)
00020
00021 vmsVideoSiteHtmlCodeParser::vmsVideoSiteHtmlCodeParser()
00022 {
00023 CoInitialize (NULL);
00024 }
00025
00026 vmsVideoSiteHtmlCodeParser::~vmsVideoSiteHtmlCodeParser()
00027 {
00028 CoUninitialize ();
00029 }
00030
00031 BOOL vmsVideoSiteHtmlCodeParser::Parse(LPCSTR pszSite, LPCSTR pszHtml)
00032 {
00033 switch (GetSupportedSiteIndex (pszSite))
00034 {
00035 case 0:
00036 return Parse_Youtube (pszHtml);
00037
00038 case 1:
00039 return Parse_GoogleVideo (pszHtml);
00040
00041 case 2:
00042 return Parse_LiveDigital (pszHtml);
00043
00044 case 3:
00045 return Parse_MySpace (pszHtml);
00046
00047 case 4:
00048 return Parse_Sharkle (pszHtml);
00049
00050 case 5:
00051 return Parse_Blennus (pszHtml);
00052
00053 case 6:
00054 return Parse_DailyMotion (pszHtml);
00055
00056 case 7:
00057 return Parse_Grouper (pszHtml);
00058
00059 default:
00060 return FALSE;
00061 }
00062 }
00063
00064 BOOL vmsVideoSiteHtmlCodeParser::Parse_Youtube(LPCSTR pszHtml)
00065 {
00066 fsString strTitle, strUrl;
00067
00068 LPSTR psz = strstr (pszHtml, "<h1 id=\"video_title\"");
00069
00070 if (psz)
00071 {
00072 psz = strchr (psz, '>');
00073 if (psz)
00074 {
00075 psz++;
00076 while (*psz && *psz != '<')
00077 {
00078 if (is_valid_char (*psz))
00079 strTitle += *psz++;
00080 else
00081 psz++;
00082 }
00083 }
00084 }
00085
00086 psz = strstr (pszHtml, "swfArgs =");
00087 if (psz == NULL)
00088 return FALSE;
00089 psz = strchr (psz, '{');
00090 if (psz == NULL)
00091 return FALSE;
00092 psz++;
00093
00094 fsString strBase = "http://youtube.com/";
00095 fsString strParams;
00096
00097 while (*psz != '}')
00098 {
00099 while (*psz == ' ' || *psz == ',')
00100 psz++;
00101
00102 fsString str;
00103
00104 while (*psz != ':')
00105 {
00106 if (*psz == ' ')
00107 {
00108 str = "";
00109 break;
00110 }
00111 str += *psz++;
00112 }
00113
00114 if (str.IsEmpty ())
00115 break;
00116
00117 if (lstrcmpi (str, "BASE_YT_URL") == 0)
00118 {
00119 strBase = ExtractValue (psz);
00120 }
00121 else
00122 {
00123 if (strParams.IsEmpty () == FALSE)
00124 strParams += '&';
00125
00126 strParams += str; strParams += "="; strParams += ExtractValue (psz);
00127 }
00128 }
00129
00130 strUrl = strBase;
00131 strUrl += "get_video?";
00132 strUrl += strParams;
00133
00134 fsDecodeHtmlText (strTitle);
00135
00136 m_strVideoTitle = strTitle;
00137 m_strVideoUrl = strUrl;
00138 m_strVideoType = "flv";
00139 m_bDirectLink = TRUE;
00140
00141 return TRUE;
00142 }
00143
00144 LPCSTR vmsVideoSiteHtmlCodeParser::get_VideoTitle()
00145 {
00146 return m_strVideoTitle;
00147 }
00148
00149 LPCSTR vmsVideoSiteHtmlCodeParser::get_VideoUrl()
00150 {
00151 return m_strVideoUrl;
00152 }
00153
00154 LPCSTR vmsVideoSiteHtmlCodeParser::get_VideoType()
00155 {
00156 return m_strVideoType;
00157 }
00158
00159 BOOL vmsVideoSiteHtmlCodeParser::IsSiteSupported(LPCSTR pszHost)
00160 {
00161 return GetSupportedSiteIndex (pszHost) != -1;
00162 }
00163
00164 BOOL vmsVideoSiteHtmlCodeParser::Parse_GoogleVideo(LPCSTR pszHtml)
00165 {
00166 LPCSTR psz = strstr (pszHtml, "/googleplayer.swf?");
00167 if (psz == NULL)
00168 return FALSE;
00169 psz = strstr (psz, "videoUrl");
00170 if (psz == NULL)
00171 return FALSE;
00172 psz = strstr (psz, "http");
00173 if (psz == NULL)
00174 return FALSE;
00175
00176 fsString strUrl;
00177 while (*psz && *psz != '"')
00178 strUrl += *psz++;
00179 if (strUrl [strUrl.GetLength () - 1] == '\\')
00180 strUrl [strUrl.GetLength () - 1] = 0;
00181 fsDecodeHtmlUrl (strUrl);
00182
00183 fsString strTitle;
00184 psz = strstr (pszHtml, "pvprogtitle");
00185 if (psz)
00186 {
00187 while (*psz && *psz != '>')
00188 psz++;
00189 if (*psz)
00190 {
00191 psz++;
00192 while (*psz == ' ')
00193 psz++;
00194 while (*psz && *psz != '<')
00195 {
00196 if (is_valid_char (*psz))
00197 strTitle += *psz++;
00198 else
00199 psz++;
00200 }
00201 while (strTitle.GetLength () && strTitle [strTitle.GetLength () - 1] == ' ')
00202 strTitle [strTitle.GetLength () - 1] = 0;
00203 fsDecodeHtmlText (strTitle);
00204 }
00205 }
00206
00207 m_strVideoTitle = strTitle;
00208 m_strVideoUrl = strUrl;
00209 m_strVideoType = "flv";
00210 m_bDirectLink = TRUE;
00211
00212 return TRUE;
00213 }
00214
00215 BOOL vmsVideoSiteHtmlCodeParser::Parse_Youtube_RootPage(LPCSTR pszHtml)
00216 {
00217 fsString strUrl;
00218
00219 LPCSTR psz = strstr (pszHtml, "/admp.swf");
00220 if (psz == NULL)
00221 return FALSE;
00222
00223 while (*psz && *psz != '=')
00224 psz++;
00225 if (*psz == 0)
00226 return FALSE;
00227 psz++;
00228
00229 fsString strId;
00230
00231 while (*psz && *psz != '&')
00232 strId += *psz++;
00233
00234 strUrl = "http://www.youtube.com/watch?v=";
00235 strUrl += strId;
00236
00237 m_strVideoTitle = "";
00238 m_strVideoUrl = strUrl;
00239 m_strVideoType = "";
00240 m_bDirectLink = FALSE;
00241
00242 return TRUE;
00243 }
00244
00245 BOOL vmsVideoSiteHtmlCodeParser::get_IsVideoUrlDirectLink()
00246 {
00247 return m_bDirectLink;
00248 }
00249
00250 BOOL vmsVideoSiteHtmlCodeParser::Parse_LiveDigital(LPCSTR pszHtml)
00251 {
00252 LPCSTR psz = strstr (pszHtml, "flashvars");
00253 if (psz == NULL)
00254 psz = strstr (pszHtml, "flashVars");
00255
00256 fsString strC, strH;
00257
00258 if (psz)
00259 {
00260 LPCSTR pszC = strstr (psz, "c="),
00261 pszH = strstr (psz, "h=");
00262
00263 if (pszC == NULL || pszH == NULL)
00264 return FALSE;
00265
00266 pszC += 2;
00267 pszH += 2;
00268
00269 while (*pszC && *pszC != '&' && *pszC != '"')
00270 strC += *pszC++;
00271
00272 while (*pszH && *pszH != '&' && *pszH != '"')
00273 strH += *pszH++;
00274 }
00275 else
00276 {
00277
00278 psz = strstr (pszHtml, "content_id/");
00279 if (psz)
00280 {
00281 psz += lstrlen ("content_id/");
00282 while (isdigit (*psz))
00283 strC += *psz++;
00284 }
00285
00286 if (strC.IsEmpty ())
00287 {
00288 psz = strstr (pszHtml, "/content/");
00289 if (psz)
00290 {
00291 psz += lstrlen ("/content/");
00292 while (isdigit (*psz))
00293 strC += *psz++;
00294 if (strC.IsEmpty ())
00295 return FALSE;
00296 }
00297 }
00298
00299 strH = "livedigital.com";
00300 }
00301
00302 fsString strUrl = "http://";
00303 strUrl += strH;
00304 strUrl += "/content/flash_load_content/";
00305 strUrl += strC;
00306
00307 m_strVideoTitle = "";
00308 m_strVideoUrl = strUrl;
00309 m_strVideoType = "";
00310 m_bDirectLink = FALSE;
00311
00312 return TRUE;
00313 }
00314
00315 BOOL vmsVideoSiteHtmlCodeParser::Parse_Further(LPCSTR pszSite, LPCSTR pszHtml)
00316 {
00317 switch (GetSupportedSiteIndex (pszSite))
00318 {
00319 case 2:
00320 return Parse_Further_LiveDigital (pszHtml);
00321
00322 case 3:
00323 return Parse_Further_MySpace (pszHtml);
00324
00325 default:
00326 return Parse (pszSite, pszHtml);
00327 }
00328 }
00329
00330 BOOL vmsVideoSiteHtmlCodeParser::Parse_Further_LiveDigital(LPCSTR pszTxt)
00331 {
00332 LPCSTR psz = strstr (pszTxt, "content_url=");
00333 if (psz == NULL)
00334 return FALSE;
00335
00336 fsString strUrl;
00337 psz += lstrlen ("content_url=");
00338 while (*psz && *psz != '&')
00339 strUrl += *psz++;
00340
00341 fsDecodeHtmlUrl (strUrl);
00342
00343 fsString strTitle;
00344
00345 psz = strstr (pszTxt, "title=");
00346 if (psz)
00347 {
00348 psz += lstrlen ("title=");
00349 while (*psz && *psz != '&')
00350 {
00351 if (is_valid_char (*psz))
00352 strTitle += *psz++;
00353 else
00354 psz++;
00355 }
00356 fsDecodeHtmlText (strTitle);
00357 }
00358
00359 m_strVideoTitle = strTitle;
00360 m_strVideoUrl = strUrl;
00361 m_strVideoType = (LPCSTR)strUrl + strUrl.GetLength () - 3;
00362 m_bDirectLink = TRUE;
00363
00364 return TRUE;
00365 }
00366
00367 BOOL vmsVideoSiteHtmlCodeParser::Parse_MySpace(LPCSTR pszHtml)
00368 {
00369 LPCSTR psz = strstr (pszHtml, "flashvars=");
00370 if (psz == NULL)
00371 psz = strstr (pszHtml, "flashVars=");
00372 if (psz == NULL)
00373 return FALSE;
00374 psz += 10;
00375
00376 if (*psz == '"')
00377 psz++;
00378 else if (strnicmp (psz, """, 6) == 0)
00379 psz += 6;
00380 else
00381 return FALSE;
00382
00383 CString str;
00384 while (*psz && *psz != '"' && strnicmp (psz, """, 6))
00385 str += *psz++;
00386 if (str.IsEmpty ())
00387 return FALSE;
00388 str.Replace ("m=", "mediaID=");
00389
00390 fsString strUrl;
00391 strUrl = "http://"; strUrl += "mediaservices.myspace.com/services/rss.ashx?";
00392 strUrl += str;
00393
00394 m_strVideoTitle = "";
00395 m_strVideoUrl = strUrl;
00396 m_strVideoType = "";
00397 m_bDirectLink = FALSE;
00398
00399 return TRUE;
00400 }
00401
00402 BOOL vmsVideoSiteHtmlCodeParser::Parse_Further_MySpace(LPCSTR pszHtml)
00403 {
00404 USES_CONVERSION;
00405 IXMLDOMDocumentPtr spXML;
00406 IXMLDOMNodePtr spNode, spNode2;
00407
00408 spXML.CreateInstance (__uuidof (DOMDocument));
00409
00410 if (spXML == NULL)
00411 return FALSE;
00412
00413 spXML->put_async (FALSE);
00414
00415 VARIANT_BOOL bRes;
00416 spXML->loadXML (A2W (pszHtml), &bRes);
00417 if (bRes == FALSE)
00418 return FALSE;
00419
00420 spXML->selectSingleNode (L"rss", &spNode);
00421 if (spNode == NULL)
00422 return FALSE;
00423
00424 spNode->selectSingleNode (L"channel", &spNode2);
00425 if (spNode2 == NULL)
00426 return FALSE;
00427
00428 spNode = NULL;
00429 spNode2->selectSingleNode (L"item", &spNode);
00430 if (spNode == NULL)
00431 return FALSE;
00432
00433 spNode2 = NULL;
00434 spNode->selectSingleNode (L"title", &spNode2);
00435 if (spNode2 == NULL)
00436 return FALSE;
00437
00438 CComBSTR bstrTitle;
00439 spNode2->get_text (&bstrTitle);
00440
00441 spNode2 = NULL;
00442 spNode->selectSingleNode (L"media:content", &spNode2);
00443 if (spNode2 == NULL)
00444 return FALSE;
00445 IXMLDOMNamedNodeMapPtr spAttrs;
00446 spNode2->get_attributes (&spAttrs);
00447 if (spAttrs == NULL)
00448 return FALSE;
00449 IXMLDOMNodePtr spUrlValue;
00450 spAttrs->getNamedItem (L"url", &spUrlValue);
00451 if (spUrlValue == NULL)
00452 return FALSE;
00453 COleVariant vtUrl;
00454 spUrlValue->get_nodeValue (&vtUrl);
00455 ASSERT (vtUrl.vt == VT_BSTR);
00456 if (vtUrl.vt != VT_BSTR)
00457 return FALSE;
00458
00459 m_strVideoTitle = W2A (bstrTitle);
00460 fsDecodeHtmlText (m_strVideoTitle);
00461 m_strVideoUrl = W2A (vtUrl.bstrVal);
00462 m_strVideoType = (LPCSTR)m_strVideoUrl + m_strVideoUrl.GetLength () - 3;
00463 m_bDirectLink = TRUE;
00464
00465 return TRUE;
00466 }
00467
00468 BOOL vmsVideoSiteHtmlCodeParser::Parse_Sharkle(LPCSTR pszHtml)
00469 {
00470 LPCSTR psz = strstr (pszHtml, "splayer.swf?");
00471 if (psz == NULL)
00472 return FALSE;
00473
00474 psz = strstr (psz, "rnd=");
00475 if (psz == NULL)
00476 return FALSE;
00477
00478 fsString strRnd;
00479
00480 psz += 4;
00481 while (*psz && *psz != '&')
00482 strRnd += *psz++;
00483 if (strRnd.IsEmpty ())
00484 return FALSE;
00485
00486 fsString strUrl;
00487 strUrl = "http://sharkle.com/inc/misc/about.php?rnd=";
00488 strUrl += strRnd;
00489 strUrl += "&ssd=ZeleninGalaburda";
00490
00491 fsString strTitle;
00492
00493 psz = strstr (pszHtml, "blog_header");
00494 if (psz != NULL)
00495 {
00496 while (*psz && *psz != '>')
00497 psz++;
00498 if (*psz != 0)
00499 {
00500 psz++;
00501 while (*psz == ' ' || is_valid_char (*psz) == FALSE)
00502 psz++;
00503 while (*psz && *psz != '<')
00504 {
00505 if (is_valid_char (*psz))
00506 strTitle += *psz++;
00507 else
00508 psz++;
00509 }
00510 while (strTitle.GetLength () && strTitle [strTitle.GetLength () - 1] == ' ')
00511 strTitle [strTitle.GetLength () - 1] = 0;
00512 }
00513 }
00514
00515 fsDecodeHtmlText (strTitle);
00516
00517 m_strVideoTitle = strTitle;
00518 m_strVideoUrl = strUrl;
00519 m_strVideoType = "flv";
00520 m_bDirectLink = TRUE;
00521
00522 return TRUE;
00523 }
00524
00525 BOOL vmsVideoSiteHtmlCodeParser::Parse_Blennus(LPCSTR pszHtml)
00526 {
00527 LPCSTR psz = strstr (pszHtml, "embed");
00528 if (psz == NULL)
00529 psz = strstr (pszHtml, "EMBED");
00530 if (psz == NULL)
00531 return FALSE;
00532
00533 psz = strstr (psz, "src=");
00534 if (psz == NULL)
00535 return FALSE;
00536 psz += 4;
00537 if (*psz++ != '"')
00538 return FALSE;
00539 fsString strUrl;
00540 while (*psz && *psz != '"')
00541 strUrl += *psz++;
00542 if (strUrl.IsEmpty ())
00543 return FALSE;
00544
00545 fsString strTitle;
00546 psz = strstr (pszHtml, "contentheading");
00547 if (psz != NULL)
00548 {
00549 while (*psz && *psz != '>')
00550 psz++;
00551 if (*psz != 0)
00552 {
00553 psz++;
00554 while (*psz == ' ' || is_valid_char (*psz) == FALSE)
00555 psz++;
00556 while (*psz && *psz != '<')
00557 {
00558 if (is_valid_char (*psz))
00559 strTitle += *psz++;
00560 else
00561 psz++;
00562 }
00563 fsDecodeHtmlText (strTitle);
00564 while (strTitle.GetLength () && strTitle [strTitle.GetLength () - 1] == ' ')
00565 strTitle [strTitle.GetLength () - 1] = 0;
00566 }
00567 }
00568
00569 fsDecodeHtmlText (strTitle);
00570
00571 m_strVideoTitle = strTitle;
00572 m_strVideoUrl = strUrl;
00573 m_strVideoType = "wmv";
00574 m_bDirectLink = TRUE;
00575
00576 return TRUE;
00577 }
00578
00579 BOOL vmsVideoSiteHtmlCodeParser::Parse_DailyMotion(LPCSTR pszHtml)
00580 {
00581 LPCSTR psz = strstr (pszHtml, "flashvars=");
00582 if (psz == NULL)
00583 psz = strstr (pszHtml, "flashVars=");
00584 if (psz == NULL)
00585 return FALSE;
00586
00587 _lSearchUrl:
00588 psz = strstr (psz, "url=");
00589 if (psz == NULL)
00590 return FALSE;
00591 psz += 4;
00592 if (strncmp (psz, "rev=", 4) == 0)
00593 goto _lSearchUrl;
00594
00595 fsString strUrl;
00596 while (*psz && *psz != '&')
00597 strUrl += *psz++;
00598 fsDecodeHtmlUrl (strUrl);
00599
00600 fsString strTitle;
00601 psz = strstr (pszHtml, "<h1");
00602 if (psz == NULL)
00603 psz = strstr (pszHtml, "<H1");
00604 if (psz != NULL)
00605 {
00606 while (*psz && *psz != '>')
00607 psz++;
00608 if (*psz != 0)
00609 {
00610 psz++;
00611 while (*psz == ' ' || is_valid_char (*psz) == FALSE)
00612 psz++;
00613 while (*psz && *psz != '<')
00614 {
00615 if (is_valid_char (*psz))
00616 strTitle += *psz++;
00617 else
00618 psz++;
00619 }
00620 fsDecodeHtmlText (strTitle);
00621 while (strTitle.GetLength () && strTitle [strTitle.GetLength () - 1] == ' ')
00622 strTitle [strTitle.GetLength () - 1] = 0;
00623 }
00624 }
00625
00626 m_strVideoTitle = strTitle;
00627 m_strVideoUrl = strUrl;
00628 m_strVideoType = "flv";
00629 m_bDirectLink = TRUE;
00630
00631 return TRUE;
00632 }
00633
00634 BOOL vmsVideoSiteHtmlCodeParser::Parse_Grouper(LPCSTR pszHtml)
00635 {
00636 LPCSTR psz = strstr (pszHtml, "flvURL=");
00637 if (psz == NULL)
00638 return FALSE;
00639 psz += 7;
00640
00641 fsString strUrl;
00642 while (*psz && *psz != '&')
00643 strUrl += *psz++;
00644
00645 fsString strTitle;
00646 psz = strstr (pszHtml, "<h1");
00647 if (psz == NULL)
00648 psz = strstr (pszHtml, "<H1");
00649 if (psz != NULL)
00650 {
00651 while (*psz && *psz != '>')
00652 psz++;
00653 if (*psz != 0)
00654 {
00655 psz++;
00656 while (*psz == ' ' || is_valid_char (*psz) == FALSE)
00657 psz++;
00658 while (*psz && *psz != '<')
00659 {
00660 if (is_valid_char (*psz))
00661 strTitle += *psz++;
00662 else
00663 psz++;
00664 }
00665 fsDecodeHtmlText (strTitle);
00666 while (strTitle.GetLength () && strTitle [strTitle.GetLength () - 1] == ' ')
00667 strTitle [strTitle.GetLength () - 1] = 0;
00668 }
00669 }
00670
00671 m_strVideoTitle = strTitle;
00672 m_strVideoUrl = strUrl;
00673 m_strVideoType = "flv";
00674 m_bDirectLink = TRUE;
00675
00676 return TRUE;
00677 }
00678
00679 fsString vmsVideoSiteHtmlCodeParser::ExtractValue(LPSTR &psz)
00680 {
00681 char c;
00682 while (*psz && *psz != '"' && *psz != '\'')
00683 psz++;
00684 if (*psz == 0)
00685 return "";
00686 c = *psz++;
00687 fsString strRes;
00688 while (*psz && *psz != c)
00689 strRes += *psz++;
00690 if (*psz)
00691 psz++;
00692 return strRes;
00693 }