[extractor/common] Move workaround for applying first Set-Cookie header into a separate method
This commit is contained in:
		| @@ -2817,6 +2817,29 @@ class InfoExtractor(object): | |||||||
|         self._downloader.cookiejar.add_cookie_header(req) |         self._downloader.cookiejar.add_cookie_header(req) | ||||||
|         return compat_cookies.SimpleCookie(req.get_header('Cookie')) |         return compat_cookies.SimpleCookie(req.get_header('Cookie')) | ||||||
|  |  | ||||||
|  |     def _apply_first_set_cookie_header(self, url_handle, cookie): | ||||||
|  |         # Some sites (e.g. [1-3]) may serve two cookies under the same name | ||||||
|  |         # in Set-Cookie header and expect the first (old) one to be set rather | ||||||
|  |         # than second (new). However, as of RFC6265 the newer one cookie | ||||||
|  |         # should be set into cookie store what actually happens. | ||||||
|  |         # We will workaround this issue by resetting the cookie to | ||||||
|  |         # the first one manually. | ||||||
|  |         # 1. https://new.vk.com/ | ||||||
|  |         # 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201 | ||||||
|  |         # 3. https://learning.oreilly.com/ | ||||||
|  |         for header, cookies in url_handle.headers.items(): | ||||||
|  |             if header.lower() != 'set-cookie': | ||||||
|  |                 continue | ||||||
|  |             if sys.version_info[0] >= 3: | ||||||
|  |                 cookies = cookies.encode('iso-8859-1') | ||||||
|  |             cookies = cookies.decode('utf-8') | ||||||
|  |             cookie_value = re.search( | ||||||
|  |                 r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies) | ||||||
|  |             if cookie_value: | ||||||
|  |                 value, domain = cookie_value.groups() | ||||||
|  |                 self._set_cookie(domain, cookie, value) | ||||||
|  |                 break | ||||||
|  |  | ||||||
|     def get_testcases(self, include_onlymatching=False): |     def get_testcases(self, include_onlymatching=False): | ||||||
|         t = getattr(self, '_TEST', None) |         t = getattr(self, '_TEST', None) | ||||||
|         if t: |         if t: | ||||||
|   | |||||||
| @@ -3,7 +3,6 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| import collections | import collections | ||||||
| import re | import re | ||||||
| import sys |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_urlparse | from ..compat import compat_urlparse | ||||||
| @@ -45,24 +44,9 @@ class VKBaseIE(InfoExtractor): | |||||||
|             'pass': password.encode('cp1251'), |             'pass': password.encode('cp1251'), | ||||||
|         }) |         }) | ||||||
|  |  | ||||||
|         # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header |         # vk serves two same remixlhk cookies in Set-Cookie header and expects | ||||||
|         # and expects the first one to be set rather than second (see |         # first one to be actually set | ||||||
|         # https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201). |         self._apply_first_set_cookie_header(url_handle, 'remixlhk') | ||||||
|         # As of RFC6265 the newer one cookie should be set into cookie store |  | ||||||
|         # what actually happens. |  | ||||||
|         # We will workaround this VK issue by resetting the remixlhk cookie to |  | ||||||
|         # the first one manually. |  | ||||||
|         for header, cookies in url_handle.headers.items(): |  | ||||||
|             if header.lower() != 'set-cookie': |  | ||||||
|                 continue |  | ||||||
|             if sys.version_info[0] >= 3: |  | ||||||
|                 cookies = cookies.encode('iso-8859-1') |  | ||||||
|             cookies = cookies.decode('utf-8') |  | ||||||
|             remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) |  | ||||||
|             if remixlhk: |  | ||||||
|                 value, domain = remixlhk.groups() |  | ||||||
|                 self._set_cookie(domain, 'remixlhk', value) |  | ||||||
|                 break |  | ||||||
|  |  | ||||||
|         login_page = self._download_webpage( |         login_page = self._download_webpage( | ||||||
|             'https://login.vk.com/?act=login', None, |             'https://login.vk.com/?act=login', None, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user