Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header
This commit is contained in:
		| @@ -56,6 +56,8 @@ from .utils import ( | ||||
|     ExtractorError, | ||||
|     format_bytes, | ||||
|     formatSeconds, | ||||
|     GeoRestrictedError, | ||||
|     ISO3166Utils, | ||||
|     locked_file, | ||||
|     make_HTTPS_handler, | ||||
|     MaxDownloadsReached, | ||||
| @@ -272,6 +274,13 @@ class YoutubeDL(object): | ||||
|                        If it returns None, the video is downloaded. | ||||
|                        match_filter_func in utils.py is one example for this. | ||||
|     no_color:          Do not emit color codes in output. | ||||
|     bypass_geo_restriction: | ||||
|                        Bypass geographic restriction via faking X-Forwarded-For | ||||
|                        HTTP header (experimental) | ||||
|     bypass_geo_restriction_as_country: | ||||
|                        Two-letter ISO 3166-2 country code that will be used for | ||||
|                        explicit geographic restriction bypassing via faking | ||||
|                        X-Forwarded-For HTTP header (experimental) | ||||
|  | ||||
|     The following options determine which downloader is picked: | ||||
|     external_downloader: Executable of the external downloader to call. | ||||
| @@ -707,6 +716,14 @@ class YoutubeDL(object): | ||||
|                     return self.process_ie_result(ie_result, download, extra_info) | ||||
|                 else: | ||||
|                     return ie_result | ||||
|             except GeoRestrictedError as e: | ||||
|                 msg = e.msg | ||||
|                 if e.countries: | ||||
|                     msg += '\nThis video is available in %s.' % ', '.join( | ||||
|                         map(ISO3166Utils.short2full, e.countries)) | ||||
|                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' | ||||
|                 self.report_error(msg) | ||||
|                 break | ||||
|             except ExtractorError as e:  # An error we somewhat expected | ||||
|                 self.report_error(compat_str(e), e.format_traceback()) | ||||
|                 break | ||||
|   | ||||
| @@ -414,6 +414,8 @@ def _real_main(argv=None): | ||||
|         'cn_verification_proxy': opts.cn_verification_proxy, | ||||
|         'geo_verification_proxy': opts.geo_verification_proxy, | ||||
|         'config_location': opts.config_location, | ||||
|         'bypass_geo_restriction': opts.bypass_geo_restriction, | ||||
|         'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country, | ||||
|     } | ||||
|  | ||||
|     with YoutubeDL(ydl_opts) as ydl: | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import hashlib | ||||
| import json | ||||
| import netrc | ||||
| import os | ||||
| import random | ||||
| import re | ||||
| import socket | ||||
| import sys | ||||
| @@ -39,6 +40,8 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     fix_xml_ampersands, | ||||
|     float_or_none, | ||||
|     GeoRestrictedError, | ||||
|     GeoUtils, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     parse_iso8601, | ||||
| @@ -320,17 +323,25 @@ class InfoExtractor(object): | ||||
|     _real_extract() methods and define a _VALID_URL regexp. | ||||
|     Probably, they should also be added to the list of extractors. | ||||
|  | ||||
|     _BYPASS_GEO attribute may be set to False in order to disable | ||||
|     geo restriction bypass mechanisms for a particular extractor. | ||||
|     Though it won't disable explicit geo restriction bypass based on | ||||
|     country code provided with bypass_geo_restriction_as_country. | ||||
|  | ||||
|     Finally, the _WORKING attribute should be set to False for broken IEs | ||||
|     in order to warn the users and skip the tests. | ||||
|     """ | ||||
|  | ||||
|     _ready = False | ||||
|     _downloader = None | ||||
|     _x_forwarded_for_ip = None | ||||
|     _BYPASS_GEO = True | ||||
|     _WORKING = True | ||||
|  | ||||
|     def __init__(self, downloader=None): | ||||
|         """Constructor. Receives an optional downloader.""" | ||||
|         self._ready = False | ||||
|         self._x_forwarded_for_ip = None | ||||
|         self.set_downloader(downloader) | ||||
|  | ||||
|     @classmethod | ||||
| @@ -359,6 +370,10 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def initialize(self): | ||||
|         """Initializes an instance (authentication, etc).""" | ||||
|         if not self._x_forwarded_for_ip: | ||||
|             country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None) | ||||
|             if country_code: | ||||
|                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) | ||||
|         if not self._ready: | ||||
|             self._real_initialize() | ||||
|             self._ready = True | ||||
| @@ -366,8 +381,22 @@ class InfoExtractor(object): | ||||
|     def extract(self, url): | ||||
|         """Extracts URL information and returns it in list of dicts.""" | ||||
|         try: | ||||
|             self.initialize() | ||||
|             return self._real_extract(url) | ||||
|             for _ in range(2): | ||||
|                 try: | ||||
|                     self.initialize() | ||||
|                     return self._real_extract(url) | ||||
|                 except GeoRestrictedError as e: | ||||
|                     if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and | ||||
|                             self._BYPASS_GEO and | ||||
|                             self._downloader.params.get('bypass_geo_restriction', True) and | ||||
|                             not self._x_forwarded_for_ip and | ||||
|                             e.countries): | ||||
|                         self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) | ||||
|                         if self._x_forwarded_for_ip: | ||||
|                             self.report_warning( | ||||
|                                 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) | ||||
|                             continue | ||||
|                     raise | ||||
|         except ExtractorError: | ||||
|             raise | ||||
|         except compat_http_client.IncompleteRead as e: | ||||
| @@ -434,6 +463,15 @@ class InfoExtractor(object): | ||||
|         if isinstance(url_or_request, (compat_str, str)): | ||||
|             url_or_request = url_or_request.partition('#')[0] | ||||
|  | ||||
|         # Some sites check X-Forwarded-For HTTP header in order to figure out | ||||
|         # the origin of the client behind proxy. This allows bypassing geo | ||||
|         # restriction by faking this header's value to IP that belongs to some | ||||
|         # geo unrestricted country. We will do so once we encounter any | ||||
|         # geo restriction error. | ||||
|         if self._x_forwarded_for_ip: | ||||
|             if 'X-Forwarded-For' not in headers: | ||||
|                 headers['X-Forwarded-For'] = self._x_forwarded_for_ip | ||||
|  | ||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) | ||||
|         if urlh is False: | ||||
|             assert not fatal | ||||
| @@ -609,10 +647,8 @@ class InfoExtractor(object): | ||||
|             expected=True) | ||||
|  | ||||
|     @staticmethod | ||||
|     def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'): | ||||
|         raise ExtractorError( | ||||
|             '%s. You might want to use --proxy to workaround.' % msg, | ||||
|             expected=True) | ||||
|     def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None): | ||||
|         raise GeoRestrictedError(msg, countries=countries) | ||||
|  | ||||
|     # Methods for following #608 | ||||
|     @staticmethod | ||||
|   | ||||
| @@ -549,6 +549,18 @@ def parseOpts(overrideArguments=None): | ||||
|             'Upper bound of a range for randomized sleep before each download ' | ||||
|             '(maximum possible number of seconds to sleep). Must only be used ' | ||||
|             'along with --min-sleep-interval.')) | ||||
|     workarounds.add_option( | ||||
|         '--bypass-geo', | ||||
|         action='store_true', dest='bypass_geo_restriction', default=True, | ||||
|         help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') | ||||
|     workarounds.add_option( | ||||
|         '--no-bypass-geo', | ||||
|         action='store_false', dest='bypass_geo_restriction', default=True, | ||||
|         help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') | ||||
|     workarounds.add_option( | ||||
|         '--bypass-geo-as-country', metavar='CODE', | ||||
|         dest='bypass_geo_restriction_as_country', default=None, | ||||
|         help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') | ||||
|  | ||||
|     verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') | ||||
|     verbosity.add_option( | ||||
|   | ||||
| @@ -23,6 +23,7 @@ import operator | ||||
| import os | ||||
| import pipes | ||||
| import platform | ||||
| import random | ||||
| import re | ||||
| import socket | ||||
| import ssl | ||||
| @@ -747,6 +748,18 @@ class RegexNotFoundError(ExtractorError): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class GeoRestrictedError(ExtractorError): | ||||
|     """Geographic restriction Error exception. | ||||
|  | ||||
|     This exception may be thrown when a video is not available from your | ||||
|     geographic location due to geographic restrictions imposed by a website. | ||||
|     """ | ||||
|     def __init__(self, msg, countries=None): | ||||
|         super(GeoRestrictedError, self).__init__(msg, expected=True) | ||||
|         self.msg = msg | ||||
|         self.countries = countries | ||||
|  | ||||
|  | ||||
| class DownloadError(YoutubeDLError): | ||||
|     """Download Error exception. | ||||
|  | ||||
| @@ -3027,6 +3040,260 @@ class ISO3166Utils(object): | ||||
|         return cls._country_map.get(code.upper()) | ||||
|  | ||||
|  | ||||
| class GeoUtils(object): | ||||
|     # Major IPv4 address blocks per country | ||||
|     _country_ip_map = { | ||||
|         'AD': '85.94.160.0/19', | ||||
|         'AE': '94.200.0.0/13', | ||||
|         'AF': '149.54.0.0/17', | ||||
|         'AG': '209.59.64.0/18', | ||||
|         'AI': '204.14.248.0/21', | ||||
|         'AL': '46.99.0.0/16', | ||||
|         'AM': '46.70.0.0/15', | ||||
|         'AO': '105.168.0.0/13', | ||||
|         'AP': '159.117.192.0/21', | ||||
|         'AR': '181.0.0.0/12', | ||||
|         'AS': '202.70.112.0/20', | ||||
|         'AT': '84.112.0.0/13', | ||||
|         'AU': '1.128.0.0/11', | ||||
|         'AW': '181.41.0.0/18', | ||||
|         'AZ': '5.191.0.0/16', | ||||
|         'BA': '31.176.128.0/17', | ||||
|         'BB': '65.48.128.0/17', | ||||
|         'BD': '114.130.0.0/16', | ||||
|         'BE': '57.0.0.0/8', | ||||
|         'BF': '129.45.128.0/17', | ||||
|         'BG': '95.42.0.0/15', | ||||
|         'BH': '37.131.0.0/17', | ||||
|         'BI': '154.117.192.0/18', | ||||
|         'BJ': '137.255.0.0/16', | ||||
|         'BL': '192.131.134.0/24', | ||||
|         'BM': '196.12.64.0/18', | ||||
|         'BN': '156.31.0.0/16', | ||||
|         'BO': '161.56.0.0/16', | ||||
|         'BQ': '161.0.80.0/20', | ||||
|         'BR': '152.240.0.0/12', | ||||
|         'BS': '24.51.64.0/18', | ||||
|         'BT': '119.2.96.0/19', | ||||
|         'BW': '168.167.0.0/16', | ||||
|         'BY': '178.120.0.0/13', | ||||
|         'BZ': '179.42.192.0/18', | ||||
|         'CA': '99.224.0.0/11', | ||||
|         'CD': '41.243.0.0/16', | ||||
|         'CF': '196.32.200.0/21', | ||||
|         'CG': '197.214.128.0/17', | ||||
|         'CH': '85.0.0.0/13', | ||||
|         'CI': '154.232.0.0/14', | ||||
|         'CK': '202.65.32.0/19', | ||||
|         'CL': '152.172.0.0/14', | ||||
|         'CM': '165.210.0.0/15', | ||||
|         'CN': '36.128.0.0/10', | ||||
|         'CO': '181.240.0.0/12', | ||||
|         'CR': '201.192.0.0/12', | ||||
|         'CU': '152.206.0.0/15', | ||||
|         'CV': '165.90.96.0/19', | ||||
|         'CW': '190.88.128.0/17', | ||||
|         'CY': '46.198.0.0/15', | ||||
|         'CZ': '88.100.0.0/14', | ||||
|         'DE': '53.0.0.0/8', | ||||
|         'DJ': '197.241.0.0/17', | ||||
|         'DK': '87.48.0.0/12', | ||||
|         'DM': '192.243.48.0/20', | ||||
|         'DO': '152.166.0.0/15', | ||||
|         'DZ': '41.96.0.0/12', | ||||
|         'EC': '186.68.0.0/15', | ||||
|         'EE': '90.190.0.0/15', | ||||
|         'EG': '156.160.0.0/11', | ||||
|         'ER': '196.200.96.0/20', | ||||
|         'ES': '88.0.0.0/11', | ||||
|         'ET': '196.188.0.0/14', | ||||
|         'EU': '2.16.0.0/13', | ||||
|         'FI': '91.152.0.0/13', | ||||
|         'FJ': '144.120.0.0/16', | ||||
|         'FM': '119.252.112.0/20', | ||||
|         'FO': '88.85.32.0/19', | ||||
|         'FR': '90.0.0.0/9', | ||||
|         'GA': '41.158.0.0/15', | ||||
|         'GB': '25.0.0.0/8', | ||||
|         'GD': '74.122.88.0/21', | ||||
|         'GE': '31.146.0.0/16', | ||||
|         'GF': '161.22.64.0/18', | ||||
|         'GG': '62.68.160.0/19', | ||||
|         'GH': '45.208.0.0/14', | ||||
|         'GI': '85.115.128.0/19', | ||||
|         'GL': '88.83.0.0/19', | ||||
|         'GM': '160.182.0.0/15', | ||||
|         'GN': '197.149.192.0/18', | ||||
|         'GP': '104.250.0.0/19', | ||||
|         'GQ': '105.235.224.0/20', | ||||
|         'GR': '94.64.0.0/13', | ||||
|         'GT': '168.234.0.0/16', | ||||
|         'GU': '168.123.0.0/16', | ||||
|         'GW': '197.214.80.0/20', | ||||
|         'GY': '181.41.64.0/18', | ||||
|         'HK': '113.252.0.0/14', | ||||
|         'HN': '181.210.0.0/16', | ||||
|         'HR': '93.136.0.0/13', | ||||
|         'HT': '148.102.128.0/17', | ||||
|         'HU': '84.0.0.0/14', | ||||
|         'ID': '39.192.0.0/10', | ||||
|         'IE': '87.32.0.0/12', | ||||
|         'IL': '79.176.0.0/13', | ||||
|         'IM': '5.62.80.0/20', | ||||
|         'IN': '117.192.0.0/10', | ||||
|         'IO': '203.83.48.0/21', | ||||
|         'IQ': '37.236.0.0/14', | ||||
|         'IR': '2.176.0.0/12', | ||||
|         'IS': '82.221.0.0/16', | ||||
|         'IT': '79.0.0.0/10', | ||||
|         'JE': '87.244.64.0/18', | ||||
|         'JM': '72.27.0.0/17', | ||||
|         'JO': '176.29.0.0/16', | ||||
|         'JP': '126.0.0.0/8', | ||||
|         'KE': '105.48.0.0/12', | ||||
|         'KG': '158.181.128.0/17', | ||||
|         'KH': '36.37.128.0/17', | ||||
|         'KI': '103.25.140.0/22', | ||||
|         'KM': '197.255.224.0/20', | ||||
|         'KN': '198.32.32.0/19', | ||||
|         'KP': '175.45.176.0/22', | ||||
|         'KR': '175.192.0.0/10', | ||||
|         'KW': '37.36.0.0/14', | ||||
|         'KY': '64.96.0.0/15', | ||||
|         'KZ': '2.72.0.0/13', | ||||
|         'LA': '115.84.64.0/18', | ||||
|         'LB': '178.135.0.0/16', | ||||
|         'LC': '192.147.231.0/24', | ||||
|         'LI': '82.117.0.0/19', | ||||
|         'LK': '112.134.0.0/15', | ||||
|         'LR': '41.86.0.0/19', | ||||
|         'LS': '129.232.0.0/17', | ||||
|         'LT': '78.56.0.0/13', | ||||
|         'LU': '188.42.0.0/16', | ||||
|         'LV': '46.109.0.0/16', | ||||
|         'LY': '41.252.0.0/14', | ||||
|         'MA': '105.128.0.0/11', | ||||
|         'MC': '88.209.64.0/18', | ||||
|         'MD': '37.246.0.0/16', | ||||
|         'ME': '178.175.0.0/17', | ||||
|         'MF': '74.112.232.0/21', | ||||
|         'MG': '154.126.0.0/17', | ||||
|         'MH': '117.103.88.0/21', | ||||
|         'MK': '77.28.0.0/15', | ||||
|         'ML': '154.118.128.0/18', | ||||
|         'MM': '37.111.0.0/17', | ||||
|         'MN': '49.0.128.0/17', | ||||
|         'MO': '60.246.0.0/16', | ||||
|         'MP': '202.88.64.0/20', | ||||
|         'MQ': '109.203.224.0/19', | ||||
|         'MR': '41.188.64.0/18', | ||||
|         'MS': '208.90.112.0/22', | ||||
|         'MT': '46.11.0.0/16', | ||||
|         'MU': '105.16.0.0/12', | ||||
|         'MV': '27.114.128.0/18', | ||||
|         'MW': '105.234.0.0/16', | ||||
|         'MX': '187.192.0.0/11', | ||||
|         'MY': '175.136.0.0/13', | ||||
|         'MZ': '197.218.0.0/15', | ||||
|         'NA': '41.182.0.0/16', | ||||
|         'NC': '101.101.0.0/18', | ||||
|         'NE': '197.214.0.0/18', | ||||
|         'NF': '203.17.240.0/22', | ||||
|         'NG': '105.112.0.0/12', | ||||
|         'NI': '186.76.0.0/15', | ||||
|         'NL': '145.96.0.0/11', | ||||
|         'NO': '84.208.0.0/13', | ||||
|         'NP': '36.252.0.0/15', | ||||
|         'NR': '203.98.224.0/19', | ||||
|         'NU': '49.156.48.0/22', | ||||
|         'NZ': '49.224.0.0/14', | ||||
|         'OM': '5.36.0.0/15', | ||||
|         'PA': '186.72.0.0/15', | ||||
|         'PE': '186.160.0.0/14', | ||||
|         'PF': '123.50.64.0/18', | ||||
|         'PG': '124.240.192.0/19', | ||||
|         'PH': '49.144.0.0/13', | ||||
|         'PK': '39.32.0.0/11', | ||||
|         'PL': '83.0.0.0/11', | ||||
|         'PM': '70.36.0.0/20', | ||||
|         'PR': '66.50.0.0/16', | ||||
|         'PS': '188.161.0.0/16', | ||||
|         'PT': '85.240.0.0/13', | ||||
|         'PW': '202.124.224.0/20', | ||||
|         'PY': '181.120.0.0/14', | ||||
|         'QA': '37.210.0.0/15', | ||||
|         'RE': '139.26.0.0/16', | ||||
|         'RO': '79.112.0.0/13', | ||||
|         'RS': '178.220.0.0/14', | ||||
|         'RU': '5.136.0.0/13', | ||||
|         'RW': '105.178.0.0/15', | ||||
|         'SA': '188.48.0.0/13', | ||||
|         'SB': '202.1.160.0/19', | ||||
|         'SC': '154.192.0.0/11', | ||||
|         'SD': '154.96.0.0/13', | ||||
|         'SE': '78.64.0.0/12', | ||||
|         'SG': '152.56.0.0/14', | ||||
|         'SI': '188.196.0.0/14', | ||||
|         'SK': '78.98.0.0/15', | ||||
|         'SL': '197.215.0.0/17', | ||||
|         'SM': '89.186.32.0/19', | ||||
|         'SN': '41.82.0.0/15', | ||||
|         'SO': '197.220.64.0/19', | ||||
|         'SR': '186.179.128.0/17', | ||||
|         'SS': '105.235.208.0/21', | ||||
|         'ST': '197.159.160.0/19', | ||||
|         'SV': '168.243.0.0/16', | ||||
|         'SX': '190.102.0.0/20', | ||||
|         'SY': '5.0.0.0/16', | ||||
|         'SZ': '41.84.224.0/19', | ||||
|         'TC': '65.255.48.0/20', | ||||
|         'TD': '154.68.128.0/19', | ||||
|         'TG': '196.168.0.0/14', | ||||
|         'TH': '171.96.0.0/13', | ||||
|         'TJ': '85.9.128.0/18', | ||||
|         'TK': '27.96.24.0/21', | ||||
|         'TL': '180.189.160.0/20', | ||||
|         'TM': '95.85.96.0/19', | ||||
|         'TN': '197.0.0.0/11', | ||||
|         'TO': '175.176.144.0/21', | ||||
|         'TR': '78.160.0.0/11', | ||||
|         'TT': '186.44.0.0/15', | ||||
|         'TV': '202.2.96.0/19', | ||||
|         'TW': '120.96.0.0/11', | ||||
|         'TZ': '156.156.0.0/14', | ||||
|         'UA': '93.72.0.0/13', | ||||
|         'UG': '154.224.0.0/13', | ||||
|         'US': '3.0.0.0/8', | ||||
|         'UY': '167.56.0.0/13', | ||||
|         'UZ': '82.215.64.0/18', | ||||
|         'VA': '212.77.0.0/19', | ||||
|         'VC': '24.92.144.0/20', | ||||
|         'VE': '186.88.0.0/13', | ||||
|         'VG': '172.103.64.0/18', | ||||
|         'VI': '146.226.0.0/16', | ||||
|         'VN': '14.160.0.0/11', | ||||
|         'VU': '202.80.32.0/20', | ||||
|         'WF': '117.20.32.0/21', | ||||
|         'WS': '202.4.32.0/19', | ||||
|         'YE': '134.35.0.0/16', | ||||
|         'YT': '41.242.116.0/22', | ||||
|         'ZA': '41.0.0.0/11', | ||||
|         'ZM': '165.56.0.0/13', | ||||
|         'ZW': '41.85.192.0/19', | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def random_ipv4(cls, code): | ||||
|         block = cls._country_ip_map.get(code.upper()) | ||||
|         if not block: | ||||
|             return None | ||||
|         addr, preflen = block.split('/') | ||||
|         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] | ||||
|         addr_max = addr_min | (0xffffffff >> int(preflen)) | ||||
|         return socket.inet_ntoa( | ||||
|             compat_struct_pack('!I', random.randint(addr_min, addr_max))) | ||||
|  | ||||
|  | ||||
| class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): | ||||
|     def __init__(self, proxies=None): | ||||
|         # Set default handlers | ||||
|   | ||||
		Reference in New Issue
	
	Block a user