[youtube:playlist] Don't use the gdata api (closes #1508)
Parse the playlist pages instead
This commit is contained in:
		@@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):
 | 
			
		||||
    def test_youtube_playlist(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = YoutubePlaylistIE(dl)
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['title'], 'ytdl test PL')
 | 
			
		||||
        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
 | 
			
		||||
@@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):
 | 
			
		||||
    def test_issue_673(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = YoutubePlaylistIE(dl)
 | 
			
		||||
        result = ie.extract('PLBB231211A4F62143')[0]
 | 
			
		||||
        result = ie.extract('PLBB231211A4F62143')
 | 
			
		||||
        self.assertTrue(len(result['entries']) > 25)
 | 
			
		||||
 | 
			
		||||
    def test_youtube_playlist_long(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = YoutubePlaylistIE(dl)
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 799)
 | 
			
		||||
 | 
			
		||||
@@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):
 | 
			
		||||
        #651
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = YoutubePlaylistIE(dl)
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
 | 
			
		||||
        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
 | 
			
		||||
        self.assertFalse('pElCt5oNDuI' in ytie_results)
 | 
			
		||||
        self.assertFalse('KdPEApIVdWM' in ytie_results)
 | 
			
		||||
@@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):
 | 
			
		||||
    def test_youtube_playlist_empty(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = YoutubePlaylistIE(dl)
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(len(result['entries']), 0)
 | 
			
		||||
 | 
			
		||||
@@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = YoutubePlaylistIE(dl)
 | 
			
		||||
        # TODO find a > 100 (paginating?) videos course
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
 | 
			
		||||
        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
 | 
			
		||||
        entries = result['entries']
 | 
			
		||||
        self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
 | 
			
		||||
        self.assertEqual(len(entries), 25)
 | 
			
		||||
@@ -99,7 +99,7 @@ class TestYoutubeLists(unittest.TestCase):
 | 
			
		||||
    def test_youtube_safe_search(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = YoutubePlaylistIE(dl)
 | 
			
		||||
        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
 | 
			
		||||
        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
 | 
			
		||||
        self.assertEqual(len(result['entries']), 2)
 | 
			
		||||
 | 
			
		||||
    def test_youtube_show(self):
 | 
			
		||||
 
 | 
			
		||||
@@ -1506,8 +1506,9 @@ class YoutubePlaylistIE(InfoExtractor):
 | 
			
		||||
                     |
 | 
			
		||||
                        ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
 | 
			
		||||
                     )"""
 | 
			
		||||
    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
 | 
			
		||||
    _MAX_RESULTS = 50
 | 
			
		||||
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
 | 
			
		||||
    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
 | 
			
		||||
    _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
 | 
			
		||||
    IE_NAME = u'youtube:playlist'
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
@@ -1532,41 +1533,23 @@ class YoutubePlaylistIE(InfoExtractor):
 | 
			
		||||
            else:
 | 
			
		||||
                self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 | 
			
		||||
 | 
			
		||||
        # Download playlist videos from API
 | 
			
		||||
        videos = []
 | 
			
		||||
        # Extract the video ids from the playlist pages
 | 
			
		||||
        ids = []
 | 
			
		||||
 | 
			
		||||
        for page_num in itertools.count(1):
 | 
			
		||||
            start_index = self._MAX_RESULTS * (page_num - 1) + 1
 | 
			
		||||
            if start_index >= 1000:
 | 
			
		||||
                self._downloader.report_warning(u'Max number of results reached')
 | 
			
		||||
                break
 | 
			
		||||
            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
 | 
			
		||||
            url = self._TEMPLATE_URL % (playlist_id, page_num)
 | 
			
		||||
            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
 | 
			
		||||
            # The ids are duplicated
 | 
			
		||||
            new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
 | 
			
		||||
            ids.extend(new_ids)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                response = json.loads(page)
 | 
			
		||||
            except ValueError as err:
 | 
			
		||||
                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
 | 
			
		||||
 | 
			
		||||
            if 'feed' not in response:
 | 
			
		||||
                raise ExtractorError(u'Got a malformed response from YouTube API')
 | 
			
		||||
            playlist_title = response['feed']['title']['$t']
 | 
			
		||||
            if 'entry' not in response['feed']:
 | 
			
		||||
                # Number of videos is a multiple of self._MAX_RESULTS
 | 
			
		||||
            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            for entry in response['feed']['entry']:
 | 
			
		||||
                index = entry['yt$position']['$t']
 | 
			
		||||
                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
 | 
			
		||||
                    videos.append((
 | 
			
		||||
                        index,
 | 
			
		||||
                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
 | 
			
		||||
                    ))
 | 
			
		||||
        playlist_title = self._og_search_title(page)
 | 
			
		||||
 | 
			
		||||
        videos = [v[1] for v in sorted(videos)]
 | 
			
		||||
 | 
			
		||||
        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
 | 
			
		||||
        return [self.playlist_result(url_results, playlist_id, playlist_title)]
 | 
			
		||||
        url_results = [self.url_result(vid, 'Youtube') for vid in ids]
 | 
			
		||||
        return self.playlist_result(url_results, playlist_id, playlist_title)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeChannelIE(InfoExtractor):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user