Match --download-archive during playlist processing (Fixes #1745)
This commit is contained in:
		| @@ -84,16 +84,16 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubeChannelIE(dl) |         ie = YoutubeChannelIE(dl) | ||||||
|         #test paginated channel |         #test paginated channel | ||||||
|         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] |         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') | ||||||
|         self.assertTrue(len(result['entries']) > 90) |         self.assertTrue(len(result['entries']) > 90) | ||||||
|         #test autogenerated channel |         #test autogenerated channel | ||||||
|         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] |         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | ||||||
|         self.assertTrue(len(result['entries']) >= 18) |         self.assertTrue(len(result['entries']) >= 18) | ||||||
|  |  | ||||||
|     def test_youtube_user(self): |     def test_youtube_user(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubeUserIE(dl) |         ie = YoutubeUserIE(dl) | ||||||
|         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] |         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') | ||||||
|         self.assertTrue(len(result['entries']) >= 320) |         self.assertTrue(len(result['entries']) >= 320) | ||||||
|  |  | ||||||
|     def test_youtube_safe_search(self): |     def test_youtube_safe_search(self): | ||||||
|   | |||||||
| @@ -355,6 +355,8 @@ class YoutubeDL(object): | |||||||
|     def _match_entry(self, info_dict): |     def _match_entry(self, info_dict): | ||||||
|         """ Returns None iff the file should be downloaded """ |         """ Returns None iff the file should be downloaded """ | ||||||
|  |  | ||||||
|  |         if 'title' in info_dict: | ||||||
|  |             # This can happen when we're just evaluating the playlist | ||||||
|             title = info_dict['title'] |             title = info_dict['title'] | ||||||
|             matchtitle = self.params.get('matchtitle', False) |             matchtitle = self.params.get('matchtitle', False) | ||||||
|             if matchtitle: |             if matchtitle: | ||||||
| @@ -374,8 +376,8 @@ class YoutubeDL(object): | |||||||
|             if age_limit < info_dict.get('age_limit', 0): |             if age_limit < info_dict.get('age_limit', 0): | ||||||
|                 return u'Skipping "' + title + '" because it is age restricted' |                 return u'Skipping "' + title + '" because it is age restricted' | ||||||
|         if self.in_download_archive(info_dict): |         if self.in_download_archive(info_dict): | ||||||
|             return (u'%(title)s has already been recorded in archive' |             return (u'%s has already been recorded in archive' | ||||||
|                     % info_dict) |                     % info_dict.get('title', info_dict.get('id', u'video'))) | ||||||
|         return None |         return None | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
| @@ -454,7 +456,7 @@ class YoutubeDL(object): | |||||||
|                                      ie_key=ie_result.get('ie_key'), |                                      ie_key=ie_result.get('ie_key'), | ||||||
|                                      extra_info=extra_info) |                                      extra_info=extra_info) | ||||||
|         elif result_type == 'playlist': |         elif result_type == 'playlist': | ||||||
|             self.add_extra_info(ie_result, extra_info) |  | ||||||
|             # We process each entry in the playlist |             # We process each entry in the playlist | ||||||
|             playlist = ie_result.get('title', None) or ie_result.get('id', None) |             playlist = ie_result.get('title', None) or ie_result.get('id', None) | ||||||
|             self.to_screen(u'[download] Downloading playlist: %s' % playlist) |             self.to_screen(u'[download] Downloading playlist: %s' % playlist) | ||||||
| @@ -484,6 +486,12 @@ class YoutubeDL(object): | |||||||
|                     'webpage_url': ie_result['webpage_url'], |                     'webpage_url': ie_result['webpage_url'], | ||||||
|                     'extractor_key': ie_result['extractor_key'], |                     'extractor_key': ie_result['extractor_key'], | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|  |                 reason = self._match_entry(entry) | ||||||
|  |                 if reason is not None: | ||||||
|  |                     self.to_screen(u'[download] ' + reason) | ||||||
|  |                     continue | ||||||
|  |  | ||||||
|                 entry_result = self.process_ie_result(entry, |                 entry_result = self.process_ie_result(entry, | ||||||
|                                                       download=download, |                                                       download=download, | ||||||
|                                                       extra_info=extra) |                                                       extra_info=extra) | ||||||
| @@ -810,7 +818,16 @@ class YoutubeDL(object): | |||||||
|         fn = self.params.get('download_archive') |         fn = self.params.get('download_archive') | ||||||
|         if fn is None: |         if fn is None: | ||||||
|             return False |             return False | ||||||
|         vid_id = info_dict['extractor'] + u' ' + info_dict['id'] |         extractor = info_dict.get('extractor_id') | ||||||
|  |         if extractor is None: | ||||||
|  |             if 'id' in info_dict: | ||||||
|  |                 extractor = info_dict.get('ie_key')  # key in a playlist | ||||||
|  |         if extractor is None: | ||||||
|  |             return False  # Incomplete video information | ||||||
|  |         # Future-proof against any change in case | ||||||
|  |         # and backwards compatibility with prior versions | ||||||
|  |         extractor = extractor.lower() | ||||||
|  |         vid_id = extractor + u' ' + info_dict['id'] | ||||||
|         try: |         try: | ||||||
|             with locked_file(fn, 'r', encoding='utf-8') as archive_file: |             with locked_file(fn, 'r', encoding='utf-8') as archive_file: | ||||||
|                 for line in archive_file: |                 for line in archive_file: | ||||||
|   | |||||||
| @@ -229,12 +229,14 @@ class InfoExtractor(object): | |||||||
|         self.to_screen(u'Logging in') |         self.to_screen(u'Logging in') | ||||||
|  |  | ||||||
|     #Methods for following #608 |     #Methods for following #608 | ||||||
|     def url_result(self, url, ie=None): |     def url_result(self, url, ie=None, video_id=None): | ||||||
|         """Returns a url that points to a page that should be processed""" |         """Returns a url that points to a page that should be processed""" | ||||||
|         #TODO: ie should be the class used for getting the info |         #TODO: ie should be the class used for getting the info | ||||||
|         video_info = {'_type': 'url', |         video_info = {'_type': 'url', | ||||||
|                       'url': url, |                       'url': url, | ||||||
|                       'ie_key': ie} |                       'ie_key': ie} | ||||||
|  |         if video_id is not None: | ||||||
|  |             video_info['id'] = video_id | ||||||
|         return video_info |         return video_info | ||||||
|     def playlist_result(self, entries, playlist_id=None, playlist_title=None): |     def playlist_result(self, entries, playlist_id=None, playlist_title=None): | ||||||
|         """Returns a playlist""" |         """Returns a playlist""" | ||||||
|   | |||||||
| @@ -1552,7 +1552,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | |||||||
|             video_id = query_dict['v'][0] |             video_id = query_dict['v'][0] | ||||||
|             if self._downloader.params.get('noplaylist'): |             if self._downloader.params.get('noplaylist'): | ||||||
|                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) |                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) | ||||||
|                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') |                 return self.url_result(video_id, 'Youtube', video_id=video_id) | ||||||
|             else: |             else: | ||||||
|                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) |                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||||
|  |  | ||||||
| @@ -1571,7 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | |||||||
|  |  | ||||||
|         playlist_title = self._og_search_title(page) |         playlist_title = self._og_search_title(page) | ||||||
|  |  | ||||||
|         url_results = [self.url_result(vid, 'Youtube') for vid in ids] |         url_results = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||||
|  |                        for video_id in ids] | ||||||
|         return self.playlist_result(url_results, playlist_id, playlist_title) |         return self.playlist_result(url_results, playlist_id, playlist_title) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1626,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor): | |||||||
|  |  | ||||||
|         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) |         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||||
|  |  | ||||||
|         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] |         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||||
|         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls] |                        for video_id in video_ids] | ||||||
|         return [self.playlist_result(url_entries, channel_id)] |         return self.playlist_result(url_entries, channel_id) | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeUserIE(InfoExtractor): | class YoutubeUserIE(InfoExtractor): | ||||||
| @@ -1692,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor): | |||||||
|             if len(ids_in_page) < self._GDATA_PAGE_SIZE: |             if len(ids_in_page) < self._GDATA_PAGE_SIZE: | ||||||
|                 break |                 break | ||||||
|  |  | ||||||
|         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] |         url_results = [ | ||||||
|         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] |             self.url_result(video_id, 'Youtube', video_id=video_id) | ||||||
|         return [self.playlist_result(url_results, playlist_title = username)] |             for video_id in video_ids] | ||||||
|  |         return self.playlist_result(url_results, playlist_title=username) | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeSearchIE(SearchInfoExtractor): | class YoutubeSearchIE(SearchInfoExtractor): | ||||||
|     IE_DESC = u'YouTube.com searches' |     IE_DESC = u'YouTube.com searches' | ||||||
| @@ -1735,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor): | |||||||
|  |  | ||||||
|         if len(video_ids) > n: |         if len(video_ids) > n: | ||||||
|             video_ids = video_ids[:n] |             video_ids = video_ids[:n] | ||||||
|         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] |         videos = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||||
|  |                   for video_id in video_ids] | ||||||
|         return self.playlist_result(videos, query) |         return self.playlist_result(videos, query) | ||||||
|  |  | ||||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | class YoutubeSearchDateIE(YoutubeSearchIE): | ||||||
| @@ -1795,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | |||||||
|             feed_html = info['feed_html'] |             feed_html = info['feed_html'] | ||||||
|             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) |             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) | ||||||
|             ids = orderedSet(m.group(1) for m in m_ids) |             ids = orderedSet(m.group(1) for m in m_ids) | ||||||
|             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) |             feed_entries.extend( | ||||||
|  |                 self.url_result(video_id, 'Youtube', video_id=video_id) | ||||||
|  |                 for video_id in ids) | ||||||
|             if info['paging'] is None: |             if info['paging'] is None: | ||||||
|                 break |                 break | ||||||
|         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) |         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user