[youtube] Extract video titles for channel playlist if possible (Closes #4971)
This commit is contained in:
		@@ -1370,10 +1370,18 @@ class YoutubeChannelIE(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def extract_videos_from_page(self, page):
 | 
					    def extract_videos_from_page(self, page):
 | 
				
			||||||
        ids_in_page = []
 | 
					        ids_in_page = []
 | 
				
			||||||
        for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
 | 
					        titles_in_page = []
 | 
				
			||||||
            if mobj.group(1) not in ids_in_page:
 | 
					        for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
 | 
				
			||||||
                ids_in_page.append(mobj.group(1))
 | 
					            video_id = mobj.group('id')
 | 
				
			||||||
        return ids_in_page
 | 
					            video_title = unescapeHTML(mobj.group('title'))
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                idx = ids_in_page.index(video_id)
 | 
				
			||||||
 | 
					                if video_title and not titles_in_page[idx]:
 | 
				
			||||||
 | 
					                    titles_in_page[idx] = video_title
 | 
				
			||||||
 | 
					            except ValueError:
 | 
				
			||||||
 | 
					                ids_in_page.append(video_id)
 | 
				
			||||||
 | 
					                titles_in_page.append(video_title)
 | 
				
			||||||
 | 
					        return zip(ids_in_page, titles_in_page)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        channel_id = self._match_id(url)
 | 
					        channel_id = self._match_id(url)
 | 
				
			||||||
@@ -1390,10 +1398,12 @@ class YoutubeChannelIE(InfoExtractor):
 | 
				
			|||||||
        if autogenerated:
 | 
					        if autogenerated:
 | 
				
			||||||
            # The videos are contained in a single page
 | 
					            # The videos are contained in a single page
 | 
				
			||||||
            # the ajax pages can't be used, they are empty
 | 
					            # the ajax pages can't be used, they are empty
 | 
				
			||||||
            video_ids = self.extract_videos_from_page(channel_page)
 | 
					            videos = self.extract_videos_from_page(channel_page)
 | 
				
			||||||
            entries = [
 | 
					            entries = [
 | 
				
			||||||
                self.url_result(video_id, 'Youtube', video_id=video_id)
 | 
					                self.url_result(
 | 
				
			||||||
                for video_id in video_ids]
 | 
					                    video_id, 'Youtube', video_id=video_id,
 | 
				
			||||||
 | 
					                    video_title=video_title)
 | 
				
			||||||
 | 
					                for video_id, video_title in videos]
 | 
				
			||||||
            return self.playlist_result(entries, channel_id)
 | 
					            return self.playlist_result(entries, channel_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def _entries():
 | 
					        def _entries():
 | 
				
			||||||
@@ -1401,9 +1411,10 @@ class YoutubeChannelIE(InfoExtractor):
 | 
				
			|||||||
            for pagenum in itertools.count(1):
 | 
					            for pagenum in itertools.count(1):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                ids_in_page = self.extract_videos_from_page(content_html)
 | 
					                ids_in_page = self.extract_videos_from_page(content_html)
 | 
				
			||||||
                for video_id in ids_in_page:
 | 
					                for video_id, video_title in ids_in_page:
 | 
				
			||||||
                    yield self.url_result(
 | 
					                    yield self.url_result(
 | 
				
			||||||
                        video_id, 'Youtube', video_id=video_id)
 | 
					                        video_id, 'Youtube', video_id=video_id,
 | 
				
			||||||
 | 
					                        video_title=video_title)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                mobj = re.search(
 | 
					                mobj = re.search(
 | 
				
			||||||
                    r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
 | 
					                    r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user