[9gag] Add extractor
This commit is contained in:
		| @@ -405,7 +405,8 @@ class YoutubeDL(object): | ||||
|         for key, value in extra_info.items(): | ||||
|             info_dict.setdefault(key, value) | ||||
|  | ||||
|     def extract_info(self, url, download=True, ie_key=None, extra_info={}): | ||||
|     def extract_info(self, url, download=True, ie_key=None, extra_info={}, | ||||
|                      process=True): | ||||
|         ''' | ||||
|         Returns a list with a dictionary for each video we find. | ||||
|         If 'download', also downloads the videos. | ||||
| @@ -441,7 +442,10 @@ class YoutubeDL(object): | ||||
|                         'webpage_url': url, | ||||
|                         'extractor_key': ie.ie_key(), | ||||
|                     }) | ||||
|                 return self.process_ie_result(ie_result, download, extra_info) | ||||
|                 if process: | ||||
|                     return self.process_ie_result(ie_result, download, extra_info) | ||||
|                 else: | ||||
|                     return ie_result | ||||
|             except ExtractorError as de: # An error we somewhat expected | ||||
|                 self.report_error(compat_str(de), de.format_traceback()) | ||||
|                 break | ||||
| @@ -474,8 +478,32 @@ class YoutubeDL(object): | ||||
|                                      download, | ||||
|                                      ie_key=ie_result.get('ie_key'), | ||||
|                                      extra_info=extra_info) | ||||
|         elif result_type == 'playlist': | ||||
|         elif result_type == 'url_transparent': | ||||
|             # Use the information from the embedding page | ||||
|             info = self.extract_info( | ||||
|                 ie_result['url'], ie_key=ie_result.get('ie_key'), | ||||
|                 extra_info=extra_info, download=False, process=False) | ||||
|  | ||||
|             def make_result(embedded_info): | ||||
|                 new_result = ie_result.copy() | ||||
|                 for f in ('_type', 'url', 'ext', 'player_url', 'formats', | ||||
|                           'entries', 'urlhandle', 'ie_key', 'duration', | ||||
|                           'subtitles', 'annotations', 'format'): | ||||
|                     if f in new_result: | ||||
|                         del new_result[f] | ||||
|                     if f in embedded_info: | ||||
|                         new_result[f] = embedded_info[f] | ||||
|                 return new_result | ||||
|             new_result = make_result(info) | ||||
|  | ||||
|             assert new_result.get('_type') != 'url_transparent' | ||||
|             if new_result.get('_type') == 'compat_list': | ||||
|                 new_result['entries'] = [ | ||||
|                     make_result(e) for e in new_result['entries']] | ||||
|  | ||||
|             return self.process_ie_result( | ||||
|                 new_result, download=download, extra_info=extra_info) | ||||
|         elif result_type == 'playlist': | ||||
|             # We process each entry in the playlist | ||||
|             playlist = ie_result.get('title', None) or ie_result.get('id', None) | ||||
|             self.to_screen(u'[download] Downloading playlist: %s' % playlist) | ||||
|   | ||||
| @@ -102,6 +102,7 @@ from .nbc import NBCNewsIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .niconico import NiconicoIE | ||||
| from .ninegag import NineGagIE | ||||
| from .nowvideo import NowVideoIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
|   | ||||
							
								
								
									
										41
									
								
								youtube_dl/extractor/ninegag.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								youtube_dl/extractor/ninegag.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class NineGagIE(InfoExtractor): | ||||
|     IE_NAME = '9gag' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u"url": u"http://9gag.tv/v/1912", | ||||
|         u"file": u"1912.mp4", | ||||
|         u"info_dict": { | ||||
|             u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", | ||||
|             u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome" | ||||
|         }, | ||||
|         u'add_ie': [u'Youtube'] | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         data_json = self._html_search_regex(r'''(?x) | ||||
|             <div\s*id="tv-video"\s*data-video-source="youtube"\s* | ||||
|                 data-video-meta="([^"]+)"''', webpage, u'video metadata') | ||||
|  | ||||
|         data = json.loads(data_json) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': data['youtubeVideoId'], | ||||
|             'ie_key': 'Youtube', | ||||
|             'id': video_id, | ||||
|             'title': data['title'], | ||||
|             'description': data['description'], | ||||
|             'view_count': int(data['view_count']), | ||||
|             'thumbnail': data['thumbnail_url'], | ||||
|         } | ||||
		Reference in New Issue
	
	Block a user