Compare commits
	
		
			54 Commits
		
	
	
		
			2014.02.22
			...
			2014.02.26
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | fdb7ca3b8d | ||
|  | 0d7caf5cdf | ||
|  | a339d7ba91 | ||
|  | 7216de55d6 | ||
|  | 2437fbca64 | ||
|  | 7d75d06b78 | ||
|  | 13ef5648c4 | ||
|  | 5b2478e2ba | ||
|  | 8b286571c3 | ||
|  | f3ac523794 | ||
|  | 020cf5ebfd | ||
|  | 54ab193970 | ||
|  | 8f563f32ab | ||
|  | 151bae3566 | ||
|  | 76df418cba | ||
|  | d0a72674c6 | ||
|  | 1d430674c7 | ||
|  | 70cb73922b | ||
|  | 344400951c | ||
|  | ea5a0be811 | ||
|  | 3c7fd0bdb2 | ||
|  | 6cadf8c858 | ||
|  | 27579b9e4c | ||
|  | 4d756a9cc0 | ||
|  | 3e668e05be | ||
|  | 60d3a2e0f8 | ||
|  | cc3a3b6b47 | ||
|  | eda1d49a62 | ||
|  | 62e609ab77 | ||
|  | 2bfe4ead4b | ||
|  | b1c6c32f78 | ||
|  | f6acbdecf4 | ||
|  | f1c9dfcc01 | ||
|  | ce78943ae1 | ||
|  | d6f0d86649 | ||
|  | 5bb67dbfea | ||
|  | 47610c4d3e | ||
|  | b732f3581f | ||
|  | 9e57ce716f | ||
|  | cd7ee7aa44 | ||
|  | 3cfe791473 | ||
|  | 973f2532f5 | ||
|  | bc3be21d59 | ||
|  | 0bf5cf9886 | ||
|  | 919052d094 | ||
|  | a2dafe2887 | ||
|  | 92661c994b | ||
|  | ffe8fe356a | ||
|  | bc2f773b4f | ||
|  | f919201ecc | ||
|  | 7ff5d5c2e2 | ||
|  | 9b77f951c7 | ||
|  | a25f2f990a | ||
|  | 78b373975d | 
| @@ -73,9 +73,7 @@ def generator(test_case): | ||||
|         if 'playlist' not in test_case: | ||||
|             info_dict = test_case.get('info_dict', {}) | ||||
|             if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): | ||||
|                 print_skipping('The output file cannot be know, the "file" ' | ||||
|                     'key is missing or the info_dict is incomplete') | ||||
|                 return | ||||
|                 raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?') | ||||
|         if 'skip' in test_case: | ||||
|             print_skipping(test_case['skip']) | ||||
|             return | ||||
|   | ||||
| @@ -170,12 +170,12 @@ class TestPlaylists(unittest.TestCase): | ||||
|     def test_AcademicEarthCourse(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = AcademicEarthCourseIE(dl) | ||||
|         result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/') | ||||
|         result = ie.extract('http://academicearth.org/playlists/laws-of-nature/') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'building-dynamic-websites') | ||||
|         self.assertEqual(result['title'], 'Building Dynamic Websites') | ||||
|         self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") | ||||
|         self.assertEqual(len(result['entries']), 10) | ||||
|         self.assertEqual(result['id'], 'laws-of-nature') | ||||
|         self.assertEqual(result['title'], 'Laws of Nature') | ||||
|         self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") | ||||
|         self.assertEqual(len(result['entries']), 4) | ||||
|          | ||||
|     def test_ivi_compilation(self): | ||||
|         dl = FakeYDL() | ||||
|   | ||||
| @@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| # Various small unit tests | ||||
| import io | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| #from youtube_dl.utils import htmlentity_transform | ||||
| @@ -21,6 +22,7 @@ from youtube_dl.utils import ( | ||||
|     orderedSet, | ||||
|     PagedList, | ||||
|     parse_duration, | ||||
|     read_batch_urls, | ||||
|     sanitize_filename, | ||||
|     shell_quote, | ||||
|     smuggle_url, | ||||
| @@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase): | ||||
|     def test_struct_unpack(self): | ||||
|         self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) | ||||
|  | ||||
|     def test_read_batch_urls(self): | ||||
|         f = io.StringIO(u'''\xef\xbb\xbf foo | ||||
|             bar\r | ||||
|             baz | ||||
|             # More after this line\r | ||||
|             ; or after this | ||||
|             bam''') | ||||
|         self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -118,6 +118,8 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         self.assertEqual(original_video['id'], 'rjFaenf1T-Y') | ||||
|  | ||||
|     def test_youtube_toptracks(self): | ||||
|         print('Skipping: The playlist page gives error 500') | ||||
|         return | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=MCUS') | ||||
|   | ||||
| @@ -47,12 +47,14 @@ __authors__  = ( | ||||
|     'Michael Kaiser', | ||||
|     'Niklas Laxström', | ||||
|     'David Triendl', | ||||
|     'Anthony Weems', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|  | ||||
| import codecs | ||||
| import getpass | ||||
| import io | ||||
| import locale | ||||
| import optparse | ||||
| import os | ||||
| @@ -71,6 +73,7 @@ from .utils import ( | ||||
|     get_cachedir, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
|     read_batch_urls, | ||||
|     SameFileError, | ||||
|     setproctitle, | ||||
|     std_headers, | ||||
| @@ -552,21 +555,19 @@ def _real_main(argv=None): | ||||
|         sys.exit(0) | ||||
|  | ||||
|     # Batch file verification | ||||
|     batchurls = [] | ||||
|     batch_urls = [] | ||||
|     if opts.batchfile is not None: | ||||
|         try: | ||||
|             if opts.batchfile == '-': | ||||
|                 batchfd = sys.stdin | ||||
|             else: | ||||
|                 batchfd = open(opts.batchfile, 'r') | ||||
|             batchurls = batchfd.readlines() | ||||
|             batchurls = [x.strip() for x in batchurls] | ||||
|             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] | ||||
|                 batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') | ||||
|             batch_urls = read_batch_urls(batchfd) | ||||
|             if opts.verbose: | ||||
|                 write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') | ||||
|                 write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n') | ||||
|         except IOError: | ||||
|             sys.exit(u'ERROR: batch file could not be read') | ||||
|     all_urls = batchurls + args | ||||
|     all_urls = batch_urls + args | ||||
|     all_urls = [url.strip() for url in all_urls] | ||||
|     _enc = preferredencoding() | ||||
|     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] | ||||
|   | ||||
| @@ -137,7 +137,7 @@ from .malemotion import MalemotionIE | ||||
| from .mdr import MDRIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mit import TechTVMITIE, MITIE | ||||
| from .mit import TechTVMITIE, MITIE, OCWMITIE | ||||
| from .mixcloud import MixcloudIE | ||||
| from .mpora import MporaIE | ||||
| from .mofosex import MofosexIE | ||||
| @@ -152,7 +152,10 @@ from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nbc import NBCNewsIE | ||||
| from .nbc import ( | ||||
|     NBCIE, | ||||
|     NBCNewsIE, | ||||
| ) | ||||
| from .ndr import NDRIE | ||||
| from .ndtv import NDTVIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| @@ -161,7 +164,7 @@ from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .niconico import NiconicoIE | ||||
| from .ninegag import NineGagIE | ||||
| from .normalboots import NormalbootsIE | ||||
| from .novamov import NovamovIE | ||||
| from .novamov import NovaMovIE | ||||
| from .nowness import NownessIE | ||||
| from .nowvideo import NowVideoIE | ||||
| from .ooyala import OoyalaIE | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class AcademicEarthCourseIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' | ||||
|     IE_NAME = 'AcademicEarth:Course' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -14,12 +14,12 @@ class AcademicEarthCourseIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title') | ||||
|             r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<p class="excerpt">(.*?)</p>', | ||||
|             r'<p class="excerpt"[^>]*?>(.*?)</p>', | ||||
|             webpage, u'description', fatal=False) | ||||
|         urls = re.findall( | ||||
|             r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">', | ||||
|             r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">', | ||||
|             webpage) | ||||
|         entries = [self.url_result(u) for u in urls] | ||||
|  | ||||
|   | ||||
| @@ -23,8 +23,8 @@ class BreakIE(InfoExtractor): | ||||
|         video_id = mobj.group(1).split("-")[-1] | ||||
|         embed_url = 'http://www.break.com/embed/%s' % video_id | ||||
|         webpage = self._download_webpage(embed_url, video_id) | ||||
|         info_json = self._search_regex(r'var embedVars = ({.*?});', webpage, | ||||
|                                        'info json', flags=re.DOTALL) | ||||
|         info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>', | ||||
|             webpage, 'info json', flags=re.DOTALL) | ||||
|         info = json.loads(info_json) | ||||
|         video_url = info['videoUri'] | ||||
|         m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -8,73 +9,63 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class CinemassacreIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?' | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | ||||
|         u'file': u'19911.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20121110', | ||||
|             u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', | ||||
|             u'description': u'md5:fb87405fcb42a331742a0dce2708560b', | ||||
|     _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | ||||
|             'file': '19911.mp4', | ||||
|             'md5': 'fde81fbafaee331785f58cd6c0d46190', | ||||
|             'info_dict': { | ||||
|                 'upload_date': '20121110', | ||||
|                 'title': '“Angry Video Game Nerd: The Movie” – Trailer', | ||||
|                 'description': 'md5:fb87405fcb42a331742a0dce2708560b', | ||||
|             }, | ||||
|         }, | ||||
|         u'params': { | ||||
|             # rtmp download | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | ||||
|         u'file': u'521be8ef82b16.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20131002', | ||||
|             u'title': u'The Mummy’s Hand (1940)', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # rtmp download | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | ||||
|             'file': '521be8ef82b16.mp4', | ||||
|             'md5': 'd72f10cd39eac4215048f62ab477a511', | ||||
|             'info_dict': { | ||||
|                 'upload_date': '20131002', | ||||
|                 'title': 'The Mummy’s Hand (1940)', | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         webpage_url = u'http://' + mobj.group('url') | ||||
|         webpage = self._download_webpage(webpage_url, None) # Don't know video id yet | ||||
|         webpage = self._download_webpage(url, None)  # Don't know video id yet | ||||
|         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') | ||||
|         mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) | ||||
|         if not mobj: | ||||
|             raise ExtractorError(u'Can\'t extract embed url and video id') | ||||
|         playerdata_url = mobj.group(u'embed_url') | ||||
|         video_id = mobj.group(u'video_id') | ||||
|             raise ExtractorError('Can\'t extract embed url and video id') | ||||
|         playerdata_url = mobj.group('embed_url') | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|', | ||||
|             webpage, u'title') | ||||
|             webpage, 'title') | ||||
|         video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', | ||||
|             webpage, u'description', flags=re.DOTALL, fatal=False) | ||||
|             webpage, 'description', flags=re.DOTALL, fatal=False) | ||||
|         if len(video_description) == 0: | ||||
|             video_description = None | ||||
|  | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id) | ||||
|         url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url') | ||||
|  | ||||
|         sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file') | ||||
|         hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file') | ||||
|         video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False) | ||||
|         sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file') | ||||
|         hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file') | ||||
|         video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': url, | ||||
|                 'play_path': 'mp4:' + sd_file, | ||||
|                 'rtmp_live': True, # workaround | ||||
|                 'ext': 'flv', | ||||
|                 'url': sd_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'format': 'sd', | ||||
|                 'format_id': 'sd', | ||||
|             }, | ||||
|             { | ||||
|                 'url': url, | ||||
|                 'play_path': 'mp4:' + hd_file, | ||||
|                 'rtmp_live': True, # workaround | ||||
|                 'ext': 'flv', | ||||
|                 'url': hd_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'format': 'hd', | ||||
|                 'format_id': 'hd', | ||||
|             }, | ||||
|   | ||||
| @@ -1,7 +1,11 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re, base64, zlib | ||||
| import re | ||||
| import json | ||||
| import base64 | ||||
| import zlib | ||||
|  | ||||
| from hashlib import sha1 | ||||
| from math import pow, sqrt, floor | ||||
| from .common import InfoExtractor | ||||
| @@ -19,13 +23,15 @@ from ..aes import ( | ||||
|     inc, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CrunchyrollIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||
|     _TESTS = [{ | ||||
|     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', | ||||
|         'file': '645513.flv', | ||||
|         #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', | ||||
|         'info_dict': { | ||||
|             'id': '645513', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', | ||||
|             'description': 'md5:2d17137920c64f2f49981a7797d275ef', | ||||
|             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', | ||||
| @@ -36,7 +42,7 @@ class CrunchyrollIE(InfoExtractor): | ||||
|             # rtmp | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|     } | ||||
|  | ||||
|     _FORMAT_IDS = { | ||||
|         '360': ('60', '106'), | ||||
| @@ -68,7 +74,7 @@ class CrunchyrollIE(InfoExtractor): | ||||
|             shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) | ||||
|             # Extend 160 Bit hash to 256 Bit | ||||
|             return shaHash + [0] * 12 | ||||
|          | ||||
|  | ||||
|         key = obfuscate_key(id) | ||||
|         class Counter: | ||||
|             __value = iv | ||||
| @@ -80,9 +86,8 @@ class CrunchyrollIE(InfoExtractor): | ||||
|         return zlib.decompress(decrypted_data) | ||||
|  | ||||
|     def _convert_subtitles_to_srt(self, subtitles): | ||||
|         i=1 | ||||
|         output = '' | ||||
|         for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): | ||||
|         for i, (start, end, text) in enumerate(re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1): | ||||
|             start = start.replace('.', ',') | ||||
|             end = end.replace('.', ',') | ||||
|             text = clean_html(text) | ||||
| @@ -90,7 +95,6 @@ class CrunchyrollIE(InfoExtractor): | ||||
|             if not text: | ||||
|                 continue | ||||
|             output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) | ||||
|             i+=1 | ||||
|         return output | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
| @@ -108,6 +112,12 @@ class CrunchyrollIE(InfoExtractor): | ||||
|         if note_m: | ||||
|             raise ExtractorError(note_m) | ||||
|  | ||||
|         mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage) | ||||
|         if mobj: | ||||
|             msg = json.loads(mobj.group('msg')) | ||||
|             if msg.get('type') == 'error': | ||||
|                 raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) | ||||
|         video_title = re.sub(r' {2,}', ' ', video_title) | ||||
|         video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') | ||||
| @@ -123,7 +133,7 @@ class CrunchyrollIE(InfoExtractor): | ||||
|         playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) | ||||
|         playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') | ||||
|          | ||||
|  | ||||
|         stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') | ||||
|         video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) | ||||
|  | ||||
| @@ -161,7 +171,7 @@ class CrunchyrollIE(InfoExtractor): | ||||
|             data = base64.b64decode(data) | ||||
|  | ||||
|             subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') | ||||
|             lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             if not lang_code: | ||||
|                 continue | ||||
|             subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) | ||||
|   | ||||
| @@ -363,11 +363,17 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group(1), 'Mpora') | ||||
|  | ||||
|         # Look for embedded Novamov player | ||||
|         # Look for embedded NovaMov player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Novamov') | ||||
|             return self.url_result(mobj.group('url'), 'NovaMov') | ||||
|  | ||||
|         # Look for embedded NowVideo player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'NowVideo') | ||||
|  | ||||
|         # Look for embedded Facebook player | ||||
|         mobj = re.search( | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from ..utils import compat_urllib_request | ||||
|  | ||||
|  | ||||
| class IPrimaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)' | ||||
|     _VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://play.iprima.cz/particka/particka-92', | ||||
| @@ -22,20 +22,32 @@ class IPrimaIE(InfoExtractor): | ||||
|             'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|             'skip_download': True,  # requires rtmpdump | ||||
|         }, | ||||
|     }, | ||||
|     ] | ||||
|     }, { | ||||
|         'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda', | ||||
|         'info_dict': { | ||||
|             'id': '9718337', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Tchibo Partička - Jarní móda', | ||||
|             'description': 'md5:589f8f59f414220621ff8882eb3ce7be', | ||||
|             'thumbnail': 're:^http:.*\.jpg$', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires rtmpdump | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % ( | ||||
|                          floor(random()*1073741824), | ||||
|                          floor(random()*1073741824)) | ||||
|         player_url = ( | ||||
|             'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % | ||||
|             (floor(random()*1073741824), floor(random()*1073741824)) | ||||
|         ) | ||||
|  | ||||
|         req = compat_urllib_request.Request(player_url) | ||||
|         req.add_header('Referer', url) | ||||
| @@ -44,18 +56,20 @@ class IPrimaIE(InfoExtractor): | ||||
|         base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1]) | ||||
|  | ||||
|         zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO') | ||||
|  | ||||
|         if zoneGEO != '0': | ||||
|             base_url = base_url.replace('token', 'token_'+zoneGEO) | ||||
|             base_url = base_url.replace('token', 'token_' + zoneGEO) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ['lq', 'hq', 'hd']: | ||||
|             filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename') | ||||
|             filename = self._html_search_regex( | ||||
|                 r'"%s_id":(.+?),' % format_id, webpage, 'filename') | ||||
|  | ||||
|             if filename == 'null': | ||||
|                 continue | ||||
|  | ||||
|             real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id') | ||||
|             real_id = self._search_regex( | ||||
|                 r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]', | ||||
|                 filename, 'real video id') | ||||
|  | ||||
|             if format_id == 'lq': | ||||
|                 quality = 0 | ||||
| @@ -63,13 +77,13 @@ class IPrimaIE(InfoExtractor): | ||||
|                 quality = 1 | ||||
|             elif format_id == 'hd': | ||||
|                 quality = 2 | ||||
|                 filename = 'hq/'+filename | ||||
|                 filename = 'hq/' + filename | ||||
|  | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'url': base_url, | ||||
|                 'quality': quality, | ||||
|                 'play_path': 'mp4:'+filename.replace('"', '')[:-4], | ||||
|                 'play_path': 'mp4:' + filename.replace('"', '')[:-4], | ||||
|                 'rtmp_live': True, | ||||
|                 'ext': 'flv', | ||||
|             }) | ||||
|   | ||||
| @@ -166,6 +166,7 @@ class MetacafeIE(InfoExtractor): | ||||
|  | ||||
|         video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title') | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         video_uploader = self._html_search_regex( | ||||
|                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', | ||||
|                 webpage, u'uploader nickname', fatal=False) | ||||
| @@ -183,6 +184,7 @@ class MetacafeIE(InfoExtractor): | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date':  None, | ||||
|             'title':    video_title, | ||||
|             'thumbnail':thumbnail, | ||||
|             'ext':      video_ext, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|   | ||||
| @@ -1,24 +1,29 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     clean_html, | ||||
|     get_element_by_id, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TechTVMITIE(InfoExtractor): | ||||
|     IE_NAME = u'techtv.mit.edu' | ||||
|     IE_NAME = 'techtv.mit.edu' | ||||
|     _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', | ||||
|         u'file': u'25418.mp4', | ||||
|         u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f', | ||||
|         u'info_dict': { | ||||
|             u'title': u'MIT DNA Learning Center Set', | ||||
|             u'description': u'md5:82313335e8a8a3f243351ba55bc1b474', | ||||
|         'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', | ||||
|         'md5': '1f8cb3e170d41fd74add04d3c9330e5f', | ||||
|         'info_dict': { | ||||
|             'id': '25418', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'MIT DNA Learning Center Set', | ||||
|             'description': 'md5:82313335e8a8a3f243351ba55bc1b474', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -27,12 +32,12 @@ class TechTVMITIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         raw_page = self._download_webpage( | ||||
|             'http://techtv.mit.edu/videos/%s' % video_id, video_id) | ||||
|         clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page) | ||||
|         clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page) | ||||
|  | ||||
|         base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', | ||||
|             raw_page, u'base url') | ||||
|         formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, | ||||
|             u'video formats') | ||||
|         base_url = self._search_regex( | ||||
|             r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url') | ||||
|         formats_json = self._search_regex( | ||||
|             r'bitrates: (\[.+?\])', raw_page, 'video formats') | ||||
|         formats_mit = json.loads(formats_json) | ||||
|         formats = [ | ||||
|             { | ||||
| @@ -48,28 +53,32 @@ class TechTVMITIE(InfoExtractor): | ||||
|  | ||||
|         title = get_element_by_id('edit-title', clean_page) | ||||
|         description = clean_html(get_element_by_id('edit-description', clean_page)) | ||||
|         thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', | ||||
|             raw_page, u'thumbnail', flags=re.DOTALL) | ||||
|         thumbnail = self._search_regex( | ||||
|             r'playlist:.*?url: \'(.+?)\'', | ||||
|             raw_page, 'thumbnail', flags=re.DOTALL) | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|                 'formats': formats, | ||||
|                 'description': description, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 } | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class MITIE(TechTVMITIE): | ||||
|     IE_NAME = u'video.mit.edu' | ||||
|     IE_NAME = 'video.mit.edu' | ||||
|     _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', | ||||
|         u'file': u'21783.mp4', | ||||
|         u'md5': u'7db01d5ccc1895fc5010e9c9e13648da', | ||||
|         u'info_dict': { | ||||
|             u'title': u'The Government is Profiling You', | ||||
|             u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd', | ||||
|         'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', | ||||
|         'file': '.mp4', | ||||
|         'md5': '7db01d5ccc1895fc5010e9c9e13648da', | ||||
|         'info_dict': { | ||||
|             'id': '21783', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Government is Profiling You', | ||||
|             'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -77,7 +86,73 @@ class MITIE(TechTVMITIE): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME)) | ||||
|         embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage, | ||||
|             u'embed url') | ||||
|         embed_url = self._search_regex( | ||||
|             r'<iframe .*?src="(.+?)"', webpage, 'embed url') | ||||
|         return self.url_result(embed_url, ie='TechTVMIT') | ||||
|  | ||||
|  | ||||
| class OCWMITIE(InfoExtractor): | ||||
|     IE_NAME = 'ocw.mit.edu' | ||||
|     _VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' | ||||
|     _BASE_URL = 'http://ocw.mit.edu/' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/', | ||||
|             'info_dict': { | ||||
|                 'id': 'EObHWIEKGjA', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence', | ||||
|                 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.', | ||||
|                 #'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt' | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/', | ||||
|             'info_dict': { | ||||
|                 'id': '7K1sB05pE0A', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Session 1: Introduction to Derivatives', | ||||
|                 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.', | ||||
|                 #'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT' | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         topic = mobj.group('topic') | ||||
|  | ||||
|         webpage = self._download_webpage(url, topic) | ||||
|         title = self._html_search_meta('WT.cg_s', webpage) | ||||
|         description = self._html_search_meta('Description', webpage) | ||||
|  | ||||
|         # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file) | ||||
|         embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage) | ||||
|         if embed_chapter_media: | ||||
|             metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1)) | ||||
|             metadata = re.split(r', ?', metadata) | ||||
|             yt = metadata[1] | ||||
|             subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7]) | ||||
|         else: | ||||
|             # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file) | ||||
|             embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage) | ||||
|             if embed_media: | ||||
|                 metadata = re.sub(r'[\'"]', '', embed_media.group(1)) | ||||
|                 metadata = re.split(r', ?', metadata) | ||||
|                 yt = metadata[1] | ||||
|                 subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5]) | ||||
|             else: | ||||
|                 raise ExtractorError('Unable to find embedded YouTube video.') | ||||
|         video_id = YoutubeIE.extract_id(yt) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'url': yt, | ||||
|             'url_transparent' | ||||
|             'subtitles': subs, | ||||
|             'ie_key': 'Youtube', | ||||
|         } | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class MixcloudIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' | ||||
|     IE_NAME = 'mixcloud' | ||||
|  | ||||
|     _TEST = { | ||||
|   | ||||
| @@ -1,19 +1,46 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import find_xpath_attr, compat_str | ||||
|  | ||||
|  | ||||
| class NBCIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', | ||||
|         'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', | ||||
|         'info_dict': { | ||||
|             'id': 'u1RInQZRN7QJ', | ||||
|             'ext': 'flv', | ||||
|             'title': 'I Am a Firefighter', | ||||
|             'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url') | ||||
|         if theplatform_url.startswith('//'): | ||||
|             theplatform_url = 'http:' + theplatform_url | ||||
|         return self.url_result(theplatform_url) | ||||
|  | ||||
|  | ||||
| class NBCNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.nbcnews.com/video/nbc-news/52753292', | ||||
|         u'file': u'52753292.flv', | ||||
|         u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Crew emerges after four-month Mars food study', | ||||
|             u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | ||||
|         'url': 'http://www.nbcnews.com/video/nbc-news/52753292', | ||||
|         'md5': '47abaac93c6eaf9ad37ee6c4463a5179', | ||||
|         'info_dict': { | ||||
|             'id': '52753292', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Crew emerges after four-month Mars food study', | ||||
|             'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -23,10 +50,11 @@ class NBCNewsIE(InfoExtractor): | ||||
|         all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | ||||
|         info = all_info.find('video') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': info.find('headline').text, | ||||
|                 'ext': 'flv', | ||||
|                 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | ||||
|                 'description': compat_str(info.find('caption').text), | ||||
|                 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | ||||
|                 } | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info.find('headline').text, | ||||
|             'ext': 'flv', | ||||
|             'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | ||||
|             'description': compat_str(info.find('caption').text), | ||||
|             'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | ||||
|         } | ||||
|   | ||||
| @@ -1,61 +1,51 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NormalbootsIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$' | ||||
|     _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://normalboots.com/video/home-alone-games-jontron/', | ||||
|         u'file': u'home-alone-games-jontron.mp4', | ||||
|         u'md5': u'8bf6de238915dd501105b44ef5f1e0f6', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Home Alone Games - JonTron - NormalBoots', | ||||
|             u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/', | ||||
|             u'uploader': u'JonTron', | ||||
|             u'upload_date': u'20140125', | ||||
|         'url': 'http://normalboots.com/video/home-alone-games-jontron/', | ||||
|         'md5': '8bf6de238915dd501105b44ef5f1e0f6', | ||||
|         'info_dict': { | ||||
|             'id': 'home-alone-games-jontron', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Home Alone Games - JonTron - NormalBoots', | ||||
|             'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/', | ||||
|             'uploader': 'JonTron', | ||||
|             'upload_date': '20140125', | ||||
|         } | ||||
|     } | ||||
|      | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('videoid') | ||||
|          | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|         } | ||||
|          | ||||
|         if url[:4] != 'http': | ||||
|             url = 'http://' + url | ||||
|          | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_title = self._og_search_title(webpage) | ||||
|         video_description = self._og_search_description(webpage) | ||||
|         video_thumbnail = self._og_search_thumbnail(webpage) | ||||
|         video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', | ||||
|             webpage, 'uploader') | ||||
|         raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',  | ||||
|         raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', | ||||
|             webpage, 'date') | ||||
|         video_upload_date = unified_strdate(raw_upload_date) | ||||
|         video_upload_date = unified_strdate(raw_upload_date) | ||||
|              | ||||
|  | ||||
|         player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url') | ||||
|         player_page = self._download_webpage(player_url, video_id) | ||||
|         video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file') | ||||
|          | ||||
|         info['url'] = video_url | ||||
|         info['title'] = video_title | ||||
|         info['description'] = video_description | ||||
|         info['thumbnail'] = video_thumbnail | ||||
|         info['uploader'] = video_uploader | ||||
|         info['upload_date'] = video_upload_date | ||||
|          | ||||
|         return info | ||||
|         video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date': video_upload_date, | ||||
|         } | ||||
|   | ||||
| @@ -9,14 +9,25 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NovamovIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?v=)(?P<videoid>[a-z\d]{13})' | ||||
| class NovaMovIE(InfoExtractor): | ||||
|     IE_NAME = 'novamov' | ||||
|     IE_DESC = 'NovaMov' | ||||
|  | ||||
|     _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'} | ||||
|  | ||||
|     _HOST = 'www.novamov.com' | ||||
|  | ||||
|     _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>' | ||||
|     _FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";' | ||||
|     _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>' | ||||
|     _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.novamov.com/video/4rurhn9x446jj', | ||||
|         'file': '4rurhn9x446jj.flv', | ||||
|         'md5': '7205f346a52bbeba427603ba10d4b935', | ||||
|         'info_dict': { | ||||
|             'id': '4rurhn9x446jj', | ||||
|             'ext': 'flv', | ||||
|             'title': 'search engine optimization', | ||||
|             'description': 'search engine optimization is used to rank the web page in the google search engine' | ||||
|         }, | ||||
| @@ -27,31 +38,26 @@ class NovamovIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         page = self._download_webpage('http://www.novamov.com/video/%s' % video_id, | ||||
|                                       video_id, 'Downloading video page') | ||||
|         page = self._download_webpage( | ||||
|             'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page') | ||||
|  | ||||
|         if re.search(r'This file no longer exists on our servers!</h2>', page) is not None: | ||||
|         if re.search(self._FILE_DELETED_REGEX, page) is not None: | ||||
|             raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | ||||
|  | ||||
|         filekey = self._search_regex( | ||||
|             r'flashvars\.filekey="(?P<filekey>[^"]+)";', page, 'filekey') | ||||
|         filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>', | ||||
|             page, 'title', fatal=False) | ||||
|         title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False) | ||||
|  | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>', | ||||
|             page, 'description', fatal=False) | ||||
|         description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False) | ||||
|  | ||||
|         api_response = self._download_webpage( | ||||
|             'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id), | ||||
|             video_id, 'Downloading video api response') | ||||
|             'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id, | ||||
|             'Downloading video api response') | ||||
|  | ||||
|         response = compat_urlparse.parse_qs(api_response) | ||||
|  | ||||
|         if 'error_msg' in response: | ||||
|             raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True) | ||||
|             raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True) | ||||
|  | ||||
|         video_url = response['url'][0] | ||||
|  | ||||
| @@ -60,4 +66,4 @@ class NovamovIE(InfoExtractor): | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description | ||||
|         } | ||||
|         } | ||||
| @@ -1,46 +1,28 @@ | ||||
| import re | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urlparse | ||||
| from .novamov import NovaMovIE | ||||
|  | ||||
|  | ||||
| class NowVideoIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.(?:ch|sx)/video/(?P<id>\w+)' | ||||
| class NowVideoIE(NovaMovIE): | ||||
|     IE_NAME = 'nowvideo' | ||||
|     IE_DESC = 'NowVideo' | ||||
|  | ||||
|     _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'} | ||||
|  | ||||
|     _HOST = 'www.nowvideo.ch' | ||||
|  | ||||
|     _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' | ||||
|     _FILEKEY_REGEX = r'var fkzd="([^"]+)";' | ||||
|     _TITLE_REGEX = r'<h4>([^<]+)</h4>' | ||||
|     _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa', | ||||
|         u'file': u'0mw0yow7b6dxa.flv', | ||||
|         u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817', | ||||
|         u'info_dict': { | ||||
|             u"title": u"youtubedl test video _BaW_jenozKc.mp4" | ||||
|         'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa', | ||||
|         'md5': 'f8fbbc8add72bd95b7850c6a02fc8817', | ||||
|         'info_dict': { | ||||
|             'id': '0mw0yow7b6dxa', | ||||
|             'ext': 'flv', | ||||
|             'title': 'youtubedl test video _BaW_jenozKc.mp4', | ||||
|             'description': 'Description', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.nowvideo.ch/video/' + video_id | ||||
|         embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         embed_page = self._download_webpage(embed_url, video_id, | ||||
|             u'Downloading embed page') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h4>(.*)</h4>', | ||||
|             webpage, u'video title') | ||||
|  | ||||
|         video_key = self._search_regex(r'var fkzd="(.*)";', | ||||
|             embed_page, u'video key') | ||||
|  | ||||
|         api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key) | ||||
|         api_response = self._download_webpage(api_call, video_id, | ||||
|             u'Downloading API page') | ||||
|         video_url = compat_urlparse.parse_qs(api_response)[u'url'][0] | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       video_url, | ||||
|             'ext':       'flv', | ||||
|             'title':     video_title, | ||||
|         }] | ||||
|     } | ||||
| @@ -1,7 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class PodomaticIE(InfoExtractor): | ||||
| @@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", | ||||
|         u"file": u"2009-01-02T16_03_35-08_00.mp3", | ||||
|         u"md5": u"84bb855fcf3429e6bf72460e1eed782d", | ||||
|         u"info_dict": { | ||||
|             u"uploader": u"Science Teaching Tips", | ||||
|             u"uploader_id": u"scienceteachingtips", | ||||
|             u"title": u"64.  When the Moon Hits Your Eye", | ||||
|             u"duration": 446, | ||||
|         "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", | ||||
|         "file": "2009-01-02T16_03_35-08_00.mp3", | ||||
|         "md5": "84bb855fcf3429e6bf72460e1eed782d", | ||||
|         "info_dict": { | ||||
|             "uploader": "Science Teaching Tips", | ||||
|             "uploader_id": "scienceteachingtips", | ||||
|             "title": "64.  When the Moon Hits Your Eye", | ||||
|             "duration": 446, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor): | ||||
|         uploader = data['podcast'] | ||||
|         title = data['title'] | ||||
|         thumbnail = data['imageLocation'] | ||||
|         duration = int(data['length'] / 1000.0) | ||||
|         duration = int_or_none(data.get('length'), 1000) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -39,6 +39,8 @@ class TestURLIE(InfoExtractor): | ||||
|                     ('Found multiple matching extractors: %s' % | ||||
|                         ' '.join(ie.IE_NAME for ie in matching_extractors)), | ||||
|                     expected=True) | ||||
|         else: | ||||
|             extractor = matching_extractors[0] | ||||
|  | ||||
|         num_str = mobj.group('num') | ||||
|         num = int(num_str) if num_str else 0 | ||||
|   | ||||
| @@ -13,7 +13,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language | ||||
| class ThePlatformIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ | ||||
|            (?P<config>[^/\?]+/(?:swf|config)/select/)? | ||||
|            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? | ||||
|          |theplatform:)(?P<id>[^/\?&]+)''' | ||||
|  | ||||
|     _TEST = { | ||||
| @@ -54,10 +54,15 @@ class ThePlatformIE(InfoExtractor): | ||||
|  | ||||
|         f4m_node = body.find(_x('smil:seq/smil:video')) | ||||
|         if f4m_node is not None: | ||||
|             f4m_url = f4m_node.attrib['src'] | ||||
|             if 'manifest.f4m?' not in f4m_url: | ||||
|                 f4m_url += '?' | ||||
|             # the parameters are from syfy.com, other sites may use others, | ||||
|             # they also work for nbc.com | ||||
|             f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' | ||||
|             formats = [{ | ||||
|                 'ext': 'flv', | ||||
|                 # the parameters are from syfy.com, other sites may use others | ||||
|                 'url': f4m_node.attrib['src'] + '?g=UXWGVKRWHFSP&hdcore=3.0.3', | ||||
|                 'url': f4m_url, | ||||
|             }] | ||||
|         else: | ||||
|             base_url = head.find(_x('smil:meta')).attrib['base'] | ||||
| @@ -95,9 +100,10 @@ class ThePlatformIE(InfoExtractor): | ||||
|         if mobj.group('config'): | ||||
|             config_url = url+ '&form=json' | ||||
|             config_url = config_url.replace('swf/', 'config/') | ||||
|             config_url = config_url.replace('onsite/', 'onsite/config/') | ||||
|             config_json = self._download_webpage(config_url, video_id, u'Downloading config') | ||||
|             config = json.loads(config_json) | ||||
|             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4' | ||||
|             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' | ||||
|         else: | ||||
|             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' | ||||
|                 'format=smil&mbr=true'.format(video_id)) | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_request | ||||
|  | ||||
|  | ||||
| class VeohIE(InfoExtractor): | ||||
| @@ -24,6 +25,13 @@ class VeohIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         age_limit = 0 | ||||
|         if 'class="adultwarning-container"' in webpage: | ||||
|             self.report_age_confirmation() | ||||
|             age_limit = 18 | ||||
|             request = compat_urllib_request.Request(url) | ||||
|             request.add_header('Cookie', 'confirmedAdult=true') | ||||
|             webpage = self._download_webpage(request, video_id) | ||||
|  | ||||
|         m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage) | ||||
|         if m_youtube is not None: | ||||
| @@ -44,4 +52,5 @@ class VeohIE(InfoExtractor): | ||||
|             'thumbnail': info.get('highResImage') or info.get('medResImage'), | ||||
|             'description': info['description'], | ||||
|             'view_count': info['views'], | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|   | ||||
| @@ -24,9 +24,10 @@ class VevoIE(InfoExtractor): | ||||
|         (?P<id>[^&?#]+)''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         'file': 'GB1101300280.mp4', | ||||
|         "md5": "06bea460acb744eab74a9d7dcb4bfd61", | ||||
|         'info_dict': { | ||||
|             'id': 'GB1101300280', | ||||
|             'ext': 'mp4', | ||||
|             "upload_date": "20130624", | ||||
|             "uploader": "Hurts", | ||||
|             "title": "Somebody to Die For", | ||||
| @@ -34,6 +35,33 @@ class VevoIE(InfoExtractor): | ||||
|             "width": 1920, | ||||
|             "height": 1080, | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'v3 SMIL format', | ||||
|         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', | ||||
|         'md5': '893ec0e0d4426a1d96c01de8f2bdff58', | ||||
|         'info_dict': { | ||||
|             'id': 'USUV71302923', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20140219', | ||||
|             'uploader': 'Cassadee Pope', | ||||
|             'title': 'I Wish I Could Break Your Heart', | ||||
|             'duration': 226.101, | ||||
|             'age_limit': 0, | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'Age-limited video', | ||||
|         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', | ||||
|         'info_dict': { | ||||
|             'id': 'USRV81300282', | ||||
|             'ext': 'mp4', | ||||
|             'age_limit': 18, | ||||
|             'title': 'Tunnel Vision (Explicit)', | ||||
|             'uploader': 'Justin Timberlake', | ||||
|             'upload_date': '20130704', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'true', | ||||
|         } | ||||
|     }] | ||||
|     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' | ||||
|  | ||||
| @@ -105,9 +133,31 @@ class VevoIE(InfoExtractor): | ||||
|         video_info = self._download_json(json_url, video_id)['video'] | ||||
|  | ||||
|         formats = self._formats_from_json(video_info) | ||||
|  | ||||
|         is_explicit = video_info.get('isExplicit') | ||||
|         if is_explicit is True: | ||||
|             age_limit = 18 | ||||
|         elif is_explicit is False: | ||||
|             age_limit = 0 | ||||
|         else: | ||||
|             age_limit = None | ||||
|  | ||||
|         # Download SMIL | ||||
|         smil_blocks = sorted(( | ||||
|             f for f in video_info['videoVersions'] | ||||
|             if f['sourceType'] == 13), | ||||
|             key=lambda f: f['version']) | ||||
|  | ||||
|         smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( | ||||
|             self._SMIL_BASE_URL, video_id, video_id.lower()) | ||||
|         if smil_blocks: | ||||
|             smil_url_m = self._search_regex( | ||||
|                 r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', | ||||
|                 fatal=False) | ||||
|             if smil_url_m is not None: | ||||
|                 smil_url = smil_url_m | ||||
|  | ||||
|         try: | ||||
|             smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( | ||||
|                 self._SMIL_BASE_URL, video_id, video_id.lower()) | ||||
|             smil_xml = self._download_webpage(smil_url, video_id, | ||||
|                                               'Downloading SMIL info') | ||||
|             formats.extend(self._formats_from_smil(smil_xml)) | ||||
| @@ -128,4 +178,5 @@ class VevoIE(InfoExtractor): | ||||
|             'upload_date': upload_date.strftime('%Y%m%d'), | ||||
|             'uploader': video_info['mainArtists'][0]['artistName'], | ||||
|             'duration': video_info['duration'], | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|   | ||||
| @@ -1,8 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
|  | ||||
| class VineIE(InfoExtractor): | ||||
| @@ -13,31 +15,46 @@ class VineIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': 'b9KOOWX7HUx', | ||||
|             'ext': 'mp4', | ||||
|             'uploader': 'Jack Dorsey', | ||||
|             'title': 'Chicken.', | ||||
|             'description': 'Chicken.', | ||||
|             'upload_date': '20130519', | ||||
|             'uploader': 'Jack Dorsey', | ||||
|             'uploader_id': '76', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'https://vine.co/v/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) | ||||
|  | ||||
|         video_url = self._html_search_meta('twitter:player:stream', webpage, | ||||
|             'video URL') | ||||
|         data = json.loads(self._html_search_regex( | ||||
|             r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) | ||||
|  | ||||
|         uploader = self._html_search_regex(r'<p class="username">(.*?)</p>', | ||||
|             webpage, 'uploader', fatal=False, flags=re.DOTALL) | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': data['videoLowURL'], | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': 'low', | ||||
|             }, | ||||
|             { | ||||
|                 'url': data['videoUrl'], | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': 'standard', | ||||
|             } | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader': uploader, | ||||
|         } | ||||
|             'description': data['description'], | ||||
|             'thumbnail': data['thumbnailUrl'], | ||||
|             'upload_date': unified_strdate(data['created']), | ||||
|             'uploader': data['username'], | ||||
|             'uploader_id': data['userIdStr'], | ||||
|             'like_count': data['likes']['count'], | ||||
|             'comment_count': data['comments']['count'], | ||||
|             'repost_count': data['reposts']['count'], | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -199,9 +199,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|         '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40}, | ||||
|  | ||||
|         # Dash mp4 audio | ||||
|         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, | ||||
| @@ -1488,11 +1488,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|         # the id of the playlist is just 'RD' + video_id | ||||
|         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) | ||||
|         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') | ||||
|         title_span = (get_element_by_attribute('class', 'title long-title', webpage) or | ||||
|             get_element_by_attribute('class', 'title ', webpage)) | ||||
|         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) | ||||
|         title_span = (search_title('playlist-title') or | ||||
|             search_title('title long-title') or search_title('title')) | ||||
|         title = clean_html(title_span) | ||||
|         video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id) | ||||
|         ids = orderedSet(re.findall(video_re, webpage)) | ||||
|         video_re = r'''(?x)data-video-username="(.*?)".*? | ||||
|                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) | ||||
|         matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) | ||||
|         # Some of the videos may have been deleted, their username field is empty | ||||
|         ids = [video_id for (username, video_id) in matches if username] | ||||
|         url_results = self._ids_to_results(ids) | ||||
|  | ||||
|         return self.playlist_result(url_results, playlist_id, title) | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| @@ -13,52 +14,42 @@ class ZDFIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' | ||||
|  | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt", | ||||
|         u"file": u"2037704.webm", | ||||
|         u"info_dict": { | ||||
|             u"upload_date": u"20131127", | ||||
|             u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".", | ||||
|             u"uploader": u"spezial", | ||||
|             u"title": u"ZDFspezial - Ende des Machtpokers" | ||||
|         'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', | ||||
|         'info_dict': { | ||||
|             'id': '2037704', | ||||
|             'ext': 'webm', | ||||
|             'title': 'ZDFspezial - Ende des Machtpokers', | ||||
|             'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".', | ||||
|             'duration': 1022, | ||||
|             'uploader': 'spezial', | ||||
|             'uploader_id': '225948', | ||||
|             'upload_date': '20131127', | ||||
|         }, | ||||
|         u"skip": u"Videos on ZDF.de are depublicised in short order", | ||||
|         'skip': 'Videos on ZDF.de are depublicised in short order', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | ||||
|         xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | ||||
|         doc = self._download_xml( | ||||
|             xml_url, video_id, | ||||
|             note=u'Downloading video info', | ||||
|             errnote=u'Failed to download video info') | ||||
|             note='Downloading video info', | ||||
|             errnote='Failed to download video info') | ||||
|  | ||||
|         title = doc.find('.//information/title').text | ||||
|         description = doc.find('.//information/detail').text | ||||
|         duration = int(doc.find('.//details/lengthSec').text) | ||||
|         uploader_node = doc.find('.//details/originChannelTitle') | ||||
|         uploader = None if uploader_node is None else uploader_node.text | ||||
|         duration_str = doc.find('.//details/length').text | ||||
|         duration_m = re.match(r'''(?x)^ | ||||
|             (?P<hours>[0-9]{2}) | ||||
|             :(?P<minutes>[0-9]{2}) | ||||
|             :(?P<seconds>[0-9]{2}) | ||||
|             (?:\.(?P<ms>[0-9]+)?) | ||||
|             ''', duration_str) | ||||
|         duration = ( | ||||
|             ( | ||||
|                 (int(duration_m.group('hours')) * 60 * 60) + | ||||
|                 (int(duration_m.group('minutes')) * 60) + | ||||
|                 int(duration_m.group('seconds')) | ||||
|             ) | ||||
|             if duration_m | ||||
|             else None | ||||
|         ) | ||||
|         uploader_id_node = doc.find('.//details/originChannelId') | ||||
|         uploader_id = None if uploader_id_node is None else uploader_id_node.text | ||||
|         upload_date = unified_strdate(doc.find('.//details/airtime').text) | ||||
|  | ||||
|         def xml_to_format(fnode): | ||||
|             video_url = fnode.find('url').text | ||||
|             is_available = u'http://www.metafilegenerator' not in video_url | ||||
|             is_available = 'http://www.metafilegenerator' not in video_url | ||||
|  | ||||
|             format_id = fnode.attrib['basetype'] | ||||
|             format_m = re.match(r'''(?x) | ||||
| @@ -71,22 +62,28 @@ class ZDFIE(InfoExtractor): | ||||
|  | ||||
|             quality = fnode.find('./quality').text | ||||
|             abr = int(fnode.find('./audioBitrate').text) // 1000 | ||||
|             vbr = int(fnode.find('./videoBitrate').text) // 1000 | ||||
|             vbr_node = fnode.find('./videoBitrate') | ||||
|             vbr = None if vbr_node is None else int(vbr_node.text) // 1000 | ||||
|  | ||||
|             format_note = u'' | ||||
|             width_node = fnode.find('./width') | ||||
|             width = None if width_node is None else int_or_none(width_node.text) | ||||
|             height_node = fnode.find('./height') | ||||
|             height = None if height_node is None else int_or_none(height_node.text) | ||||
|  | ||||
|             format_note = '' | ||||
|             if not format_note: | ||||
|                 format_note = None | ||||
|  | ||||
|             return { | ||||
|                 'format_id': format_id + u'-' + quality, | ||||
|                 'format_id': format_id + '-' + quality, | ||||
|                 'url': video_url, | ||||
|                 'ext': ext, | ||||
|                 'acodec': format_m.group('acodec'), | ||||
|                 'vcodec': format_m.group('vcodec'), | ||||
|                 'abr': abr, | ||||
|                 'vbr': vbr, | ||||
|                 'width': int_or_none(fnode.find('./width').text), | ||||
|                 'height': int_or_none(fnode.find('./height').text), | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'filesize': int_or_none(fnode.find('./filesize').text), | ||||
|                 'format_note': format_note, | ||||
|                 'protocol': proto, | ||||
| @@ -103,9 +100,10 @@ class ZDFIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,6 +1,7 @@ | ||||
| #!/usr/bin/env python | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import contextlib | ||||
| import ctypes | ||||
| import datetime | ||||
| import email.utils | ||||
| @@ -779,6 +780,7 @@ def unified_strdate(date_str): | ||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
|         '%Y-%m-%dT%H:%M:%S', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f', | ||||
|         '%Y-%m-%dT%H:%M', | ||||
|     ] | ||||
|     for expression in format_expressions: | ||||
| @@ -1244,3 +1246,19 @@ except TypeError: | ||||
| else: | ||||
|     struct_pack = struct.pack | ||||
|     struct_unpack = struct.unpack | ||||
|  | ||||
|  | ||||
| def read_batch_urls(batch_fd): | ||||
|     def fixup(url): | ||||
|         if not isinstance(url, compat_str): | ||||
|             url = url.decode('utf-8', 'replace') | ||||
|         BOM_UTF8 = u'\xef\xbb\xbf' | ||||
|         if url.startswith(BOM_UTF8): | ||||
|             url = url[len(BOM_UTF8):] | ||||
|         url = url.strip() | ||||
|         if url.startswith(('#', ';', ']')): | ||||
|             return False | ||||
|         return url | ||||
|  | ||||
|     with contextlib.closing(batch_fd) as fd: | ||||
|         return [url for url in map(fixup, fd) if url] | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.02.22.1' | ||||
| __version__ = '2014.02.26' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user