Compare commits
	
		
			170 Commits
		
	
	
		
			2014.11.26
			...
			2014.12.12
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 3b0bec8d11 | ||
|  | 412c617d0f | ||
|  | 751536f5c8 | ||
|  | 025f30ba38 | ||
|  | 0d2fb1d193 | ||
|  | 82b34105d3 | ||
|  | 73aeb2dc56 | ||
|  | c6973bd412 | ||
|  | f8780e6d11 | ||
|  | e2f89ec7aa | ||
|  | 62651c556a | ||
|  | bf94e38d3d | ||
|  | 4f97852316 | ||
|  | 16040f46d6 | ||
|  | d068ba24f3 | ||
|  | f5e43bc695 | ||
|  | 6a5308ab49 | ||
|  | 63e0f29564 | ||
|  | 42bdd9d051 | ||
|  | 4e40de6e2a | ||
|  | 0fa2b899d1 | ||
|  | f17e4c9c28 | ||
|  | 807962f4a1 | ||
|  | 9c1aa1d668 | ||
|  | 69f491f14e | ||
|  | cb007f47c1 | ||
|  | 9abd500a74 | ||
|  | cf68bcaeff | ||
|  | cbe2bd914d | ||
|  | 75111274ed | ||
|  | 624dcebff6 | ||
|  | 9684f17cde | ||
|  | e52a40abf7 | ||
|  | 0daa05961b | ||
|  | 158731f83e | ||
|  | 24270b0301 | ||
|  | 3c1b81b957 | ||
|  | 45c24df512 | ||
|  | bf671b605e | ||
|  | 09c82fbc9a | ||
|  | 3bca0409fe | ||
|  | d6f78a354d | ||
|  | e0b9d47387 | ||
|  | f8795e102b | ||
|  | 4bb4a18876 | ||
|  | 8560c61842 | ||
|  | a81bbebf44 | ||
|  | 72e3ffeb74 | ||
|  | 2fc9f2b41d | ||
|  | 5f3544baa3 | ||
|  | da27660014 | ||
|  | b8a6114309 | ||
|  | 774e208f94 | ||
|  | f20b52778b | ||
|  | 83e865a370 | ||
|  | b89a938687 | ||
|  | e89a2aabed | ||
|  | f58766ce5c | ||
|  | 15644a40df | ||
|  | d4800f3c3f | ||
|  | 09a5dd2d3b | ||
|  | 819039ee63 | ||
|  | ce36339575 | ||
|  | 684712076f | ||
|  | 603c92080f | ||
|  | 16ae61f655 | ||
|  | 0ef4d4ab7e | ||
|  | 4542535f94 | ||
|  | 6a52eed80e | ||
|  | acf5cbfe93 | ||
|  | 8d1c8cae9c | ||
|  | c84890f708 | ||
|  | 6d0886204a | ||
|  | 04d02a9d57 | ||
|  | 6ac4e8065a | ||
|  | b82f815f37 | ||
|  | 158f8cadc0 | ||
|  | 7d70cf4157 | ||
|  | 6591fdf51f | ||
|  | 47d7c64274 | ||
|  | db175341c7 | ||
|  | 9ff6772790 | ||
|  | 5f9b83944d | ||
|  | f6735be4da | ||
|  | 6a3e0103bb | ||
|  | 0b5cc1983e | ||
|  | 1a9f8b1ad4 | ||
|  | 7115599121 | ||
|  | 0df23ba9f9 | ||
|  | 58daf5ebed | ||
|  | 1a7c6c69d3 | ||
|  | 045c48847a | ||
|  | e638e83662 | ||
|  | 90644a6843 | ||
|  | d958fa9ff9 | ||
|  | ebb6419960 | ||
|  | 122c2f87c1 | ||
|  | a154eb3d15 | ||
|  | 81028ff9eb | ||
|  | e8df5cee12 | ||
|  | ab07963b5c | ||
|  | 7e26084d09 | ||
|  | 4349c07dd7 | ||
|  | 1139a54d9b | ||
|  | b128c9ed68 | ||
|  | 9776bc7f57 | ||
|  | e703fc66c2 | ||
|  | 39c52bbd32 | ||
|  | 6219802165 | ||
|  | 8b97115358 | ||
|  | 810fb84d5e | ||
|  | 5f5e993dc6 | ||
|  | 191cc41ba4 | ||
|  | abe70fa044 | ||
|  | 7f142293df | ||
|  | d4e06d4a83 | ||
|  | ecd7ea1e6b | ||
|  | b92c548693 | ||
|  | eecd6a467d | ||
|  | dce2a3cf9e | ||
|  | 9095aa38ac | ||
|  | 0403b06985 | ||
|  | de9bd74bc2 | ||
|  | 233d37fb6b | ||
|  | c627f7d48c | ||
|  | 163c8babaa | ||
|  | 6708542099 | ||
|  | ea2ee40357 | ||
|  | 62d8b56655 | ||
|  | c492970b4b | ||
|  | ac5633592a | ||
|  | 706d7d4ee7 | ||
|  | 752c8c9b76 | ||
|  | b1399a144d | ||
|  | 05177b34a6 | ||
|  | c41a9650c3 | ||
|  | df015c69ea | ||
|  | 1434bffa1f | ||
|  | 94aa25b995 | ||
|  | d128cfe393 | ||
|  | 954f36f890 | ||
|  | 19e92770c9 | ||
|  | 95c673a148 | ||
|  | a196a53265 | ||
|  | 3266f0c68e | ||
|  | 1940fadd53 | ||
|  | 03fd72d996 | ||
|  | f2b44a2513 | ||
|  | c522adb1f0 | ||
|  | 7160532d41 | ||
|  | 4e62ebe250 | ||
|  | 4472f84f0c | ||
|  | b766eb2707 | ||
|  | 10a404c335 | ||
|  | c056efa2e3 | ||
|  | 283ac8d592 | ||
|  | 313d4572ce | ||
|  | 42939b6129 | ||
|  | 37ea8164d3 | ||
|  | 8c810a7db3 | ||
|  | 248a0b890f | ||
|  | 96b7c7fe3f | ||
|  | e987e91fcc | ||
|  | cb6444e197 | ||
|  | 93b8a10e3b | ||
|  | 4207558e8b | ||
|  | ad0d800fc3 | ||
|  | e232f787f6 | ||
|  | 155f9550c0 | ||
|  | 72476fcc42 | 
							
								
								
									
										4
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -88,3 +88,7 @@ Dao Hoang Son | ||||
| Oskar Jauch | ||||
| Matthew Rayfield | ||||
| t0mm0 | ||||
| Tithen-Firion | ||||
| Zack Fernandes | ||||
| cryptonaut | ||||
| Adrian Kretz | ||||
|   | ||||
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,7 +1,7 @@ | ||||
| all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish | ||||
|  | ||||
| clean: | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json | ||||
|  | ||||
| cleanall: clean | ||||
| 	rm -f youtube-dl youtube-dl.exe | ||||
|   | ||||
| @@ -65,10 +65,10 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      this is not possible instead of searching. | ||||
|     --ignore-config                  Do not read configuration files. When given | ||||
|                                      in the global configuration file /etc | ||||
|                                      /youtube-dl.conf: do not read the user | ||||
|                                      configuration in ~/.config/youtube-dl.conf | ||||
|                                      (%APPDATA%/youtube-dl/config.txt on | ||||
|                                      Windows) | ||||
|                                      /youtube-dl.conf: Do not read the user | ||||
|                                      configuration in ~/.config/youtube- | ||||
|                                      dl/config (%APPDATA%/youtube-dl/config.txt | ||||
|                                      on Windows) | ||||
|     --flat-playlist                  Do not extract the videos of a playlist, | ||||
|                                      only list them. | ||||
|  | ||||
|   | ||||
| @@ -141,7 +141,7 @@ def expect_info_dict(self, expected_dict, got_dict): | ||||
|     if missing_keys: | ||||
|         def _repr(v): | ||||
|             if isinstance(v, compat_str): | ||||
|                 return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'") | ||||
|                 return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') | ||||
|             else: | ||||
|                 return repr(v) | ||||
|         info_dict_str = ''.join( | ||||
|   | ||||
| @@ -238,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles): | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') | ||||
|         self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|   | ||||
| @@ -48,6 +48,7 @@ from youtube_dl.utils import ( | ||||
|     intlist_to_bytes, | ||||
|     args_to_str, | ||||
|     parse_filesize, | ||||
|     version_tuple, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -143,6 +144,9 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') | ||||
|         self.assertEqual(unified_strdate('1968-12-10'), '19681210') | ||||
|         self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') | ||||
|         self.assertEqual( | ||||
|             unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), | ||||
|             '20141126') | ||||
|  | ||||
|     def test_find_xpath_attr(self): | ||||
|         testxml = '''<root> | ||||
| @@ -220,6 +224,9 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(parse_duration('0s'), 0) | ||||
|         self.assertEqual(parse_duration('01:02:03.05'), 3723.05) | ||||
|         self.assertEqual(parse_duration('T30M38S'), 1838) | ||||
|         self.assertEqual(parse_duration('5 s'), 5) | ||||
|         self.assertEqual(parse_duration('3 min'), 180) | ||||
|         self.assertEqual(parse_duration('2.5 hours'), 9000) | ||||
|  | ||||
|     def test_fix_xml_ampersands(self): | ||||
|         self.assertEqual( | ||||
| @@ -376,6 +383,12 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(parse_filesize('2 MiB'), 2097152) | ||||
|         self.assertEqual(parse_filesize('5 GB'), 5000000000) | ||||
|         self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) | ||||
|         self.assertEqual(parse_filesize('1,24 KB'), 1240) | ||||
|  | ||||
|     def test_version_tuple(self): | ||||
|         self.assertEqual(version_tuple('1'), (1,)) | ||||
|         self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) | ||||
|         self.assertEqual(version_tuple('10.1-6'), (10, 1, 6))  # avconv style | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -7,6 +7,7 @@ import collections | ||||
| import datetime | ||||
| import errno | ||||
| import io | ||||
| import itertools | ||||
| import json | ||||
| import locale | ||||
| import os | ||||
| @@ -621,23 +622,15 @@ class YoutubeDL(object): | ||||
|                 ie_result['url'], ie_key=ie_result.get('ie_key'), | ||||
|                 extra_info=extra_info, download=False, process=False) | ||||
|  | ||||
|             def make_result(embedded_info): | ||||
|                 new_result = ie_result.copy() | ||||
|                 for f in ('_type', 'url', 'ext', 'player_url', 'formats', | ||||
|                           'entries', 'ie_key', 'duration', | ||||
|                           'subtitles', 'annotations', 'format', | ||||
|                           'thumbnail', 'thumbnails'): | ||||
|                     if f in new_result: | ||||
|                         del new_result[f] | ||||
|                     if f in embedded_info: | ||||
|                         new_result[f] = embedded_info[f] | ||||
|                 return new_result | ||||
|             new_result = make_result(info) | ||||
|             force_properties = dict( | ||||
|                 (k, v) for k, v in ie_result.items() if v is not None) | ||||
|             for f in ('_type', 'url'): | ||||
|                 if f in force_properties: | ||||
|                     del force_properties[f] | ||||
|             new_result = info.copy() | ||||
|             new_result.update(force_properties) | ||||
|  | ||||
|             assert new_result.get('_type') != 'url_transparent' | ||||
|             if new_result.get('_type') == 'compat_list': | ||||
|                 new_result['entries'] = [ | ||||
|                     make_result(e) for e in new_result['entries']] | ||||
|  | ||||
|             return self.process_ie_result( | ||||
|                 new_result, download=download, extra_info=extra_info) | ||||
| @@ -654,21 +647,28 @@ class YoutubeDL(object): | ||||
|             if playlistend == -1: | ||||
|                 playlistend = None | ||||
|  | ||||
|             if isinstance(ie_result['entries'], list): | ||||
|                 n_all_entries = len(ie_result['entries']) | ||||
|                 entries = ie_result['entries'][playliststart:playlistend] | ||||
|             ie_entries = ie_result['entries'] | ||||
|             if isinstance(ie_entries, list): | ||||
|                 n_all_entries = len(ie_entries) | ||||
|                 entries = ie_entries[playliststart:playlistend] | ||||
|                 n_entries = len(entries) | ||||
|                 self.to_screen( | ||||
|                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % | ||||
|                     (ie_result['extractor'], playlist, n_all_entries, n_entries)) | ||||
|             else: | ||||
|                 assert isinstance(ie_result['entries'], PagedList) | ||||
|                 entries = ie_result['entries'].getslice( | ||||
|             elif isinstance(ie_entries, PagedList): | ||||
|                 entries = ie_entries.getslice( | ||||
|                     playliststart, playlistend) | ||||
|                 n_entries = len(entries) | ||||
|                 self.to_screen( | ||||
|                     "[%s] playlist %s: Downloading %d videos" % | ||||
|                     (ie_result['extractor'], playlist, n_entries)) | ||||
|             else:  # iterable | ||||
|                 entries = list(itertools.islice( | ||||
|                     ie_entries, playliststart, playlistend)) | ||||
|                 n_entries = len(entries) | ||||
|                 self.to_screen( | ||||
|                     "[%s] playlist %s: Downloading %d videos" % | ||||
|                     (ie_result['extractor'], playlist, n_entries)) | ||||
|  | ||||
|             for i, entry in enumerate(entries, 1): | ||||
|                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) | ||||
| @@ -787,6 +787,10 @@ class YoutubeDL(object): | ||||
|             info_dict['display_id'] = info_dict['id'] | ||||
|  | ||||
|         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: | ||||
|             # Working around negative timestamps in Windows | ||||
|             # (see http://bugs.python.org/issue1646728) | ||||
|             if info_dict['timestamp'] < 0 and os.name == 'nt': | ||||
|                 info_dict['timestamp'] = 0 | ||||
|             upload_date = datetime.datetime.utcfromtimestamp( | ||||
|                 info_dict['timestamp']) | ||||
|             info_dict['upload_date'] = upload_date.strftime('%Y%m%d') | ||||
| @@ -930,8 +934,12 @@ class YoutubeDL(object): | ||||
|         if self.params.get('forceid', False): | ||||
|             self.to_stdout(info_dict['id']) | ||||
|         if self.params.get('forceurl', False): | ||||
|             # For RTMP URLs, also include the playpath | ||||
|             self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) | ||||
|             if info_dict.get('requested_formats') is not None: | ||||
|                 for f in info_dict['requested_formats']: | ||||
|                     self.to_stdout(f['url'] + f.get('play_path', '')) | ||||
|             else: | ||||
|                 # For RTMP URLs, also include the playpath | ||||
|                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) | ||||
|         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: | ||||
|             self.to_stdout(info_dict['thumbnail']) | ||||
|         if self.params.get('forcedescription', False) and info_dict.get('description') is not None: | ||||
|   | ||||
| @@ -247,7 +247,7 @@ else: | ||||
|                 userhome = compat_getenv('HOME') | ||||
|             elif 'USERPROFILE' in os.environ: | ||||
|                 userhome = compat_getenv('USERPROFILE') | ||||
|             elif not 'HOMEPATH' in os.environ: | ||||
|             elif 'HOMEPATH' not in os.environ: | ||||
|                 return path | ||||
|             else: | ||||
|                 try: | ||||
| @@ -297,7 +297,9 @@ else: | ||||
|  | ||||
| # Old 2.6 and 2.7 releases require kwargs to be bytes | ||||
| try: | ||||
|     (lambda x: x)(**{'x': 0}) | ||||
|     def _testfunc(x): | ||||
|         pass | ||||
|     _testfunc(**{'x': 0}) | ||||
| except TypeError: | ||||
|     def compat_kwargs(kwargs): | ||||
|         return dict((bytes(k), v) for k, v in kwargs.items()) | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import os | ||||
| import re | ||||
| import subprocess | ||||
|  | ||||
| from ..postprocessor.ffmpeg import FFmpegPostProcessor | ||||
| from .common import FileDownloader | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
| @@ -32,6 +33,9 @@ class HlsFD(FileDownloader): | ||||
|             return False | ||||
|         cmd = [program] + args | ||||
|  | ||||
|         ffpp = FFmpegPostProcessor(downloader=self) | ||||
|         ffpp.check_version() | ||||
|  | ||||
|         retval = subprocess.call(cmd) | ||||
|         if retval == 0: | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|   | ||||
| @@ -24,11 +24,13 @@ from .arte import ( | ||||
| ) | ||||
| from .audiomack import AudiomackIE | ||||
| from .auengine import AUEngineIE | ||||
| from .azubu import AzubuIE | ||||
| from .bambuser import BambuserIE, BambuserChannelIE | ||||
| from .bandcamp import BandcampIE, BandcampAlbumIE | ||||
| from .bbccouk import BBCCoUkIE | ||||
| from .beeg import BeegIE | ||||
| from .behindkink import BehindKinkIE | ||||
| from .bet import BetIE | ||||
| from .bild import BildIE | ||||
| from .bilibili import BiliBiliIE | ||||
| from .blinkx import BlinkxIE | ||||
| @@ -38,6 +40,7 @@ from .bpb import BpbIE | ||||
| from .br import BRIE | ||||
| from .breakcom import BreakIE | ||||
| from .brightcove import BrightcoveIE | ||||
| from .buzzfeed import BuzzFeedIE | ||||
| from .byutv import BYUtvIE | ||||
| from .c56 import C56IE | ||||
| from .canal13cl import Canal13clIE | ||||
| @@ -48,7 +51,7 @@ from .cbsnews import CBSNewsIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .cinemassacre import CinemassacreIE | ||||
| from .cinchcast import CinchcastIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| from .clipsyndicate import ClipsyndicateIE | ||||
| @@ -120,6 +123,8 @@ from .fktv import ( | ||||
| from .flickr import FlickrIE | ||||
| from .folketinget import FolketingetIE | ||||
| from .fourtube import FourTubeIE | ||||
| from .foxgay import FoxgayIE | ||||
| from .foxnews import FoxNewsIE | ||||
| from .franceculture import FranceCultureIE | ||||
| from .franceinter import FranceInterIE | ||||
| from .francetv import ( | ||||
| @@ -215,6 +220,7 @@ from .mdr import MDRIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mgoon import MgoonIE | ||||
| from .minhateca import MinhatecaIE | ||||
| from .ministrygrid import MinistryGridIE | ||||
| from .mit import TechTVMITIE, MITIE, OCWMITIE | ||||
| from .mitele import MiTeleIE | ||||
| @@ -241,9 +247,10 @@ from .muenchentv import MuenchenTVIE | ||||
| from .musicplayon import MusicPlayOnIE | ||||
| from .musicvault import MusicVaultIE | ||||
| from .muzu import MuzuTVIE | ||||
| from .myspace import MySpaceIE | ||||
| from .myspace import MySpaceIE, MySpaceAlbumIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .myvidster import MyVidsterIE | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nbc import ( | ||||
| @@ -301,6 +308,7 @@ from .promptfile import PromptFileIE | ||||
| from .prosiebensat1 import ProSiebenSat1IE | ||||
| from .pyvideo import PyvideoIE | ||||
| from .quickvid import QuickVidIE | ||||
| from .radiode import RadioDeIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| from .rai import RaiIE | ||||
| from .rbmaradio import RBMARadioIE | ||||
| @@ -328,6 +336,7 @@ from .savefrom import SaveFromIE | ||||
| from .sbs import SBSIE | ||||
| from .scivee import SciVeeIE | ||||
| from .screencast import ScreencastIE | ||||
| from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE | ||||
| from .servingsys import ServingSysIE | ||||
| from .sexu import SexuIE | ||||
| from .sexykarma import SexyKarmaIE | ||||
| @@ -416,6 +425,7 @@ from .tutv import TutvIE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE | ||||
| from .tvplay import TVPlayIE | ||||
| from .twentyfourvideo import TwentyFourVideoIE | ||||
| from .twitch import TwitchIE | ||||
| from .ubu import UbuIE | ||||
| from .udemy import ( | ||||
| @@ -517,7 +527,7 @@ from .youtube import ( | ||||
|     YoutubeUserIE, | ||||
|     YoutubeWatchLaterIE, | ||||
| ) | ||||
| from .zdf import ZDFIE | ||||
| from .zdf import ZDFIE, ZDFChannelIE | ||||
| from .zingmp3 import ( | ||||
|     ZingMp3SongIE, | ||||
|     ZingMp3AlbumIE, | ||||
|   | ||||
| @@ -2,123 +2,147 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AdultSwimIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', | ||||
|     _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://adultswim.com/videos/rick-and-morty/pilot', | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 'md5': '4da359ec73b58df4575cd01a610ba5dc', | ||||
|                 'md5': '247572debc75c7652f253c8daa51a14d', | ||||
|                 'info_dict': { | ||||
|                     'id': '8a250ba1450996e901453d7f02ca02f5', | ||||
|                     'id': 'rQxZvXQ4ROaSOqq-or2Mow-0', | ||||
|                     'ext': 'flv', | ||||
|                     'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1', | ||||
|                     'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | ||||
|                     'uploader': 'Rick and Morty', | ||||
|                     'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | ||||
|                 } | ||||
|                     'title': 'Rick and Morty - Pilot Part 1', | ||||
|                     'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 'md5': 'ffbdf55af9331c509d95350bd0cc1819', | ||||
|                 'md5': '77b0e037a4b20ec6b98671c4c379f48d', | ||||
|                 'info_dict': { | ||||
|                     'id': '8a250ba1450996e901453d7f4bd102f6', | ||||
|                     'id': 'rQxZvXQ4ROaSOqq-or2Mow-3', | ||||
|                     'ext': 'flv', | ||||
|                     'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2', | ||||
|                     'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | ||||
|                     'uploader': 'Rick and Morty', | ||||
|                     'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | ||||
|                 } | ||||
|                     'title': 'Rick and Morty - Pilot Part 4', | ||||
|                     'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " | ||||
|                 }, | ||||
|             }, | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'title': 'Rick and Morty - Pilot', | ||||
|             'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/', | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 'md5': 'b92409635540304280b4b6c36bd14a0a', | ||||
|                 'md5': '2eb5c06d0f9a1539da3718d897f13ec5', | ||||
|                 'info_dict': { | ||||
|                     'id': '8a250ba1450996e901453d7fa73c02f7', | ||||
|                     'id': '-t8CamQlQ2aYZ49ItZCFog-0', | ||||
|                     'ext': 'flv', | ||||
|                     'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3', | ||||
|                     'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | ||||
|                     'uploader': 'Rick and Morty', | ||||
|                     'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 'md5': 'e8818891d60e47b29cd89d7b0278156d', | ||||
|                 'info_dict': { | ||||
|                     'id': '8a250ba1450996e901453d7fc8ba02f8', | ||||
|                     'ext': 'flv', | ||||
|                     'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4', | ||||
|                     'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | ||||
|                     'uploader': 'Rick and Morty', | ||||
|                     'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | ||||
|                 } | ||||
|                     'title': 'American Dad - Putting Francine Out of Business', | ||||
|                     'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' | ||||
|                 }, | ||||
|             } | ||||
|         ] | ||||
|     } | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'title': 'American Dad - Putting Francine Out of Business', | ||||
|             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _video_extensions = { | ||||
|         '3500': 'flv', | ||||
|         '640': 'mp4', | ||||
|         '150': 'mp4', | ||||
|         'ipad': 'm3u8', | ||||
|         'iphone': 'm3u8' | ||||
|     } | ||||
|     _video_dimensions = { | ||||
|         '3500': (1280, 720), | ||||
|         '640': (480, 270), | ||||
|         '150': (320, 180) | ||||
|     } | ||||
|     @staticmethod | ||||
|     def find_video_info(collection, slug): | ||||
|         for video in collection.get('videos'): | ||||
|             if video.get('slug') == slug: | ||||
|                 return video | ||||
|  | ||||
|     @staticmethod | ||||
|     def find_collection_by_linkURL(collections, linkURL): | ||||
|         for collection in collections: | ||||
|             if collection.get('linkURL') == linkURL: | ||||
|                 return collection | ||||
|  | ||||
|     @staticmethod | ||||
|     def find_collection_containing_video(collections, slug): | ||||
|         for collection in collections: | ||||
|             for video in collection.get('videos'): | ||||
|                 if video.get('slug') == slug: | ||||
|                     return collection, video | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_path = mobj.group('path') | ||||
|         show_path = mobj.group('show_path') | ||||
|         episode_path = mobj.group('episode_path') | ||||
|         is_playlist = True if mobj.group('is_playlist') else False | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_path) | ||||
|         episode_id = self._html_search_regex( | ||||
|             r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', | ||||
|             webpage, 'episode_id') | ||||
|         title = self._og_search_title(webpage) | ||||
|         webpage = self._download_webpage(url, episode_path) | ||||
|  | ||||
|         index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id | ||||
|         idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') | ||||
|         # Extract the value of `bootstrappedData` from the Javascript in the page. | ||||
|         bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path) | ||||
|  | ||||
|         episode_el = idoc.find('.//episode') | ||||
|         show_title = episode_el.attrib.get('collectionTitle') | ||||
|         episode_title = episode_el.attrib.get('title') | ||||
|         thumbnail = episode_el.attrib.get('thumbnailUrl') | ||||
|         description = episode_el.find('./description').text.strip() | ||||
|         try: | ||||
|             bootstrappedData = json.loads(bootstrappedDataJS) | ||||
|         except ValueError as ve: | ||||
|             errmsg = '%s: Failed to parse JSON ' % episode_path | ||||
|             raise ExtractorError(errmsg, cause=ve) | ||||
|  | ||||
|         # Downloading videos from a /videos/playlist/ URL needs to be handled differently. | ||||
|         # NOTE: We are only downloading one video (the current one) not the playlist | ||||
|         if is_playlist: | ||||
|             collections = bootstrappedData['playlists']['collections'] | ||||
|             collection = self.find_collection_by_linkURL(collections, show_path) | ||||
|             video_info = self.find_video_info(collection, episode_path) | ||||
|  | ||||
|             show_title = video_info['showTitle'] | ||||
|             segment_ids = [video_info['videoPlaybackID']] | ||||
|         else: | ||||
|             collections = bootstrappedData['show']['collections'] | ||||
|             collection, video_info = self.find_collection_containing_video(collections, episode_path) | ||||
|  | ||||
|             show = bootstrappedData['show'] | ||||
|             show_title = show['title'] | ||||
|             segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] | ||||
|  | ||||
|         episode_id = video_info['id'] | ||||
|         episode_title = video_info['title'] | ||||
|         episode_description = video_info['description'] | ||||
|         episode_duration = video_info.get('duration') | ||||
|  | ||||
|         entries = [] | ||||
|         segment_els = episode_el.findall('./segments/segment') | ||||
|         for part_num, segment_id in enumerate(segment_ids): | ||||
|             segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id | ||||
|  | ||||
|         for part_num, segment_el in enumerate(segment_els): | ||||
|             segment_id = segment_el.attrib.get('id') | ||||
|             segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1) | ||||
|             thumbnail = segment_el.attrib.get('thumbnailUrl') | ||||
|             duration = segment_el.attrib.get('duration') | ||||
|             segment_title = '%s - %s' % (show_title, episode_title) | ||||
|             if len(segment_ids) > 1: | ||||
|                 segment_title += ' Part %d' % (part_num + 1) | ||||
|  | ||||
|             segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id | ||||
|             idoc = self._download_xml( | ||||
|                 segment_url, segment_title, | ||||
|                 'Downloading segment information', 'Unable to download segment information') | ||||
|  | ||||
|             segment_duration = idoc.find('.//trt').text.strip() | ||||
|  | ||||
|             formats = [] | ||||
|             file_els = idoc.findall('.//files/file') | ||||
|  | ||||
|             for file_el in file_els: | ||||
|                 bitrate = file_el.attrib.get('bitrate') | ||||
|                 type = file_el.attrib.get('type') | ||||
|                 width, height = self._video_dimensions.get(bitrate, (None, None)) | ||||
|                 ftype = file_el.attrib.get('type') | ||||
|  | ||||
|                 formats.append({ | ||||
|                     'format_id': '%s-%s' % (bitrate, type), | ||||
|                     'url': file_el.text, | ||||
|                     'ext': self._video_extensions.get(bitrate, 'mp4'), | ||||
|                     'format_id': '%s_%s' % (bitrate, ftype), | ||||
|                     'url': file_el.text.strip(), | ||||
|                     # The bitrate may not be a number (for example: 'iphone') | ||||
|                     'tbr': int(bitrate) if bitrate.isdigit() else None, | ||||
|                     'height': height, | ||||
|                     'width': width | ||||
|                     'quality': 1 if ftype == 'hd' else -1 | ||||
|                 }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
| @@ -127,18 +151,16 @@ class AdultSwimIE(InfoExtractor): | ||||
|                 'id': segment_id, | ||||
|                 'title': segment_title, | ||||
|                 'formats': formats, | ||||
|                 'uploader': show_title, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'duration': duration, | ||||
|                 'description': description | ||||
|                 'duration': segment_duration, | ||||
|                 'description': episode_description | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': episode_id, | ||||
|             'display_id': video_path, | ||||
|             'display_id': episode_path, | ||||
|             'entries': entries, | ||||
|             'title': '%s %s' % (show_title, episode_title), | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail | ||||
|             'title': '%s - %s' % (show_title, episode_title), | ||||
|             'description': episode_description, | ||||
|             'duration': episode_duration | ||||
|         } | ||||
|   | ||||
| @@ -24,17 +24,17 @@ class AudiomackIE(InfoExtractor): | ||||
|         }, | ||||
|         # hosted on soundcloud via audiomack | ||||
|         { | ||||
|             'add_ie': ['Soundcloud'], | ||||
|             'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', | ||||
|             'file': '172419696.mp3', | ||||
|             'info_dict': | ||||
|             { | ||||
|             'info_dict': { | ||||
|                 'id': '172419696', | ||||
|                 'ext': 'mp3', | ||||
|                 'description': 'md5:1fc3272ed7a635cce5be1568c2822997', | ||||
|                 'title': 'Young Thug ft Lil Wayne - Take Kare', | ||||
|                 "upload_date": "20141016", | ||||
|                 "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n", | ||||
|                 "uploader": "Young Thug World" | ||||
|                 'uploader': 'Young Thug World', | ||||
|                 'upload_date': '20141016', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										93
									
								
								youtube_dl/extractor/azubu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								youtube_dl/extractor/azubu.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import float_or_none | ||||
|  | ||||
|  | ||||
| class AzubuIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1', | ||||
|             'md5': 'a88b42fcf844f29ad6035054bd9ecaf4', | ||||
|             'info_dict': { | ||||
|                 'id': '15575', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1', | ||||
|                 'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01', | ||||
|                 'thumbnail': 're:^https?://.*\.jpe?g', | ||||
|                 'timestamp': 1417523507.334, | ||||
|                 'upload_date': '20141202', | ||||
|                 'duration': 9988.7, | ||||
|                 'uploader': 'GSL', | ||||
|                 'uploader_id': 414310, | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-', | ||||
|             'md5': 'b72a871fe1d9f70bd7673769cdb3b925', | ||||
|             'info_dict': { | ||||
|                 'id': '9344', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"', | ||||
|                 'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af', | ||||
|                 'thumbnail': 're:^https?://.*\.jpe?g', | ||||
|                 'timestamp': 1410530893.320, | ||||
|                 'upload_date': '20140912', | ||||
|                 'duration': 172.385, | ||||
|                 'uploader': 'FnaticTV', | ||||
|                 'uploader_id': 272749, | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data'] | ||||
|  | ||||
|         title = data['title'].strip() | ||||
|         description = data['description'] | ||||
|         thumbnail = data['thumbnail'] | ||||
|         view_count = data['view_count'] | ||||
|         uploader = data['user']['username'] | ||||
|         uploader_id = data['user']['id'] | ||||
|  | ||||
|         stream_params = json.loads(data['stream_params']) | ||||
|  | ||||
|         timestamp = float_or_none(stream_params['creationDate'], 1000) | ||||
|         duration = float_or_none(stream_params['length'], 1000) | ||||
|  | ||||
|         renditions = stream_params.get('renditions') or [] | ||||
|         video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength') | ||||
|         if video: | ||||
|             renditions.append(video) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': fmt['url'], | ||||
|             'width': fmt['frameWidth'], | ||||
|             'height': fmt['frameHeight'], | ||||
|             'vbr': float_or_none(fmt['encodingRate'], 1000), | ||||
|             'filesize': fmt['size'], | ||||
|             'vcodec': fmt['videoCodec'], | ||||
|             'container': fmt['videoContainer'], | ||||
|         } for fmt in renditions if fmt['url']] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,9 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..compat import compat_HTTPError | ||||
|  | ||||
|  | ||||
| class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
| @@ -55,7 +56,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion', | ||||
|             'info_dict': { | ||||
|                 'id': 'b03k3pb7', | ||||
|                 'ext': 'flv', | ||||
|                 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction", | ||||
|                 'description': '2. Invasion', | ||||
|                 'duration': 3600, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _extract_asx_playlist(self, connection, programme_id): | ||||
| @@ -102,6 +118,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|         return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item') | ||||
|  | ||||
|     def _extract_medias(self, media_selection): | ||||
|         error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error') | ||||
|         if error is not None: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True) | ||||
|         return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media') | ||||
|  | ||||
|     def _extract_connections(self, media): | ||||
| @@ -158,54 +178,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|             subtitles[lang] = srt | ||||
|         return subtitles | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         group_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, group_id, 'Downloading video page') | ||||
|         if re.search(r'id="emp-error" class="notinuk">', webpage): | ||||
|             raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only', | ||||
|                                  expected=True) | ||||
|  | ||||
|         playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id, | ||||
|                                       'Downloading playlist XML') | ||||
|  | ||||
|         no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems') | ||||
|         if no_items is not None: | ||||
|             reason = no_items.get('reason') | ||||
|             if reason == 'preAvailability': | ||||
|                 msg = 'Episode %s is not yet available' % group_id | ||||
|             elif reason == 'postAvailability': | ||||
|                 msg = 'Episode %s is no longer available' % group_id | ||||
|     def _download_media_selector(self, programme_id): | ||||
|         try: | ||||
|             media_selection = self._download_xml( | ||||
|                 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id, | ||||
|                 programme_id, 'Downloading media selection XML') | ||||
|         except ExtractorError as ee: | ||||
|             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: | ||||
|                 media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8')) | ||||
|             else: | ||||
|                 msg = 'Episode %s is not available: %s' % (group_id, reason) | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|                 raise | ||||
|  | ||||
|         formats = [] | ||||
|         subtitles = None | ||||
|  | ||||
|         for item in self._extract_items(playlist): | ||||
|             kind = item.get('kind') | ||||
|             if kind != 'programme' and kind != 'radioProgramme': | ||||
|                 continue | ||||
|             title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text | ||||
|             description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text | ||||
|         for media in self._extract_medias(media_selection): | ||||
|             kind = media.get('kind') | ||||
|             if kind == 'audio': | ||||
|                 formats.extend(self._extract_audio(media, programme_id)) | ||||
|             elif kind == 'video': | ||||
|                 formats.extend(self._extract_video(media, programme_id)) | ||||
|             elif kind == 'captions': | ||||
|                 subtitles = self._extract_captions(media, programme_id) | ||||
|  | ||||
|             programme_id = item.get('identifier') | ||||
|             duration = int(item.get('duration')) | ||||
|         return formats, subtitles | ||||
|  | ||||
|             media_selection = self._download_xml( | ||||
|                 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id, | ||||
|                 programme_id, 'Downloading media selection XML') | ||||
|     def _real_extract(self, url): | ||||
|         group_id = self._match_id(url) | ||||
|  | ||||
|             for media in self._extract_medias(media_selection): | ||||
|                 kind = media.get('kind') | ||||
|                 if kind == 'audio': | ||||
|                     formats.extend(self._extract_audio(media, programme_id)) | ||||
|                 elif kind == 'video': | ||||
|                     formats.extend(self._extract_video(media, programme_id)) | ||||
|                 elif kind == 'captions': | ||||
|                     subtitles = self._extract_captions(media, programme_id) | ||||
|         webpage = self._download_webpage(url, group_id, 'Downloading video page') | ||||
|  | ||||
|         programme_id = self._search_regex( | ||||
|             r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False) | ||||
|         if programme_id: | ||||
|             player = self._download_json( | ||||
|                 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id, | ||||
|                 group_id)['jsConf']['player'] | ||||
|             title = player['title'] | ||||
|             description = player['subtitle'] | ||||
|             duration = player['duration'] | ||||
|             formats, subtitles = self._download_media_selector(programme_id) | ||||
|         else: | ||||
|             playlist = self._download_xml( | ||||
|                 'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, | ||||
|                 group_id, 'Downloading playlist XML') | ||||
|  | ||||
|             no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems') | ||||
|             if no_items is not None: | ||||
|                 reason = no_items.get('reason') | ||||
|                 if reason == 'preAvailability': | ||||
|                     msg = 'Episode %s is not yet available' % group_id | ||||
|                 elif reason == 'postAvailability': | ||||
|                     msg = 'Episode %s is no longer available' % group_id | ||||
|                 elif reason == 'noMedia': | ||||
|                     msg = 'Episode %s is not currently available' % group_id | ||||
|                 else: | ||||
|                     msg = 'Episode %s is not available: %s' % (group_id, reason) | ||||
|                 raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|             for item in self._extract_items(playlist): | ||||
|                 kind = item.get('kind') | ||||
|                 if kind != 'programme' and kind != 'radioProgramme': | ||||
|                     continue | ||||
|                 title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text | ||||
|                 description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text | ||||
|                 programme_id = item.get('identifier') | ||||
|                 duration = int(item.get('duration')) | ||||
|                 formats, subtitles = self._download_media_selector(programme_id) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(programme_id, subtitles) | ||||
|   | ||||
| @@ -10,15 +10,15 @@ from ..utils import url_basename | ||||
| class BehindKinkIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/', | ||||
|         'md5': '41ad01222b8442089a55528fec43ec01', | ||||
|         'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', | ||||
|         'md5': '507b57d8fdcd75a41a9a7bdb7989c762', | ||||
|         'info_dict': { | ||||
|             'id': '36370', | ||||
|             'id': '37127', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!', | ||||
|             'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...', | ||||
|             'upload_date': '20140814', | ||||
|             'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg', | ||||
|             'title': 'What are you passionate about – Marley Blaze', | ||||
|             'description': 'md5:aee8e9611b4ff70186f752975d9b94b4', | ||||
|             'upload_date': '20141205', | ||||
|             'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
| @@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|         year = mobj.group('year') | ||||
|         month = mobj.group('month') | ||||
|         day = mobj.group('day') | ||||
|         upload_date = year + month + day | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r"'file':\s*'([^']+)'", | ||||
|             webpage, 'URL base') | ||||
|  | ||||
|         video_id = url_basename(video_url) | ||||
|         video_id = video_id.split('_')[0] | ||||
|             r'<source src="([^"]+)"', webpage, 'video URL') | ||||
|         video_id = url_basename(video_url).split('_')[0] | ||||
|         upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'upload_date': upload_date, | ||||
|   | ||||
							
								
								
									
										108
									
								
								youtube_dl/extractor/bet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								youtube_dl/extractor/bet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,108 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BetIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', | ||||
|             'info_dict': { | ||||
|                 'id': '417cd61c-c793-4e8e-b006-e445ecc45add', | ||||
|                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'BET News Presents: A Conversation With President Obama', | ||||
|                 'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6', | ||||
|                 'duration': 1534, | ||||
|                 'timestamp': 1418075340, | ||||
|                 'upload_date': '20141208', | ||||
|                 'uploader': 'admin', | ||||
|                 'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', | ||||
|             'info_dict': { | ||||
|                 'id': '4160e53b-ad41-43b1-980f-8d85f63121f4', | ||||
|                 'display_id': 'justice-for-ferguson-a-community-reacts', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Justice for Ferguson: A Community Reacts', | ||||
|                 'description': 'A BET News special.', | ||||
|                 'duration': 1696, | ||||
|                 'timestamp': 1416942360, | ||||
|                 'upload_date': '20141125', | ||||
|                 'uploader': 'admin', | ||||
|                 'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         media_url = compat_urllib_parse.unquote(self._search_regex( | ||||
|             [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], | ||||
|             webpage, 'media URL')) | ||||
|  | ||||
|         mrss = self._download_xml(media_url, display_id) | ||||
|  | ||||
|         item = mrss.find('./channel/item') | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'dc': 'http://purl.org/dc/elements/1.1/', | ||||
|             'media': 'http://search.yahoo.com/mrss/', | ||||
|             'ka': 'http://kickapps.com/karss', | ||||
|         } | ||||
|  | ||||
|         title = xpath_text(item, './title', 'title') | ||||
|         description = xpath_text( | ||||
|             item, './description', 'description', fatal=False) | ||||
|  | ||||
|         video_id = xpath_text(item, './guid', 'video id', fatal=False) | ||||
|  | ||||
|         timestamp = parse_iso8601(xpath_text( | ||||
|             item, xpath_with_ns('./dc:date', NS_MAP), | ||||
|             'upload date', fatal=False)) | ||||
|         uploader = xpath_text( | ||||
|             item, xpath_with_ns('./dc:creator', NS_MAP), | ||||
|             'uploader', fatal=False) | ||||
|  | ||||
|         media_content = item.find( | ||||
|             xpath_with_ns('./media:content', NS_MAP)) | ||||
|         duration = int_or_none(media_content.get('duration')) | ||||
|         smil_url = media_content.get('url') | ||||
|  | ||||
|         thumbnail = media_content.find( | ||||
|             xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') | ||||
|  | ||||
|         formats = self._extract_smil_formats(smil_url, display_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -4,13 +4,17 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     unescapeHTML, | ||||
|     parse_iso8601, | ||||
|     compat_urlparse, | ||||
|     clean_html, | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -64,7 +68,39 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|                 'uploader': 'redvsblue', | ||||
|                 'uploader_id': '792887', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://blip.tv/play/gbk766dkj4Yn', | ||||
|             'md5': 'fe0a33f022d49399a241e84a8ea8b8e3', | ||||
|             'info_dict': { | ||||
|                 'id': '1749452', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20090208', | ||||
|                 'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.', | ||||
|                 'title': 'Nostalgia Critic: Transformers', | ||||
|                 'timestamp': 1234068723, | ||||
|                 'uploader': 'NostalgiaCritic', | ||||
|                 'uploader_id': '246467', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             # https://github.com/rg3/youtube-dl/pull/4404 | ||||
|             'note': 'Audio only', | ||||
|             'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982', | ||||
|             'md5': '76c0a56f24e769ceaab21fbb6416a351', | ||||
|             'info_dict': { | ||||
|                 'id': '7103299', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Weekly Manga Recap: Kingdom', | ||||
|                 'description': 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?', | ||||
|                 'timestamp': 1417660321, | ||||
|                 'upload_date': '20141204', | ||||
|                 'uploader': 'The Rollo T', | ||||
|                 'uploader_id': '407429', | ||||
|                 'duration': 7251, | ||||
|                 'vcodec': 'none', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -74,11 +110,13 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|         # See https://github.com/rg3/youtube-dl/issues/857 and | ||||
|         # https://github.com/rg3/youtube-dl/issues/4197 | ||||
|         if lookup_id: | ||||
|             info_page = self._download_webpage( | ||||
|                 'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id') | ||||
|             video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id') | ||||
|         else: | ||||
|             video_id = mobj.group('id') | ||||
|             urlh = self._request_webpage( | ||||
|                 'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id') | ||||
|             url = compat_urlparse.urlparse(urlh.geturl()) | ||||
|             qs = compat_urlparse.parse_qs(url.query) | ||||
|             mobj = re.match(self._VALID_URL, qs['file'][0]) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS') | ||||
|  | ||||
| @@ -114,7 +152,7 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|             msg = self._download_webpage( | ||||
|                 url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url', | ||||
|                 video_id, 'Resolving URL for %s' % role) | ||||
|             real_url = compat_urlparse.parse_qs(msg)['message'][0] | ||||
|             real_url = compat_urlparse.parse_qs(msg.strip())['message'][0] | ||||
|  | ||||
|             media_type = media_content.get('type') | ||||
|             if media_type == 'text/srt' or url.endswith('.srt'): | ||||
| @@ -129,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|                     'url': real_url, | ||||
|                     'format_id': role, | ||||
|                     'format_note': media_type, | ||||
|                     'vcodec': media_content.get(blip('vcodec')), | ||||
|                     'vcodec': media_content.get(blip('vcodec')) or 'none', | ||||
|                     'acodec': media_content.get(blip('acodec')), | ||||
|                     'filesize': media_content.get('filesize'), | ||||
|                     'width': int(media_content.get('width')), | ||||
|                     'height': int(media_content.get('height')), | ||||
|                     'width': int_or_none(media_content.get('width')), | ||||
|                     'height': int_or_none(media_content.get('height')), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -14,7 +14,6 @@ class BreakIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', | ||||
|         'md5': '33aa4ff477ecd124d18d7b5d23b87ce5', | ||||
|         'info_dict': { | ||||
|             'id': '2468056', | ||||
|             'ext': 'mp4', | ||||
|   | ||||
| @@ -265,6 +265,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|                 url = rend['defaultURL'] | ||||
|                 if not url: | ||||
|                     continue | ||||
|                 ext = None | ||||
|                 if rend['remote']: | ||||
|                     url_comp = compat_urllib_parse_urlparse(url) | ||||
|                     if url_comp.path.endswith('.m3u8'): | ||||
| @@ -276,7 +277,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|                         # akamaihd.net, but they don't use f4m manifests | ||||
|                         url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' | ||||
|                         ext = 'flv' | ||||
|                 else: | ||||
|                 if ext is None: | ||||
|                     ext = determine_ext(url) | ||||
|                 size = rend.get('size') | ||||
|                 formats.append({ | ||||
|   | ||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/buzzfeed.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/buzzfeed.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BuzzFeedIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia', | ||||
|         'info_dict': { | ||||
|             'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss', | ||||
|             'title': 'This Angry Ram Destroys A Punching Bag Like A Boss', | ||||
|             'description': 'Rambro!', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': 'aVCR29aE_OQ', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20141024', | ||||
|                 'uploader_id': 'Buddhanz1', | ||||
|                 'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl', | ||||
|                 'uploader': 'Buddhanz', | ||||
|                 'title': 'Angry Ram destroys a punching bag', | ||||
|             } | ||||
|         }] | ||||
|     }, { | ||||
|         'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', | ||||
|         'params': { | ||||
|             'skip_download': True,  # Got enough YouTube download tests | ||||
|         }, | ||||
|         'info_dict': { | ||||
|             'description': 'Munchkin the Teddy Bear is back !', | ||||
|             'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': 'mVmBL8B-In0', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20141124', | ||||
|                 'uploader_id': 'CindysMunchkin', | ||||
|                 'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu', | ||||
|                 'uploader': 'Munchkin the Shih Tzu', | ||||
|                 'title': 'Munchkin the Teddy Bear gets her exercise', | ||||
|             }, | ||||
|         }] | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         all_buckets = re.findall( | ||||
|             r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'', | ||||
|             webpage) | ||||
|  | ||||
|         entries = [] | ||||
|         for bd_json in all_buckets: | ||||
|             bd = json.loads(bd_json) | ||||
|             video = bd.get('video') or bd.get('progload_video') | ||||
|             if not video: | ||||
|                 continue | ||||
|             entries.append(self.url_result(video['url'])) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'entries': entries, | ||||
|         } | ||||
| @@ -236,16 +236,17 @@ class Channel9IE(InfoExtractor): | ||||
|         if contents is None: | ||||
|             return contents | ||||
|  | ||||
|         session_meta = {'session_code': self._extract_session_code(html), | ||||
|                         'session_day': self._extract_session_day(html), | ||||
|                         'session_room': self._extract_session_room(html), | ||||
|                         'session_speakers': self._extract_session_speakers(html), | ||||
|                         } | ||||
|         session_meta = { | ||||
|             'session_code': self._extract_session_code(html), | ||||
|             'session_day': self._extract_session_day(html), | ||||
|             'session_room': self._extract_session_room(html), | ||||
|             'session_speakers': self._extract_session_speakers(html), | ||||
|         } | ||||
|  | ||||
|         for content in contents: | ||||
|             content.update(session_meta) | ||||
|  | ||||
|         return contents | ||||
|         return self.playlist_result(contents) | ||||
|  | ||||
|     def _extract_list(self, content_path): | ||||
|         rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS') | ||||
|   | ||||
							
								
								
									
										52
									
								
								youtube_dl/extractor/cinchcast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								youtube_dl/extractor/cinchcast.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CinchcastIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         # Actual test is run in generic, look for undergroundwellness | ||||
|         'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         doc = self._download_xml( | ||||
|             'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, | ||||
|             video_id) | ||||
|  | ||||
|         item = doc.find('.//item') | ||||
|         title = xpath_text(item, './title', fatal=True) | ||||
|         date_str = xpath_text( | ||||
|             item, './{http://developer.longtailvideo.com/trac/}date') | ||||
|         upload_date = unified_strdate(date_str, day_first=False) | ||||
|         # duration is present but wrong | ||||
|         formats = [] | ||||
|         formats.append({ | ||||
|             'format_id': 'main', | ||||
|             'url': item.find( | ||||
|                 './{http://search.yahoo.com/mrss/}content').attrib['url'], | ||||
|         }) | ||||
|         backup_url = xpath_text( | ||||
|             item, './{http://developer.longtailvideo.com/trac/}backupContent') | ||||
|         if backup_url: | ||||
|             formats.append({ | ||||
|                 'preference': 2,  # seems to be more reliable | ||||
|                 'format_id': 'backup', | ||||
|                 'url': backup_url, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -15,23 +15,24 @@ class CNETIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', | ||||
|         'md5': '041233212a0d06b179c87cbcca1577b8', | ||||
|         'info_dict': { | ||||
|             'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Hands-on with Microsoft Windows 8.1 Update', | ||||
|             'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', | ||||
|             'thumbnail': 're:^http://.*/flmswindows8.jpg$', | ||||
|             'uploader_id': 'sarah.mitroff@cbsinteractive.com', | ||||
|             'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861', | ||||
|             'uploader': 'Sarah Mitroff', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'requires rtmpdump', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|  | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         data_json = self._html_search_regex( | ||||
|             r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'", | ||||
|             webpage, 'data json') | ||||
| @@ -42,37 +43,31 @@ class CNETIE(InfoExtractor): | ||||
|         if not vdata: | ||||
|             raise ExtractorError('Cannot find video data') | ||||
|  | ||||
|         mpx_account = data['config']['players']['default']['mpx_account'] | ||||
|         vid = vdata['files']['rtmp'] | ||||
|         tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid) | ||||
|  | ||||
|         video_id = vdata['id'] | ||||
|         title = vdata.get('headline') | ||||
|         if title is None: | ||||
|             title = vdata.get('title') | ||||
|         if title is None: | ||||
|             raise ExtractorError('Cannot find title!') | ||||
|         description = vdata.get('dek') | ||||
|         thumbnail = vdata.get('image', {}).get('path') | ||||
|         author = vdata.get('author') | ||||
|         if author: | ||||
|             uploader = '%s %s' % (author['firstName'], author['lastName']) | ||||
|             uploader_id = author.get('email') | ||||
|             uploader_id = author.get('id') | ||||
|         else: | ||||
|             uploader = None | ||||
|             uploader_id = None | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%s-%s-%s' % ( | ||||
|                 f['type'], f['format'], | ||||
|                 int_or_none(f.get('bitrate'), 1000, default='')), | ||||
|             'url': f['uri'], | ||||
|             'tbr': int_or_none(f.get('bitrate'), 1000), | ||||
|         } for f in vdata['files']['data']] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': tp_link, | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'thumbnail': thumbnail, | ||||
|   | ||||
| @@ -13,6 +13,7 @@ import time | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_cookiejar, | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_urlparse, | ||||
| @@ -117,6 +118,7 @@ class InfoExtractor(object): | ||||
|  | ||||
|     The following fields are optional: | ||||
|  | ||||
|     alt_title:      A secondary title of the video. | ||||
|     display_id      An alternative identifier for the video, not necessarily | ||||
|                     unique, but available before title. Typically, id is | ||||
|                     something like "4234987", title "Dancing naked mole rats", | ||||
| @@ -128,7 +130,7 @@ class InfoExtractor(object): | ||||
|                         * "resolution" (optional, string "{width}x{height"}, | ||||
|                                         deprecated) | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
|     description:    One-line video description. | ||||
|     description:    Full video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
|     timestamp:      UNIX timestamp of the moment the video became available. | ||||
|     upload_date:    Video upload date (YYYYMMDD). | ||||
| @@ -157,8 +159,8 @@ class InfoExtractor(object): | ||||
|  | ||||
|  | ||||
|     _type "playlist" indicates multiple videos. | ||||
|     There must be a key "entries", which is a list or a PagedList object, each | ||||
|     element of which is a valid dictionary under this specfication. | ||||
|     There must be a key "entries", which is a list, an iterable, or a PagedList | ||||
|     object, each element of which is a valid dictionary by this specification. | ||||
|  | ||||
|     Additionally, playlists can have "title" and "id" attributes with the same | ||||
|     semantics as videos (see above). | ||||
| @@ -173,9 +175,10 @@ class InfoExtractor(object): | ||||
|     _type "url" indicates that the video must be extracted from another | ||||
|     location, possibly by a different extractor. Its only required key is: | ||||
|     "url" - the next URL to extract. | ||||
|  | ||||
|     Additionally, it may have properties believed to be identical to the | ||||
|     resolved entity, for example "title" if the title of the referred video is | ||||
|     The key "ie_key" can be set to the class name (minus the trailing "IE", | ||||
|     e.g. "Youtube") if the extractor class is known in advance. | ||||
|     Additionally, the dictionary may have any properties of the resolved entity | ||||
|     known in advance, for example "title" if the title of the referred video is | ||||
|     known ahead of time. | ||||
|  | ||||
|  | ||||
| @@ -389,6 +392,10 @@ class InfoExtractor(object): | ||||
|             url_or_request, video_id, note, errnote, fatal=fatal) | ||||
|         if (not fatal) and json_string is False: | ||||
|             return None | ||||
|         return self._parse_json( | ||||
|             json_string, video_id, transform_source=transform_source, fatal=fatal) | ||||
|  | ||||
|     def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): | ||||
|         if transform_source: | ||||
|             json_string = transform_source(json_string) | ||||
|         try: | ||||
| @@ -438,7 +445,7 @@ class InfoExtractor(object): | ||||
|         return video_info | ||||
|  | ||||
|     @staticmethod | ||||
|     def playlist_result(entries, playlist_id=None, playlist_title=None): | ||||
|     def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None): | ||||
|         """Returns a playlist""" | ||||
|         video_info = {'_type': 'playlist', | ||||
|                       'entries': entries} | ||||
| @@ -446,6 +453,8 @@ class InfoExtractor(object): | ||||
|             video_info['id'] = playlist_id | ||||
|         if playlist_title: | ||||
|             video_info['title'] = playlist_title | ||||
|         if playlist_description: | ||||
|             video_info['description'] = playlist_description | ||||
|         return video_info | ||||
|  | ||||
|     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): | ||||
| @@ -789,6 +798,49 @@ class InfoExtractor(object): | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|     # TODO: improve extraction | ||||
|     def _extract_smil_formats(self, smil_url, video_id): | ||||
|         smil = self._download_xml( | ||||
|             smil_url, video_id, 'Downloading SMIL file', | ||||
|             'Unable to download SMIL file') | ||||
|  | ||||
|         base = smil.find('./head/meta').get('base') | ||||
|  | ||||
|         formats = [] | ||||
|         rtmp_count = 0 | ||||
|         for video in smil.findall('./body/switch/video'): | ||||
|             src = video.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) | ||||
|             width = int_or_none(video.get('width')) | ||||
|             height = int_or_none(video.get('height')) | ||||
|             proto = video.get('proto') | ||||
|             if not proto: | ||||
|                 if base: | ||||
|                     if base.startswith('rtmp'): | ||||
|                         proto = 'rtmp' | ||||
|                     elif base.startswith('http'): | ||||
|                         proto = 'http' | ||||
|             ext = video.get('ext') | ||||
|             if proto == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats(src, video_id, ext)) | ||||
|             elif proto == 'rtmp': | ||||
|                 rtmp_count += 1 | ||||
|                 streamer = video.get('streamer') or base | ||||
|                 formats.append({ | ||||
|                     'url': streamer, | ||||
|                     'play_path': src, | ||||
|                     'ext': 'flv', | ||||
|                     'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), | ||||
|                     'tbr': bitrate, | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return formats | ||||
|  | ||||
|     def _live_title(self, name): | ||||
|         """ Generate the title for a live video """ | ||||
|         now = datetime.datetime.now() | ||||
| @@ -817,6 +869,12 @@ class InfoExtractor(object): | ||||
|                 self._downloader.report_warning(msg) | ||||
|         return res | ||||
|  | ||||
|     def _set_cookie(self, domain, name, value, expire_time=None): | ||||
|         cookie = compat_cookiejar.Cookie( | ||||
|             0, name, value, None, None, domain, None, | ||||
|             None, '/', True, False, expire_time, '', None, None, None) | ||||
|         self._downloader.cookiejar.set_cookie(cookie) | ||||
|  | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
|   | ||||
| @@ -13,9 +13,10 @@ from ..compat import ( | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     urlencode_postdata, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     limit_length, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': '637842556329505', | ||||
|             'ext': 'mp4', | ||||
|             'duration': 38, | ||||
|             'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', | ||||
|         } | ||||
|     }, { | ||||
| @@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor): | ||||
|         self._login() | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
| @@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'url': video_url, | ||||
|             'duration': int(video_data['video_duration']), | ||||
|             'thumbnail': video_data['thumbnail_src'], | ||||
|             'duration': int_or_none(video_data.get('video_duration')), | ||||
|             'thumbnail': video_data.get('thumbnail_src'), | ||||
|         } | ||||
|   | ||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/foxgay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/foxgay.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class FoxgayIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' | ||||
|     _TEST = { | ||||
|         'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', | ||||
|         'md5': '80d72beab5d04e1655a56ad37afe6841', | ||||
|         'info_dict': { | ||||
|             'id': '2582', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a', | ||||
|             'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf', | ||||
|             'age_limit': 18, | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(?P<title>.*?)</title>', | ||||
|             webpage, 'title', fatal=False) | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         # Find the URL for the iFrame which contains the actual video. | ||||
|         iframe = self._download_webpage( | ||||
|             self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'), | ||||
|             video_id) | ||||
|         video_url = self._html_search_regex( | ||||
|             r"v_path = '(?P<vid>http://.*?)'", iframe, 'url') | ||||
|         thumb_url = self._html_search_regex( | ||||
|             r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'description': description, | ||||
|             'thumbnail': thumb_url, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
							
								
								
									
										94
									
								
								youtube_dl/extractor/foxnews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								youtube_dl/extractor/foxnews.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FoxNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', | ||||
|             'md5': '32aaded6ba3ef0d1c04e238d01031e5e', | ||||
|             'info_dict': { | ||||
|                 'id': '3937480', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Frozen in Time', | ||||
|                 'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler', | ||||
|                 'duration': 265, | ||||
|                 'timestamp': 1304411491, | ||||
|                 'upload_date': '20110503', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips', | ||||
|             'md5': '5846c64a1ea05ec78175421b8323e2df', | ||||
|             'info_dict': { | ||||
|                 'id': '3922535568001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", | ||||
|                 'description': "Congressman discusses the president's executive action", | ||||
|                 'duration': 292, | ||||
|                 'timestamp': 1417662047, | ||||
|                 'upload_date': '20141204', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id) | ||||
|  | ||||
|         item = video['channel']['item'] | ||||
|         title = item['title'] | ||||
|         description = item['description'] | ||||
|         timestamp = parse_iso8601(item['dc-date']) | ||||
|  | ||||
|         media_group = item['media-group'] | ||||
|         duration = None | ||||
|         formats = [] | ||||
|         for media in media_group['media-content']: | ||||
|             attributes = media['@attributes'] | ||||
|             video_url = attributes['url'] | ||||
|             if video_url.endswith('.f4m'): | ||||
|                 formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id)) | ||||
|             elif video_url.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv')) | ||||
|             elif not video_url.endswith('.smil'): | ||||
|                 duration = int_or_none(attributes.get('duration')) | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'format_id': media['media-category']['@attributes']['label'], | ||||
|                     'preference': 1, | ||||
|                     'vbr': int_or_none(attributes.get('bitrate')), | ||||
|                     'filesize': int_or_none(attributes.get('fileSize')) | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         media_thumbnail = media_group['media-thumbnail']['@attributes'] | ||||
|         thumbnails = [{ | ||||
|             'url': media_thumbnail['url'], | ||||
|             'width': int_or_none(media_thumbnail.get('width')), | ||||
|             'height': int_or_none(media_thumbnail.get('height')), | ||||
|         }] if media_thumbnail else [] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
| @@ -467,8 +467,17 @@ class GenericIE(InfoExtractor): | ||||
|             'expected_warnings': [ | ||||
|                 'URL could be a direct video link, returning it as such.' | ||||
|             ] | ||||
|         } | ||||
|  | ||||
|         }, | ||||
|         # Cinchcast embed | ||||
|         { | ||||
|             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', | ||||
|             'info_dict': { | ||||
|                 'id': '7141703', | ||||
|                 'ext': 'mp3', | ||||
|                 'upload_date': '20141126', | ||||
|                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -962,6 +971,13 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'SBS') | ||||
|  | ||||
|         # Look for embedded Cinchcast player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Cinchcast') | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', | ||||
|             webpage) | ||||
|   | ||||
| @@ -17,7 +17,6 @@ class GoldenMoustacheIE(InfoExtractor): | ||||
|             'title': 'Suricate - Le Poker', | ||||
|             'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/', | ||||
| @@ -28,7 +27,6 @@ class GoldenMoustacheIE(InfoExtractor): | ||||
|             'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)', | ||||
|             'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a', | ||||
|             'thumbnail': 're:^https?://.*\.(?:png|jpg)$', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
| @@ -42,9 +40,6 @@ class GoldenMoustacheIE(InfoExtractor): | ||||
|             r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title') | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'<strong>([0-9]+)</strong>\s*VUES</span>', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -53,5 +48,4 @@ class GoldenMoustacheIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|   | ||||
| @@ -2,57 +2,52 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GoshgayIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)' | ||||
|     _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.goshgay.com/video4116282', | ||||
|         'md5': '268b9f3c3229105c57859e166dd72b03', | ||||
|         'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', | ||||
|         'md5': '027fcc54459dff0feb0bc06a7aeda680', | ||||
|         'info_dict': { | ||||
|             'id': '4116282', | ||||
|             'id': '299069', | ||||
|             'ext': 'flv', | ||||
|             'title': 'md5:089833a4790b5e103285a07337f245bf', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|             'title': 'DIESEL SFW XXX Video', | ||||
|             'thumbnail': 're:^http://.*\.jpg$', | ||||
|             'duration': 79, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._og_search_title(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h2>(.*?)<', webpage, 'title') | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'<span class="duration">\s*-?\s*(.*?)</span>', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|         family_friendly = self._html_search_meta( | ||||
|             'isFamilyFriendly', webpage, default='false') | ||||
|         config_url = self._search_regex( | ||||
|             r"'config'\s*:\s*'([^']+)'", webpage, 'config URL') | ||||
|  | ||||
|         config = self._download_xml( | ||||
|             config_url, video_id, 'Downloading player config XML') | ||||
|  | ||||
|         if config is None: | ||||
|             raise ExtractorError('Missing config XML') | ||||
|         if config.tag != 'config': | ||||
|             raise ExtractorError('Missing config attribute') | ||||
|         fns = config.findall('file') | ||||
|         if len(fns) < 1: | ||||
|             raise ExtractorError('Missing media URI') | ||||
|         video_url = fns[0].text | ||||
|  | ||||
|         url_comp = compat_urlparse.urlparse(url) | ||||
|         ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) | ||||
|         flashvars = compat_parse_qs(self._html_search_regex( | ||||
|             r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"', | ||||
|             webpage, 'flashvars')) | ||||
|         thumbnail = flashvars.get('url_bigthumb', [None])[0] | ||||
|         video_url = flashvars['flv_url'][0] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'http_referer': ref, | ||||
|             'duration': duration, | ||||
|             'age_limit': 0 if family_friendly == 'true' else 18, | ||||
|         } | ||||
|   | ||||
| @@ -2,9 +2,8 @@ | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import js_to_json | ||||
|  | ||||
|  | ||||
| class HelsinkiIE(InfoExtractor): | ||||
| @@ -24,39 +23,21 @@ class HelsinkiIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         formats = [] | ||||
|  | ||||
|         mobj = re.search(r'file=((\w+):[^&]+)', webpage) | ||||
|         if mobj: | ||||
|             formats.append({ | ||||
|                 'ext': mobj.group(2), | ||||
|                 'play_path': mobj.group(1), | ||||
|                 'url': 'rtmp://flashvideo.it.helsinki.fi/vod/', | ||||
|                 'player_url': 'http://video.helsinki.fi/player.swf', | ||||
|                 'format_note': 'sd', | ||||
|                 'quality': 0, | ||||
|             }) | ||||
|  | ||||
|         mobj = re.search(r'hd\.file=((\w+):[^&]+)', webpage) | ||||
|         if mobj: | ||||
|             formats.append({ | ||||
|                 'ext': mobj.group(2), | ||||
|                 'play_path': mobj.group(1), | ||||
|                 'url': 'rtmp://flashvideo.it.helsinki.fi/vod/', | ||||
|                 'player_url': 'http://video.helsinki.fi/player.swf', | ||||
|                 'format_note': 'hd', | ||||
|                 'quality': 1, | ||||
|             }) | ||||
|  | ||||
|         params = self._parse_json(self._html_search_regex( | ||||
|             r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);', | ||||
|             webpage, 'player code'), video_id, transform_source=js_to_json) | ||||
|         formats = [{ | ||||
|             'url': s['file'], | ||||
|             'ext': 'mp4', | ||||
|         } for s in params['sources']] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage).replace('Video: ', ''), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -1,12 +1,12 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import random | ||||
| import string | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import find_xpath_attr | ||||
| from ..utils import ( | ||||
|     find_xpath_attr, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HowStuffWorksIE(InfoExtractor): | ||||
| @@ -16,98 +16,74 @@ class HowStuffWorksIE(InfoExtractor): | ||||
|             'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm', | ||||
|             'info_dict': { | ||||
|                 'id': '450221', | ||||
|                 'display_id': 'cool-jobs-iditarod-musher', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Cool Jobs - Iditarod Musher', | ||||
|                 'description': 'md5:82bb58438a88027b8186a1fccb365f90', | ||||
|                 'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.', | ||||
|                 'display_id': 'cool-jobs-iditarod-musher', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'duration': 161, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # md5 is not consistent | ||||
|                 'skip_download': True | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm', | ||||
|             'info_dict': { | ||||
|                 'id': '453464', | ||||
|                 'display_id': 'survival-zone-food-and-water-in-the-savanna', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Survival Zone: Food and Water In the Savanna', | ||||
|                 'description': 'md5:7e1c89f6411434970c15fa094170c371', | ||||
|                 'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.', | ||||
|                 'display_id': 'survival-zone-food-and-water-in-the-savanna', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # md5 is not consistent | ||||
|                 'skip_download': True | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm', | ||||
|             'info_dict': { | ||||
|                 'id': '440011', | ||||
|                 'display_id': 'sword-swallowing-1-by-dan-meyer', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Sword Swallowing #1 by Dan Meyer', | ||||
|                 'description': 'md5:b2409e88172913e2e7d3d1159b0ef735', | ||||
|                 'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>', | ||||
|                 'display_id': 'sword-swallowing-1-by-dan-meyer', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # md5 is not consistent | ||||
|                 'skip_download': True | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         clip_js = self._search_regex( | ||||
|             r'(?s)var clip = ({.*?});', webpage, 'clip info') | ||||
|         clip_info = self._parse_json( | ||||
|             clip_js, display_id, transform_source=js_to_json) | ||||
|  | ||||
|         content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id') | ||||
|  | ||||
|         mp4 = self._search_regex( | ||||
|             r'''(?xs)var\s+clip\s*=\s*{\s* | ||||
|                 .+?\s* | ||||
|                 content_id\s*:\s*%s\s*,\s* | ||||
|                 .+?\s* | ||||
|                 mp4\s*:\s*\[(.*?),?\]\s* | ||||
|                 };\s* | ||||
|                 videoData\.push\(clip\);''' % content_id, | ||||
|             webpage, 'mp4', fatal=False, default=None) | ||||
|  | ||||
|         smil = self._download_xml( | ||||
|             'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id, | ||||
|             content_id, 'Downloading video SMIL') | ||||
|  | ||||
|         http_base = find_xpath_attr( | ||||
|             smil, | ||||
|             './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), | ||||
|             'name', | ||||
|             'httpBase').get('content') | ||||
|  | ||||
|         def random_string(str_len=0): | ||||
|             return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)]) | ||||
|  | ||||
|         URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12)) | ||||
|  | ||||
|         video_id = clip_info['content_id'] | ||||
|         formats = [] | ||||
|         m3u8_url = clip_info.get('m3u8') | ||||
|         if m3u8_url: | ||||
|             formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') | ||||
|         for video in clip_info.get('mp4', []): | ||||
|             formats.append({ | ||||
|                 'url': video['src'], | ||||
|                 'format_id': video['bitrate'], | ||||
|                 'vbr': int(video['bitrate'].rstrip('k')), | ||||
|             }) | ||||
|  | ||||
|         if not formats: | ||||
|             smil = self._download_xml( | ||||
|                 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id, | ||||
|                 video_id, 'Downloading video SMIL') | ||||
|  | ||||
|             http_base = find_xpath_attr( | ||||
|                 smil, | ||||
|                 './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), | ||||
|                 'name', | ||||
|                 'httpBase').get('content') | ||||
|  | ||||
|             URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A' | ||||
|  | ||||
|         if mp4: | ||||
|             for video in json.loads('[%s]' % mp4): | ||||
|                 bitrate = video['bitrate'] | ||||
|                 fmt = { | ||||
|                     'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX, | ||||
|                     'format_id': bitrate, | ||||
|                 } | ||||
|                 m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate) | ||||
|                 if m: | ||||
|                     fmt['vbr'] = int(m.group('vbr')) | ||||
|                 formats.append(fmt) | ||||
|         else: | ||||
|             for video in smil.findall( | ||||
|                     './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): | ||||
|                 vbr = int(video.attrib['system-bitrate']) / 1000 | ||||
|                     './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): | ||||
|                 vbr = int_or_none(video.attrib['system-bitrate'], scale=1000) | ||||
|                 formats.append({ | ||||
|                     'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX), | ||||
|                     'format_id': '%dk' % vbr, | ||||
| @@ -116,19 +92,12 @@ class HowStuffWorksIE(InfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         TITLE_SUFFIX = ' : HowStuffWorks' | ||||
|         if title.endswith(TITLE_SUFFIX): | ||||
|             title = title[:-len(TITLE_SUFFIX)] | ||||
|  | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': content_id, | ||||
|             'id': '%s' % video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'title': unescapeHTML(clip_info['clip_title']), | ||||
|             'description': unescapeHTML(clip_info.get('caption')), | ||||
|             'thumbnail': clip_info.get('video_still_url'), | ||||
|             'duration': clip_info.get('duration'), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										72
									
								
								youtube_dl/extractor/minhateca.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								youtube_dl/extractor/minhateca.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     parse_filesize, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MinhatecaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.' | ||||
|     _TEST = { | ||||
|         'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)', | ||||
|         'info_dict': { | ||||
|             'id': '125848331', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'youtube-dl test video', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'filesize_approx': 1530000, | ||||
|             'duration': 9, | ||||
|             'view_count': int, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         token = self._html_search_regex( | ||||
|             r'<input name="__RequestVerificationToken".*?value="([^"]+)"', | ||||
|             webpage, 'request token') | ||||
|         token_data = [ | ||||
|             ('fileId', video_id), | ||||
|             ('__RequestVerificationToken', token), | ||||
|         ] | ||||
|         req = compat_urllib_request.Request( | ||||
|             'http://minhateca.com.br/action/License/Download', | ||||
|             data=compat_urllib_parse.urlencode(token_data)) | ||||
|         req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         data = self._download_json( | ||||
|             req, video_id, note='Downloading metadata') | ||||
|  | ||||
|         video_url = data['redirectUrl'] | ||||
|         title_str = self._html_search_regex( | ||||
|             r'<h1.*?>(.*?)</h1>', webpage, 'title') | ||||
|         title, _, ext = title_str.rpartition('.') | ||||
|         filesize_approx = parse_filesize(self._html_search_regex( | ||||
|             r'<p class="fileSize">(.*?)</p>', | ||||
|             webpage, 'file size approximation', fatal=False)) | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'<p class="downloadsCounter">([0-9]+)</p>', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'ext': ext, | ||||
|             'filesize_approx': filesize_approx, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
| @@ -70,7 +70,7 @@ class MixcloudIE(InfoExtractor): | ||||
|             raise ExtractorError('Unable to extract track url') | ||||
|  | ||||
|         PREFIX = ( | ||||
|             r'<div class="cloudcast-play-button-container[^"]*?"' | ||||
|             r'<span class="play-button[^"]*?"' | ||||
|             r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') | ||||
|         title = self._html_search_regex( | ||||
|             PREFIX + r'm-title="([^"]+)"', webpage, 'title') | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| @@ -7,6 +8,7 @@ from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| ) | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class MySpaceIE(InfoExtractor): | ||||
| @@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689', | ||||
|             'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919', | ||||
|             'info_dict': { | ||||
|                 'id': '100008689', | ||||
|                 'id': '109594919', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Viva La Vida', | ||||
|                 'description': 'The official Viva La Vida video, directed by Hype Williams', | ||||
|                 'uploader': 'Coldplay', | ||||
|                 'uploader_id': 'coldplay', | ||||
|                 'title': 'Little Big Town', | ||||
|                 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', | ||||
|                 'uploader': 'Five Minutes to the Stage', | ||||
|                 'uploader_id': 'fiveminutestothestage', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # song | ||||
|         # songs | ||||
|         { | ||||
|             'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242', | ||||
|             'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681', | ||||
|             'info_dict': { | ||||
|                 'id': '39008454', | ||||
|                 'id': '93388656', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Darkness In My Heart', | ||||
|                 'uploader_id': 'spiderbags', | ||||
|                 'title': 'Of weakened soul...', | ||||
|                 'uploader': 'Killsorrow', | ||||
|                 'uploader_id': 'killsorrow', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, { | ||||
|             'add_ie': ['Vevo'], | ||||
|             'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041', | ||||
|             'info_dict': { | ||||
|                 'id': 'USZM20600099', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Animal I Have Become', | ||||
|                 'uploader': 'Three Days Grace', | ||||
|                 'timestamp': int, | ||||
|                 'upload_date': '20060502', | ||||
|             }, | ||||
|             'skip': 'VEVO is only available in some countries', | ||||
|         }, { | ||||
|             'add_ie': ['Youtube'], | ||||
|             'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426', | ||||
|             'info_dict': { | ||||
|                 'id': 'ypWvQgnJrSU', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Starset - First Light', | ||||
|                 'description': 'md5:2d5db6c9d11d527683bcda818d332414', | ||||
|                 'uploader': 'Jacob Soren', | ||||
|                 'uploader_id': 'SorenPromotions', | ||||
|                 'upload_date': '20140725', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
| @@ -48,16 +75,41 @@ class MySpaceIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         player_url = self._search_regex( | ||||
|             r'playerSwf":"([^"?]*)', webpage, 'player URL') | ||||
|  | ||||
|         if mobj.group('mediatype').startswith('music/song'): | ||||
|             # songs don't store any useful info in the 'context' variable | ||||
|             song_data = self._search_regex( | ||||
|                 r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id, | ||||
|                 webpage, 'song_data', default=None, group=0) | ||||
|             if song_data is None: | ||||
|                 # some songs in an album are not playable | ||||
|                 self.report_warning( | ||||
|                     '%s: No downloadable song on this page' % video_id) | ||||
|                 return | ||||
|  | ||||
|             def search_data(name): | ||||
|                 return self._search_regex( | ||||
|                     r'data-%s="(.*?)"' % name, webpage, name) | ||||
|                     r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, | ||||
|                     song_data, name, default='', group='data') | ||||
|             streamUrl = search_data('stream-url') | ||||
|             if not streamUrl: | ||||
|                 vevo_id = search_data('vevo-id') | ||||
|                 youtube_id = search_data('youtube-id') | ||||
|                 if vevo_id: | ||||
|                     self.to_screen('Vevo video detected: %s' % vevo_id) | ||||
|                     return self.url_result('vevo:%s' % vevo_id, ie='Vevo') | ||||
|                 elif youtube_id: | ||||
|                     self.to_screen('Youtube video detected: %s' % youtube_id) | ||||
|                     return self.url_result(youtube_id, ie='Youtube') | ||||
|                 else: | ||||
|                     raise ExtractorError( | ||||
|                         'Found song but don\'t know how to download it') | ||||
|             info = { | ||||
|                 'id': video_id, | ||||
|                 'title': self._og_search_title(webpage), | ||||
|                 'uploader': search_data('artist-name'), | ||||
|                 'uploader_id': search_data('artist-username'), | ||||
|                 'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             } | ||||
| @@ -79,6 +131,50 @@ class MySpaceIE(InfoExtractor): | ||||
|         info.update({ | ||||
|             'url': rtmp_url, | ||||
|             'play_path': play_path, | ||||
|             'player_url': player_url, | ||||
|             'ext': 'flv', | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class MySpaceAlbumIE(InfoExtractor): | ||||
|     IE_NAME = 'MySpace:album' | ||||
|     _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://myspace.com/starset2/music/album/transmissions-19455773', | ||||
|         'info_dict': { | ||||
|             'title': 'Transmissions', | ||||
|             'id': '19455773', | ||||
|         }, | ||||
|         'playlist_count': 14, | ||||
|         'skip': 'this album is only available in some countries', | ||||
|     }, { | ||||
|         'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029', | ||||
|         'info_dict': { | ||||
|             'title': 'The Demo', | ||||
|             'id': '18596029', | ||||
|         }, | ||||
|         'playlist_count': 5, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|         display_id = mobj.group('title') + playlist_id | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage) | ||||
|         if not tracks_paths: | ||||
|             raise ExtractorError( | ||||
|                 '%s: No songs found, try using proxy' % display_id, | ||||
|                 expected=True) | ||||
|         entries = [ | ||||
|             self.url_result(t_path, ie=MySpaceIE.ie_key()) | ||||
|             for t_path in tracks_paths] | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'display_id': display_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										29
									
								
								youtube_dl/extractor/myvidster.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								youtube_dl/extractor/myvidster.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MyVidsterIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making', | ||||
|         'md5': '95296d0231c1363222c3441af62dc4ca', | ||||
|         'info_dict': { | ||||
|             'id': '3685814', | ||||
|             'title': 'md5:7d8427d6d02c4fbcef50fe269980c749', | ||||
|             'upload_date': '20141027', | ||||
|             'uploader_id': 'utkualp', | ||||
|             'ext': 'mp4', | ||||
|             'age_limit': 18, | ||||
|         }, | ||||
|         'add_ie': ['XHamster'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         return self.url_result(self._html_search_regex( | ||||
|             r'rel="videolink" href="(?P<real_url>.*)">', | ||||
|             webpage, 'real video url')) | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     remove_end, | ||||
| @@ -10,8 +8,8 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class NBAIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', | ||||
|         'md5': 'c0edcfc37607344e2ff8f13c378c88a4', | ||||
|         'info_dict': { | ||||
| @@ -21,12 +19,13 @@ class NBAIE(InfoExtractor): | ||||
|             'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', | ||||
|             'duration': 181, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' | ||||
| @@ -37,7 +36,7 @@ class NBAIE(InfoExtractor): | ||||
|  | ||||
|         description = self._og_search_description(webpage) | ||||
|         duration = parse_duration( | ||||
|             self._html_search_meta('duration', webpage, 'duration', fatal=False)) | ||||
|             self._html_search_meta('duration', webpage, 'duration')) | ||||
|  | ||||
|         return { | ||||
|             'id': shortened_video_id, | ||||
|   | ||||
| @@ -2,11 +2,13 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import os | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_urlparse | ||||
| ) | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
| @@ -24,9 +26,12 @@ class NHLBaseInfoExtractor(InfoExtractor): | ||||
|  | ||||
|         initial_video_url = info['publishPoint'] | ||||
|         if info['formats'] == '1': | ||||
|             parsed_url = compat_urllib_parse_urlparse(initial_video_url) | ||||
|             filename, ext = os.path.splitext(parsed_url.path) | ||||
|             path = '%s_sd%s' % (filename, ext) | ||||
|             data = compat_urllib_parse.urlencode({ | ||||
|                 'type': 'fvod', | ||||
|                 'path': initial_video_url.replace('.mp4', '_sd.mp4'), | ||||
|                 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:]) | ||||
|             }) | ||||
|             path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data | ||||
|             path_doc = self._download_xml( | ||||
| @@ -73,6 +78,17 @@ class NHLIE(NHLBaseInfoExtractor): | ||||
|             'duration': 0, | ||||
|             'upload_date': '20141011', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802', | ||||
|         'md5': 'c78fc64ea01777e426cfc202b746c825', | ||||
|         'info_dict': { | ||||
|             'id': '58665', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Classic Game In Six - April 22, 1979', | ||||
|             'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.', | ||||
|             'duration': 400, | ||||
|             'upload_date': '20100129' | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://video.flames.nhl.com/videocenter/console?id=630616', | ||||
|         'only_matching': True, | ||||
| @@ -90,7 +106,7 @@ class NHLIE(NHLBaseInfoExtractor): | ||||
| class NHLVideocenterIE(NHLBaseInfoExtractor): | ||||
|     IE_NAME = 'nhl.com:videocenter' | ||||
|     IE_DESC = 'NHL videocenter category' | ||||
|     _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$' | ||||
|     _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999', | ||||
|         'info_dict': { | ||||
|   | ||||
| @@ -20,6 +20,7 @@ class NocoIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' | ||||
|     _LOGIN_URL = 'http://noco.tv/do.php' | ||||
|     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' | ||||
|     _SUB_LANG_TEMPLATE = '&sub_lang=%s' | ||||
|     _NETRC_MACHINE = 'noco' | ||||
|  | ||||
|     _TEST = { | ||||
| @@ -60,10 +61,12 @@ class NocoIE(InfoExtractor): | ||||
|         if 'erreur' in login: | ||||
|             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) | ||||
|  | ||||
|     def _call_api(self, path, video_id, note): | ||||
|     def _call_api(self, path, video_id, note, sub_lang=None): | ||||
|         ts = compat_str(int(time.time() * 1000)) | ||||
|         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() | ||||
|         url = self._API_URL_TEMPLATE % (path, ts, tk) | ||||
|         if sub_lang: | ||||
|             url += self._SUB_LANG_TEMPLATE % sub_lang | ||||
|  | ||||
|         resp = self._download_json(url, video_id, note) | ||||
|  | ||||
| @@ -91,31 +94,34 @@ class NocoIE(InfoExtractor): | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items(): | ||||
|         for lang, lang_dict in medias['fr']['video_list'].items(): | ||||
|             for format_id, fmt in lang_dict['quality_list'].items(): | ||||
|                 format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id | ||||
|  | ||||
|             video = self._call_api( | ||||
|                 'shows/%s/video/%s/fr' % (video_id, format_id.lower()), | ||||
|                 video_id, 'Downloading %s video JSON' % format_id) | ||||
|                 video = self._call_api( | ||||
|                     'shows/%s/video/%s/fr' % (video_id, format_id.lower()), | ||||
|                     video_id, 'Downloading %s video JSON' % format_id_extended, | ||||
|                     lang if lang != 'none' else None) | ||||
|  | ||||
|             file_url = video['file'] | ||||
|             if not file_url: | ||||
|                 continue | ||||
|                 file_url = video['file'] | ||||
|                 if not file_url: | ||||
|                     continue | ||||
|  | ||||
|             if file_url in ['forbidden', 'not found']: | ||||
|                 popmessage = video['popmessage'] | ||||
|                 self._raise_error(popmessage['title'], popmessage['message']) | ||||
|                 if file_url in ['forbidden', 'not found']: | ||||
|                     popmessage = video['popmessage'] | ||||
|                     self._raise_error(popmessage['title'], popmessage['message']) | ||||
|  | ||||
|             formats.append({ | ||||
|                 'url': file_url, | ||||
|                 'format_id': format_id, | ||||
|                 'width': fmt['res_width'], | ||||
|                 'height': fmt['res_lines'], | ||||
|                 'abr': fmt['audiobitrate'], | ||||
|                 'vbr': fmt['videobitrate'], | ||||
|                 'filesize': fmt['filesize'], | ||||
|                 'format_note': qualities[format_id]['quality_name'], | ||||
|                 'preference': qualities[format_id]['priority'], | ||||
|             }) | ||||
|                 formats.append({ | ||||
|                     'url': file_url, | ||||
|                     'format_id': format_id_extended, | ||||
|                     'width': fmt['res_width'], | ||||
|                     'height': fmt['res_lines'], | ||||
|                     'abr': fmt['audiobitrate'], | ||||
|                     'vbr': fmt['videobitrate'], | ||||
|                     'filesize': fmt['filesize'], | ||||
|                     'format_note': qualities[format_id]['quality_name'], | ||||
|                     'preference': qualities[format_id]['priority'], | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -130,7 +130,7 @@ class NTVIE(InfoExtractor): | ||||
|                 'rtmp_conn': 'B:1', | ||||
|                 'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128', | ||||
|                 'page_url': 'http://www.ntv.ru', | ||||
|                 'flash_ver': 'LNX 11,2,202,341', | ||||
|                 'flash_version': 'LNX 11,2,202,341', | ||||
|                 'rtmp_live': True, | ||||
|                 'ext': 'flv', | ||||
|                 'filesize': int(size.text), | ||||
|   | ||||
| @@ -4,6 +4,8 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
| @@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         m_error = re.search( | ||||
|             r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage) | ||||
|         if m_error: | ||||
|             raise ExtractorError(clean_html(m_error.group('msg')), expected=True) | ||||
|  | ||||
|         video_title = None | ||||
|         duration = None | ||||
|         video_thumbnail = None | ||||
|   | ||||
| @@ -8,7 +8,6 @@ from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     qualities, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor): | ||||
|         thumbnail = self._search_regex( | ||||
|             r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         quality = qualities(['SD', 'HD']) | ||||
|         formats = [{ | ||||
|             'url': source['file'], | ||||
|             'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])), | ||||
|             'quality': quality(source['label']), | ||||
|         } for source in json.loads(js_to_json(self._search_regex( | ||||
|             r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))] | ||||
|         quality = qualities(['sd', 'hd']) | ||||
|         sources = json.loads(js_to_json(self._search_regex( | ||||
|             r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) | ||||
|         formats = [] | ||||
|         for container, s in sources.items(): | ||||
|             for qname, video_url in s.items(): | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'container': container, | ||||
|                     'format_id': '%s-%s' % (container, qname), | ||||
|                     'quality': quality(qname), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Im Interview: Kai Wiesinger', | ||||
|                 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', | ||||
|                 'upload_date': '20140225', | ||||
|                 'upload_date': '20140203', | ||||
|                 'duration': 522.56, | ||||
|             }, | ||||
|             'params': { | ||||
| @@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', | ||||
|                 'description': 'md5:2669cde3febe9bce13904f701e774eb6', | ||||
|                 'upload_date': '20140225', | ||||
|                 'upload_date': '20141014', | ||||
|                 'duration': 2410.44, | ||||
|             }, | ||||
|             'params': { | ||||
| @@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', | ||||
|             'info_dict': { | ||||
|                 'id': '439664', | ||||
|                 'title': 'Episode 8 - Ganze Folge - Playlist', | ||||
|                 'description': 'md5:63b8963e71f481782aeea877658dec84', | ||||
|             }, | ||||
|             'playlist_count': 2, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     _CLIPID_REGEXES = [ | ||||
|         r'"clip_id"\s*:\s+"(\d+)"', | ||||
|         r'clipid: "(\d+)"', | ||||
|         r'clip[iI]d=(\d+)', | ||||
|         r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", | ||||
|     ] | ||||
|     _TITLE_REGEXES = [ | ||||
|         r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', | ||||
| @@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|         r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', | ||||
|         r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', | ||||
|     ] | ||||
|     _PAGE_TYPE_REGEXES = [ | ||||
|         r'<meta name="page_type" content="([^"]+)">', | ||||
|         r"'itemType'\s*:\s*'([^']*)'", | ||||
|     ] | ||||
|     _PLAYLIST_ID_REGEXES = [ | ||||
|         r'content[iI]d=(\d+)', | ||||
|         r"'itemId'\s*:\s*'([^']*)'", | ||||
|     ] | ||||
|     _PLAYLIST_CLIP_REGEXES = [ | ||||
|         r'(?s)data-qvt=.+?<a href="([^"]+)"', | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|     def _extract_clip(self, url, webpage): | ||||
|         clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') | ||||
|  | ||||
|         access_token = 'testclient' | ||||
| @@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|     def _extract_playlist(self, url, webpage): | ||||
|         playlist_id = self._html_search_regex( | ||||
|             self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') | ||||
|         for regex in self._PLAYLIST_CLIP_REGEXES: | ||||
|             playlist_clips = re.findall(regex, webpage) | ||||
|             if playlist_clips: | ||||
|                 title = self._html_search_regex( | ||||
|                     self._TITLE_REGEXES, webpage, 'title') | ||||
|                 description = self._html_search_regex( | ||||
|                     self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) | ||||
|                 entries = [ | ||||
|                     self.url_result( | ||||
|                         re.match('(.+?//.+?)/', url).group(1) + clip_path, | ||||
|                         'ProSiebenSat1') | ||||
|                     for clip_path in playlist_clips] | ||||
|                 return self.playlist_result(entries, playlist_id, title, description) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         page_type = self._search_regex( | ||||
|             self._PAGE_TYPE_REGEXES, webpage, | ||||
|             'page type', default='clip').lower() | ||||
|         if page_type == 'clip': | ||||
|             return self._extract_clip(url, webpage) | ||||
|         elif page_type == 'playlist': | ||||
|             return self._extract_playlist(url, webpage) | ||||
|   | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/radiode.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/radiode.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class RadioDeIE(InfoExtractor): | ||||
|     IE_NAME = 'radio.de' | ||||
|     _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' | ||||
|     _TEST = { | ||||
|         'url': 'http://ndr2.radio.de/', | ||||
|         'md5': '3b4cdd011bc59174596b6145cda474a4', | ||||
|         'info_dict': { | ||||
|             'id': 'ndr2', | ||||
|             'ext': 'mp3', | ||||
|             'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'description': 'md5:591c49c702db1a33751625ebfb67f273', | ||||
|             'thumbnail': 're:^https?://.*\.png', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         radio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, radio_id) | ||||
|  | ||||
|         broadcast = json.loads(self._search_regex( | ||||
|             r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}', | ||||
|             webpage, 'broadcast')) | ||||
|  | ||||
|         title = self._live_title(broadcast['name']) | ||||
|         description = broadcast.get('description') or broadcast.get('shortDescription') | ||||
|         thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': stream['streamUrl'], | ||||
|             'ext': stream['streamContentFormat'].lower(), | ||||
|             'acodec': stream['streamContentFormat'], | ||||
|             'abr': stream['bitRate'], | ||||
|             'asr': stream['sampleRate'] | ||||
|         } for stream in broadcast['streamUrls']] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': radio_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'is_live': True, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -15,7 +15,7 @@ from ..utils import ( | ||||
|  | ||||
| class RTSIE(InfoExtractor): | ||||
|     IE_DESC = 'RTS.ch' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -23,6 +23,7 @@ class RTSIE(InfoExtractor): | ||||
|             'md5': '753b877968ad8afaeddccc374d4256a5', | ||||
|             'info_dict': { | ||||
|                 'id': '3449373', | ||||
|                 'display_id': 'les-enfants-terribles', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 1488, | ||||
|                 'title': 'Les Enfants Terribles', | ||||
| @@ -30,7 +31,8 @@ class RTSIE(InfoExtractor): | ||||
|                 'uploader': 'Divers', | ||||
|                 'upload_date': '19680921', | ||||
|                 'timestamp': -40280400, | ||||
|                 'thumbnail': 're:^https?://.*\.image' | ||||
|                 'thumbnail': 're:^https?://.*\.image', | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -38,6 +40,7 @@ class RTSIE(InfoExtractor): | ||||
|             'md5': 'c148457a27bdc9e5b1ffe081a7a8337b', | ||||
|             'info_dict': { | ||||
|                 'id': '5624067', | ||||
|                 'display_id': 'entre-ciel-et-mer', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 3720, | ||||
|                 'title': 'Les yeux dans les cieux - Mon homard au Canada', | ||||
| @@ -45,7 +48,8 @@ class RTSIE(InfoExtractor): | ||||
|                 'uploader': 'Passe-moi les jumelles', | ||||
|                 'upload_date': '20140404', | ||||
|                 'timestamp': 1396635300, | ||||
|                 'thumbnail': 're:^https?://.*\.image' | ||||
|                 'thumbnail': 're:^https?://.*\.image', | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -53,6 +57,7 @@ class RTSIE(InfoExtractor): | ||||
|             'md5': 'b4326fecd3eb64a458ba73c73e91299d', | ||||
|             'info_dict': { | ||||
|                 'id': '5745975', | ||||
|                 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 48, | ||||
|                 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', | ||||
| @@ -60,7 +65,8 @@ class RTSIE(InfoExtractor): | ||||
|                 'uploader': 'Hockey', | ||||
|                 'upload_date': '20140403', | ||||
|                 'timestamp': 1396556882, | ||||
|                 'thumbnail': 're:^https?://.*\.image' | ||||
|                 'thumbnail': 're:^https?://.*\.image', | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|             'skip': 'Blocked outside Switzerland', | ||||
|         }, | ||||
| @@ -69,6 +75,7 @@ class RTSIE(InfoExtractor): | ||||
|             'md5': '9bb06503773c07ce83d3cbd793cebb91', | ||||
|             'info_dict': { | ||||
|                 'id': '5745356', | ||||
|                 'display_id': 'londres-cachee-par-un-epais-smog', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 33, | ||||
|                 'title': 'Londres cachée par un épais smog', | ||||
| @@ -76,7 +83,8 @@ class RTSIE(InfoExtractor): | ||||
|                 'uploader': 'Le Journal en continu', | ||||
|                 'upload_date': '20140403', | ||||
|                 'timestamp': 1396537322, | ||||
|                 'thumbnail': 're:^https?://.*\.image' | ||||
|                 'thumbnail': 're:^https?://.*\.image', | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -84,6 +92,7 @@ class RTSIE(InfoExtractor): | ||||
|             'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', | ||||
|             'info_dict': { | ||||
|                 'id': '5706148', | ||||
|                 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014', | ||||
|                 'ext': 'mp3', | ||||
|                 'duration': 123, | ||||
|                 'title': '"Urban Hippie", de Damien Krisl', | ||||
| @@ -92,22 +101,44 @@ class RTSIE(InfoExtractor): | ||||
|                 'timestamp': 1396551600, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260', | ||||
|             'md5': '968777c8779e5aa2434be96c54e19743', | ||||
|             'info_dict': { | ||||
|                 'id': '6348260', | ||||
|                 'display_id': 'le-19h30', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 1796, | ||||
|                 'title': 'Le 19h30', | ||||
|                 'description': '', | ||||
|                 'uploader': 'Le 19h30', | ||||
|                 'upload_date': '20141201', | ||||
|                 'timestamp': 1417458600, | ||||
|                 'thumbnail': 're:^https?://.*\.image', | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('id') | ||||
|         video_id = m.group('id') or m.group('id_new') | ||||
|         display_id = m.group('display_id') or m.group('display_id_new') | ||||
|  | ||||
|         def download_json(internal_id): | ||||
|             return self._download_json( | ||||
|                 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, | ||||
|                 video_id) | ||||
|                 display_id) | ||||
|  | ||||
|         all_info = download_json(video_id) | ||||
|  | ||||
|         # video_id extracted out of URL is not always a real id | ||||
|         if 'video' not in all_info and 'audio' not in all_info: | ||||
|             page = self._download_webpage(url, video_id) | ||||
|             page = self._download_webpage(url, display_id) | ||||
|             internal_id = self._html_search_regex( | ||||
|                 r'<(?:video|audio) data-id="([0-9]+)"', page, | ||||
|                 'internal video id') | ||||
| @@ -143,6 +174,7 @@ class RTSIE(InfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'formats': formats, | ||||
|             'title': info['title'], | ||||
|             'description': info.get('intro'), | ||||
|   | ||||
| @@ -53,6 +53,7 @@ class RutubeIE(InfoExtractor): | ||||
|         m3u8_url = options['video_balancer'].get('m3u8') | ||||
|         if m3u8_url is None: | ||||
|             raise ExtractorError('Couldn\'t find m3u8 manifest url') | ||||
|         formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') | ||||
|  | ||||
|         return { | ||||
|             'id': video['id'], | ||||
| @@ -60,8 +61,7 @@ class RutubeIE(InfoExtractor): | ||||
|             'description': video['description'], | ||||
|             'duration': video['duration'], | ||||
|             'view_count': video['hits'], | ||||
|             'url': m3u8_url, | ||||
|             'ext': 'mp4', | ||||
|             'formats': formats, | ||||
|             'thumbnail': video['thumbnail_url'], | ||||
|             'uploader': author.get('name'), | ||||
|             'uploader_id': compat_str(author['id']) if author else None, | ||||
|   | ||||
| @@ -5,61 +5,27 @@ import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class CinemassacreIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | ||||
|             'md5': 'fde81fbafaee331785f58cd6c0d46190', | ||||
|             'info_dict': { | ||||
|                 'id': '19911', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20121110', | ||||
|                 'title': '“Angry Video Game Nerd: The Movie” – Trailer', | ||||
|                 'description': 'md5:fb87405fcb42a331742a0dce2708560b', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | ||||
|             'md5': 'd72f10cd39eac4215048f62ab477a511', | ||||
|             'info_dict': { | ||||
|                 'id': '521be8ef82b16', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20131002', | ||||
|                 'title': 'The Mummy’s Hand (1940)', | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
| class ScreenwaveMediaIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
| 
 | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') | ||||
|         mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage) | ||||
|         if not mobj: | ||||
|             raise ExtractorError('Can\'t extract embed url and video id') | ||||
|         playerdata_url = mobj.group('embed_url') | ||||
|         video_id = mobj.group('video_id') | ||||
|         full_video_id = mobj.group('full_video_id') | ||||
| 
 | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<title>(?P<title>.+?)\|', webpage, 'title') | ||||
|         video_description = self._html_search_regex( | ||||
|             r'<div class="entry-content">(?P<description>.+?)</div>', | ||||
|             webpage, 'description', flags=re.DOTALL, fatal=False) | ||||
|         video_thumbnail = self._og_search_thumbnail(webpage) | ||||
| 
 | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') | ||||
|         video_id = self._match_id(url) | ||||
|         playerdata = self._download_webpage(url, video_id, 'Downloading player webpage') | ||||
| 
 | ||||
|         vidtitle = self._search_regex( | ||||
|             r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') | ||||
|         vidurl = self._search_regex( | ||||
|             r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') | ||||
|             r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/') | ||||
| 
 | ||||
|         videolist_url = None | ||||
| 
 | ||||
| @@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor): | ||||
|         if mobj: | ||||
|             videoserver = mobj.group('videoserver') | ||||
|             mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) | ||||
|             vidid = mobj.group('vidid') if mobj else full_video_id | ||||
|             vidid = mobj.group('vidid') if mobj else video_id | ||||
|             videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid) | ||||
|         else: | ||||
|             mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) | ||||
| @@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor): | ||||
|                 file_ = src.partition(':')[-1] | ||||
|                 width = int_or_none(video.get('width')) | ||||
|                 height = int_or_none(video.get('height')) | ||||
|                 bitrate = int_or_none(video.get('system-bitrate')) | ||||
|                 bitrate = int_or_none(video.get('system-bitrate'), scale=1000) | ||||
|                 format = { | ||||
|                     'url': baseurl + file_, | ||||
|                     'format_id': src.rpartition('.')[0].rpartition('_')[-1], | ||||
|                 } | ||||
|                 if width or height: | ||||
|                     format.update({ | ||||
|                         'tbr': bitrate // 1000 if bitrate else None, | ||||
|                         'tbr': bitrate, | ||||
|                         'width': width, | ||||
|                         'height': height, | ||||
|                     }) | ||||
|                 else: | ||||
|                     format.update({ | ||||
|                         'abr': bitrate // 1000 if bitrate else None, | ||||
|                         'abr': bitrate, | ||||
|                         'vcodec': 'none', | ||||
|                     }) | ||||
|                 formats.append(format) | ||||
|             self._sort_formats(formats) | ||||
|         else: | ||||
|             formats = [{ | ||||
|                 'url': vidurl, | ||||
|             }] | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'title': vidtitle, | ||||
|             'formats': formats, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class CinemassacreIE(InfoExtractor): | ||||
|     _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | ||||
|             'md5': 'fde81fbafaee331785f58cd6c0d46190', | ||||
|             'info_dict': { | ||||
|                 'id': 'Cinemassacre-19911', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20121110', | ||||
|                 'title': '“Angry Video Game Nerd: The Movie” – Trailer', | ||||
|                 'description': 'md5:fb87405fcb42a331742a0dce2708560b', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | ||||
|             'md5': 'd72f10cd39eac4215048f62ab477a511', | ||||
|             'info_dict': { | ||||
|                 'id': 'Cinemassacre-521be8ef82b16', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20131002', | ||||
|                 'title': 'The Mummy’s Hand (1940)', | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|         video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') | ||||
| 
 | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
| 
 | ||||
|         playerdata_url = self._search_regex( | ||||
|             r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', | ||||
|             webpage, 'player data URL') | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<title>(?P<title>.+?)\|', webpage, 'title') | ||||
|         video_description = self._html_search_regex( | ||||
|             r'<div class="entry-content">(?P<description>.+?)</div>', | ||||
|             webpage, 'description', flags=re.DOTALL, fatal=False) | ||||
|         video_thumbnail = self._og_search_thumbnail(webpage) | ||||
| 
 | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'display_id': display_id, | ||||
|             'title': video_title, | ||||
|             'description': video_description, | ||||
|             'upload_date': video_date, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'url': playerdata_url, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class TeamFourIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?' | ||||
|     _TEST = { | ||||
|         'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/', | ||||
|         'info_dict': { | ||||
|             'id': 'TeamFourStar-5292a02f20bfa', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20130401', | ||||
|             'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', | ||||
|             'title': 'A Moment With TFS Episode 4', | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
| 
 | ||||
|         playerdata_url = self._search_regex( | ||||
|             r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', | ||||
|             webpage, 'player data URL') | ||||
| 
 | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<div class="heroheadingtitle">(?P<title>.+?)</div>', | ||||
|             webpage, 'title') | ||||
|         video_date = unified_strdate(self._html_search_regex( | ||||
|             r'<div class="heroheadingdate">(?P<date>.+?)</div>', | ||||
|             webpage, 'date', fatal=False)) | ||||
|         video_description = self._html_search_regex( | ||||
|             r'(?s)<div class="postcontent">(?P<description>.+?)</div>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         video_thumbnail = self._og_search_thumbnail(webpage) | ||||
| 
 | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'display_id': display_id, | ||||
|             'title': video_title, | ||||
|             'description': video_description, | ||||
|             'upload_date': video_date, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'url': playerdata_url, | ||||
|         } | ||||
| @@ -39,7 +39,7 @@ class SlideshareIE(InfoExtractor): | ||||
|         ext = info['jsplayer']['video_extension'] | ||||
|         video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) | ||||
|         description = self._html_search_regex( | ||||
|             r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage, | ||||
|             r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage, | ||||
|             'description', fatal=False) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') | ||||
|  | ||||
|         if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: | ||||
|             raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True) | ||||
|             raise ExtractorError( | ||||
|                 'Broadcast %s does not exist' % broadcast_id, expected=True) | ||||
|  | ||||
|         # Adult content | ||||
|         if re.search('EroConfirmText">', broadcast_page) is not None: | ||||
|  | ||||
|             (username, password) = self._get_login_info() | ||||
|             if username is None: | ||||
|                 raise ExtractorError('Erotic broadcasts allowed only for registered users, ' | ||||
|                                      'use --username and --password options to provide account credentials.', expected=True) | ||||
|                 raise ExtractorError( | ||||
|                     'Erotic broadcasts allowed only for registered users, ' | ||||
|                     'use --username and --password options to provide account credentials.', | ||||
|                     expected=True) | ||||
|  | ||||
|             login_form = { | ||||
|                 'login-hint53': '1', | ||||
| @@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|                 'password': password, | ||||
|             } | ||||
|  | ||||
|             request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) | ||||
|             request = compat_urllib_request.Request( | ||||
|                 broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) | ||||
|             request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age') | ||||
|             broadcast_page = self._download_webpage( | ||||
|                 request, broadcast_id, 'Logging in and confirming age') | ||||
|  | ||||
|             if re.search('>Неверный логин или пароль<', broadcast_page) is not None: | ||||
|                 raise ExtractorError('Unable to log in: bad username or password', expected=True) | ||||
| @@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|             adult_content = False | ||||
|  | ||||
|         ticket = self._html_search_regex( | ||||
|             'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', | ||||
|             r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)", | ||||
|             broadcast_page, 'broadcast ticket') | ||||
|  | ||||
|         url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket | ||||
| @@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|         if broadcast_password: | ||||
|             url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON') | ||||
|         broadcast_json_page = self._download_webpage( | ||||
|             url, broadcast_id, 'Downloading broadcast JSON') | ||||
|  | ||||
|         try: | ||||
|             broadcast_json = json.loads(broadcast_json_page) | ||||
|  | ||||
|             protected_broadcast = broadcast_json['_pass_protected'] == 1 | ||||
|             if protected_broadcast and not broadcast_password: | ||||
|                 raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True) | ||||
|                 raise ExtractorError( | ||||
|                     'This broadcast is protected by a password, use the --video-password option', | ||||
|                     expected=True) | ||||
|  | ||||
|             broadcast_offline = broadcast_json['is_play'] == 0 | ||||
|             if broadcast_offline: | ||||
|                 raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True) | ||||
|  | ||||
|             rtmp_url = broadcast_json['_server'] | ||||
|             if not rtmp_url.startswith('rtmp://'): | ||||
|             mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url) | ||||
|             if not mobj: | ||||
|                 raise ExtractorError('Unexpected broadcast rtmp URL') | ||||
|  | ||||
|             broadcast_playpath = broadcast_json['_streamName'] | ||||
|             broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL']) | ||||
|             broadcast_thumbnail = broadcast_json['_imgURL'] | ||||
|             broadcast_title = broadcast_json['title'] | ||||
|             broadcast_title = self._live_title(broadcast_json['title']) | ||||
|             broadcast_description = broadcast_json['description'] | ||||
|             broadcaster_nick = broadcast_json['nick'] | ||||
|             broadcaster_login = broadcast_json['login'] | ||||
| @@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|             'age_limit': 18 if adult_content else 0, | ||||
|             'ext': 'flv', | ||||
|             'play_path': broadcast_playpath, | ||||
|             'player_url': 'http://pics.smotri.com/broadcast_play.swf', | ||||
|             'app': broadcast_app, | ||||
|             'rtmp_live': True, | ||||
|             'rtmp_conn': rtmp_conn | ||||
|             'rtmp_conn': rtmp_conn, | ||||
|             'is_live': True, | ||||
|         } | ||||
|   | ||||
| @@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|                             (?P<title>[\w\d-]+)/? | ||||
|                             (?P<token>[^?]+?)?(?:[?].*)?$) | ||||
|                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) | ||||
|                           (?:/?\?secret_token=(?P<secret_token>[^&]+?))?$) | ||||
|                           (?:/?\?secret_token=(?P<secret_token>[^&]+))?) | ||||
|                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) | ||||
|                     ) | ||||
|                     ''' | ||||
|   | ||||
| @@ -4,10 +4,11 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_filesize | ||||
|  | ||||
|  | ||||
| class TagesschauIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html' | ||||
|     _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html', | ||||
| @@ -19,6 +20,16 @@ class TagesschauIE(InfoExtractor): | ||||
|             'description': 'md5:69da3c61275b426426d711bde96463ab', | ||||
|             'thumbnail': 're:^http:.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', | ||||
|         'md5': '3c54c1f6243d279b706bde660ceec633', | ||||
|         'info_dict': { | ||||
|             'id': '5727', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'md5:695c01bfd98b7e313c501386327aea59', | ||||
|             'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', | ||||
|             'thumbnail': 're:^http:.*\.jpg$', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     _FORMATS = { | ||||
| @@ -28,42 +39,82 @@ class TagesschauIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         if video_id.startswith('-'): | ||||
|             display_id = video_id.strip('-') | ||||
|         else: | ||||
|             display_id = video_id | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         display_id = video_id.lstrip('-') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         playerpage = self._download_webpage( | ||||
|             'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, | ||||
|             display_id, 'Downloading player page') | ||||
|         player_url = self._html_search_meta( | ||||
|             'twitter:player', webpage, 'player URL', default=None) | ||||
|         if player_url: | ||||
|             playerpage = self._download_webpage( | ||||
|                 player_url, display_id, 'Downloading player page') | ||||
|  | ||||
|         medias = re.findall( | ||||
|             r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', | ||||
|             playerpage) | ||||
|  | ||||
|         formats = [] | ||||
|         for url, ext, res in medias: | ||||
|             f = { | ||||
|                 'format_id': res + '_' + ext, | ||||
|                 'url': url, | ||||
|                 'ext': ext, | ||||
|             } | ||||
|             f.update(self._FORMATS.get(res, {})) | ||||
|             formats.append(f) | ||||
|             medias = re.findall( | ||||
|                 r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', | ||||
|                 playerpage) | ||||
|             formats = [] | ||||
|             for url, ext, res in medias: | ||||
|                 f = { | ||||
|                     'format_id': res + '_' + ext, | ||||
|                     'url': url, | ||||
|                     'ext': ext, | ||||
|                 } | ||||
|                 f.update(self._FORMATS.get(res, {})) | ||||
|                 formats.append(f) | ||||
|             thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1] | ||||
|             title = self._og_search_title(webpage).strip() | ||||
|             description = self._og_search_description(webpage).strip() | ||||
|         else: | ||||
|             download_text = self._search_regex( | ||||
|                 r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>', | ||||
|                 webpage, 'download links') | ||||
|             links = re.finditer( | ||||
|                 r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>', | ||||
|                 download_text) | ||||
|             formats = [] | ||||
|             for l in links: | ||||
|                 format_id = self._search_regex( | ||||
|                     r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID') | ||||
|                 format = { | ||||
|                     'format_id': format_id, | ||||
|                     'url': l.group('url'), | ||||
|                     'format_name': l.group('name'), | ||||
|                 } | ||||
|                 m = re.match( | ||||
|                     r'''(?x) | ||||
|                         Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10; | ||||
|                         (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10; | ||||
|                         (?P<vbr>[0-9]+)kbps&\#10; | ||||
|                         Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10; | ||||
|                         Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''', | ||||
|                     l.group('title')) | ||||
|                 if m: | ||||
|                     format.update({ | ||||
|                         'format_note': m.group('audio_desc'), | ||||
|                         'vcodec': m.group('vcodec'), | ||||
|                         'width': int(m.group('width')), | ||||
|                         'height': int(m.group('height')), | ||||
|                         'abr': int(m.group('abr')), | ||||
|                         'vbr': int(m.group('vbr')), | ||||
|                         'filesize_approx': parse_filesize(m.group('filesize_approx')), | ||||
|                     }) | ||||
|                 formats.append(format) | ||||
|             thumbnail_fn = self._search_regex( | ||||
|                 r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"', | ||||
|                 webpage, 'thumbnail', fatal=False) | ||||
|             description = self._html_search_regex( | ||||
|                 r'(?s)<p class="teasertext">(.*?)</p>', | ||||
|                 webpage, 'description', fatal=False) | ||||
|             title = self._html_search_regex( | ||||
|                 r'<span class="headline".*?>(.*?)</span>', webpage, 'title') | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1] | ||||
|         thumbnail = 'http://www.tagesschau.de' + thumbnail_fn | ||||
|  | ||||
|         return { | ||||
|             'id': display_id, | ||||
|             'title': self._og_search_title(webpage).strip(), | ||||
|             'thumbnail': 'http://www.tagesschau.de' + thumbnail, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage).strip(), | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
| @@ -199,8 +199,9 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|         webpage = self._download_webpage(url, name) | ||||
|  | ||||
|         config_json = self._html_search_regex( | ||||
|             r"data-config='([^']+)", webpage, 'config') | ||||
|         config = json.loads(config_json) | ||||
|             r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>', | ||||
|             webpage, 'config') | ||||
|         config = json.loads(config_json)['config'] | ||||
|         video_url = config['video']['url'] | ||||
|         thumbnail = config.get('image', {}).get('url') | ||||
|  | ||||
|   | ||||
| @@ -19,6 +19,7 @@ class TuneInIE(InfoExtractor): | ||||
|         |tun\.in/(?P<redirect_id>[A-Za-z0-9]+) | ||||
|     ) | ||||
|     ''' | ||||
|     _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station' | ||||
|  | ||||
|     _INFO_DICT = { | ||||
|         'id': '34682', | ||||
| @@ -56,13 +57,10 @@ class TuneInIE(InfoExtractor): | ||||
|             mobj = re.match(self._VALID_URL, url) | ||||
|         station_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             url, station_id, note='Downloading station webpage') | ||||
|         station_info = self._download_json( | ||||
|             self._API_URL_TEMPLATE.format(station_id), | ||||
|             station_id, note='Downloading station JSON') | ||||
|  | ||||
|         payload = self._html_search_regex( | ||||
|             r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data') | ||||
|         json_data = json.loads(payload) | ||||
|         station_info = json_data['Station']['broadcast'] | ||||
|         title = station_info['Title'] | ||||
|         thumbnail = station_info.get('Logo') | ||||
|         location = station_info.get('Location') | ||||
|   | ||||
| @@ -1,32 +1,30 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     str_to_int, | ||||
|     parse_age_limit, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TvigleIE(InfoExtractor): | ||||
|     IE_NAME = 'tvigle' | ||||
|     IE_DESC = 'Интернет-телевидение Tvigle.ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$' | ||||
|     _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tvigle.ru/video/brat/', | ||||
|             'md5': 'ff4344a4894b0524441fb6f8218dc716', | ||||
|             'url': 'http://www.tvigle.ru/video/sokrat/', | ||||
|             'md5': '36514aed3657d4f70b4b2cef8eb520cd', | ||||
|             'info_dict': { | ||||
|                 'id': '5118490', | ||||
|                 'display_id': 'brat', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Брат', | ||||
|                 'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb', | ||||
|                 'duration': 5722.6, | ||||
|                 'age_limit': 16, | ||||
|                 'id': '1848932', | ||||
|                 'display_id': 'sokrat', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Сократ', | ||||
|                 'description': 'md5:a05bd01be310074d5833efc6743be95e', | ||||
|                 'duration': 6586, | ||||
|                 'age_limit': 0, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor): | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
| @@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor): | ||||
|         title = item['title'] | ||||
|         description = item['description'] | ||||
|         thumbnail = item['thumbnail'] | ||||
|         duration = float_or_none(item['durationMilliseconds'], 1000) | ||||
|         age_limit = str_to_int(item['ageRestrictions']) | ||||
|         duration = float_or_none(item.get('durationMilliseconds'), 1000) | ||||
|         age_limit = parse_age_limit(item.get('ageRestrictions')) | ||||
|  | ||||
|         formats = [] | ||||
|         for vcodec, fmts in item['videos'].items(): | ||||
|   | ||||
| @@ -6,7 +6,6 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_iso8601, | ||||
|     qualities, | ||||
| ) | ||||
| @@ -182,8 +181,8 @@ class TVPlayIE(InfoExtractor): | ||||
|             'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         if video['is_geo_blocked']: | ||||
|             raise ExtractorError( | ||||
|                 'This content is not available in your country due to copyright reasons', expected=True) | ||||
|             self.report_warning( | ||||
|                 'This content might not be available in your country due to copyright reasons') | ||||
|  | ||||
|         streams = self._download_json( | ||||
|             'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON') | ||||
|   | ||||
							
								
								
									
										109
									
								
								youtube_dl/extractor/twentyfourvideo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								youtube_dl/extractor/twentyfourvideo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,109 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TwentyFourVideoIE(InfoExtractor): | ||||
|     IE_NAME = '24video' | ||||
|     _VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.24video.net/video/view/1044982', | ||||
|             'md5': '48dd7646775690a80447a8dca6a2df76', | ||||
|             'info_dict': { | ||||
|                 'id': '1044982', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Эротика каменного века', | ||||
|                 'description': 'Как смотрели порно в каменном веке.', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'uploader': 'SUPERTELO', | ||||
|                 'duration': 31, | ||||
|                 'timestamp': 1275937857, | ||||
|                 'upload_date': '20100607', | ||||
|                 'age_limit': 18, | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.24video.net/player/new24_play.swf?id=1044982', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://www.24video.net/video/view/%s' % video_id, video_id) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._html_search_regex( | ||||
|             r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         duration = int_or_none(self._og_search_property( | ||||
|             'duration', webpage, 'duration', fatal=False)) | ||||
|         timestamp = parse_iso8601(self._search_regex( | ||||
|             r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">', | ||||
|             webpage, 'upload date')) | ||||
|  | ||||
|         uploader = self._html_search_regex( | ||||
|             r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'<span class="video-views">(\d+) просмотр', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|         comment_count = int_or_none(self._html_search_regex( | ||||
|             r'<div class="comments-title" id="comments-count">(\d+) комментари', | ||||
|             webpage, 'comment count', fatal=False)) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         pc_video = self._download_xml( | ||||
|             'http://www.24video.net/video/xml/%s?mode=play' % video_id, | ||||
|             video_id, 'Downloading PC video URL').find('.//video') | ||||
|  | ||||
|         formats.append({ | ||||
|             'url': pc_video.attrib['url'], | ||||
|             'format_id': 'pc', | ||||
|             'quality': 1, | ||||
|         }) | ||||
|  | ||||
|         like_count = int_or_none(pc_video.get('ratingPlus')) | ||||
|         dislike_count = int_or_none(pc_video.get('ratingMinus')) | ||||
|         age_limit = 18 if pc_video.get('adult') == 'true' else 0 | ||||
|  | ||||
|         mobile_video = self._download_xml( | ||||
|             'http://www.24video.net/video/xml/%s' % video_id, | ||||
|             video_id, 'Downloading mobile video URL').find('.//video') | ||||
|  | ||||
|         formats.append({ | ||||
|             'url': mobile_video.attrib['url'], | ||||
|             'format_id': 'mobile', | ||||
|             'quality': 0, | ||||
|         }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,3 +1,4 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| @@ -5,6 +6,8 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     ExtractorError, | ||||
|     parse_iso8601, | ||||
| ) | ||||
| @@ -24,6 +27,7 @@ class TwitchIE(InfoExtractor): | ||||
|         """ | ||||
|     _PAGE_LIMIT = 100 | ||||
|     _API_BASE = 'https://api.twitch.tv' | ||||
|     _LOGIN_URL = 'https://secure.twitch.tv/user/login' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.twitch.tv/riotgames/b/577357806', | ||||
|         'info_dict': { | ||||
| @@ -109,6 +113,44 @@ class TwitchIE(InfoExtractor): | ||||
|             'view_count': info['views'], | ||||
|         } | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             self._LOGIN_URL, None, 'Downloading login page') | ||||
|  | ||||
|         authenticity_token = self._search_regex( | ||||
|             r'<input name="authenticity_token" type="hidden" value="([^"]+)"', | ||||
|             login_page, 'authenticity token') | ||||
|  | ||||
|         login_form = { | ||||
|             'utf8': '✓'.encode('utf-8'), | ||||
|             'authenticity_token': authenticity_token, | ||||
|             'redirect_on_login': '', | ||||
|             'embed_form': 'false', | ||||
|             'mp_source_action': '', | ||||
|             'follow': '', | ||||
|             'user[login]': username, | ||||
|             'user[password]': password, | ||||
|         } | ||||
|  | ||||
|         request = compat_urllib_request.Request( | ||||
|             self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) | ||||
|         request.add_header('Referer', self._LOGIN_URL) | ||||
|         response = self._download_webpage( | ||||
|             request, None, 'Logging in as %s' % username) | ||||
|  | ||||
|         m = re.search( | ||||
|             r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response) | ||||
|         if m: | ||||
|             raise ExtractorError( | ||||
|                 'Unable to login: %s' % m.group('msg').strip(), expected=True) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj.group('chapterid'): | ||||
|   | ||||
| @@ -97,11 +97,8 @@ class UdemyIE(InfoExtractor): | ||||
|         if 'returnUrl' not in response: | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|  | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         lecture_id = mobj.group('id') | ||||
|         lecture_id = self._match_id(url) | ||||
|  | ||||
|         lecture = self._download_json( | ||||
|             'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, | ||||
|   | ||||
| @@ -13,7 +13,7 @@ from ..utils import ( | ||||
| class VevoIE(InfoExtractor): | ||||
|     """ | ||||
|     Accepts urls from vevo.com or in the format 'vevo:{id}' | ||||
|     (currently used by MTVIE) | ||||
|     (currently used by MTVIE and MySpaceIE) | ||||
|     """ | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| | ||||
|   | ||||
| @@ -17,7 +17,7 @@ class VGTVIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': '84196', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Hevnen er søt episode 1:10 - Abu', | ||||
|                 'title': 'Hevnen er søt: Episode 10 - Abu', | ||||
|                 'description': 'md5:e25e4badb5f544b04341e14abdc72234', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'duration': 648.000, | ||||
| @@ -35,7 +35,7 @@ class VGTVIE(InfoExtractor): | ||||
|                 'title': 'OPPTAK: VGTV følger EM-kvalifiseringen', | ||||
|                 'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'duration': 9056.000, | ||||
|                 'duration': 9103.0, | ||||
|                 'timestamp': 1410113864, | ||||
|                 'upload_date': '20140907', | ||||
|                 'view_count': int, | ||||
|   | ||||
| @@ -17,6 +17,7 @@ class VineIE(InfoExtractor): | ||||
|             'id': 'b9KOOWX7HUx', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Chicken.', | ||||
|             'alt_title': 'Vine by Jack Dorsey', | ||||
|             'description': 'Chicken.', | ||||
|             'upload_date': '20130519', | ||||
|             'uploader': 'Jack Dorsey', | ||||
| @@ -25,30 +26,26 @@ class VineIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) | ||||
|  | ||||
|         data = json.loads(self._html_search_regex( | ||||
|             r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': data['videoLowURL'], | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': 'low', | ||||
|             }, | ||||
|             { | ||||
|                 'url': data['videoUrl'], | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': 'standard', | ||||
|             } | ||||
|         ] | ||||
|         formats = [{ | ||||
|             'url': data['videoLowURL'], | ||||
|             'ext': 'mp4', | ||||
|             'format_id': 'low', | ||||
|         }, { | ||||
|             'url': data['videoUrl'], | ||||
|             'ext': 'mp4', | ||||
|             'format_id': 'standard', | ||||
|         }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'alt_title': self._og_search_description(webpage), | ||||
|             'description': data['description'], | ||||
|             'thumbnail': data['thumbnailUrl'], | ||||
|             'upload_date': unified_strdate(data['created']), | ||||
| @@ -63,29 +60,36 @@ class VineIE(InfoExtractor): | ||||
|  | ||||
| class VineUserIE(InfoExtractor): | ||||
|     IE_NAME = 'vine:user' | ||||
|     _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$' | ||||
|     _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$' | ||||
|     _VINE_BASE_URL = "https://vine.co/" | ||||
|     _TEST = { | ||||
|         'url': 'https://vine.co/Visa', | ||||
|         'info_dict': { | ||||
|             'id': 'Visa', | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'https://vine.co/Visa', | ||||
|             'info_dict': { | ||||
|                 'id': 'Visa', | ||||
|             }, | ||||
|             'playlist_mincount': 46, | ||||
|         }, | ||||
|         'playlist_mincount': 46, | ||||
|     } | ||||
|         { | ||||
|             'url': 'https://vine.co/u/941705360593584128', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         user = mobj.group('user') | ||||
|         u = mobj.group('u') | ||||
|  | ||||
|         profile_url = "%sapi/users/profiles/vanity/%s" % ( | ||||
|             self._VINE_BASE_URL, user) | ||||
|         profile_url = "%sapi/users/profiles/%s%s" % ( | ||||
|             self._VINE_BASE_URL, 'vanity/' if not u else '', user) | ||||
|         profile_data = self._download_json( | ||||
|             profile_url, user, note='Downloading user profile data') | ||||
|  | ||||
|         user_id = profile_data['data']['userId'] | ||||
|         timeline_data = [] | ||||
|         for pagenum in itertools.count(1): | ||||
|             timeline_url = "%sapi/timelines/users/%s?page=%s" % ( | ||||
|             timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % ( | ||||
|                 self._VINE_BASE_URL, user_id, pagenum) | ||||
|             timeline_page = self._download_json( | ||||
|                 timeline_url, user, note='Downloading page %d' % pagenum) | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_chr, | ||||
| @@ -25,6 +27,7 @@ class XMinusIE(InfoExtractor): | ||||
|             'tbr': 320, | ||||
|             'filesize_approx': 5900000, | ||||
|             'view_count': int, | ||||
|             'description': 'md5:03238c5b663810bc79cf42ef3c03e371', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -48,9 +51,14 @@ class XMinusIE(InfoExtractor): | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'<div class="quality.*?► ([0-9]+)', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div id="song_texts">(.*?)</div><br', | ||||
|             webpage, 'song lyrics', fatal=False) | ||||
|         if description: | ||||
|             description = re.sub(' *\r *', '\n', description) | ||||
|  | ||||
|         enc_token = self._html_search_regex( | ||||
|             r'data-mt="(.*?)"', webpage, 'enc_token') | ||||
|             r'minus_track\.tkn="(.+?)"', webpage, 'enc_token') | ||||
|         token = ''.join( | ||||
|             c if pos == 3 else compat_chr(compat_ord(c) - 1) | ||||
|             for pos, c in enumerate(reversed(enc_token))) | ||||
| @@ -64,4 +72,5 @@ class XMinusIE(InfoExtractor): | ||||
|             'filesize_approx': filesize_approx, | ||||
|             'tbr': tbr, | ||||
|             'view_count': view_count, | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
| @@ -45,7 +45,9 @@ class YouPornIE(InfoExtractor): | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         # Get JSON parameters | ||||
|         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters') | ||||
|         json_params = self._search_regex( | ||||
|             r'var currentVideo = new Video\((.*)\)[,;]', | ||||
|             webpage, 'JSON parameters') | ||||
|         try: | ||||
|             params = json.loads(json_params) | ||||
|         except: | ||||
|   | ||||
| @@ -7,29 +7,31 @@ import itertools | ||||
| import json | ||||
| import os.path | ||||
| import re | ||||
| import time | ||||
| import traceback | ||||
|  | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..jsinterp import JSInterpreter | ||||
| from ..swfinterp import SWFInterpreter | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_chr, | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
|     compat_str, | ||||
|  | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     get_element_by_id, | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
|     get_element_by_attribute, | ||||
|     get_element_by_id, | ||||
|     int_or_none, | ||||
|     OnDemandPagedList, | ||||
|     orderedSet, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     orderedSet, | ||||
|     uppercase_escape, | ||||
| ) | ||||
|  | ||||
| @@ -38,17 +40,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|     """Provide base functions for Youtube extractors""" | ||||
|     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' | ||||
|     _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor' | ||||
|     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' | ||||
|     _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | ||||
|     _NETRC_MACHINE = 'youtube' | ||||
|     # If True it will raise an error if no login info is provided | ||||
|     _LOGIN_REQUIRED = False | ||||
|  | ||||
|     def _set_language(self): | ||||
|         return bool(self._download_webpage( | ||||
|             self._LANG_URL, None, | ||||
|             note='Setting language', errnote='unable to set language', | ||||
|             fatal=False)) | ||||
|         self._set_cookie( | ||||
|             '.youtube.com', 'PREF', 'f1=50000000&hl=en', | ||||
|             # YouTube sets the expire time to about two months | ||||
|             expire_time=time.time() + 2 * 30 * 24 * 3600) | ||||
|  | ||||
|     def _login(self): | ||||
|         """ | ||||
| @@ -176,30 +176,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
|     def _confirm_age(self): | ||||
|         age_form = { | ||||
|             'next_url': '/', | ||||
|             'action_confirm': 'Confirm', | ||||
|         } | ||||
|         req = compat_urllib_request.Request( | ||||
|             self._AGE_URL, | ||||
|             compat_urllib_parse.urlencode(age_form).encode('ascii') | ||||
|         ) | ||||
|  | ||||
|         self._download_webpage( | ||||
|             req, None, | ||||
|             note='Confirming age', errnote='Unable to confirm age', | ||||
|             fatal=False) | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if self._downloader is None: | ||||
|             return | ||||
|         if self._get_login_info()[0] is not None: | ||||
|             if not self._set_language(): | ||||
|                 return | ||||
|         self._set_language() | ||||
|         if not self._login(): | ||||
|             return | ||||
|         self._confirm_age() | ||||
|  | ||||
|  | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
| @@ -305,6 +287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, | ||||
|         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, | ||||
|         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, | ||||
|         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'}, | ||||
|  | ||||
|         # Dash webm audio | ||||
|         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, | ||||
| @@ -398,8 +381,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': 'IB3lcPjvWLA', | ||||
|                 'ext': 'm4a', | ||||
|                 'title': 'Afrojack - The Spark ft. Spree Wilson', | ||||
|                 'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8', | ||||
|                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson', | ||||
|                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d', | ||||
|                 'uploader': 'AfrojackVEVO', | ||||
|                 'uploader_id': 'AfrojackVEVO', | ||||
|                 'upload_date': '20131011', | ||||
| @@ -421,7 +404,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'title': 'Burning Everyone\'s Koran', | ||||
|                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         # Normal age-gate video (No vevo, embed allowed) | ||||
|         { | ||||
|             'url': 'http://youtube.com/watch?v=HtVdAasjOgU', | ||||
|             'info_dict': { | ||||
|                 'id': 'HtVdAasjOgU', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', | ||||
|                 'description': 'md5:eca57043abae25130f58f655ad9a7771', | ||||
|                 'uploader': 'The Witcher', | ||||
|                 'uploader_id': 'WitcherGame', | ||||
|                 'upload_date': '20140605', | ||||
|             }, | ||||
|         }, | ||||
|         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) | ||||
|         { | ||||
|             'url': '__2ABJjxzNo', | ||||
|             'info_dict': { | ||||
|                 'id': '__2ABJjxzNo', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20100430', | ||||
|                 'uploader_id': 'deadmau5', | ||||
|                 'description': 'md5:12c56784b8032162bb936a5f76d55360', | ||||
|                 'uploader': 'deadmau5', | ||||
|                 'title': 'Deadmau5 - Some Chords (HD)', | ||||
|             }, | ||||
|             'expected_warnings': [ | ||||
|                 'DASH manifest missing', | ||||
|             ] | ||||
|         }, | ||||
|         # Olympics (https://github.com/rg3/youtube-dl/issues/4431) | ||||
|         { | ||||
|             'url': 'lqQg6PlCWgI', | ||||
|             'info_dict': { | ||||
|                 'id': 'lqQg6PlCWgI', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20120731', | ||||
|                 'uploader_id': 'olympic', | ||||
|                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', | ||||
|                 'uploader': 'Olympics', | ||||
|                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': 'requires avconv', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
| @@ -671,6 +699,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id | ||||
|         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') | ||||
|  | ||||
|     def _parse_dash_manifest( | ||||
|             self, video_id, dash_manifest_url, player_url, age_gate): | ||||
|         def decrypt_sig(mobj): | ||||
|             s = mobj.group(1) | ||||
|             dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) | ||||
|             return '/signature/%s' % dec_s | ||||
|         dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) | ||||
|         dash_doc = self._download_xml( | ||||
|             dash_manifest_url, video_id, | ||||
|             note='Downloading DASH manifest', | ||||
|             errnote='Could not download DASH manifest') | ||||
|  | ||||
|         formats = [] | ||||
|         for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): | ||||
|             url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') | ||||
|             if url_el is None: | ||||
|                 continue | ||||
|             format_id = r.attrib['id'] | ||||
|             video_url = url_el.text | ||||
|             filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) | ||||
|             f = { | ||||
|                 'format_id': format_id, | ||||
|                 'url': video_url, | ||||
|                 'width': int_or_none(r.attrib.get('width')), | ||||
|                 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), | ||||
|                 'asr': int_or_none(r.attrib.get('audioSamplingRate')), | ||||
|                 'filesize': filesize, | ||||
|                 'fps': int_or_none(r.attrib.get('frameRate')), | ||||
|             } | ||||
|             try: | ||||
|                 existing_format = next( | ||||
|                     fo for fo in formats | ||||
|                     if fo['format_id'] == format_id) | ||||
|             except StopIteration: | ||||
|                 f.update(self._formats.get(format_id, {})) | ||||
|                 formats.append(f) | ||||
|             else: | ||||
|                 existing_format.update(f) | ||||
|         return formats | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         proto = ( | ||||
|             'http' if self._downloader.params.get('prefer_insecure', False) | ||||
| @@ -684,16 +752,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Get video webpage | ||||
|         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id | ||||
|         pref_cookies = [ | ||||
|             c for c in self._downloader.cookiejar | ||||
|             if c.domain == '.youtube.com' and c.name == 'PREF'] | ||||
|         for pc in pref_cookies: | ||||
|             if 'hl=' in pc.value: | ||||
|                 pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value) | ||||
|             else: | ||||
|                 if pc.value: | ||||
|                     pc.value += '&' | ||||
|                 pc.value += 'hl=en' | ||||
|         video_webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Attempt to extract SWF player URL | ||||
| @@ -704,7 +762,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             player_url = None | ||||
|  | ||||
|         # Get video info | ||||
|         self.report_video_info_webpage_download(video_id) | ||||
|         if re.search(r'player-age-gate-content">', video_webpage) is not None: | ||||
|             age_gate = True | ||||
|             # We simulate the access to the video from www.youtube.com/v/{video_id} | ||||
| @@ -723,15 +780,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             video_info = compat_parse_qs(video_info_webpage) | ||||
|         else: | ||||
|             age_gate = False | ||||
|             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: | ||||
|                 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | ||||
|                                   % (video_id, el_type)) | ||||
|                 video_info_webpage = self._download_webpage(video_info_url, video_id, | ||||
|                                                             note=False, | ||||
|                                                             errnote='unable to download video info webpage') | ||||
|                 video_info = compat_parse_qs(video_info_webpage) | ||||
|                 if 'token' in video_info: | ||||
|                     break | ||||
|             try: | ||||
|                 # Try looking directly into the video webpage | ||||
|                 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) | ||||
|                 if not mobj: | ||||
|                     raise ValueError('Could not find ytplayer.config')  # caught below | ||||
|                 json_code = uppercase_escape(mobj.group(1)) | ||||
|                 ytplayer_config = json.loads(json_code) | ||||
|                 args = ytplayer_config['args'] | ||||
|                 # Convert to the same format returned by compat_parse_qs | ||||
|                 video_info = dict((k, [v]) for k, v in args.items()) | ||||
|                 if 'url_encoded_fmt_stream_map' not in args: | ||||
|                     raise ValueError('No stream_map present')  # caught below | ||||
|             except ValueError: | ||||
|                 # We fallback to the get_video_info pages (used by the embed page) | ||||
|                 self.report_video_info_webpage_download(video_id) | ||||
|                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: | ||||
|                     video_info_url = ( | ||||
|                         '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | ||||
|                         % (proto, video_id, el_type)) | ||||
|                     video_info_webpage = self._download_webpage( | ||||
|                         video_info_url, | ||||
|                         video_id, note=False, | ||||
|                         errnote='unable to download video info webpage') | ||||
|                     video_info = compat_parse_qs(video_info_webpage) | ||||
|                     if 'token' in video_info: | ||||
|                         break | ||||
|         if 'token' not in video_info: | ||||
|             if 'reason' in video_info: | ||||
|                 raise ExtractorError( | ||||
| @@ -799,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         m_cat_container = self._search_regex( | ||||
|             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', | ||||
|             video_webpage, 'categories', fatal=False) | ||||
|             video_webpage, 'categories', default=None) | ||||
|         if m_cat_container: | ||||
|             category = self._html_search_regex( | ||||
|                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category', | ||||
| @@ -856,32 +930,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if self._downloader.params.get('writeannotations', False): | ||||
|             video_annotations = self._extract_annotations(video_id) | ||||
|  | ||||
|         # Decide which formats to download | ||||
|         try: | ||||
|             mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) | ||||
|             if not mobj: | ||||
|                 raise ValueError('Could not find vevo ID') | ||||
|             json_code = uppercase_escape(mobj.group(1)) | ||||
|             ytplayer_config = json.loads(json_code) | ||||
|             args = ytplayer_config['args'] | ||||
|             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map | ||||
|             # this signatures are encrypted | ||||
|             if 'url_encoded_fmt_stream_map' not in args: | ||||
|                 raise ValueError('No stream_map present')  # caught below | ||||
|             re_signature = re.compile(r'[&,]s=') | ||||
|             m_s = re_signature.search(args['url_encoded_fmt_stream_map']) | ||||
|             if m_s is not None: | ||||
|                 self.to_screen('%s: Encrypted signatures detected.' % video_id) | ||||
|                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] | ||||
|             m_s = re_signature.search(args.get('adaptive_fmts', '')) | ||||
|             if m_s is not None: | ||||
|                 if 'adaptive_fmts' in video_info: | ||||
|                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts'] | ||||
|                 else: | ||||
|                     video_info['adaptive_fmts'] = [args['adaptive_fmts']] | ||||
|         except ValueError: | ||||
|             pass | ||||
|  | ||||
|         def _map_to_format_list(urlmap): | ||||
|             formats = [] | ||||
|             for itag, video_real_url in urlmap.items(): | ||||
| @@ -903,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'url': video_info['conn'][0], | ||||
|                 'player_url': player_url, | ||||
|             }] | ||||
|         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: | ||||
|         elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1: | ||||
|             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] | ||||
|             if 'rtmpe%3Dyes' in encoded_url_map: | ||||
|                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) | ||||
| @@ -968,53 +1016,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # Look for the DASH manifest | ||||
|         if self._downloader.params.get('youtube_include_dash_manifest', True): | ||||
|             try: | ||||
|                 # The DASH manifest used needs to be the one from the original video_webpage. | ||||
|                 # The one found in get_video_info seems to be using different signatures. | ||||
|                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. | ||||
|                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the | ||||
|                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. | ||||
|                 if age_gate: | ||||
|                     dash_manifest_url = video_info.get('dashmpd')[0] | ||||
|             dash_mpd = video_info.get('dashmpd') | ||||
|             if dash_mpd: | ||||
|                 dash_manifest_url = dash_mpd[0] | ||||
|                 try: | ||||
|                     dash_formats = self._parse_dash_manifest( | ||||
|                         video_id, dash_manifest_url, player_url, age_gate) | ||||
|                 except (ExtractorError, KeyError) as e: | ||||
|                     self.report_warning( | ||||
|                         'Skipping DASH manifest: %r' % e, video_id) | ||||
|                 else: | ||||
|                     dash_manifest_url = ytplayer_config['args']['dashmpd'] | ||||
|  | ||||
|                 def decrypt_sig(mobj): | ||||
|                     s = mobj.group(1) | ||||
|                     dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) | ||||
|                     return '/signature/%s' % dec_s | ||||
|                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) | ||||
|                 dash_doc = self._download_xml( | ||||
|                     dash_manifest_url, video_id, | ||||
|                     note='Downloading DASH manifest', | ||||
|                     errnote='Could not download DASH manifest') | ||||
|                 for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): | ||||
|                     url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') | ||||
|                     if url_el is None: | ||||
|                         continue | ||||
|                     format_id = r.attrib['id'] | ||||
|                     video_url = url_el.text | ||||
|                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) | ||||
|                     f = { | ||||
|                         'format_id': format_id, | ||||
|                         'url': video_url, | ||||
|                         'width': int_or_none(r.attrib.get('width')), | ||||
|                         'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), | ||||
|                         'asr': int_or_none(r.attrib.get('audioSamplingRate')), | ||||
|                         'filesize': filesize, | ||||
|                     } | ||||
|                     try: | ||||
|                         existing_format = next( | ||||
|                             fo for fo in formats | ||||
|                             if fo['format_id'] == format_id) | ||||
|                     except StopIteration: | ||||
|                         f.update(self._formats.get(format_id, {})) | ||||
|                         formats.append(f) | ||||
|                     else: | ||||
|                         existing_format.update(f) | ||||
|  | ||||
|             except (ExtractorError, KeyError) as e: | ||||
|                 self.report_warning('Skipping DASH manifest: %r' % e, video_id) | ||||
|                     formats.extend(dash_formats) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -1256,7 +1268,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): | ||||
|  | ||||
| class YoutubeChannelIE(InfoExtractor): | ||||
|     IE_DESC = 'YouTube.com channels' | ||||
|     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" | ||||
|     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' | ||||
|     _MORE_PAGES_INDICATOR = 'yt-uix-load-more' | ||||
|     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' | ||||
|     IE_NAME = 'youtube:channel' | ||||
| @@ -1274,13 +1286,8 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|         return ids_in_page | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract channel id | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         channel_id = self._match_id(url) | ||||
|  | ||||
|         # Download channel page | ||||
|         channel_id = mobj.group(1) | ||||
|         video_ids = [] | ||||
|         url = 'https://www.youtube.com/channel/%s/videos' % channel_id | ||||
|         channel_page = self._download_webpage(url, channel_id) | ||||
| @@ -1294,8 +1301,12 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|             # The videos are contained in a single page | ||||
|             # the ajax pages can't be used, they are empty | ||||
|             video_ids = self.extract_videos_from_page(channel_page) | ||||
|         else: | ||||
|             # Download all channel pages using the json-based channel_ajax query | ||||
|             entries = [ | ||||
|                 self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                 for video_id in video_ids] | ||||
|             return self.playlist_result(entries, channel_id) | ||||
|  | ||||
|         def _entries(): | ||||
|             for pagenum in itertools.count(1): | ||||
|                 url = self._MORE_PAGES_URL % (pagenum, channel_id) | ||||
|                 page = self._download_json( | ||||
| @@ -1303,21 +1314,19 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|                     transform_source=uppercase_escape) | ||||
|  | ||||
|                 ids_in_page = self.extract_videos_from_page(page['content_html']) | ||||
|                 video_ids.extend(ids_in_page) | ||||
|                 for video_id in ids_in_page: | ||||
|                     yield self.url_result( | ||||
|                         video_id, 'Youtube', video_id=video_id) | ||||
|  | ||||
|                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: | ||||
|                     break | ||||
|  | ||||
|         self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||
|  | ||||
|         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                        for video_id in video_ids] | ||||
|         return self.playlist_result(url_entries, channel_id) | ||||
|         return self.playlist_result(_entries(), channel_id) | ||||
|  | ||||
|  | ||||
| class YoutubeUserIE(InfoExtractor): | ||||
|     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' | ||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' | ||||
|     _GDATA_PAGE_SIZE = 50 | ||||
|     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' | ||||
| @@ -1345,12 +1354,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
|             return super(YoutubeUserIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract username | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|  | ||||
|         username = mobj.group(1) | ||||
|         username = self._match_id(url) | ||||
|  | ||||
|         # Download video ids using YouTube Data API. Result size per | ||||
|         # query is limited (currently to 50 videos) so we need to query | ||||
|   | ||||
| @@ -1,12 +1,14 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import functools | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
|     OnDemandPagedList, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url): | ||||
|  | ||||
|  | ||||
| class ZDFIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' | ||||
|     _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', | ||||
| @@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | ||||
|         return extract_from_xml_url(self, video_id, xml_url) | ||||
|  | ||||
|  | ||||
| class ZDFChannelIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', | ||||
|         'info_dict': { | ||||
|             'id': '1586442', | ||||
|         }, | ||||
|         'playlist_count': 4, | ||||
|     } | ||||
|     _PAGE_SIZE = 50 | ||||
|  | ||||
|     def _fetch_page(self, channel_id, page): | ||||
|         offset = page * self._PAGE_SIZE | ||||
|         xml_url = ( | ||||
|             'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' | ||||
|             % (offset, self._PAGE_SIZE, channel_id)) | ||||
|         doc = self._download_xml( | ||||
|             xml_url, channel_id, | ||||
|             note='Downloading channel info', | ||||
|             errnote='Failed to download channel info') | ||||
|  | ||||
|         title = doc.find('.//information/title').text | ||||
|         description = doc.find('.//information/detail').text | ||||
|         for asset in doc.findall('.//teasers/teaser'): | ||||
|             a_type = asset.find('./type').text | ||||
|             a_id = asset.find('./details/assetId').text | ||||
|             if a_type not in ('video', 'topic'): | ||||
|                 continue | ||||
|             yield { | ||||
|                 '_type': 'url', | ||||
|                 'playlist_title': title, | ||||
|                 'playlist_description': description, | ||||
|                 'url': 'zdf:%s:%s' % (a_type, a_id), | ||||
|             } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_id = self._match_id(url) | ||||
|         entries = OnDemandPagedList( | ||||
|             functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': channel_id, | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
| @@ -163,7 +163,10 @@ def parseOpts(overrideArguments=None): | ||||
|     general.add_option( | ||||
|         '--ignore-config', | ||||
|         action='store_true', | ||||
|         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') | ||||
|         help='Do not read configuration files. ' | ||||
|         'When given in the global configuration file /etc/youtube-dl.conf: ' | ||||
|         'Do not read the user configuration in ~/.config/youtube-dl/config ' | ||||
|         '(%APPDATA%/youtube-dl/config.txt on Windows)') | ||||
|     general.add_option( | ||||
|         '--flat-playlist', | ||||
|         action='store_const', dest='extract_flat', const='in_playlist', | ||||
|   | ||||
| @@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor): | ||||
|  | ||||
|     def run(self, information): | ||||
|         cmd = self.exec_cmd | ||||
|         if not '{}' in cmd: | ||||
|         if '{}' not in cmd: | ||||
|             cmd += ' {}' | ||||
|  | ||||
|         cmd = cmd.replace('{}', shlex_quote(information['filepath'])) | ||||
|   | ||||
| @@ -37,11 +37,11 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|         if not self._executable: | ||||
|             raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') | ||||
|  | ||||
|         REQUIRED_VERSION = '1.0' | ||||
|         required_version = '10-0' if self._uses_avconv() else '1.0' | ||||
|         if is_outdated_version( | ||||
|                 self._versions[self._executable], REQUIRED_VERSION): | ||||
|                 self._versions[self._executable], required_version): | ||||
|             warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( | ||||
|                 self._executable, self._executable, REQUIRED_VERSION) | ||||
|                 self._executable, self._executable, required_version) | ||||
|             if self._downloader: | ||||
|                 self._downloader.report_warning(warning) | ||||
|  | ||||
|   | ||||
| @@ -79,7 +79,7 @@ def update_self(to_screen, verbose): | ||||
|             to_screen(compat_str(traceback.format_exc())) | ||||
|         to_screen('ERROR: can\'t obtain versions info. Please try again later.') | ||||
|         return | ||||
|     if not 'signature' in versions_info: | ||||
|     if 'signature' not in versions_info: | ||||
|         to_screen('ERROR: the versions file is not signed or corrupted. Aborting.') | ||||
|         return | ||||
|     signature = versions_info['signature'] | ||||
|   | ||||
| @@ -166,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False): | ||||
|         xpath = xpath.encode('ascii') | ||||
|  | ||||
|     n = node.find(xpath) | ||||
|     if n is None: | ||||
|     if n is None or n.text is None: | ||||
|         if fatal: | ||||
|             name = xpath if name is None else name | ||||
|             raise ExtractorError('Could not find XML element %s' % name) | ||||
| @@ -644,17 +644,19 @@ def parse_iso8601(date_str, delimiter='T'): | ||||
|     return calendar.timegm(dt.timetuple()) | ||||
|  | ||||
|  | ||||
| def unified_strdate(date_str): | ||||
| def unified_strdate(date_str, day_first=True): | ||||
|     """Return a string with the date in the format YYYYMMDD""" | ||||
|  | ||||
|     if date_str is None: | ||||
|         return None | ||||
|  | ||||
|     upload_date = None | ||||
|     # Replace commas | ||||
|     date_str = date_str.replace(',', ' ') | ||||
|     # %z (UTC offset) is only supported in python>=3.2 | ||||
|     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) | ||||
|     # Remove AM/PM + timezone | ||||
|     date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) | ||||
|  | ||||
|     format_expressions = [ | ||||
|         '%d %B %Y', | ||||
|         '%d %b %Y', | ||||
| @@ -669,7 +671,6 @@ def unified_strdate(date_str): | ||||
|         '%d/%m/%Y', | ||||
|         '%d/%m/%y', | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%d/%m/%Y %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S.%f', | ||||
|         '%d.%m.%Y %H:%M', | ||||
| @@ -681,6 +682,14 @@ def unified_strdate(date_str): | ||||
|         '%Y-%m-%dT%H:%M:%S.%f', | ||||
|         '%Y-%m-%dT%H:%M', | ||||
|     ] | ||||
|     if day_first: | ||||
|         format_expressions.extend([ | ||||
|             '%d/%m/%Y %H:%M:%S', | ||||
|         ]) | ||||
|     else: | ||||
|         format_expressions.extend([ | ||||
|             '%m/%d/%Y %H:%M:%S', | ||||
|         ]) | ||||
|     for expression in format_expressions: | ||||
|         try: | ||||
|             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||
| @@ -712,8 +721,10 @@ def date_from_str(date_str): | ||||
|     Return a datetime object from a string in the format YYYYMMDD or | ||||
|     (now|today)[+-][0-9](day|week|month|year)(s)?""" | ||||
|     today = datetime.date.today() | ||||
|     if date_str == 'now'or date_str == 'today': | ||||
|     if date_str in ('now', 'today'): | ||||
|         return today | ||||
|     if date_str == 'yesterday': | ||||
|         return today - datetime.timedelta(days=1) | ||||
|     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str) | ||||
|     if match is not None: | ||||
|         sign = match.group('sign') | ||||
| @@ -808,22 +819,22 @@ def _windows_write_string(s, out): | ||||
|  | ||||
|     GetStdHandle = ctypes.WINFUNCTYPE( | ||||
|         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( | ||||
|         ("GetStdHandle", ctypes.windll.kernel32)) | ||||
|         (b"GetStdHandle", ctypes.windll.kernel32)) | ||||
|     h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) | ||||
|  | ||||
|     WriteConsoleW = ctypes.WINFUNCTYPE( | ||||
|         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, | ||||
|         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), | ||||
|         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32)) | ||||
|         ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32)) | ||||
|     written = ctypes.wintypes.DWORD(0) | ||||
|  | ||||
|     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32)) | ||||
|     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32)) | ||||
|     FILE_TYPE_CHAR = 0x0002 | ||||
|     FILE_TYPE_REMOTE = 0x8000 | ||||
|     GetConsoleMode = ctypes.WINFUNCTYPE( | ||||
|         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, | ||||
|         ctypes.POINTER(ctypes.wintypes.DWORD))( | ||||
|         ("GetConsoleMode", ctypes.windll.kernel32)) | ||||
|         (b"GetConsoleMode", ctypes.windll.kernel32)) | ||||
|     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value | ||||
|  | ||||
|     def not_a_console(handle): | ||||
| @@ -1024,7 +1035,7 @@ def smuggle_url(url, data): | ||||
|  | ||||
|  | ||||
| def unsmuggle_url(smug_url, default=None): | ||||
|     if not '#__youtubedl_smuggle' in smug_url: | ||||
|     if '#__youtubedl_smuggle' not in smug_url: | ||||
|         return smug_url, default | ||||
|     url, _, sdata = smug_url.rpartition('#') | ||||
|     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0] | ||||
| @@ -1090,11 +1101,14 @@ def parse_filesize(s): | ||||
|     } | ||||
|  | ||||
|     units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE) | ||||
|     m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s) | ||||
|     m = re.match( | ||||
|         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s) | ||||
|     if not m: | ||||
|         return None | ||||
|  | ||||
|     return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')]) | ||||
|     num_str = m.group('num').replace(',', '.') | ||||
|     mult = _UNIT_TABLE[m.group('unit')] | ||||
|     return int(float(num_str) * mult) | ||||
|  | ||||
|  | ||||
| def get_term_width(): | ||||
| @@ -1203,18 +1217,29 @@ def parse_duration(s): | ||||
|  | ||||
|     m = re.match( | ||||
|         r'''(?ix)T? | ||||
|         (?: | ||||
|             (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| | ||||
|             (?P<only_hours>[0-9.]+)\s*(?:hours?)| | ||||
|  | ||||
|             (?: | ||||
|                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)? | ||||
|                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s* | ||||
|             )? | ||||
|             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s) | ||||
|             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)? | ||||
|         )$''', s) | ||||
|     if not m: | ||||
|         return None | ||||
|     res = int(m.group('secs')) | ||||
|     res = 0 | ||||
|     if m.group('only_mins'): | ||||
|         return float_or_none(m.group('only_mins'), invscale=60) | ||||
|     if m.group('only_hours'): | ||||
|         return float_or_none(m.group('only_hours'), invscale=60 * 60) | ||||
|     if m.group('secs'): | ||||
|         res += int(m.group('secs')) | ||||
|     if m.group('mins'): | ||||
|         res += int(m.group('mins')) * 60 | ||||
|         if m.group('hours'): | ||||
|             res += int(m.group('hours')) * 60 * 60 | ||||
|     if m.group('hours'): | ||||
|         res += int(m.group('hours')) * 60 * 60 | ||||
|     if m.group('ms'): | ||||
|         res += float(m.group('ms')) | ||||
|     return res | ||||
| @@ -1488,7 +1513,7 @@ def limit_length(s, length): | ||||
|  | ||||
|  | ||||
| def version_tuple(v): | ||||
|     return [int(e) for e in v.split('.')] | ||||
|     return tuple(int(e) for e in re.split(r'[-.]', v)) | ||||
|  | ||||
|  | ||||
| def is_outdated_version(version, limit, assume_new=True): | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2014.11.26.2' | ||||
| __version__ = '2014.12.12.2' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user