Compare commits

..

54 Commits

Author SHA1 Message Date
Philipp Hagemeister
a5f1e12a02 release 2014.01.05 2014-01-05 04:30:29 +01:00
Philipp Hagemeister
ca9e792253 [cspan] Use HTTP download (Fixes #2098) 2014-01-05 04:30:19 +01:00
Philipp Hagemeister
aff24732b9 Merge remote-tracking branch 'rzhxeo/blip'
Conflicts:
	youtube_dl/extractor/bliptv.py
2014-01-05 03:48:45 +01:00
Philipp Hagemeister
455fa214b6 Ignore more downloaded files 2014-01-05 03:44:38 +01:00
Philipp Hagemeister
a9c5e5ca6e Set required properties for format merging 2014-01-05 03:44:08 +01:00
Philipp Hagemeister
cefcb9fde3 [bliptv] Use centralized format sorting
This also makes youtube-dl use the better "Source" format by default.
2014-01-05 03:21:23 +01:00
Philipp Hagemeister
bca4e93076 [bliptv] Simplify 2014-01-05 03:18:45 +01:00
Philipp Hagemeister
67c20aebb7 Merge remote-tracking branch 'rzhxeo/blip2' 2014-01-05 03:16:19 +01:00
Philipp Hagemeister
448711e39f [pornhd] Add support for ISO-3166 subpages (Fixes #2088) 2014-01-05 03:13:10 +01:00
Philipp Hagemeister
8bf48f237d Fix/work around Windows encoding issues (Fixes #2095) 2014-01-05 03:07:55 +01:00
Philipp Hagemeister
7c0578dc86 [collegehumor] Use character strings by default 2014-01-05 03:07:15 +01:00
Philipp Hagemeister
55033ffb0a [collegehumor] Add support for age_limit 2014-01-05 03:03:15 +01:00
Philipp Hagemeister
b4a9bf701a [collegehumor] Support multiple formats (Fixes #2092)
Unfortunately, we lose a part of the description in the new JSON format, but that's still better than a non-functioning URL.
2014-01-05 02:50:10 +01:00
Philipp Hagemeister
a015dce0e2 Merge remote-tracking branch 'jaimeMF/merge-formats' 2014-01-05 02:06:48 +01:00
Philipp Hagemeister
28ab2e48ae fix typo 2014-01-05 02:04:21 +01:00
Philipp Hagemeister
6febd1c1df Prepare widespread unicode literal use 2014-01-05 01:52:03 +01:00
Jaime Marquínez Ferrándiz
6350728be2 Allow merging formats (closes #1612)
Multiple formats can be requested using `-f 137+139`, each one is downloaded and then the two are merged with ffmpeg.
2014-01-04 13:13:51 +01:00
Philipp Hagemeister
a7c26e7338 [lynda] minor changes 2014-01-03 13:24:29 +01:00
Philipp Hagemeister
c880557666 Merge remote-tracking branch 'origin/master' 2014-01-03 13:10:00 +01:00
Philipp Hagemeister
85689a531f [macgamestore] Minor fixes (#2044) 2014-01-03 13:09:39 +01:00
Philipp Hagemeister
cc14dfb8ec Merge remote-tracking branch 'dstftw/macgamestore' 2014-01-03 13:06:22 +01:00
Jaime Marquínez Ferrándiz
91d7d0b333 FFmpegMetadataPP; Write temporary file to something.temp.{ext} (fixes #2079)
ffmpeg correctly recognize the formats of extensions like m4a, but it doesn’t works if it’s passed with the `—format` option.
2014-01-03 12:54:19 +01:00
Philipp Hagemeister
9887c9b2d6 [jpopsuki] Simplify 2014-01-03 12:51:37 +01:00
Philipp Hagemeister
d2fee313ec Merge remote-tracking branch 'diffycat/jpopsuki' 2014-01-03 12:20:18 +01:00
Philipp Hagemeister
fa7f58e433 release 2014.01.03 2014-01-03 12:12:17 +01:00
Jaime Marquínez Ferrándiz
71cd2a571e [dreisat] Make ‘index.php’ optional in the url (fixes #2080) 2014-01-03 12:02:08 +01:00
Philipp Hagemeister
7c094bfe2f Reveal a little bit more detail about what we cache (#858) 2014-01-03 10:57:31 +01:00
Philipp Hagemeister
0f30658329 Clarify --cache-dir (#858) 2014-01-02 23:27:47 +01:00
Jaime Marquínez Ferrándiz
31c1cf5a9d [soundcloud] recognize more players’ urls (fixes #2078) 2014-01-02 16:18:51 +01:00
Jaime Marquínez Ferrándiz
efa1739b74 [comedycentral] Recognize ‘video-collections’ urls (#2072) 2014-01-01 21:11:35 +01:00
Jaime Marquínez Ferrándiz
5ffecde73f [mixcloud] Fix track url transformation (fixes #2068)
‘/previews/‘ must be replaced with ‘/c/originals/‘ now.
2014-01-01 21:07:55 +01:00
Philipp Hagemeister
08d13955dd [wistia] Prefer original video format above all others
We could also set up a formula which would weigh filesize/bitrate and vcodec/acodec (say, 1GB h264 < 3 GB MPEG2 < 2 GB h264), but that would get really messy real soon.
2014-01-01 20:23:49 +01:00
rzhxeo
531147dd5e [BlipTVIE] Extract all formats 2014-01-01 19:45:45 +01:00
Philipp Hagemeister
a17c95f5e4 [README] Bug reporting: Add an item for unrelated questions 2014-01-01 19:18:20 +01:00
Philipp Hagemeister
eadaf08c16 Merge remote-tracking branch 'origin/master' 2014-01-01 15:30:46 +01:00
Anton Larionov
4a9c9b6fdb [jpopsuki] Add script encoding definition for python2 2014-01-01 18:27:02 +04:00
Anton Larionov
b969ab48d9 Add support for jpopsuki.tv 2014-01-01 17:59:54 +04:00
Jaime Marquínez Ferrándiz
8fa8a6299b [youtube] Add itag 264 (closes #2063)
It has a better bitrate than 137 but the same resolution
2014-01-01 13:45:33 +01:00
Jaime Marquínez Ferrándiz
b2b0870b3a [dreisat] Update test filename and checksum 2014-01-01 13:30:58 +01:00
Jaime Marquínez Ferrándiz
4fb757d1e0 Merge pull request #2041 from dstftw/imdb-list
[imdb] Add support for IMDb list (#2033)
2014-01-01 12:45:09 +01:00
Jaime Marquínez Ferrándiz
241bce7aaf Merge pull request #2061 from rzhxeo/var
Correct variable name in YoutubeDL.list_formats
2014-01-01 03:33:34 -08:00
Philipp Hagemeister
33ec2ae8d9 Merge remote-tracking branch 'origin/master' 2014-01-01 10:43:58 +01:00
Jaime Marquínez Ferrándiz
c801b2051a Add an extractor for cmt.com (closes #2049)
It just inherits from MTVIE.
Some videos also come from vevo.com
2013-12-31 17:21:44 +01:00
Jaime Marquínez Ferrándiz
7976fcac55 [http] Fix ‘err’ variable not being assigned in an except block (#2045) 2013-12-31 13:44:57 +01:00
Jaime Marquínez Ferrándiz
e9f9a10fba Fix initialization of YoutubeDL with params set to None
Set it to an empty dictionary because it’s directly accessed when setting some properties
2013-12-31 13:34:52 +01:00
rzhxeo
1cdfc31e1f Correct variable name in YoutubeDL 2013-12-30 06:50:12 +01:00
rzhxeo
19dab5e6cc [GenericIE] Outsource embedded blip.tv player video id extraction to BlipTVIE and fix minor errors in RegEx 2013-12-30 06:15:02 +01:00
rzhxeo
c0f9969b9e [BlipTVIE] Fix and simplify extraction of embedded videos 2013-12-30 06:14:10 +01:00
Philipp Hagemeister
a0ddb8a2fa Add new --print-traffic option 2013-12-29 15:28:32 +01:00
Philipp Hagemeister
c1d1facd06 [generic] Output something before making network requests 2013-12-27 08:38:42 +01:00
dst
c7f8537dd9 [lynda] Add support for lynda.com (#1966) 2013-12-26 15:48:24 +07:00
dst
1e923b0d29 [macgamestore] Add extractor (#2043) 2013-12-25 16:07:34 +07:00
dst
41cc67c542 [imdb] Add playlist test 2013-12-25 08:40:09 +07:00
dst
c645c7658d [imdb] Extractor for lists (#2033) 2013-12-25 08:34:41 +07:00
30 changed files with 709 additions and 308 deletions

2
.gitignore vendored
View File

@@ -23,6 +23,8 @@ updates_key.pem
*.vtt
*.flv
*.mp4
*.m4a
*.m4v
*.part
test/testdata
.tox

View File

@@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like.
empty string (--proxy "") for direct connection
--no-check-certificate Suppress HTTPS certificate validation.
--cache-dir DIR Location in the filesystem where youtube-dl can
store downloaded information permanently. By
store some downloaded information permanently. By
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
/youtube-dl .
/youtube-dl . At the moment, only YouTube player
files (for videos with obfuscated signatures) are
cached, but that may change.
--no-cache-dir Disable filesystem caching
--bidi-workaround Work around terminals that lack bidirectional
text support. Requires bidiv or fribidi
@@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services
### Is anyone going to need the feature?
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
### Is your question about youtube-dl?
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import print_function, unicode_literals
import pkg_resources
import sys

View File

@@ -28,7 +28,8 @@ from youtube_dl.extractor import (
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE,
IviCompilationIE
IviCompilationIE,
ImdbListIE,
)
@@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20)
def test_imdb_list(self):
dl = FakeYDL()
ie = ImdbListIE(dl)
result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'sMjedvGDd8U')
self.assertEqual(result['title'], u'Animated and Family Films')
self.assertTrue(len(result['entries']) >= 48)
if __name__ == '__main__':

View File

@@ -0,0 +1,40 @@
from __future__ import unicode_literals
import io
import os
import re
import unittest
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
class TestUnicodeLiterals(unittest.TestCase):
def test_all_files(self):
print('Skipping this test (not yet fully implemented)')
return
for dirpath, _, filenames in os.walk(rootDir):
for basename in filenames:
if not basename.endswith('.py'):
continue
fn = os.path.join(dirpath, basename)
with io.open(fn, encoding='utf-8') as inf:
code = inf.read()
if "'" not in code and '"' not in code:
continue
imps = 'from __future__ import unicode_literals'
self.assertTrue(
imps in code,
' %s missing in %s' % (imps, fn))
m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
if m is not None:
self.assertTrue(
m is None,
'u present in %s, around %s' % (
fn, code[m.start() - 10:m.end() + 10]))
if __name__ == '__main__':
unittest.main()

View File

@@ -10,6 +10,7 @@ from .utils import (
PostProcessingError,
shell_quote,
subtitles_filename,
prepend_extension,
)
@@ -84,10 +85,10 @@ class FFmpegPostProcessor(PostProcessor):
files_cmd = []
for path in input_paths:
files_cmd.extend(['-i', encodeFilename(path)])
files_cmd.extend(['-i', encodeFilename(path, True)])
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
+ opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
if self._downloader.params.get('verbose', False):
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
@@ -120,7 +121,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
if not self._exes['ffprobe'] and not self._exes['avprobe']:
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
try:
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
cmd = [
self._exes['avprobe'] or self._exes['ffprobe'],
'-show_streams',
encodeFilename(self._ffmpeg_filename_argument(path), True)]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
output = handle.communicate()[0]
if handle.wait() != 0:
@@ -496,16 +500,22 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
return True, info
filename = info['filepath']
ext = os.path.splitext(filename)[1][1:]
temp_filename = filename + u'.temp'
temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy']
for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)])
options.extend(['-f', ext])
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info
class FFmpegMergerPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
args = ['-c', 'copy']
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals
import collections
import errno
@@ -51,9 +51,11 @@ from .utils import (
write_json_file,
write_string,
YoutubeDLHandler,
prepend_extension,
)
from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader
from .PostProcessor import FFmpegMergerPP
from .version import __version__
@@ -148,6 +150,7 @@ class YoutubeDL(object):
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
debug_printtraffic:Print out sent and received HTTP traffic
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -164,6 +167,8 @@ class YoutubeDL(object):
def __init__(self, params=None):
"""Create a FileDownloader object with the given options."""
if params is None:
params = {}
self._ies = []
self._ies_instances = {}
self._pps = []
@@ -172,7 +177,7 @@ class YoutubeDL(object):
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self._err_file = sys.stderr
self.params = {} if params is None else params
self.params = params
if params.get('bidi_workaround', False):
try:
@@ -197,7 +202,7 @@ class YoutubeDL(object):
self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == 2:
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
else:
raise
@@ -206,13 +211,13 @@ class YoutubeDL(object):
and not params['restrictfilenames']):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
u'Assuming --restrict-filenames since file system encoding '
u'cannot encode all charactes. '
u'Set the LC_ALL environment variable to fix this.')
'Assuming --restrict-filenames since file system encoding '
'cannot encode all charactes. '
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
if '%(stitle)s' in self.params.get('outtmpl', ''):
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
self._setup_opener()
@@ -255,13 +260,13 @@ class YoutubeDL(object):
return message
assert hasattr(self, '_output_process')
assert type(message) == type(u'')
line_count = message.count(u'\n') + 1
self._output_process.stdin.write((message + u'\n').encode('utf-8'))
assert type(message) == type('')
line_count = message.count('\n') + 1
self._output_process.stdin.write((message + '\n').encode('utf-8'))
self._output_process.stdin.flush()
res = u''.join(self._output_channel.readline().decode('utf-8')
res = ''.join(self._output_channel.readline().decode('utf-8')
for _ in range(line_count))
return res[:-len(u'\n')]
return res[:-len('\n')]
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
@@ -273,19 +278,19 @@ class YoutubeDL(object):
self.params['logger'].debug(message)
elif not check_quiet or not self.params.get('quiet', False):
message = self._bidi_workaround(message)
terminator = [u'\n', u''][skip_eol]
terminator = ['\n', ''][skip_eol]
output = message + terminator
write_string(output, self._screen_file)
def to_stderr(self, message):
"""Print message to stderr."""
assert type(message) == type(u'')
assert type(message) == type('')
if self.params.get('logger'):
self.params['logger'].error(message)
else:
message = self._bidi_workaround(message)
output = message + u'\n'
output = message + '\n'
write_string(output, self._err_file)
def to_console_title(self, message):
@@ -296,21 +301,21 @@ class YoutubeDL(object):
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
write_string(u'\033]0;%s\007' % message, self._screen_file)
write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
# Save the title on stack
write_string(u'\033[22;0t', self._screen_file)
write_string('\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
# Restore the title from stack
write_string(u'\033[23;0t', self._screen_file)
write_string('\033[23;0t', self._screen_file)
def __enter__(self):
self.save_console_title()
@@ -336,13 +341,13 @@ class YoutubeDL(object):
if self.params.get('verbose'):
if tb is None:
if sys.exc_info()[0]: # if .trouble has been called from an except block
tb = u''
tb = ''
if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
tb += compat_str(traceback.format_exc())
else:
tb_data = traceback.format_list(traceback.extract_stack())
tb = u''.join(tb_data)
tb = ''.join(tb_data)
self.to_stderr(tb)
if not self.params.get('ignoreerrors', False):
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
@@ -358,10 +363,10 @@ class YoutubeDL(object):
If stderr is a tty file the 'WARNING:' will be colored
'''
if self._err_file.isatty() and os.name != 'nt':
_msg_header = u'\033[0;33mWARNING:\033[0m'
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = u'WARNING:'
warning_message = u'%s %s' % (_msg_header, message)
_msg_header = 'WARNING:'
warning_message = '%s %s' % (_msg_header, message)
self.to_stderr(warning_message)
def report_error(self, message, tb=None):
@@ -370,18 +375,18 @@ class YoutubeDL(object):
in red if stderr is a tty file.
'''
if self._err_file.isatty() and os.name != 'nt':
_msg_header = u'\033[0;31mERROR:\033[0m'
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = u'ERROR:'
error_message = u'%s %s' % (_msg_header, message)
_msg_header = 'ERROR:'
error_message = '%s %s' % (_msg_header, message)
self.trouble(error_message, tb)
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
self.to_screen('[download] %s has already been downloaded' % file_name)
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
self.to_screen('[download] The file has already been downloaded')
def increment_downloads(self):
"""Increment the ordinal that assigns a number to each file."""
@@ -396,61 +401,61 @@ class YoutubeDL(object):
autonumber_size = self.params.get('autonumber_size')
if autonumber_size is None:
autonumber_size = 5
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
autonumber_templ = '%0' + str(autonumber_size) + 'd'
template_dict['autonumber'] = autonumber_templ % self._num_downloads
if template_dict.get('playlist_index') is not None:
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
restricted=self.params.get('restrictfilenames'),
is_id=(k == u'id'))
is_id=(k == 'id'))
template_dict = dict((k, sanitize(k, v))
for k, v in template_dict.items()
if v is not None)
template_dict = collections.defaultdict(lambda: u'NA', template_dict)
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
tmpl = os.path.expanduser(self.params['outtmpl'])
filename = tmpl % template_dict
return filename
except ValueError as err:
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', u'video'))
video_title = info_dict.get('title', info_dict.get('id', 'video'))
if 'title' in info_dict:
# This can happen when we're just evaluating the playlist
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
if not re.search(matchtitle, title, re.IGNORECASE):
return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
return '"' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None)
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
view_count = info_dict.get('view_count', None)
if view_count is not None:
min_views = self.params.get('min_views')
if min_views is not None and view_count < min_views:
return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
max_views = self.params.get('max_views')
if max_views is not None and view_count > max_views:
return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
age_limit = self.params.get('age_limit')
if age_limit is not None:
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
return 'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
return u'%s has already been recorded in archive' % video_title
return '%s has already been recorded in archive' % video_title
return None
@staticmethod
@@ -477,8 +482,8 @@ class YoutubeDL(object):
continue
if not ie.working():
self.report_warning(u'The program functionality for this site has been marked as broken, '
u'and will probably not work.')
self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.')
try:
ie_result = ie.extract(url)
@@ -511,7 +516,7 @@ class YoutubeDL(object):
else:
raise
else:
self.report_error(u'no suitable InfoExtractor: %s' % url)
self.report_error('no suitable InfoExtractor: %s' % url)
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""
@@ -562,7 +567,7 @@ class YoutubeDL(object):
elif result_type == 'playlist':
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
self.to_screen('[download] Downloading playlist: %s' % playlist)
playlist_results = []
@@ -577,11 +582,11 @@ class YoutubeDL(object):
n_entries = len(entries)
self.to_screen(
u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
for i, entry in enumerate(entries, 1):
self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
extra = {
'playlist': playlist,
'playlist_index': i + playliststart,
@@ -593,7 +598,7 @@ class YoutubeDL(object):
reason = self._match_entry(entry)
if reason is not None:
self.to_screen(u'[download] ' + reason)
self.to_screen('[download] ' + reason)
continue
entry_result = self.process_ie_result(entry,
@@ -626,7 +631,7 @@ class YoutubeDL(object):
elif format_spec == 'worst':
return available_formats[0]
else:
extensions = [u'mp4', u'flv', u'webm', u'3gp']
extensions = ['mp4', 'flv', 'webm', '3gp']
if format_spec in extensions:
filter_f = lambda f: f['ext'] == format_spec
else:
@@ -645,7 +650,7 @@ class YoutubeDL(object):
info_dict['playlist_index'] = None
# This extractors handle format selection themselves
if info_dict['extractor'] in [u'Youku']:
if info_dict['extractor'] in ['Youku']:
if download:
self.process_info(info_dict)
return info_dict
@@ -662,10 +667,10 @@ class YoutubeDL(object):
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
if format.get('format') is None:
format['format'] = u'{id} - {res}{note}'.format(
format['format'] = '{id} - {res}{note}'.format(
id=format['format_id'],
res=self.format_resolution(format),
note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
)
# Automatically determine file extension if missing
if 'ext' not in format:
@@ -697,21 +702,35 @@ class YoutubeDL(object):
if req_format in ('-1', 'all'):
formats_to_download = formats
else:
# We can accept formats requestd in the format: 34/5/best, we pick
# We can accept formats requested in the format: 34/5/best, we pick
# the first that is available, starting from left
req_formats = req_format.split('/')
for rf in req_formats:
selected_format = self.select_format(rf, formats)
if re.match(r'.+?\+.+?', rf) is not None:
# Two formats have been requested like '137+139'
format_1, format_2 = rf.split('+')
formats_info = (self.select_format(format_1, formats),
self.select_format(format_2, formats))
if all(formats_info):
selected_format = {
'requested_formats': formats_info,
'format': rf,
'ext': formats_info[0]['ext'],
}
else:
selected_format = None
else:
selected_format = self.select_format(rf, formats)
if selected_format is not None:
formats_to_download = [selected_format]
break
if not formats_to_download:
raise ExtractorError(u'requested format not available',
raise ExtractorError('requested format not available',
expected=True)
if download:
if len(formats_to_download) > 1:
self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
for format in formats_to_download:
new_info = dict(info_dict)
new_info.update(format)
@@ -729,7 +748,7 @@ class YoutubeDL(object):
info_dict['fulltitle'] = info_dict['title']
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + u'...'
info_dict['title'] = info_dict['title'][:197] + '...'
# Keep for backwards compatibility
info_dict['stitle'] = info_dict['title']
@@ -739,7 +758,7 @@ class YoutubeDL(object):
reason = self._match_entry(info_dict)
if reason is not None:
self.to_screen(u'[download] ' + reason)
self.to_screen('[download] ' + reason)
return
max_downloads = self.params.get('max_downloads')
@@ -756,7 +775,7 @@ class YoutubeDL(object):
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
@@ -783,37 +802,37 @@ class YoutubeDL(object):
if dn != '' and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
self.report_error(u'unable to create directory ' + compat_str(err))
self.report_error('unable to create directory ' + compat_str(err))
return
if self.params.get('writedescription', False):
descfn = filename + u'.description'
descfn = filename + '.description'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
self.to_screen(u'[info] Video description is already present')
self.to_screen('[info] Video description is already present')
else:
try:
self.to_screen(u'[info] Writing video description to: ' + descfn)
self.to_screen('[info] Writing video description to: ' + descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
except (KeyError, TypeError):
self.report_warning(u'There\'s no description to write.')
self.report_warning('There\'s no description to write.')
except (OSError, IOError):
self.report_error(u'Cannot write description file ' + descfn)
self.report_error('Cannot write description file ' + descfn)
return
if self.params.get('writeannotations', False):
annofn = filename + u'.annotations.xml'
annofn = filename + '.annotations.xml'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen(u'[info] Video annotations are already present')
self.to_screen('[info] Video annotations are already present')
else:
try:
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
self.to_screen('[info] Writing video annotations to: ' + annofn)
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning(u'There are no annotations to write.')
self.report_warning('There are no annotations to write.')
except (OSError, IOError):
self.report_error(u'Cannot write annotations file: ' + annofn)
self.report_error('Cannot write annotations file: ' + annofn)
return
subtitles_are_requested = any([self.params.get('writesubtitles', False),
@@ -831,45 +850,45 @@ class YoutubeDL(object):
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
else:
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn)
self.report_error('Cannot write subtitles file ' + descfn)
return
if self.params.get('writeinfojson', False):
infofn = os.path.splitext(filename)[0] + u'.info.json'
infofn = os.path.splitext(filename)[0] + '.info.json'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
self.to_screen(u'[info] Video description metadata is already present')
self.to_screen('[info] Video description metadata is already present')
else:
self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
try:
write_json_file(info_dict, encodeFilename(infofn))
except (OSError, IOError):
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
self.report_error('Cannot write metadata to JSON file ' + infofn)
return
if self.params.get('writethumbnail', False):
if info_dict.get('thumbnail') is not None:
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
self.to_screen(u'[%s] %s: Thumbnail is already present' %
self.to_screen('[%s] %s: Thumbnail is already present' %
(info_dict['extractor'], info_dict['id']))
else:
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
self.to_screen('[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
try:
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
(info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning(u'Unable to download thumbnail "%s": %s' %
self.report_warning('Unable to download thumbnail "%s": %s' %
(info_dict['thumbnail'], compat_str(err)))
if not self.params.get('skip_download', False):
@@ -877,24 +896,41 @@ class YoutubeDL(object):
success = True
else:
try:
fd = get_suitable_downloader(info_dict)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
success = fd.download(filename, info_dict)
def dl(name, info):
fd = get_suitable_downloader(info)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
return fd.download(name, info)
if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
for f in info_dict['requested_formats']:
new_info = dict(info_dict)
new_info.update(f)
fname = self.prepare_filename(new_info)
fname = prepend_extension(fname, 'f%s' % f['format_id'])
downloaded.append(fname)
partial_success = dl(fname, new_info)
success = success and partial_success
info_dict['__postprocessors'] = [FFmpegMergerPP(self)]
info_dict['__files_to_merge'] = downloaded
else:
# Just a single file
success = dl(filename, info_dict)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error(u'unable to download video data: %s' % str(err))
self.report_error('unable to download video data: %s' % str(err))
return
except (OSError, IOError) as err:
raise UnavailableVideoError(err)
except (ContentTooShortError, ) as err:
self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
return
if success:
try:
self.post_process(filename, info_dict)
except (PostProcessingError) as err:
self.report_error(u'postprocessing: %s' % str(err))
self.report_error('postprocessing: %s' % str(err))
return
self.record_download_archive(info_dict)
@@ -911,9 +947,9 @@ class YoutubeDL(object):
#It also downloads the videos
self.extract_info(url)
except UnavailableVideoError:
self.report_error(u'unable to download video')
self.report_error('unable to download video')
except MaxDownloadsReached:
self.to_screen(u'[info] Maximum number of downloaded files reached.')
self.to_screen('[info] Maximum number of downloaded files reached.')
raise
return self._download_retcode
@@ -926,7 +962,7 @@ class YoutubeDL(object):
except DownloadError:
webpage_url = info.get('webpage_url')
if webpage_url is not None:
self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
return self.download([webpage_url])
else:
raise
@@ -937,7 +973,11 @@ class YoutubeDL(object):
info = dict(ie_info)
info['filepath'] = filename
keep_video = None
for pp in self._pps:
pps_chain = []
if ie_info.get('__postprocessors') is not None:
pps_chain.extend(ie_info['__postprocessors'])
pps_chain.extend(self._pps)
for pp in pps_chain:
try:
keep_video_wish, new_info = pp.run(info)
if keep_video_wish is not None:
@@ -950,10 +990,10 @@ class YoutubeDL(object):
self.report_error(e.msg)
if keep_video is False and not self.params.get('keepvideo', False):
try:
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
os.remove(encodeFilename(filename))
except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file')
self.report_warning('Unable to remove downloaded video file')
def _make_archive_id(self, info_dict):
# Future-proof against any change in case
@@ -964,7 +1004,7 @@ class YoutubeDL(object):
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return None # Incomplete video information
return extractor.lower() + u' ' + info_dict['id']
return extractor.lower() + ' ' + info_dict['id']
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
@@ -992,7 +1032,7 @@ class YoutubeDL(object):
vid_id = self._make_archive_id(info_dict)
assert vid_id
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + u'\n')
archive_file.write(vid_id + '\n')
@staticmethod
def format_resolution(format, default='unknown'):
@@ -1002,49 +1042,49 @@ class YoutubeDL(object):
return format['resolution']
if format.get('height') is not None:
if format.get('width') is not None:
res = u'%sx%s' % (format['width'], format['height'])
res = '%sx%s' % (format['width'], format['height'])
else:
res = u'%sp' % format['height']
res = '%sp' % format['height']
elif format.get('width') is not None:
res = u'?x%d' % format['width']
res = '?x%d' % format['width']
else:
res = default
return res
def list_formats(self, info_dict):
def format_note(fdict):
res = u''
if f.get('ext') in ['f4f', 'f4m']:
res += u'(unsupported) '
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
res += '(unsupported) '
if fdict.get('format_note') is not None:
res += fdict['format_note'] + u' '
res += fdict['format_note'] + ' '
if fdict.get('tbr') is not None:
res += u'%4dk ' % fdict['tbr']
res += '%4dk ' % fdict['tbr']
if (fdict.get('vcodec') is not None and
fdict.get('vcodec') != 'none'):
res += u'%-5s@' % fdict['vcodec']
res += '%-5s@' % fdict['vcodec']
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
res += u'video@'
res += 'video@'
if fdict.get('vbr') is not None:
res += u'%4dk' % fdict['vbr']
res += '%4dk' % fdict['vbr']
if fdict.get('acodec') is not None:
if res:
res += u', '
res += u'%-5s' % fdict['acodec']
res += ', '
res += '%-5s' % fdict['acodec']
elif fdict.get('abr') is not None:
if res:
res += u', '
res += ', '
res += 'audio'
if fdict.get('abr') is not None:
res += u'@%3dk' % fdict['abr']
res += '@%3dk' % fdict['abr']
if fdict.get('filesize') is not None:
if res:
res += u', '
res += ', '
res += format_bytes(fdict['filesize'])
return res
def line(format, idlen=20):
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
format['format_id'],
format['ext'],
self.format_resolution(format),
@@ -1052,7 +1092,7 @@ class YoutubeDL(object):
))
formats = info_dict.get('formats', [info_dict])
idlen = max(len(u'format code'),
idlen = max(len('format code'),
max(len(f['format_id']) for f in formats))
formats_s = [line(f, idlen) for f in formats]
if len(formats) > 1:
@@ -1060,10 +1100,10 @@ class YoutubeDL(object):
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
header_line = line({
'format_id': u'format code', 'ext': u'extension',
'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s)))
'format_id': 'format code', 'ext': 'extension',
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
self.to_screen('[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, '\n'.join(formats_s)))
def urlopen(self, req):
""" Start an HTTP download """
@@ -1072,7 +1112,7 @@ class YoutubeDL(object):
def print_debug_header(self):
if not self.params.get('verbose'):
return
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
write_string('[debug] youtube-dl version ' + __version__ + '\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
@@ -1081,20 +1121,20 @@ class YoutubeDL(object):
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
write_string('[debug] Git HEAD: ' + out + '\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
write_string('[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + '\n')
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')
@@ -1124,10 +1164,13 @@ class YoutubeDL(object):
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(
self.params.get('nocheckcertificate', False))
self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
ydlh = YoutubeDLHandler(debuglevel=debuglevel)
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
https_handler, proxy_handler, cookie_processor, ydlh)
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)

View File

@@ -186,7 +186,7 @@ def parseOpts(overrideArguments=None):
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
help='Disable filesystem caching')
@@ -334,7 +334,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
verbosity.add_option('--print-traffic',
dest='debug_printtraffic', action='store_true', default=False,
help=optparse.SUPPRESS_HELP)
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
@@ -696,6 +698,7 @@ def _real_main(argv=None):
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround,
'debug_printtraffic': opts.debug_printtraffic,
}
with YoutubeDL(ydl_opts) as ydl:

View File

@@ -133,7 +133,7 @@ class HttpFD(FileDownloader):
return False
try:
stream.write(data_block)
except (IOError, OSError):
except (IOError, OSError) as err:
self.to_stderr(u"\n")
self.report_error(u'unable to write data: %s' % str(err))
return False

View File

@@ -28,6 +28,7 @@ from .channel9 import Channel9IE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
from .cmt import CMTIE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
@@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .imdb import (
ImdbIE,
ImdbListIE
)
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
@@ -91,12 +95,18 @@ from .ivi import (
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE
from .kankan import KankanIE
from .keezmovies import KeezMoviesIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE
from .lynda import (
LyndaIE,
LyndaCourseIE
)
from .macgamestore import MacGameStoreIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import datetime
import json
import re
@@ -20,42 +22,36 @@ from ..utils import (
class BlipTVIE(InfoExtractor):
"""Information extractor for blip.tv"""
_VALID_URL = r'^(?:https?://)?(?:www\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
_URL_EXT = r'^.*\.([a-z0-9]+)$'
IE_NAME = u'blip.tv'
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
_TEST = {
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
u'file': u'5779306.m4v',
u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
u'info_dict': {
u"upload_date": u"20111205",
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
u"uploader": u"Comic Book Resources - CBR TV",
u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
'file': '5779306.mov',
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
'info_dict': {
'upload_date': '20111205',
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
'uploader': 'Comic Book Resources - CBR TV',
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
}
}
def report_direct_download(self, title):
"""Report information extraction."""
self.to_screen(u'%s: Direct download detected' % title)
self.to_screen('%s: Direct download detected' % title)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
raise ExtractorError('Invalid URL: %s' % url)
# See https://github.com/rg3/youtube-dl/issues/857
api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
if api_mobj is not None:
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
urlp = compat_urllib_parse_urlparse(url)
if urlp.path.startswith('/play/'):
response = self._request_webpage(url, None, False)
redirecturl = response.geturl()
rurlp = compat_urllib_parse_urlparse(redirecturl)
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
url = 'http://blip.tv/a/a-' + file_id
return self._real_extract(url)
embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
if embed_mobj:
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
info_page = self._download_webpage(info_url, embed_mobj.group(1))
video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id')
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
if '?' in url:
cchar = '&'
@@ -66,13 +62,13 @@ class BlipTVIE(InfoExtractor):
request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1))
urlh = self._request_webpage(request, None, False,
u'unable to download video info webpage')
'unable to download video info webpage')
try:
json_code_bytes = urlh.read()
json_code = json_code_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))
try:
json_data = json.loads(json_code)
@@ -82,32 +78,38 @@ class BlipTVIE(InfoExtractor):
data = json_data
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
formats = []
if 'additionalMedia' in data:
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
best_format = formats[-1]
video_url = best_format['url']
for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])):
if not int(f['media_width']): # filter m3u8
continue
formats.append({
'url': f['url'],
'format_id': f['role'],
'width': int(f['media_width']),
'height': int(f['media_height']),
})
else:
video_url = data['media']['url']
umobj = re.match(self._URL_EXT, video_url)
if umobj is None:
raise ValueError('Can not determine filename extension')
ext = umobj.group(1)
formats.append({
'url': data['media']['url'],
'width': int(data['media']['width']),
'height': int(data['media']['height']),
})
self._sort_formats(formats)
return {
'id': compat_str(data['item_id']),
'url': video_url,
'uploader': data['display_name'],
'upload_date': upload_date,
'title': data['title'],
'ext': ext,
'format': data['media']['mimeType'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'player_url': data['embedUrl'],
'user_agent': 'iTunes/10.6.1',
'formats': formats,
}
except (ValueError, KeyError) as err:
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
raise ExtractorError('Unable to parse video information: %s' % repr(err))
class BlipTVUserIE(InfoExtractor):
@@ -115,19 +117,19 @@ class BlipTVUserIE(InfoExtractor):
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
_PAGE_SIZE = 12
IE_NAME = u'blip.tv:user'
IE_NAME = 'blip.tv:user'
def _real_extract(self, url):
# Extract username
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
raise ExtractorError('Invalid URL: %s' % url)
username = mobj.group(1)
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
page = self._download_webpage(url, username, u'Downloading user page')
page = self._download_webpage(url, username, 'Downloading user page')
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
@@ -143,7 +145,7 @@ class BlipTVUserIE(InfoExtractor):
while True:
url = page_base + "&page=" + str(pagenum)
page = self._download_webpage(url, username,
u'Downloading video ids from page %d' % pagenum)
'Downloading video ids from page %d' % pagenum)
# Extract video identifiers
ids_in_page = []
@@ -165,6 +167,6 @@ class BlipTVUserIE(InfoExtractor):
pagenum += 1
urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
return [self.playlist_result(url_entries, playlist_title = username)]

View File

@@ -0,0 +1,19 @@
from .mtv import MTVIE
class CMTIE(MTVIE):
IE_NAME = u'cmt.com'
_VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
_TESTS = [
{
u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
u'info_dict': {
u'id': u'989124',
u'ext': u'mp4',
u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
u'description': u'Blame It All On My Roots',
},
},
]

View File

@@ -1,82 +1,68 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
determine_ext,
ExtractorError,
)
class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
_TESTS = [{
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
u'file': u'6902724.mp4',
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
u'info_dict': {
u'title': u'Comic-Con Cosplay Catastrophe',
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
'file': '6902724.mp4',
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
'info_dict': {
'title': 'Comic-Con Cosplay Catastrophe',
'description': 'Fans get creative this year at San Diego. Too',
'age_limit': 13,
},
},
{
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
u'file': u'3505939.mp4',
u'md5': u'c51ca16b82bb456a4397987791a835f5',
u'info_dict': {
u'title': u'Font Conference',
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
'file': '3505939.mp4',
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
'info_dict': {
'title': 'Font Conference',
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
'age_limit': 10,
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('videoid')
info = {
'id': video_id,
'uploader': None,
'upload_date': None,
}
jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
data = json.loads(self._download_webpage(
jsonUrl, video_id, 'Downloading info JSON'))
vdata = data['video']
self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
mdoc = self._download_xml(xmlUrl, video_id,
u'Downloading info XML',
u'Unable to download video info XML')
try:
videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID')
if youtubeIdNode is not None:
return self.url_result(youtubeIdNode.text, 'Youtube')
info['description'] = videoNode.findall('./description')[0].text
info['title'] = videoNode.findall('./caption')[0].text
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
next_url = videoNode.findall('./file')[0].text
except IndexError:
raise ExtractorError(u'Invalid metadata XML file')
if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
adoc = self._download_xml(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
try:
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError:
raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
info['ext'] = 'mp4'
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
rating = vdata.get('rating')
if rating:
age_limit = AGE_LIMITS.get(rating.lower())
else:
# Old-style direct links
info['url'] = next_url
info['ext'] = determine_ext(info['url'])
age_limit = None # None = No idea
return info
PREFS = {'high_quality': 2, 'low_quality': 0}
formats = []
for format_key in ('mp4', 'webm'):
for qname, qurl in vdata[format_key].items():
formats.append({
'format_id': format_key + '_' + qname,
'url': qurl,
'format': format_key,
'preference': PREFS.get(qname),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': vdata['title'],
'description': vdata.get('description'),
'thumbnail': vdata.get('thumbnail'),
'formats': formats,
'age_limit': age_limit,
}

View File

@@ -12,7 +12,9 @@ from ..utils import (
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
(video-clips|episodes|cc-studios|video-collections)
/(?P<title>.*)'''
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = {

View File

@@ -69,7 +69,8 @@ class InfoExtractor(object):
download, lower-case.
"http", "https", "rtsp", "rtmp" or so.
* preference Order number of this format. If this field is
present, the formats get sorted by this field.
present and not None, the formats get sorted
by this field.
-1 for default (order by other properties),
-2 or smaller for less than default.
url: Final video URL.
@@ -377,7 +378,7 @@ class InfoExtractor(object):
@staticmethod
def _og_regexes(prop):
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
template % (property_re, content_re),

View File

@@ -1,20 +1,25 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
unescapeHTML,
)
class CSpanIE(InfoExtractor):
_VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
IE_DESC = 'C-SPAN'
_TEST = {
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
u'file': u'315139.flv',
u'md5': u'74a623266956f69e4df0068ab6c80fe4',
u'info_dict': {
u"title": u"Attorney General Eric Holder on Voting Rights Act Decision"
'url': 'http://www.c-spanvideo.org/program/HolderonV',
'file': '315139.mp4',
'md5': '8e44ce11f0f725527daccc453f553eb0',
'info_dict': {
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
},
u'skip': u'Requires rtmpdump'
}
def _real_extract(self, url):
@@ -22,30 +27,22 @@ class CSpanIE(InfoExtractor):
prog_name = mobj.group(1)
webpage = self._download_webpage(url, prog_name)
video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
data = compat_urllib_parse.urlencode({'programid': video_id,
'dynamic':'1'})
info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
self.report_extraction(video_id)
title = self._html_search_regex(
r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
description = self._og_search_description(webpage)
title = self._html_search_regex(r'<string name="title">(.*?)</string>',
video_info, 'title')
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
webpage, 'description',
flags=re.MULTILINE|re.DOTALL)
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
data_json = self._download_webpage(
info_url, video_id, 'Downloading video info')
data = json.loads(data_json)
url = self._search_regex(r'<string name="URL">(.*?)</string>',
video_info, 'video url')
url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
path = self._search_regex(r'<string name="path">(.*?)</string>',
video_info, 'rtmp play path')
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
return {'id': video_id,
'title': title,
'ext': 'flv',
'url': url,
'play_path': path,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
}
return {
'id': video_id,
'title': title,
'url': url,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@@ -10,11 +10,11 @@ from ..utils import (
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = {
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
u'file': u'36983.webm',
u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
u'file': u'36983.mp4',
u'md5': u'9dcfe344732808dbfcc901537973c922',
u'info_dict': {
u"title": u"Kaffeeland Schweiz",
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",

View File

@@ -162,6 +162,8 @@ class GenericIE(InfoExtractor):
return self.url_result('http://' + url)
video_id = os.path.splitext(url.split('/')[-1])[0]
self.to_screen(u'%s: Requesting header' % video_id)
try:
response = self._send_head(url)
@@ -271,16 +273,12 @@ class GenericIE(InfoExtractor):
}
# Look for embedded blip.tv player
mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
if mobj:
player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
player_page = self._download_webpage(player_url, mobj.group(1))
blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
if blip_video_id:
return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
return self.url_result(mobj.group(1), 'BlipTV')
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)

View File

@@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
'description': descr,
'thumbnail': format_info['slate'],
}
class ImdbListIE(InfoExtractor):
IE_NAME = u'imdb:list'
IE_DESC = u'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
list_id = mobj.group('id')
# RSS XML is sometimes malformed
rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
# Export is independent of actual author_id, but returns 404 if no author_id is provided.
# However, passing dummy author_id seems to be enough.
csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
list_id, u'Downloading list CSV')
entries = []
for item in csv.split('\n')[1:]:
cols = item.split(',')
if len(cols) < 2:
continue
item_id = cols[1][1:-1]
if item_id.startswith('vi'):
entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
return self.playlist_result(entries, list_id, list_title)

View File

@@ -0,0 +1,73 @@
# coding=utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
)
class JpopsukiIE(InfoExtractor):
IE_NAME = 'jpopsuki.tv'
_VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/video/(.*?)/(?P<id>\S+)'
_TEST = {
'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
'md5': '88018c0c1a9b1387940e90ec9e7e198e',
'file': '00be659d23b0b40508169cdee4545771.mp4',
'info_dict': {
'id': '00be659d23b0b40508169cdee4545771',
'title': 'ayumi hamasaki - evolution',
'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
'uploader': 'plama_chan',
'uploader_id': '404',
'upload_date': '20121101'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
r'<source src="(.*?)" type', webpage, 'video url')
video_title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<li>from: <a href="/user/view/user/(.*?)/uid/',
webpage, 'video uploader', fatal=False)
uploader_id = self._html_search_regex(
r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
webpage, 'video uploader_id', fatal=False)
upload_date = self._html_search_regex(
r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
fatal=False)
if upload_date is not None:
upload_date = unified_strdate(upload_date)
view_count_str = self._html_search_regex(
r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
fatal=False)
comment_count_str = self._html_search_regex(
r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
fatal=False)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'upload_date': upload_date,
'view_count': int_or_none(view_count_str),
'comment_count': int_or_none(comment_count_str),
}

View File

@@ -0,0 +1,102 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import ExtractorError
class LyndaIE(InfoExtractor):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
_TEST = {
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'file': '114408.mp4',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
u"info_dict": {
'title': 'Using the exercise files',
'duration': 68
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
video_id, 'Downloading video JSON')
video_json = json.loads(page)
if 'Status' in video_json and video_json['Status'] == 'NotFound':
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
if video_json['HasAccess'] is False:
raise ExtractorError('Video %s is only available for members' % video_id, expected=True)
video_id = video_json['ID']
duration = video_json['DurationInSeconds']
title = video_json['Title']
formats = [{'url': fmt['Url'],
'ext': fmt['Extension'],
'width': fmt['Width'],
'height': fmt['Height'],
'filesize': fmt['FileSize'],
'format_id': fmt['Resolution']
} for fmt in video_json['Formats']]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'duration': duration,
'formats': formats
}
class LyndaCourseIE(InfoExtractor):
IE_NAME = 'lynda:course'
IE_DESC = 'lynda.com online courses'
# Course link equals to welcome/introduction video link of same course
# We will recognize it as course link
_VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
course_id, 'Downloading course JSON')
course_json = json.loads(page)
if 'Status' in course_json and course_json['Status'] == 'NotFound':
raise ExtractorError('Course %s does not exist' % course_id, expected=True)
unaccessible_videos = 0
videos = []
for chapter in course_json['Chapters']:
for video in chapter['Videos']:
if video['HasAccess'] is not True:
unaccessible_videos += 1
continue
videos.append(video['ID'])
if unaccessible_videos > 0:
self._downloader.report_warning('%s videos are only available for members and will not be downloaded' % unaccessible_videos)
entries = [
self.url_result('http://www.lynda.com/%s/%s-4.html' %
(course_path, video_id),
'Lynda')
for video_id in videos]
course_title = course_json['Title']
return self.playlist_result(entries, course_id, course_title)

View File

@@ -0,0 +1,43 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class MacGameStoreIE(InfoExtractor):
IE_NAME = 'macgamestore'
IE_DESC = 'MacGameStore trailers'
_VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
_TEST = {
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
'file': '2450.m4v',
'md5': '8649b8ea684b6666b4c5be736ecddc61',
'info_dict': {
'title': 'Crow',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id, 'Downloading trailer page')
if re.search(r'>Missing Media<', webpage) is not None:
raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
video_title = self._html_search_regex(
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
video_url = self._html_search_regex(
r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>',
webpage, 'video URL')
return {
'id': video_id,
'url': video_url,
'title': video_title
}

View File

@@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor):
info = json.loads(json_data)
preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
song_url = preview_url.replace('/previews/', '/c/originals/')
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
final_song_url = self._get_url(template_url)
if final_song_url is None:

View File

@@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
uri = mobj.group('mgid')
uri = mobj.groupdict().get('mgid')
if uri is None:
webpage = self._download_webpage(url, video_id)

View File

@@ -5,7 +5,7 @@ from ..utils import compat_urllib_parse
class PornHdIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
_TEST = {
u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
u'file': u'1962.flv',

View File

@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
(?!sets/)(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
)
'''
IE_NAME = u'soundcloud'
@@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor):
if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
elif mobj.group('widget'):
elif mobj.group('player'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
return self.url_result(query['url'][0], ie='Soundcloud')
else:

View File

@@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor):
'height': a['height'],
'filesize': a['size'],
'ext': a['ext'],
'preference': 1 if atype == 'original' else None,
})
self._sort_formats(formats)

View File

@@ -194,6 +194,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
'138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
'264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
# Dash mp4 audio
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},

View File

@@ -500,12 +500,13 @@ def unescapeHTML(s):
result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
return result
def encodeFilename(s):
def encodeFilename(s, for_subprocess=False):
"""
@param s The name of the file
"""
assert type(s) == type(u'')
assert type(s) == compat_str
# Python 3 has a Unicode API
if sys.version_info >= (3, 0):
@@ -515,12 +516,18 @@ def encodeFilename(s):
# Pass u'' directly to use Unicode APIs on Windows 2000 and up
# (Detecting Windows NT 4 is tricky because 'major >= 4' would
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
return s
if not for_subprocess:
return s
else:
# For subprocess calls, encode with locale encoding
# Refer to http://stackoverflow.com/a/9951851/35070
encoding = preferredencoding()
else:
encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
return s.encode(encoding, 'ignore')
if encoding is None:
encoding = 'utf-8'
return s.encode(encoding, 'ignore')
def decodeOption(optval):
if optval is None:
@@ -539,7 +546,8 @@ def formatSeconds(secs):
else:
return '%d' % secs
def make_HTTPS_handler(opts_no_check_certificate):
def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
if sys.version_info < (3, 2):
import httplib
@@ -560,7 +568,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
def https_open(self, req):
return self.do_open(HTTPSConnectionV3, req)
return HTTPSHandlerV3()
return HTTPSHandlerV3(**kwargs)
else:
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
context.verify_mode = (ssl.CERT_NONE
@@ -571,7 +579,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
context.load_default_certs()
except AttributeError:
pass # Python < 3.4
return compat_urllib_request.HTTPSHandler(context=context)
return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
class ExtractorError(Exception):
"""Error during info extraction."""
@@ -849,12 +857,22 @@ def platform_name():
def write_string(s, out=None):
if out is None:
out = sys.stderr
assert type(s) == type(u'')
assert type(s) == compat_str
if ('b' in getattr(out, 'mode', '') or
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
s = s.encode(preferredencoding(), 'ignore')
out.write(s)
try:
out.write(s)
except UnicodeEncodeError:
# In Windows shells, this can fail even when the codec is just charmap!?
# See https://wiki.python.org/moin/PrintFails#Issue
if sys.platform == 'win32' and hasattr(out, 'encoding'):
s = s.encode(out.encoding, 'ignore').decode(out.encoding)
out.write(s)
else:
raise
out.flush()
@@ -1070,7 +1088,7 @@ def fix_xml_all_ampersand(xml_str):
def setproctitle(title):
assert isinstance(title, type(u''))
assert isinstance(title, compat_str)
try:
libc = ctypes.cdll.LoadLibrary("libc.so.6")
except OSError:
@@ -1118,3 +1136,8 @@ def parse_duration(s):
if m.group('hours'):
res += int(m.group('hours')) * 60 * 60
return res
def prepend_extension(filename, ext):
name, real_ext = os.path.splitext(filename)
return u'{0}.{1}{2}'.format(name, ext, real_ext)

View File

@@ -1,2 +1,2 @@
__version__ = '2013.12.26'
__version__ = '2014.01.05'