Compare commits

..

26 Commits

Author SHA1 Message Date
Philipp Hagemeister
e02066e7ff Windows build for 2012.11.28 2012-11-27 16:15:15 +01:00
Philipp Hagemeister
c9128b353d Bump version number to a numeric-only one to appease py2exe 2012-11-27 16:12:08 +01:00
Philipp Hagemeister
e7c6f1a2dc Bump version number 2012-11-27 16:08:39 +01:00
Philipp Hagemeister
1a911e60a4 Add test for asian characters (#551) 2012-11-27 16:07:52 +01:00
Philipp Hagemeister
46cbda0be4 Minor filename encoding improvement in a common case 2012-11-27 15:07:10 +01:00
Philipp Hagemeister
fa59f4b6a9 Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 14:55:18 +01:00
Christopher Neugebauer
4a702f3819 Fixes the InfoExtractor for the Colbert Report. 2012-11-27 23:54:43 +11:00
Philipp Hagemeister
6bac102a4d Fix spacing in comedycentral IE 2012-11-27 13:24:10 +01:00
Philipp Hagemeister
958a22b7cf Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 13:19:18 +01:00
Philipp Hagemeister
97cd3afc75 warn if %(stitle)s is being used 2012-11-27 13:11:06 +01:00
Philipp Hagemeister
aa2a94ed81 Encode the entire filename 2012-11-27 13:01:32 +01:00
Philipp Hagemeister
c7032546f1 Clean up test 2012-11-27 12:46:27 +01:00
Philipp Hagemeister
56781d3d2e Switch back to underline for invalid characters, and make restricted ASCII-only 2012-11-27 12:46:09 +01:00
Christopher Neugebauer
feb22fe5fe Fixed indentation error 2012-11-27 22:32:24 +11:00
Christopher Neugebauer
d8dddb7c02 Removes extranous debugging info :) 2012-11-27 22:30:07 +11:00
Christopher Neugebauer
4408d996fb Adds format listing/selection support to the Comedy Central extractor. 2012-11-27 22:28:16 +11:00
Philipp Hagemeister
ed7516c69d Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 12:25:51 +01:00
Christopher Neugebauer
89af8e9d32 Removes extraneous debug message. 2012-11-27 21:51:30 +11:00
Christopher Neugebauer
36a9c0b5ff Points the ComedyCentral extractor at a CDN which works with more RTMPDump versions. 2012-11-27 21:49:27 +11:00
Philipp Hagemeister
9fb3bfb45a Merge remote-tracking branch 'gcmalloc/master' 2012-11-27 00:42:47 +01:00
gcmalloc
a8ac2f8664 adding second vimeo url 2012-10-24 15:57:19 +02:00
gcmalloc
fb0e99b884 skipping vimeo for the moment 2012-10-24 00:32:23 +02:00
gcmalloc
9c6e9a4532 adding xnxx test 2012-10-24 00:13:16 +02:00
gcmalloc
67af74992e adding collegehumor test 2012-10-24 00:05:45 +02:00
gcmalloc
103c508ffa adding stanford open class courses 2012-10-23 23:59:12 +02:00
gcmalloc
2876773381 adding test for vimeo, xvideo and soundcloud 2012-10-23 23:53:33 +02:00
11 changed files with 217 additions and 35 deletions

View File

@@ -1 +1 @@
2012.11.27
2012.11.28

View File

@@ -47,8 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
%(extractor)s for the provider (youtube, metacafe,
etc), %(id)s for the video id and %% for a literal
percent. Use - to output to stdout.
--restrict-filenames Avoid some characters such as "&" and spaces in
filenames
--restrict-filenames Restrict filenames to only ASCII characters, and
avoid "&" and spaces in filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files

View File

@@ -7,6 +7,9 @@ import json
from youtube_dl.FileDownloader import FileDownloader
from youtube_dl.InfoExtractors import YoutubeIE, DailymotionIE
from youtube_dl.InfoExtractors import MetacafeIE, BlipTVIE
from youtube_dl.InfoExtractors import XVideosIE, VimeoIE
from youtube_dl.InfoExtractors import SoundcloudIE, StanfordOpenClassroomIE
from youtube_dl.InfoExtractors import CollegeHumorIE, XNXXIE
class DownloadTest(unittest.TestCase):
@@ -30,10 +33,33 @@ class DownloadTest(unittest.TestCase):
BLIP_URL = "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352"
BLIP_FILE = "5779306.m4v"
XVIDEO_MD5 = ""
XVIDEO_URL = ""
XVIDEO_FILE = ""
XVIDEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf"
XVIDEO_URL = "http://www.xvideos.com/video939581/funny_porns_by_s_-1"
XVIDEO_FILE = "939581.flv"
VIMEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf"
VIMEO_URL = "http://vimeo.com/14160053"
VIMEO_FILE = ""
VIMEO2_MD5 = ""
VIMEO2_URL = "http://player.vimeo.com/video/47019590"
VIMEO2_FILE = ""
SOUNDCLOUD_MD5 = "ce3775768ebb6432fa8495d446a078ed"
SOUNDCLOUD_URL = "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy"
SOUNDCLOUD_FILE = "n6FLbx6ZzMiu.mp3"
STANDFORD_MD5 = "22c8206291368c4e2c9c1a307f0ea0f4"
STANDFORD_URL = "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100"
STANDFORD_FILE = "PracticalUnix_intro-environment.mp4"
COLLEGEHUMOR_MD5 = ""
COLLEGEHUMOR_URL = "http://www.collegehumor.com/video/6830834/mitt-romney-style-gangnam-style-parody"
COLLEGEHUMOR_FILE = ""
XNXX_MD5 = "5f0469c8d1dfd1bc38c8e6deb5e0a21d"
XNXX_URL = "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_"
XNXX_FILE = "1135332.flv"
def test_youtube(self):
#let's download a file from youtube
@@ -72,6 +98,73 @@ class DownloadTest(unittest.TestCase):
md5_down_file = md5_for_file(DownloadTest.BLIP_FILE)
self.assertEqual(md5_down_file, DownloadTest.BLIP_MD5)
def test_xvideo(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(XVideosIE())
fd.download([DownloadTest.XVIDEO_URL])
self.assertTrue(os.path.exists(DownloadTest.XVIDEO_FILE))
md5_down_file = md5_for_file(DownloadTest.XVIDEO_FILE)
self.assertEqual(md5_down_file, DownloadTest.XVIDEO_MD5)
def test_vimeo(self):
#skipped for the moment produce an error
return
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(VimeoIE())
fd.download([DownloadTest.VIMEO_URL])
self.assertTrue(os.path.exists(DownloadTest.VIMEO_FILE))
md5_down_file = md5_for_file(DownloadTest.VIMEO_FILE)
self.assertEqual(md5_down_file, DownloadTest.VIMEO_MD5)
def test_vimeo2(self):
#skipped for the moment produce an error
return
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(VimeoIE())
fd.download([DownloadTest.VIMEO2_URL])
self.assertTrue(os.path.exists(DownloadTest.VIMEO2_FILE))
md5_down_file = md5_for_file(DownloadTest.VIMEO2_FILE)
self.assertEqual(md5_down_file, DownloadTest.VIMEO2_MD5)
def test_soundcloud(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(SoundcloudIE())
fd.download([DownloadTest.SOUNDCLOUD_URL])
self.assertTrue(os.path.exists(DownloadTest.SOUNDCLOUD_FILE))
md5_down_file = md5_for_file(DownloadTest.SOUNDCLOUD_FILE)
self.assertEqual(md5_down_file, DownloadTest.SOUNDCLOUD_MD5)
def test_standford(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(StanfordOpenClassroomIE())
fd.download([DownloadTest.STANDFORD_URL])
self.assertTrue(os.path.exists(DownloadTest.STANDFORD_FILE))
md5_down_file = md5_for_file(DownloadTest.STANDFORD_FILE)
self.assertEqual(md5_down_file, DownloadTest.STANDFORD_MD5)
def test_collegehumor(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(CollegeHumorIE())
fd.download([DownloadTest.COLLEGEHUMOR_URL])
self.assertTrue(os.path.exists(DownloadTest.COLLEGEHUMOR_FILE))
md5_down_file = md5_for_file(DownloadTest.COLLEGEHUMOR_FILE)
self.assertEqual(md5_down_file, DownloadTest.COLLEGEHUMOR_MD5)
def test_xnxx(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(XNXXIE())
fd.download([DownloadTest.XNXX_URL])
self.assertTrue(os.path.exists(DownloadTest.XNXX_FILE))
md5_down_file = md5_for_file(DownloadTest.XNXX_FILE)
self.assertEqual(md5_down_file, DownloadTest.XNXX_MD5)
def tearDown(self):
if os.path.exists(DownloadTest.YOUTUBE_FILE):
os.remove(DownloadTest.YOUTUBE_FILE)
@@ -81,13 +174,25 @@ class DownloadTest(unittest.TestCase):
os.remove(DownloadTest.METACAFE_FILE)
if os.path.exists(DownloadTest.BLIP_FILE):
os.remove(DownloadTest.BLIP_FILE)
if os.path.exists(DownloadTest.XVIDEO_FILE):
os.remove(DownloadTest.XVIDEO_FILE)
if os.path.exists(DownloadTest.VIMEO_FILE):
os.remove(DownloadTest.VIMEO_FILE)
if os.path.exists(DownloadTest.SOUNDCLOUD_FILE):
os.remove(DownloadTest.SOUNDCLOUD_FILE)
if os.path.exists(DownloadTest.STANDFORD_FILE):
os.remove(DownloadTest.STANDFORD_FILE)
if os.path.exists(DownloadTest.COLLEGEHUMOR_FILE):
os.remove(DownloadTest.COLLEGEHUMOR_FILE)
if os.path.exists(DownloadTest.XNXX_FILE):
os.remove(DownloadTest.XNXX_FILE)
def md5_for_file(filename, block_size=2**20):
with open(filename) as f:
md5 = hashlib.md5()
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
return md5.hexdigest()
with open(filename) as f:
md5 = hashlib.md5()
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
return md5.hexdigest()

View File

@@ -22,10 +22,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename(u'123'), u'123')
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de'))
self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de'))
self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de'))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
@@ -45,20 +45,29 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c')
self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
forbidden = u'"\0\\/&: \'\t\n'
for fc in forbidden:
print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
# Handle a common case more neatly
self.assertEqual(sanitize_filename(u'大声带 - Song', restricted=True), u'Song')
self.assertEqual(sanitize_filename(u'总统: Speech', restricted=True), u'Speech')
# .. but make sure the file name is never empty
self.assertTrue(sanitize_filename(u'-', restricted=True) != u'')
self.assertTrue(sanitize_filename(u':', restricted=True) != u'')
def test_ordered_set(self):
self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])
self.assertEqual(orderedSet([]), [])

Binary file not shown.

View File

@@ -59,8 +59,8 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files

BIN
youtube-dl.exe Executable file → Normal file

Binary file not shown.

View File

@@ -94,6 +94,9 @@ class FileDownloader(object):
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self.params = params
if '%(stitle)s' in self.params['outtmpl']:
self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
@staticmethod
def format_bytes(bytes):
if bytes is None:
@@ -322,9 +325,8 @@ class FileDownloader(object):
"""Generate the output filename."""
try:
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
template_dict['epoch'] = unicode(int(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
template_dict['title'] = template_dict['stitle'] # Keep both for backwards compatibility
filename = self.params['outtmpl'] % template_dict
return filename
except (ValueError, KeyError), err:
@@ -350,7 +352,8 @@ class FileDownloader(object):
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
# Keep for backwards compatibility
info_dict['stitle'] = info_dict['title']
reason = self._match_entry(info_dict)
if reason is not None:
@@ -363,6 +366,7 @@ class FileDownloader(object):
raise MaxDownloadsReached()
filename = self.prepare_filename(info_dict)
filename = sanitize_filename(filename, self.params.get('restrictfilenames'))
# Forced printings
if self.params.get('forcetitle', False):

View File

@@ -2253,6 +2253,25 @@ class ComedyCentralIE(InfoExtractor):
_VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
IE_NAME = u'comedycentral'
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
_video_extensions = {
'3500': 'mp4',
'2200': 'mp4',
'1700': 'mp4',
'1200': 'mp4',
'750': 'mp4',
'400': 'mp4',
}
_video_dimensions = {
'3500': '1280x720',
'2200': '960x540',
'1700': '768x432',
'1200': '640x360',
'750': '512x288',
'400': '384x216',
}
def report_extraction(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
@@ -2265,6 +2284,13 @@ class ComedyCentralIE(InfoExtractor):
def report_player_url(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@ -2305,10 +2331,19 @@ class ComedyCentralIE(InfoExtractor):
epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
if len(mMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
if len(mMovieParams) == 0:
# The Colbert Report embeds the information in a without
# a URL prefix; so extract the alternate reference
# and then add the URL prefix manually.
altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
if len(altMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
playerUrl_raw = mMovieParams[0][0]
self.report_player_url(epTitle)
try:
@@ -2357,10 +2392,31 @@ class ComedyCentralIE(InfoExtractor):
if len(turls) == 0:
self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
continue
if self._downloader.params.get('listformats', None):
self._print_formats([i[0] for i in turls])
return
# For now, just pick the highest bitrate
format,video_url = turls[-1]
# Get the format arg from the arg stream
req_format = self._downloader.params.get('format', None)
# Select format if we can find one
for f,v in turls:
if f == req_format:
format, video_url = f, v
break
# Patch to download from alternative CDN, which does not
# break on current RTMPDump builds
broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
if video_url.startswith(broken_cdn):
video_url = video_url.replace(broken_cdn, better_cdn)
effTitle = showId + u'-' + epTitle
info = {
'id': shortMediaId,
@@ -2372,7 +2428,7 @@ class ComedyCentralIE(InfoExtractor):
'format': format,
'thumbnail': None,
'description': officialTitle,
'player_url': playerUrl
'player_url': None #playerUrl
}
results.append(info)

View File

@@ -21,7 +21,7 @@ __authors__ = (
)
__license__ = 'Public Domain'
__version__ = '2012.11.27'
__version__ = '2012.11.28'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
@@ -274,7 +274,7 @@ def parseOpts():
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Avoid some characters such as "&" and spaces in filenames', default=False)
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites',
@@ -532,7 +532,7 @@ def _real_main():
parser.error(u'you must provide at least one URL')
else:
sys.exit()
try:
retcode = fd.download(all_urls)
except MaxDownloadsReached:

View File

@@ -207,15 +207,23 @@ def sanitize_filename(s, restricted=False):
elif char == ':':
return '_-' if restricted else ' -'
elif char in '\\/|*<>':
return '-'
return '_'
if restricted and (char in '&\'' or char.isspace()):
return '_'
if restricted and ord(char) > 127:
return '_'
return char
result = u''.join(map(replace_insane, s))
while '--' in result:
result = result.replace('--', '-')
return result.strip('-')
while '__' in result:
result = result.replace('__', '_')
result = result.strip('_')
# Common case of "Foreign band name - English song title"
if restricted and result.startswith('-_'):
result = result[2:]
if not result:
result = '_'
return result
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """