Compare commits

..

93 Commits

Author SHA1 Message Date
Philipp Hagemeister
e02066e7ff Windows build for 2012.11.28 2012-11-27 16:15:15 +01:00
Philipp Hagemeister
c9128b353d Bump version number to a numeric-only one to appease py2exe 2012-11-27 16:12:08 +01:00
Philipp Hagemeister
e7c6f1a2dc Bump version number 2012-11-27 16:08:39 +01:00
Philipp Hagemeister
1a911e60a4 Add test for asian characters (#551) 2012-11-27 16:07:52 +01:00
Philipp Hagemeister
46cbda0be4 Minor filename encoding improvement in a common case 2012-11-27 15:07:10 +01:00
Philipp Hagemeister
fa59f4b6a9 Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 14:55:18 +01:00
Christopher Neugebauer
4a702f3819 Fixes the InfoExtractor for the Colbert Report. 2012-11-27 23:54:43 +11:00
Philipp Hagemeister
6bac102a4d Fix spacing in comedycentral IE 2012-11-27 13:24:10 +01:00
Philipp Hagemeister
958a22b7cf Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 13:19:18 +01:00
Philipp Hagemeister
97cd3afc75 warn if %(stitle)s is being used 2012-11-27 13:11:06 +01:00
Philipp Hagemeister
aa2a94ed81 Encode the entire filename 2012-11-27 13:01:32 +01:00
Philipp Hagemeister
c7032546f1 Clean up test 2012-11-27 12:46:27 +01:00
Philipp Hagemeister
56781d3d2e Switch back to underline for invalid characters, and make restricted ASCII-only 2012-11-27 12:46:09 +01:00
Christopher Neugebauer
feb22fe5fe Fixed indentation error 2012-11-27 22:32:24 +11:00
Christopher Neugebauer
d8dddb7c02 Removes extranous debugging info :) 2012-11-27 22:30:07 +11:00
Christopher Neugebauer
4408d996fb Adds format listing/selection support to the Comedy Central extractor. 2012-11-27 22:28:16 +11:00
Philipp Hagemeister
ed7516c69d Merge remote-tracking branch 'chrisjrn/master' 2012-11-27 12:25:51 +01:00
Christopher Neugebauer
89af8e9d32 Removes extraneous debug message. 2012-11-27 21:51:30 +11:00
Christopher Neugebauer
36a9c0b5ff Points the ComedyCentral extractor at a CDN which works with more RTMPDump versions. 2012-11-27 21:49:27 +11:00
Philipp Hagemeister
9fb3bfb45a Merge remote-tracking branch 'gcmalloc/master' 2012-11-27 00:42:47 +01:00
Filippo Valsorda
d479e34043 release 2012.11.27 2012-11-27 00:22:39 +01:00
Philipp Hagemeister
240089e5df remove accidental remnants 2012-11-27 00:14:12 +01:00
Philipp Hagemeister
1c469a9480 New optoin --restrict-filenames 2012-11-26 23:58:46 +01:00
Philipp Hagemeister
71f36332dd Remove redundancy in instructions 2012-11-26 23:40:51 +01:00
Philipp Hagemeister
8179d2ba74 Merge branch 'master' of github.com:rg3/youtube-dl 2012-11-26 23:25:04 +01:00
Philipp Hagemeister
df4bad3245 Document configuration 2012-11-26 23:24:55 +01:00
Filippo Valsorda
a7b5c8d6a8 fix FAQ on how to compile (also, starnge fix in the Makefile) 2012-11-26 22:35:12 +01:00
Philipp Hagemeister
92b91c1878 Use character instead of byte strings 2012-11-26 04:23:20 +01:00
Philipp Hagemeister
7ec1a206ea Remove longs (int does the right thing since Python 2.2, see PEP 237) 2012-11-26 04:13:43 +01:00
Philipp Hagemeister
51937c0869 Add some parentheses around print for #180 2012-11-26 04:05:54 +01:00
Philipp Hagemeister
6b50761222 Merge pull request #538 from zejn/patch-1
Also enable album URLs on Vimeo.
2012-11-25 18:04:11 -08:00
Philipp Hagemeister
6571408dc6 Merge pull request #545 from FiloSottile/alias
Kill (alias) --literal and %(title)
2012-11-25 15:57:57 -08:00
Filippo Valsorda
b6fab35b9f alias %(title)s to %(stitle)s 2012-11-25 20:39:42 +01:00
Filippo Valsorda
baec15387c aliased --literal to --title 2012-11-25 20:28:49 +01:00
zejn
297d7fd9c0 Also enable album URLs on Vimeo. 2012-11-21 13:24:14 +01:00
Filippo Valsorda
5002aea371 release 2012.11.17 2012-11-17 14:02:31 +01:00
Filippo Valsorda
74033a662d Reworked Vimeo file selection logic (quality, codec) - closes #530 2012-11-13 21:53:18 +01:00
Filippo Valsorda
0526e4f55a Merge pull request #522 from art-zhitnik/master
--(match|reject)-title utf8 fix
2012-11-11 06:22:10 -08:00
Art Zhitnik
39973a0236 Solve the bug of parsing titles with unicode (cyrillic) 2012-11-11 14:09:12 +10:00
Filippo Valsorda
5d40a470a2 quiet the HTMLParser debug info - closes #517 2012-11-09 12:32:07 +01:00
Filippo Valsorda
4cc391461a fix DailyMotion official users videos - closes #281 - by @yvestan 2012-11-07 14:44:10 +01:00
Filippo Valsorda
bf95333e5e fixed MetacafeIE (uploader nickname regex) - closes #515 2012-11-06 23:08:10 +01:00
Philipp Hagemeister
b7a34316d2 -x for --extract-audio, one of the most popular options 2012-10-30 17:41:38 +01:00
Philipp Hagemeister
74e453bdea New --id option for the old default filename pattern 2012-10-30 17:37:53 +01:00
Philipp Hagemeister
156a59e7a9 Additional tests in file name sanitation 2012-10-29 08:19:54 +01:00
Philipp Hagemeister
aeca861f22 Merge pull request #502 from FiloSottile/new_sanitize_filename
My sanitize_filename proposal
2012-10-28 15:33:59 -07:00
Filippo Valsorda
42cb53fcfa modified filename escaping to a "smarter" one 2012-10-28 22:47:02 +01:00
Filippo Valsorda
fe4d68e196 slight change to Dailymotion uploader regex (fix) 2012-10-28 21:43:43 +01:00
Philipp Hagemeister
25b7fd9c01 Merge pull request #491 from tyll/master
Update install target
2012-10-26 01:10:25 -07:00
Till Maas
e79e8b7dc4 Update install target
- Allow to configure destination directories to fulfill the needs of
  different distributions
- Support DESTDIR variable for staging installation when packaging
- Do not set user/group to root. It requires 'make install' to run as
  root, but then this is the default behaviour anyways.
2012-10-25 21:19:13 +02:00
Filippo Valsorda
965a8b2bc4 Merge pull request #488 from Tailszefox/local
Fix audio bitrate quality for ffmpeg/avconv (closes #487)
2012-10-24 11:42:31 -07:00
gcmalloc
a8ac2f8664 adding second vimeo url 2012-10-24 15:57:19 +02:00
gcmalloc
fb0e99b884 skipping vimeo for the moment 2012-10-24 00:32:23 +02:00
gcmalloc
9c6e9a4532 adding xnxx test 2012-10-24 00:13:16 +02:00
gcmalloc
67af74992e adding collegehumor test 2012-10-24 00:05:45 +02:00
gcmalloc
103c508ffa adding stanford open class courses 2012-10-23 23:59:12 +02:00
gcmalloc
2876773381 adding test for vimeo, xvideo and soundcloud 2012-10-23 23:53:33 +02:00
Tailszefox
f06eaa873e Fix audio bitrate quality for ffmpeg/avconv 2012-10-23 16:37:12 +02:00
Philipp Hagemeister
ece34e8951 Merge pull request #486 from Tailszefox/local
Added duration for YouTube videos
2012-10-23 05:53:28 -07:00
Tailszefox
2262a32dd7 Added duration for YouTube videos 2012-10-22 18:32:42 +02:00
Philipp Hagemeister
c6c0e23a32 Support raw playlist parameters (Closes #482) 2012-10-22 13:01:36 +02:00
Philipp Hagemeister
02b324a23d Restore 2.5 compat by activating with_statement future 2012-10-22 12:51:20 +02:00
Filippo Valsorda
b8005afc20 handle YT urls with #/ redirects (closes #484) 2012-10-22 09:15:27 +02:00
Philipp Hagemeister
073522bc6c Don't use 2.7+ check_output 2012-10-19 23:28:37 +02:00
Philipp Hagemeister
9248cb0549 Merge pull request #472 from gcmalloc/master
Test proposal
2012-10-19 05:48:12 -07:00
gcmalloc
6b41b61119 correcting travis 2012-10-19 12:53:20 +02:00
gcmalloc
591bbe9c90 changing test from md5 to filesize, the file changed between download 2012-10-19 12:53:20 +02:00
gcmalloc
fc7376016c cleaning the test that doesn't work with the api for the moment 2012-10-19 12:53:20 +02:00
gcmalloc
97a37c2319 some assertion on the file downloaded 2012-10-19 12:53:20 +02:00
gcmalloc
3afed78a6a removing testing video 2012-10-19 12:53:20 +02:00
gcmalloc
4279a0ca98 correcting test to be compatible with python2.6 2012-10-19 12:53:20 +02:00
gcmalloc
edcc7d2dd3 StringIO used by nosetests do not merge with the way youtube-dl handle sys.stdout and sys.stderr 2012-10-19 12:53:19 +02:00
gcmalloc
7f60b5aa40 correction on the test 2012-10-19 12:53:19 +02:00
gcmalloc
aeeb29a356 adding travis support 2012-10-15 10:58:35 +02:00
Filippo Valsorda
902b2a0a45 New IE: YouTube channels (closes #396) 2012-10-14 13:48:18 +02:00
gcmalloc
6d9c22cd26 correcting the makefile according to the new one 2012-10-12 20:30:01 +02:00
gcmalloc
729baf58b2 removing extended globbing for the find utility 2012-10-12 20:25:22 +02:00
gcmalloc
4c9afeca34 adding xvideo 2012-10-12 20:25:22 +02:00
gcmalloc
6da7877bf5 adding facebook test 2012-10-12 20:25:22 +02:00
gcmalloc
b4e5de51ec adding photobucket test 2012-10-12 20:25:22 +02:00
gcmalloc
a4b5f22554 adding metacafe test 2012-10-12 20:25:22 +02:00
gcmalloc
ff08984246 adding dailymotion test 2012-10-12 20:25:22 +02:00
gcmalloc
137c5803c3 some changes to keep the same standard 2012-10-12 20:25:22 +02:00
gcmalloc
3eec021a1f removing unused global modifier 2012-10-12 20:25:22 +02:00
gcmalloc
5a33b73309 correcting the makefile 2012-10-12 20:25:22 +02:00
gcmalloc
0b4e98490b changing test video 2012-10-12 20:24:58 +02:00
gcmalloc
80a846e119 correction on the test for the utils.py 2012-10-12 20:24:58 +02:00
gcmalloc
434d60cd95 adding clean rule in the makefile 2012-10-12 20:24:58 +02:00
gcmalloc
efe8902f0b adding download test with md5 check 2012-10-12 20:24:58 +02:00
gcmalloc
44fb345437 adding TestCase class and corresponding test 2012-10-12 20:24:58 +02:00
gcmalloc
9993976ae4 correction on the sanitize title method, change in title resulting 2012-10-12 20:24:58 +02:00
gcmalloc
b387fb0385 adding test rule in the Makefile 2012-10-12 20:24:58 +02:00
Filippo Valsorda
10daa766a1 support EDU YouTube playlists (closes #407) 2012-10-11 08:27:19 +02:00
18 changed files with 659 additions and 204 deletions

9
.travis.yml Normal file
View File

@@ -0,0 +1,9 @@
language: python
#specify the python version
python:
- "2.6"
- "2.7"
#command to install the setup
install:
# command to run tests
script: nosetests test --nocapture

View File

@@ -1 +1 @@
2012.10.09
2012.11.28

View File

@@ -5,12 +5,22 @@ clean:
rm -f youtube-dl youtube-dl.exe youtube-dl.1 LATEST_VERSION
PREFIX=/usr/local
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -m 755 --owner root --group root youtube-dl $(PREFIX)/bin/
install -m 644 --owner root --group root youtube-dl.1 $(PREFIX)/man/man1
install -m 644 --owner root --group root youtube-dl.bash-completion /etc/bash_completion.d/youtube-dl
BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/man
SYSCONFDIR=/etc
.PHONY: all clean install README.md youtube-dl.bash-completion
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -d $(DESTDIR)$(BINDIR)
install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
install -d $(DESTDIR)$(MANDIR)/man1
install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
test:
nosetests2 --nocapture test
.PHONY: all clean install test README.md youtube-dl.bash-completion
# TODO un-phony README.md and youtube-dl.bash_completion by reading from .in files and generating from them
youtube-dl: youtube_dl/*.py
@@ -26,13 +36,13 @@ youtube-dl.exe: youtube_dl/*.py
README.md: youtube_dl/*.py
@options=$$(COLUMNS=80 python -m youtube_dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/## \1/') && \
header=$$(sed -e '/.*# OPTIONS/,$$ d' README.md) && \
footer=$$(sed -e '1,/.*# FAQ/ d' README.md) && \
footer=$$(sed -e '1,/.*# CONFIGURATION/ d' README.md) && \
echo "$${header}" > README.md && \
echo >> README.md && \
echo '# OPTIONS' >> README.md && \
echo "$${options}" >> README.md&& \
echo >> README.md && \
echo '# FAQ' >> README.md && \
echo '# CONFIGURATION' >> README.md && \
echo "$${footer}" >> README.md
youtube-dl.1: README.md

View File

@@ -36,9 +36,10 @@ which means you can modify it, redistribute it or use it however you like.
## Filesystem Options:
-t, --title use title in file name
-l, --literal use literal title in file name
--id use video ID in file name
-l, --literal [deprecated] alias of --title
-A, --auto-number number downloaded files starting from 00000
-o, --output TEMPLATE output filename template. Use %(stitle)s to get the
-o, --output TEMPLATE output filename template. Use %(title)s to get the
title, %(uploader)s for the uploader name,
%(autonumber)s to get an automatically incremented
number, %(ext)s for the filename extension,
@@ -46,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
%(extractor)s for the provider (youtube, metacafe,
etc), %(id)s for the video id and %% for a literal
percent. Use - to output to stdout.
--restrict-filenames Restrict filenames to only ASCII characters, and
avoid "&" and spaces in filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files
@@ -91,7 +94,7 @@ which means you can modify it, redistribute it or use it however you like.
-n, --netrc use .netrc authentication data
## Post-processing Options:
--extract-audio convert video files to audio-only files (requires
-x, --extract-audio convert video files to audio-only files (requires
ffmpeg or avconv and ffprobe or avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav";
best by default
@@ -101,6 +104,10 @@ which means you can modify it, redistribute it or use it however you like.
-k, --keep-video keeps the video file on disk after the post-
processing; the video is erased by default
# CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
# FAQ
### Can you please put the -b option back?
@@ -146,7 +153,7 @@ Please note that Python 2.5 is not supported anymore.
### What is this binary file? Where has the code gone?
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repo to see the code. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make compile`.
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
### The exe throws a *Runtime error from Visual C++*

1
test/parameters.json Normal file
View File

@@ -0,0 +1 @@
{"username": null, "listformats": null, "skip_download": false, "usenetrc": false, "max_downloads": null, "noprogress": false, "forcethumbnail": false, "forceformat": false, "format_limit": null, "ratelimit": null, "nooverwrites": false, "forceurl": false, "writeinfojson": false, "simulate": false, "playliststart": 1, "continuedl": true, "password": null, "prefer_free_formats": false, "nopart": false, "retries": 10, "updatetime": true, "consoletitle": false, "verbose": true, "forcefilename": false, "ignoreerrors": false, "logtostderr": false, "format": null, "subtitleslang": null, "quiet": false, "outtmpl": "%(id)s.%(ext)s", "rejecttitle": null, "playlistend": -1, "writedescription": false, "forcetitle": false, "forcedescription": false, "writesubtitles": false, "matchtitle": null}

View File

@@ -1,29 +0,0 @@
# -*- coding: utf-8 -*-
# Various small unit tests
import os,sys
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
import youtube_dl
def test_simplify_title():
assert youtube_dl._simplify_title(u'abc') == u'abc'
assert youtube_dl._simplify_title(u'abc_d-e') == u'abc_d-e'
assert youtube_dl._simplify_title(u'123') == u'123'
assert u'/' not in youtube_dl._simplify_title(u'abc/de')
assert u'abc' in youtube_dl._simplify_title(u'abc/de')
assert u'de' in youtube_dl._simplify_title(u'abc/de')
assert u'/' not in youtube_dl._simplify_title(u'abc/de///')
assert u'\\' not in youtube_dl._simplify_title(u'abc\\de')
assert u'abc' in youtube_dl._simplify_title(u'abc\\de')
assert u'de' in youtube_dl._simplify_title(u'abc\\de')
assert youtube_dl._simplify_title(u'ä') == u'ä'
assert youtube_dl._simplify_title(u'кириллица') == u'кириллица'
# Strip underlines
assert youtube_dl._simplify_title(u'\'a_') == u'a'

198
test/test_download.py Normal file
View File

@@ -0,0 +1,198 @@
#!/usr/bin/env python2
import unittest
import hashlib
import os
import json
from youtube_dl.FileDownloader import FileDownloader
from youtube_dl.InfoExtractors import YoutubeIE, DailymotionIE
from youtube_dl.InfoExtractors import MetacafeIE, BlipTVIE
from youtube_dl.InfoExtractors import XVideosIE, VimeoIE
from youtube_dl.InfoExtractors import SoundcloudIE, StanfordOpenClassroomIE
from youtube_dl.InfoExtractors import CollegeHumorIE, XNXXIE
class DownloadTest(unittest.TestCase):
PARAMETERS_FILE = "test/parameters.json"
#calculated with md5sum:
#md5sum (GNU coreutils) 8.19
YOUTUBE_SIZE = 1993883
YOUTUBE_URL = "http://www.youtube.com/watch?v=BaW_jenozKc"
YOUTUBE_FILE = "BaW_jenozKc.mp4"
DAILYMOTION_MD5 = "d363a50e9eb4f22ce90d08d15695bb47"
DAILYMOTION_URL = "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech"
DAILYMOTION_FILE = "x33vw9.mp4"
METACAFE_SIZE = 5754305
METACAFE_URL = "http://www.metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/"
METACAFE_FILE = "_aUehQsCQtM.flv"
BLIP_MD5 = "93c24d2f4e0782af13b8a7606ea97ba7"
BLIP_URL = "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352"
BLIP_FILE = "5779306.m4v"
XVIDEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf"
XVIDEO_URL = "http://www.xvideos.com/video939581/funny_porns_by_s_-1"
XVIDEO_FILE = "939581.flv"
VIMEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf"
VIMEO_URL = "http://vimeo.com/14160053"
VIMEO_FILE = ""
VIMEO2_MD5 = ""
VIMEO2_URL = "http://player.vimeo.com/video/47019590"
VIMEO2_FILE = ""
SOUNDCLOUD_MD5 = "ce3775768ebb6432fa8495d446a078ed"
SOUNDCLOUD_URL = "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy"
SOUNDCLOUD_FILE = "n6FLbx6ZzMiu.mp3"
STANDFORD_MD5 = "22c8206291368c4e2c9c1a307f0ea0f4"
STANDFORD_URL = "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100"
STANDFORD_FILE = "PracticalUnix_intro-environment.mp4"
COLLEGEHUMOR_MD5 = ""
COLLEGEHUMOR_URL = "http://www.collegehumor.com/video/6830834/mitt-romney-style-gangnam-style-parody"
COLLEGEHUMOR_FILE = ""
XNXX_MD5 = "5f0469c8d1dfd1bc38c8e6deb5e0a21d"
XNXX_URL = "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_"
XNXX_FILE = "1135332.flv"
def test_youtube(self):
#let's download a file from youtube
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(YoutubeIE())
fd.download([DownloadTest.YOUTUBE_URL])
self.assertTrue(os.path.exists(DownloadTest.YOUTUBE_FILE))
self.assertEqual(os.path.getsize(DownloadTest.YOUTUBE_FILE), DownloadTest.YOUTUBE_SIZE)
def test_dailymotion(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(DailymotionIE())
fd.download([DownloadTest.DAILYMOTION_URL])
self.assertTrue(os.path.exists(DownloadTest.DAILYMOTION_FILE))
md5_down_file = md5_for_file(DownloadTest.DAILYMOTION_FILE)
self.assertEqual(md5_down_file, DownloadTest.DAILYMOTION_MD5)
def test_metacafe(self):
#this emulate a skip,to be 2.6 compatible
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(MetacafeIE())
fd.add_info_extractor(YoutubeIE())
fd.download([DownloadTest.METACAFE_URL])
self.assertTrue(os.path.exists(DownloadTest.METACAFE_FILE))
self.assertEqual(os.path.getsize(DownloadTest.METACAFE_FILE), DownloadTest.METACAFE_SIZE)
def test_blip(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(BlipTVIE())
fd.download([DownloadTest.BLIP_URL])
self.assertTrue(os.path.exists(DownloadTest.BLIP_FILE))
md5_down_file = md5_for_file(DownloadTest.BLIP_FILE)
self.assertEqual(md5_down_file, DownloadTest.BLIP_MD5)
def test_xvideo(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(XVideosIE())
fd.download([DownloadTest.XVIDEO_URL])
self.assertTrue(os.path.exists(DownloadTest.XVIDEO_FILE))
md5_down_file = md5_for_file(DownloadTest.XVIDEO_FILE)
self.assertEqual(md5_down_file, DownloadTest.XVIDEO_MD5)
def test_vimeo(self):
#skipped for the moment produce an error
return
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(VimeoIE())
fd.download([DownloadTest.VIMEO_URL])
self.assertTrue(os.path.exists(DownloadTest.VIMEO_FILE))
md5_down_file = md5_for_file(DownloadTest.VIMEO_FILE)
self.assertEqual(md5_down_file, DownloadTest.VIMEO_MD5)
def test_vimeo2(self):
#skipped for the moment produce an error
return
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(VimeoIE())
fd.download([DownloadTest.VIMEO2_URL])
self.assertTrue(os.path.exists(DownloadTest.VIMEO2_FILE))
md5_down_file = md5_for_file(DownloadTest.VIMEO2_FILE)
self.assertEqual(md5_down_file, DownloadTest.VIMEO2_MD5)
def test_soundcloud(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(SoundcloudIE())
fd.download([DownloadTest.SOUNDCLOUD_URL])
self.assertTrue(os.path.exists(DownloadTest.SOUNDCLOUD_FILE))
md5_down_file = md5_for_file(DownloadTest.SOUNDCLOUD_FILE)
self.assertEqual(md5_down_file, DownloadTest.SOUNDCLOUD_MD5)
def test_standford(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(StanfordOpenClassroomIE())
fd.download([DownloadTest.STANDFORD_URL])
self.assertTrue(os.path.exists(DownloadTest.STANDFORD_FILE))
md5_down_file = md5_for_file(DownloadTest.STANDFORD_FILE)
self.assertEqual(md5_down_file, DownloadTest.STANDFORD_MD5)
def test_collegehumor(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(CollegeHumorIE())
fd.download([DownloadTest.COLLEGEHUMOR_URL])
self.assertTrue(os.path.exists(DownloadTest.COLLEGEHUMOR_FILE))
md5_down_file = md5_for_file(DownloadTest.COLLEGEHUMOR_FILE)
self.assertEqual(md5_down_file, DownloadTest.COLLEGEHUMOR_MD5)
def test_xnxx(self):
with open(DownloadTest.PARAMETERS_FILE) as f:
fd = FileDownloader(json.load(f))
fd.add_info_extractor(XNXXIE())
fd.download([DownloadTest.XNXX_URL])
self.assertTrue(os.path.exists(DownloadTest.XNXX_FILE))
md5_down_file = md5_for_file(DownloadTest.XNXX_FILE)
self.assertEqual(md5_down_file, DownloadTest.XNXX_MD5)
def tearDown(self):
if os.path.exists(DownloadTest.YOUTUBE_FILE):
os.remove(DownloadTest.YOUTUBE_FILE)
if os.path.exists(DownloadTest.DAILYMOTION_FILE):
os.remove(DownloadTest.DAILYMOTION_FILE)
if os.path.exists(DownloadTest.METACAFE_FILE):
os.remove(DownloadTest.METACAFE_FILE)
if os.path.exists(DownloadTest.BLIP_FILE):
os.remove(DownloadTest.BLIP_FILE)
if os.path.exists(DownloadTest.XVIDEO_FILE):
os.remove(DownloadTest.XVIDEO_FILE)
if os.path.exists(DownloadTest.VIMEO_FILE):
os.remove(DownloadTest.VIMEO_FILE)
if os.path.exists(DownloadTest.SOUNDCLOUD_FILE):
os.remove(DownloadTest.SOUNDCLOUD_FILE)
if os.path.exists(DownloadTest.STANDFORD_FILE):
os.remove(DownloadTest.STANDFORD_FILE)
if os.path.exists(DownloadTest.COLLEGEHUMOR_FILE):
os.remove(DownloadTest.COLLEGEHUMOR_FILE)
if os.path.exists(DownloadTest.XNXX_FILE):
os.remove(DownloadTest.XNXX_FILE)
def md5_for_file(filename, block_size=2**20):
with open(filename) as f:
md5 = hashlib.md5()
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
return md5.hexdigest()

79
test/test_utils.py Normal file
View File

@@ -0,0 +1,79 @@
# -*- coding: utf-8 -*-
# Various small unit tests
import unittest
#from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import timeconvert
from youtube_dl.utils import sanitize_filename
from youtube_dl.utils import unescapeHTML
from youtube_dl.utils import orderedSet
class TestUtil(unittest.TestCase):
def test_timeconvert(self):
self.assertTrue(timeconvert('') is None)
self.assertTrue(timeconvert('bougrg') is None)
def test_sanitize_filename(self):
self.assertEqual(sanitize_filename(u'abc'), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e'), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123'), u'123')
self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de'))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
forbidden = u'"\0\\/'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c')
self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
forbidden = u'"\0\\/&: \'\t\n'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
# Handle a common case more neatly
self.assertEqual(sanitize_filename(u'大声带 - Song', restricted=True), u'Song')
self.assertEqual(sanitize_filename(u'总统: Speech', restricted=True), u'Speech')
# .. but make sure the file name is never empty
self.assertTrue(sanitize_filename(u'-', restricted=True) != u'')
self.assertTrue(sanitize_filename(u':', restricted=True) != u'')
def test_ordered_set(self):
self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])
self.assertEqual(orderedSet([]), [])
self.assertEqual(orderedSet([1]), [1])
#keep the list ordered
self.assertEqual(orderedSet([135,1,1,1]), [135,1])
def test_unescape_html(self):
self.assertEqual(unescapeHTML(u"%20;"), u"%20;")

Binary file not shown.

Binary file not shown.

View File

@@ -48,9 +48,10 @@ redistribute it or use it however you like.
.nf
\f[C]
-t,\ --title\ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ title\ in\ file\ name
-l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ use\ literal\ title\ in\ file\ name
--id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ video\ ID\ in\ file\ name
-l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ --title
-A,\ --auto-number\ \ \ \ \ \ \ \ number\ downloaded\ files\ starting\ from\ 00000
-o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(stitle)s\ to\ get\ the
-o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to\ get\ the
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ title,\ %(uploader)s\ for\ the\ uploader\ name,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ to\ get\ an\ automatically\ incremented
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ number,\ %(ext)s\ for\ the\ filename\ extension,
@@ -58,6 +59,8 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
@@ -119,7 +122,7 @@ redistribute it or use it however you like.
.IP
.nf
\f[C]
--extract-audio\ \ \ \ \ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires
-x,\ --extract-audio\ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe)
--audio-format\ FORMAT\ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ or\ "wav";
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ best\ by\ default
@@ -130,6 +133,12 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default
\f[]
.fi
.SH CONFIGURATION
.PP
You can configure youtube-dl by placing default arguments (such as
\f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not
copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or
\f[C]~/.local/config/youtube-dl.conf\f[].
.SH FAQ
.SS Can you please put the -b option back?
.PP
@@ -203,10 +212,10 @@ Please note that Python 2.5 is not supported anymore.
.PP
Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on
some systems) or clone the git repo to see the code.
some systems) or clone the git repository, as laid out above.
If you modify the code, you can run it by executing the
\f[C]__main__.py\f[] file.
To recompile the executable, run \f[C]make\ compile\f[].
To recompile the executable, run \f[C]make\ youtube-dl\f[].
.SS The exe throws a \f[I]Runtime error from Visual C++\f[]
.PP
To run the exe you need to install first the Microsoft Visual C++ 2008

View File

@@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )

BIN
youtube-dl.exe Executable file → Normal file

Binary file not shown.

View File

@@ -13,7 +13,7 @@ import urllib2
if os.name == 'nt':
import ctypes
from utils import *
@@ -44,37 +44,38 @@ class FileDownloader(object):
Available options:
username: Username for authentication purposes.
password: Password for authentication purposes.
usenetrc: Use netrc for authentication instead.
quiet: Do not print messages to stdout.
forceurl: Force printing final URL.
forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
outtmpl: Template for output names.
ignoreerrors: Do not stop on download errors.
ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files.
retries: Number of times to retry for HTTP error 5xx
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
username: Username for authentication purposes.
password: Password for authentication purposes.
usenetrc: Use netrc for authentication instead.
quiet: Do not print messages to stdout.
forceurl: Force printing final URL.
forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors.
ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files.
retries: Number of times to retry for HTTP error 5xx
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
"""
params = None
@@ -93,6 +94,9 @@ class FileDownloader(object):
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self.params = params
if '%(stitle)s' in self.params['outtmpl']:
self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
@staticmethod
def format_bytes(bytes):
if bytes is None:
@@ -139,23 +143,23 @@ class FileDownloader(object):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return long(new_max)
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return long(new_max)
return int(new_max)
if rate < new_min:
return long(new_min)
return long(rate)
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into a long integer."""
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return long(round(number * multiplier))
return int(round(number * multiplier))
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
@@ -173,7 +177,6 @@ class FileDownloader(object):
if not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding(), 'ignore')
self._screen_file.write(output)
@@ -181,7 +184,8 @@ class FileDownloader(object):
def to_stderr(self, message):
"""Print message to stderr."""
print >>sys.stderr, message.encode(preferredencoding())
assert type(message) == type(u'')
sys.stderr.write((message + u'\n').encode(preferredencoding()))
def to_cons_title(self, message):
"""Set console/terminal window title to message."""
@@ -321,7 +325,7 @@ class FileDownloader(object):
"""Generate the output filename."""
try:
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
template_dict['epoch'] = unicode(int(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
filename = self.params['outtmpl'] % template_dict
return filename
@@ -334,17 +338,22 @@ class FileDownloader(object):
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
if matchtitle:
matchtitle = matchtitle.decode('utf8')
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
if rejecttitle:
rejecttitle = rejecttitle.decode('utf8')
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return None
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
info_dict['stitle'] = sanitize_filename(info_dict['title'])
# Keep for backwards compatibility
info_dict['stitle'] = info_dict['title']
reason = self._match_entry(info_dict)
if reason is not None:
@@ -357,20 +366,21 @@ class FileDownloader(object):
raise MaxDownloadsReached()
filename = self.prepare_filename(info_dict)
filename = sanitize_filename(filename, self.params.get('restrictfilenames'))
# Forced printings
if self.params.get('forcetitle', False):
print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forceurl', False):
print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcedescription', False) and 'description' in info_dict:
print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcefilename', False) and filename is not None:
print filename.encode(preferredencoding(), 'xmlcharrefreplace')
print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forceformat', False):
print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -399,10 +409,10 @@ class FileDownloader(object):
except (OSError, IOError):
self.trouble(u'ERROR: Cannot write description file ' + descfn)
return
if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
# that way it will silently go on when used with unsupporting IE
try:
srtfn = filename.rsplit('.', 1)[0] + u'.srt'
self.report_writesubtitles(srtfn)
@@ -448,7 +458,7 @@ class FileDownloader(object):
except (ContentTooShortError, ), err:
self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
return
if success:
try:
self.post_process(filename, info_dict)

View File

@@ -102,6 +102,7 @@ class YoutubeIE(InfoExtractor):
(?:https?://)? # http(s):// (optional)
(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?!view_play_list|my_playlists|artist|playlist) # ignore playlist URLs
(?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/
@@ -212,9 +213,9 @@ class YoutubeIE(InfoExtractor):
return srt
def _print_formats(self, formats):
print 'Available formats:'
print('Available formats:')
for x in formats:
print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
def _real_initialize(self):
if self._downloader is None:
@@ -237,7 +238,7 @@ class YoutubeIE(InfoExtractor):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
# Set language
@@ -246,7 +247,7 @@ class YoutubeIE(InfoExtractor):
self.report_lang()
urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
return
# No authentication to be performed
@@ -269,7 +270,7 @@ class YoutubeIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
# Confirm age
@@ -282,7 +283,7 @@ class YoutubeIE(InfoExtractor):
self.report_age_confirmation()
age_results = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -304,7 +305,7 @@ class YoutubeIE(InfoExtractor):
try:
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Attempt to extract SWF player URL
@@ -326,7 +327,7 @@ class YoutubeIE(InfoExtractor):
if 'token' in video_info:
break
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if 'token' not in video_info:
if 'reason' in video_info:
@@ -389,7 +390,7 @@ class YoutubeIE(InfoExtractor):
try:
srt_list = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
if not srt_lang_list:
@@ -406,13 +407,19 @@ class YoutubeIE(InfoExtractor):
try:
srt_xml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
if not srt_xml:
raise Trouble(u'WARNING: unable to download video subtitles')
video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
except Trouble as trouble:
self._downloader.trouble(trouble[0])
if 'length_seconds' not in video_info:
self._downloader.trouble(u'WARNING: unable to extract video duration')
video_duration = ''
else:
video_duration = urllib.unquote_plus(video_info['length_seconds'][0])
# token
video_token = urllib.unquote_plus(video_info['token'][0])
@@ -479,7 +486,8 @@ class YoutubeIE(InfoExtractor):
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles
'subtitles': video_subtitles,
'duration': video_duration
})
return results
@@ -518,7 +526,7 @@ class MetacafeIE(InfoExtractor):
self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
return
# Confirm age
@@ -531,7 +539,7 @@ class MetacafeIE(InfoExtractor):
self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -555,7 +563,7 @@ class MetacafeIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader and title from webpage
@@ -595,7 +603,7 @@ class MetacafeIE(InfoExtractor):
return
video_title = mobj.group(1).decode('utf-8')
mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
mobj = re.search(r'submitter=(.*?);', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
@@ -648,7 +656,7 @@ class DailymotionIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader and title from webpage
@@ -684,9 +692,14 @@ class DailymotionIE(InfoExtractor):
video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
video_uploader = u'NA'
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
if mobj is None:
self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
# lookin for official user
mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
if mobj_official is None:
self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
else:
video_uploader = mobj_official.group(1)
else:
video_uploader = mobj.group(1)
@@ -741,7 +754,7 @@ class GoogleIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader, and title from webpage
@@ -780,7 +793,7 @@ class GoogleIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
if mobj is None:
@@ -836,7 +849,7 @@ class PhotobucketIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader, and title from webpage
@@ -906,7 +919,7 @@ class YahooIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@ -930,7 +943,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract uploader and title from webpage
@@ -988,7 +1001,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract media URL from playlist XML
@@ -1017,7 +1030,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
IE_NAME = u'vimeo'
def __init__(self, downloader=None):
@@ -1046,7 +1059,7 @@ class VimeoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Now we begin extracting as much information as we can from what we
@@ -1087,21 +1100,32 @@ class VimeoIE(InfoExtractor):
timestamp = config['request']['timestamp']
# Vimeo specific: extract video codec and quality information
# First consider quality, then codecs, then take everything
# TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
for codec in codecs:
if codec[0] in config["video"]["files"]:
video_codec = codec[0]
video_extension = codec[1]
if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
else: quality = 'sd'
files = { 'hd': [], 'sd': [], 'other': []}
for codec_name, codec_extension in codecs:
if codec_name in config["video"]["files"]:
if 'hd' in config["video"]["files"][codec_name]:
files['hd'].append((codec_name, codec_extension, 'hd'))
elif 'sd' in config["video"]["files"][codec_name]:
files['sd'].append((codec_name, codec_extension, 'sd'))
else:
files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
for quality in ('hd', 'sd', 'other'):
if len(files[quality]) > 0:
video_quality = files[quality][0][2]
video_codec = files[quality][0][0]
video_extension = files[quality][0][1]
self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
break
else:
self._downloader.trouble(u'ERROR: no known codec found')
return
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
%(video_id, sig, timestamp, quality, video_codec.upper())
%(video_id, sig, timestamp, video_quality, video_codec.upper())
return [{
'id': video_id,
@@ -1201,7 +1225,7 @@ class GenericIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
except ValueError, err:
# since this is the last-resort InfoExtractor, if
@@ -1322,7 +1346,7 @@ class YoutubeSearchIE(InfoExtractor):
try:
data = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
return
api_response = json.loads(data)['data']
@@ -1399,7 +1423,7 @@ class GoogleSearchIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1482,7 +1506,7 @@ class YahooSearchIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1508,9 +1532,9 @@ class YahooSearchIE(InfoExtractor):
class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists."""
_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
_VALID_URL = r'(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL|EC)?|PL|EC)([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
_VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;list=.*?%s'
_VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;([^&"]+&amp;)*list=.*?%s'
_MORE_PAGES_INDICATOR = r'yt-uix-pager-next'
IE_NAME = u'youtube:playlist'
@@ -1552,7 +1576,7 @@ class YoutubePlaylistIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1578,6 +1602,56 @@ class YoutubePlaylistIE(InfoExtractor):
return
class YoutubeChannelIE(InfoExtractor):
"""Information Extractor for YouTube channels."""
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)(?:/.*)?$"
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
_MORE_PAGES_INDICATOR = r'yt-uix-button-content">Next' # TODO
IE_NAME = u'youtube:channel'
def report_download_page(self, channel_id, pagenum):
"""Report attempt to download channel page with given number."""
self._downloader.to_screen(u'[youtube] Channel %s: Downloading page #%s' % (channel_id, pagenum))
def _real_extract(self, url):
# Extract channel id
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid url: %s' % url)
return
# Download channel pages
channel_id = mobj.group(1)
video_ids = []
pagenum = 1
while True:
self.report_download_page(channel_id, pagenum)
url = self._TEMPLATE_URL % (channel_id, pagenum)
request = urllib2.Request(url)
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
ids_in_page = []
for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&', page):
if mobj.group(1) not in ids_in_page:
ids_in_page.append(mobj.group(1))
video_ids.extend(ids_in_page)
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
break
pagenum = pagenum + 1
for id in video_ids:
self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
return
class YoutubeUserIE(InfoExtractor):
"""Information Extractor for YouTube users."""
@@ -1622,7 +1696,7 @@ class YoutubeUserIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1694,7 +1768,7 @@ class BlipTVUserIE(InfoExtractor):
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
@@ -1782,7 +1856,7 @@ class DepositFilesIE(InfoExtractor):
self.report_download_webpage(file_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err))
return
# Search for the real file URL
@@ -1899,7 +1973,7 @@ class FacebookIE(InfoExtractor):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
if useremail is None:
@@ -1919,7 +1993,7 @@ class FacebookIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -1936,7 +2010,7 @@ class FacebookIE(InfoExtractor):
page = urllib2.urlopen(request)
video_webpage = page.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Start extracting information
@@ -2070,13 +2144,13 @@ class BlipTVIE(InfoExtractor):
'urlhandle': urlh
}
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if info is None: # Regular URL
try:
json_code = urlh.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err))
return
try:
@@ -2144,7 +2218,7 @@ class MyVideoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -2179,6 +2253,25 @@ class ComedyCentralIE(InfoExtractor):
_VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
IE_NAME = u'comedycentral'
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
_video_extensions = {
'3500': 'mp4',
'2200': 'mp4',
'1700': 'mp4',
'1200': 'mp4',
'750': 'mp4',
'400': 'mp4',
}
_video_dimensions = {
'3500': '1280x720',
'2200': '960x540',
'1700': '768x432',
'1200': '640x360',
'750': '512x288',
'400': '384x216',
}
def report_extraction(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
@@ -2191,6 +2284,13 @@ class ComedyCentralIE(InfoExtractor):
def report_player_url(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@ -2231,10 +2331,19 @@ class ComedyCentralIE(InfoExtractor):
epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
if len(mMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
if len(mMovieParams) == 0:
# The Colbert Report embeds the information in a without
# a URL prefix; so extract the alternate reference
# and then add the URL prefix manually.
altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
if len(altMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
playerUrl_raw = mMovieParams[0][0]
self.report_player_url(epTitle)
try:
@@ -2283,10 +2392,31 @@ class ComedyCentralIE(InfoExtractor):
if len(turls) == 0:
self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
continue
if self._downloader.params.get('listformats', None):
self._print_formats([i[0] for i in turls])
return
# For now, just pick the highest bitrate
format,video_url = turls[-1]
# Get the format arg from the arg stream
req_format = self._downloader.params.get('format', None)
# Select format if we can find one
for f,v in turls:
if f == req_format:
format, video_url = f, v
break
# Patch to download from alternative CDN, which does not
# break on current RTMPDump builds
broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
if video_url.startswith(broken_cdn):
video_url = video_url.replace(broken_cdn, better_cdn)
effTitle = showId + u'-' + epTitle
info = {
'id': shortMediaId,
@@ -2298,7 +2428,7 @@ class ComedyCentralIE(InfoExtractor):
'format': format,
'thumbnail': None,
'description': officialTitle,
'player_url': playerUrl
'player_url': None #playerUrl
}
results.append(info)
@@ -2406,7 +2536,7 @@ class CollegeHumorIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
@@ -2425,7 +2555,7 @@ class CollegeHumorIE(InfoExtractor):
try:
metaXml = urllib2.urlopen(xmlUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
return
mdoc = xml.etree.ElementTree.fromstring(metaXml)
@@ -2471,7 +2601,7 @@ class XVideosIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -2557,7 +2687,7 @@ class SoundcloudIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction('%s/%s' % (uploader, slug_title))
@@ -2584,7 +2714,7 @@ class SoundcloudIE(InfoExtractor):
mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
if mobj:
description = mobj.group(1)
# upload date
upload_date = None
mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
@@ -2592,7 +2722,7 @@ class SoundcloudIE(InfoExtractor):
try:
upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
except Exception, e:
self._downloader.to_stderr(str(e))
self._downloader.to_stderr(compat_str(e))
# for soundcloud, a request to a cross domain is required for cookies
request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
@@ -2636,7 +2766,7 @@ class InfoQIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction(url)
@@ -2722,15 +2852,15 @@ class MixcloudIE(InfoExtractor):
return None
def _print_formats(self, formats):
print 'Available formats:'
print('Available formats:')
for fmt in formats.keys():
for b in formats[fmt]:
try:
ext = formats[fmt][b][0]
print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
except TypeError: # we have no bitrate info
ext = formats[fmt][0]
print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
break
def _real_extract(self, url):
@@ -2750,7 +2880,7 @@ class MixcloudIE(InfoExtractor):
self.report_download_json(file_url)
jsonData = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err))
return
# parse JSON
@@ -2934,7 +3064,7 @@ class MTVIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
@@ -2967,7 +3097,7 @@ class MTVIE(InfoExtractor):
try:
metadataXml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err))
return
mdoc = xml.etree.ElementTree.fromstring(metadataXml)
@@ -3054,7 +3184,7 @@ class YoukuIE(InfoExtractor):
self.report_download_webpage(video_id)
jsondata = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -3230,7 +3360,7 @@ class GooglePlusIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
return
# Extract update date
@@ -3272,7 +3402,7 @@ class GooglePlusIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extract_vid_page(video_page)

View File

@@ -73,7 +73,7 @@ class FFmpegExtractAudioPP(PostProcessor):
def detect_executables():
def executable(exe):
try:
subprocess.check_output([exe, '-version'])
subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
except OSError:
return False
return exe
@@ -146,7 +146,7 @@ class FFmpegExtractAudioPP(PostProcessor):
if int(self._preferredquality) < 10:
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
else:
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality]
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
else:
# We convert the audio (lossy)
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
@@ -156,7 +156,7 @@ class FFmpegExtractAudioPP(PostProcessor):
if int(self._preferredquality) < 10:
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
else:
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality]
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
if self._preferredcodec == 'aac':
more_opts += ['-f', 'adts']
if self._preferredcodec == 'm4a':

View File

@@ -1,6 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import with_statement
__authors__ = (
'Ricardo Garcia Gonzalez',
'Danny Colligan',
@@ -19,7 +21,7 @@ __authors__ = (
)
__license__ = 'Public Domain'
__version__ = '2012.10.09'
__version__ = '2012.11.28'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
@@ -46,7 +48,7 @@ from PostProcessor import *
def updateSelf(downloader, filename):
''' Update the program file with the latest version from the repository '''
# Note: downloader only used for options
if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename)
@@ -64,7 +66,7 @@ def updateSelf(downloader, filename):
directory = os.path.dirname(exe)
if not os.access(directory, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % directory)
try:
urlh = urllib2.urlopen(UPDATE_URL_EXE)
newcontent = urlh.read()
@@ -73,20 +75,18 @@ def updateSelf(downloader, filename):
outf.write(newcontent)
except (IOError, OSError), err:
sys.exit('ERROR: unable to download latest version')
try:
bat = os.path.join(directory, 'youtube-dl-updater.bat')
b = open(bat, 'w')
print >> b, """
b.write("""
echo Updating youtube-dl...
ping 127.0.0.1 -n 5 -w 1000 > NUL
move /Y "%s.new" "%s"
del "%s"
""" %(exe, exe, bat)
\n""" %(exe, exe, bat))
b.close()
os.startfile(bat)
except (IOError, OSError), err:
sys.exit('ERROR: unable to overwrite current version')
@@ -263,13 +263,18 @@ def parseOpts():
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name', default=False)
filesystem.add_option('--id',
action='store_true', dest='useid', help='use video ID in file name', default=False)
filesystem.add_option('-l', '--literal',
action='store_true', dest='useliteral', help='use literal title in file name', default=False)
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
filesystem.add_option('-A', '--auto-number',
action='store_true', dest='autonumber',
help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites',
@@ -294,7 +299,7 @@ def parseOpts():
help='write video metadata to a .info.json file', default=False)
postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
@@ -328,6 +333,7 @@ def gen_extractors():
"""
return [
YoutubePlaylistIE(),
YoutubeChannelIE(),
YoutubeUserIE(),
YoutubeSearchIE(),
YoutubeIE(),
@@ -421,10 +427,10 @@ def _real_main():
parser.error(u'using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None:
parser.error(u'account username missing')
if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
parser.error(u'using output template conflicts with using title, literal title or auto number')
if opts.usetitle and opts.useliteral:
parser.error(u'using title conflicts with using literal title')
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
parser.error(u'using output template conflicts with using title, video ID or auto number')
if opts.usetitle and opts.useid:
parser.error(u'using title conflicts with using video ID')
if opts.username is not None and opts.password is None:
opts.password = getpass.getpass(u'Type account password and press return:')
if opts.ratelimit is not None:
@@ -475,15 +481,14 @@ def _real_main():
'format_limit': opts.format_limit,
'listformats': opts.listformats,
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s'),
'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites,
@@ -527,7 +532,7 @@ def _real_main():
parser.error(u'you must provide at least one URL')
else:
sys.exit()
try:
retcode = fd.download(all_urls)
except MaxDownloadsReached:

View File

@@ -26,6 +26,11 @@ std_headers = {
'Accept-Language': 'en-us,en;q=0.5',
}
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
def preferredencoding():
"""Get preferred encoding.
@@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser):
HTMLParser.HTMLParser.__init__(self)
def error(self, message):
print >> sys.stderr, self.getpos()
if self.error_count > 10 or self.started:
raise HTMLParser.HTMLParseError(message, self.getpos())
self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
@@ -190,14 +194,36 @@ def timeconvert(timestr):
if timetuple is not None:
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
def sanitize_filename(s):
"""Sanitizes a string so it could be used as part of a filename."""
def sanitize_filename(s, restricted=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
"""
def replace_insane(char):
if char in u' .\\/|?*<>:"' or ord(char) < 32:
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
return '' if restricted else '\''
elif char == ':':
return '_-' if restricted else ' -'
elif char in '\\/|*<>':
return '_'
if restricted and (char in '&\'' or char.isspace()):
return '_'
if restricted and ord(char) > 127:
return '_'
return char
return u''.join(map(replace_insane, s)).strip('_')
result = u''.join(map(replace_insane, s))
while '__' in result:
result = result.replace('__', '_')
result = result.strip('_')
# Common case of "Foreign band name - English song title"
if restricted and result.startswith('-_'):
result = result[2:]
if not result:
result = '_'
return result
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """