From 469d4c89686afca46333d85442bb770e6010518c Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Mon, 17 Nov 2014 17:52:00 -0500
Subject: [PATCH 01/11] [vk] Added a new information extractor for pages that
 are a list of a user\'s videos on vk.com. It works in a same way to playlist
 style pages for the YT information extractors.

---
 youtube_dl/extractor/__init__.py |  5 ++++-
 youtube_dl/extractor/vk.py       | 37 +++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index f45ce05ab..b687a56b4 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -452,7 +452,10 @@ from .vine import (
     VineUserIE,
 )
 from .viki import VikiIE
-from .vk import VKIE
+from .vk import (
+    VKIE,
+    VKUserVideosIE,
+)
 from .vodlocker import VodlockerIE
 from .vporn import VpornIE
 from .vrt import VRTIE
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 36cd7e52e..5223e5e2c 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -16,7 +16,7 @@ from ..utils import (
 
 class VKIE(InfoExtractor):
     IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
     _NETRC_MACHINE = 'vk'
 
     _TESTS = [
@@ -185,3 +185,38 @@ class VKIE(InfoExtractor):
             'uploader': data.get('md_author'),
             'duration': data.get('duration')
         }
+
+
+class VKUserVideosIE(InfoExtractor):
+    IE_NAME = 'vk.com:user-videos'
+    IE_DESC = 'All of a user\'s videos'
+    _VALID_URL = r'https?://(?:m\.)?vk\.com/videos([0-9]+)'
+    _TEMPLATE_URL = 'https://vk.com/videos'
+    _TEST = {
+        'url': 'http://vk.com/videos205387401',
+        'playlist_mincount': 4,
+    }
+
+    def extract_videos_from_page(self, page):
+        ids_in_page = []
+        for mobj in re.finditer(r'href="/video([0-9_]+)"', page):
+            if mobj.group(1) not in ids_in_page:
+                ids_in_page.append(mobj.group(1))
+        return ids_in_page
+
+    def _real_extract(self, url):
+        # Extract page id
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError('Invalid URL: %s' % url)
+
+        # Download page and get video ids
+        page_id = mobj.group(1)
+        page = self._download_webpage(url, page_id)
+        video_ids = self.extract_videos_from_page(page)
+
+        self._downloader.to_screen('[vk] User videos %s: Found %i videos' % (page_id, len(video_ids)))
+
+        url_entries = [self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id)
+                       for video_id in video_ids]
+        return self.playlist_result(url_entries, page_id)
\ No newline at end of file

From 6fcd6e0e21839ae4b1753995dc44d2a93f72ac1f Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Tue, 18 Nov 2014 19:34:12 +0000
Subject: [PATCH 02/11] [vk] Updated the regex for matching user video pages.
 It now matches optional URL parameters too.

---
 youtube_dl/extractor/vk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 5223e5e2c..3bcf50e28 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -190,7 +190,7 @@ class VKIE(InfoExtractor):
 class VKUserVideosIE(InfoExtractor):
     IE_NAME = 'vk.com:user-videos'
     IE_DESC = 'All of a user\'s videos'
-    _VALID_URL = r'https?://(?:m\.)?vk\.com/videos([0-9]+)'
+    _VALID_URL = r'https?://(?:m\.)?vk\.com/videos([0-9]+)(?:m\?.*)?'
     _TEMPLATE_URL = 'https://vk.com/videos'
     _TEST = {
         'url': 'http://vk.com/videos205387401',

From 02a12f9fe69508525c9cad06782151f5cf950671 Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Tue, 18 Nov 2014 20:19:56 +0000
Subject: [PATCH 03/11] [vk] date_added is now extracted from the video page.

---
 youtube_dl/extractor/vk.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 3bcf50e28..deaad6c3e 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -11,7 +11,7 @@ from ..utils import (
     compat_urllib_parse,
     compat_str,
     unescapeHTML,
-)
+    unified_strdate)
 
 
 class VKIE(InfoExtractor):
@@ -169,6 +169,12 @@ class VKIE(InfoExtractor):
         data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
         data = json.loads(data_json)
 
+        # Extract upload date
+        upload_date = None
+        mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
+        if mobj is not None:
+            upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
+
         formats = [{
             'format_id': k,
             'url': v,
@@ -183,7 +189,8 @@ class VKIE(InfoExtractor):
             'title': unescapeHTML(data['md_title']),
             'thumbnail': data.get('jpg'),
             'uploader': data.get('md_author'),
-            'duration': data.get('duration')
+            'duration': data.get('duration'),
+            'upload_date': upload_date,
         }
 
 

From 42e1ff8665ceb6eddb6b2067f57239be3a8ab209 Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Fri, 21 Nov 2014 22:23:39 +0000
Subject: [PATCH 04/11] [vk.com] Added upload_date variable to the test cases
 that still work.

---
 youtube_dl/extractor/vk.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index deaad6c3e..de87bee1f 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -29,6 +29,7 @@ class VKIE(InfoExtractor):
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:Noize MC.*',
                 'duration': 195,
+                'upload_date': '20120212',
             },
         },
         {
@@ -52,6 +53,7 @@ class VKIE(InfoExtractor):
                 'uploader': 'Vladimir Gavrin',
                 'title': 'Lin Dan',
                 'duration': 101,
+                'upload_date': '20120730',
             }
         },
         {
@@ -88,6 +90,7 @@ class VKIE(InfoExtractor):
                 'uploader': 'Киномания - лучшее из мира кино',
                 'title': ' ',
                 'duration': 7291,
+                'upload_date': '20140328',
             },
             'skip': 'Requires vk account credentials',
         },
@@ -100,6 +103,7 @@ class VKIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Книга Илая',
                 'duration': 6771,
+                'upload_date': '20140626',
             },
             'skip': 'Only works from Russia',
         },

From c52331f30c15ff715431cf1ca5fceec505efe599 Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Fri, 21 Nov 2014 22:52:01 +0000
Subject: [PATCH 05/11] [vk.com] Updated a test video that has been removed,
 and added a comment for others to update two other test videos that are also
 now removed.

---
 youtube_dl/extractor/vk.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index de87bee1f..d6632cbb7 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -33,14 +33,15 @@ class VKIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://vk.com/video4643923_163339118',
-            'md5': 'f79bccb5cd182b1f43502ca5685b2b36',
+            'url': 'http://vk.com/video205387401_165548505',
+            'md5': '6c0aeb2e90396ba97035b9cbde548700',
             'info_dict': {
-                'id': '163339118',
+                'id': '165548505',
                 'ext': 'mp4',
-                'uploader': 'Elya Iskhakova',
-                'title': 'Dream Theater - Hollow Years Live at Budokan 720*',
-                'duration': 558,
+                'uploader': 'Tom Cruise',
+                'title': 'No name',
+                'duration': 9,
+                'upload_date': '20130721'
             }
         },
         {
@@ -57,6 +58,8 @@ class VKIE(InfoExtractor):
             }
         },
         {
+            # VIDEO NOW REMOVED
+            # please update if you find a video whose URL follows the same pattern
             'url': 'http://vk.com/video-8871596_164049491',
             'md5': 'a590bcaf3d543576c9bd162812387666',
             'note': 'Only available for registered users',
@@ -66,10 +69,13 @@ class VKIE(InfoExtractor):
                 'uploader': 'Триллеры',
                 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
                 'duration': 8352,
+                'upload_date': '20121218'
             },
             'skip': 'Requires vk account credentials',
         },
         {
+            # VIDEO NOW REMOVED
+            # please update if you find a video whose URL follows the same pattern
             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
             'md5': 'd82c22e449f036282d1d3f7f4d276869',
             'info_dict': {
@@ -78,6 +84,7 @@ class VKIE(InfoExtractor):
                 'uploader': 'Киномания - лучшее из мира кино',
                 'title': 'Запах женщины (1992)',
                 'duration': 9392,
+                'upload_date': '20130914'
             },
             'skip': 'Requires vk account credentials',
         },
@@ -177,6 +184,7 @@ class VKIE(InfoExtractor):
         upload_date = None
         mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
         if mobj is not None:
+            x = mobj.group(1) + ' ' + mobj.group(2)
             upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
 
         formats = [{

From cad985ab4d78ef00a77932e87cee5aba8a1e3320 Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Fri, 21 Nov 2014 22:54:11 +0000
Subject: [PATCH 06/11] [vk.com] Updated the description to include vk.com.

---
 youtube_dl/extractor/vk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index d6632cbb7..d96abd16e 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -208,7 +208,7 @@ class VKIE(InfoExtractor):
 
 class VKUserVideosIE(InfoExtractor):
     IE_NAME = 'vk.com:user-videos'
-    IE_DESC = 'All of a user\'s videos'
+    IE_DESC = 'vk.com:All of a user\'s videos'
     _VALID_URL = r'https?://(?:m\.)?vk\.com/videos([0-9]+)(?:m\?.*)?'
     _TEMPLATE_URL = 'https://vk.com/videos'
     _TEST = {

From 53d1cd1f779201af426548fc77e1724b6c70abd9 Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Fri, 21 Nov 2014 23:03:31 +0000
Subject: [PATCH 07/11] [vk.com] Updated the _VALID_URL regex for the playlist
 IE. Removed optional m, and named the id group.

---
 youtube_dl/extractor/vk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index d96abd16e..7136a0cb2 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -209,7 +209,7 @@ class VKIE(InfoExtractor):
 class VKUserVideosIE(InfoExtractor):
     IE_NAME = 'vk.com:user-videos'
     IE_DESC = 'vk.com:All of a user\'s videos'
-    _VALID_URL = r'https?://(?:m\.)?vk\.com/videos([0-9]+)(?:m\?.*)?'
+    _VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
     _TEMPLATE_URL = 'https://vk.com/videos'
     _TEST = {
         'url': 'http://vk.com/videos205387401',

From e1e8b6897b2e2610c45eb53fe44e1e07c3c39e82 Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Fri, 21 Nov 2014 23:16:12 +0000
Subject: [PATCH 08/11] [vk.com] Updated the extract_videos_from_page function
 with a much simpler 1-liner.

---
 youtube_dl/extractor/vk.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 7136a0cb2..e8d7cdeae 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -11,7 +11,8 @@ from ..utils import (
     compat_urllib_parse,
     compat_str,
     unescapeHTML,
-    unified_strdate)
+    unified_strdate,
+    orderedSet)
 
 
 class VKIE(InfoExtractor):
@@ -216,13 +217,6 @@ class VKUserVideosIE(InfoExtractor):
         'playlist_mincount': 4,
     }
 
-    def extract_videos_from_page(self, page):
-        ids_in_page = []
-        for mobj in re.finditer(r'href="/video([0-9_]+)"', page):
-            if mobj.group(1) not in ids_in_page:
-                ids_in_page.append(mobj.group(1))
-        return ids_in_page
-
     def _real_extract(self, url):
         # Extract page id
         mobj = re.match(self._VALID_URL, url)
@@ -232,7 +226,7 @@ class VKUserVideosIE(InfoExtractor):
         # Download page and get video ids
         page_id = mobj.group(1)
         page = self._download_webpage(url, page_id)
-        video_ids = self.extract_videos_from_page(page)
+        video_ids = orderedSet(m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
 
         self._downloader.to_screen('[vk] User videos %s: Found %i videos' % (page_id, len(video_ids)))
 

From 021a0db8f787521757919fde5a2303205312abff Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Fri, 21 Nov 2014 23:21:03 +0000
Subject: [PATCH 09/11] [vk.com] Simplified the page_id acquisition by using
 the id matched in the URL earlier on.

---
 youtube_dl/extractor/vk.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index e8d7cdeae..b4e261fad 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -218,13 +218,7 @@ class VKUserVideosIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        # Extract page id
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-
-        # Download page and get video ids
-        page_id = mobj.group(1)
+        page_id = self._match_id(url)
         page = self._download_webpage(url, page_id)
         video_ids = orderedSet(m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
 

From b9272e8f8f785fee4d77258bdb1cf1d85f947ecf Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Fri, 21 Nov 2014 23:22:52 +0000
Subject: [PATCH 10/11] [vk.com] Removed redundant log message -- this
 information is already being logged.

---
 youtube_dl/extractor/vk.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index b4e261fad..248b7d54d 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -221,9 +221,6 @@ class VKUserVideosIE(InfoExtractor):
         page_id = self._match_id(url)
         page = self._download_webpage(url, page_id)
         video_ids = orderedSet(m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
-
-        self._downloader.to_screen('[vk] User videos %s: Found %i videos' % (page_id, len(video_ids)))
-
         url_entries = [self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id)
                        for video_id in video_ids]
         return self.playlist_result(url_entries, page_id)
\ No newline at end of file

From 9262867e86cb415ed1994f9f956536eba39fc457 Mon Sep 17 00:00:00 2001
From: Will Sewell <me@willsewell.name>
Date: Fri, 21 Nov 2014 23:25:05 +0000
Subject: [PATCH 11/11] [vk.com] Added newline at the end of the file.

---
 youtube_dl/extractor/vk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 248b7d54d..daf615af9 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -223,4 +223,4 @@ class VKUserVideosIE(InfoExtractor):
         video_ids = orderedSet(m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
         url_entries = [self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id)
                        for video_id in video_ids]
-        return self.playlist_result(url_entries, page_id)
\ No newline at end of file
+        return self.playlist_result(url_entries, page_id)