[imdb:list] Switch to loading the webpage
The RSS method seems to be defunct.
This commit is contained in:
		| @@ -68,22 +68,15 @@ class ImdbListIE(InfoExtractor): | |||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         list_id = mobj.group('id') |         list_id = mobj.group('id') | ||||||
|  |  | ||||||
|         # RSS XML is sometimes malformed |         webpage = self._download_webpage(url, list_id) | ||||||
|         rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, 'Downloading list RSS') |         list_code = self._search_regex( | ||||||
|         list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, 'list title') |             r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"', | ||||||
|  |             webpage, 'list code') | ||||||
|  |         entries = [ | ||||||
|  |             self.url_result('http://www.imdb.com' + m, 'Imdb') | ||||||
|  |             for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)] | ||||||
|  |  | ||||||
|         # Export is independent of actual author_id, but returns 404 if no author_id is provided. |         list_title = self._html_search_regex( | ||||||
|         # However, passing dummy author_id seems to be enough. |             r'<h1 class="header">(.*?)</h1>', webpage, 'list title') | ||||||
|         csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id, |  | ||||||
|                                      list_id, 'Downloading list CSV') |  | ||||||
|          |  | ||||||
|         entries = [] |  | ||||||
|         for item in csv.split('\n')[1:]: |  | ||||||
|             cols = item.split(',') |  | ||||||
|             if len(cols) < 2: |  | ||||||
|                 continue |  | ||||||
|             item_id = cols[1][1:-1] |  | ||||||
|             if item_id.startswith('vi'): |  | ||||||
|                 entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb')) |  | ||||||
|  |  | ||||||
|         return self.playlist_result(entries, list_id, list_title) |         return self.playlist_result(entries, list_id, list_title) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user