[extractor/common] Support multiple properties in _og_search_property
This commit is contained in:
		| @@ -48,6 +48,9 @@ class TestInfoExtractor(unittest.TestCase): | |||||||
|         self.assertEqual(ie._og_search_property('foobar', html), 'Foo') |         self.assertEqual(ie._og_search_property('foobar', html), 'Foo') | ||||||
|         self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') |         self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') | ||||||
|         self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') |         self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') | ||||||
|  |         self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') | ||||||
|  |         self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) | ||||||
|  |         self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) | ||||||
|  |  | ||||||
|     def test_html_search_meta(self): |     def test_html_search_meta(self): | ||||||
|         ie = self.ie |         ie = self.ie | ||||||
|   | |||||||
| @@ -727,9 +727,14 @@ class InfoExtractor(object): | |||||||
|                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) |                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) | ||||||
|  |  | ||||||
|     def _og_search_property(self, prop, html, name=None, **kargs): |     def _og_search_property(self, prop, html, name=None, **kargs): | ||||||
|  |         if not isinstance(prop, (list, tuple)): | ||||||
|  |             prop = [prop] | ||||||
|         if name is None: |         if name is None: | ||||||
|             name = 'OpenGraph %s' % prop |             name = 'OpenGraph %s' % prop[0] | ||||||
|         escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs) |         og_regexes = [] | ||||||
|  |         for p in prop: | ||||||
|  |             og_regexes.extend(self._og_regexes(p)) | ||||||
|  |         escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs) | ||||||
|         if escaped is None: |         if escaped is None: | ||||||
|             return None |             return None | ||||||
|         return unescapeHTML(escaped) |         return unescapeHTML(escaped) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user