Use relative paths for DASH fragments (closes #12990)
10x reduced JSON size refs #13810
This commit is contained in:
		| @@ -2,6 +2,7 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| from .fragment import FragmentFD | from .fragment import FragmentFD | ||||||
| from ..compat import compat_urllib_error | from ..compat import compat_urllib_error | ||||||
|  | from ..utils import urljoin | ||||||
|  |  | ||||||
|  |  | ||||||
| class DashSegmentsFD(FragmentFD): | class DashSegmentsFD(FragmentFD): | ||||||
| @@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD): | |||||||
|     FD_NAME = 'dashsegments' |     FD_NAME = 'dashsegments' | ||||||
|  |  | ||||||
|     def real_download(self, filename, info_dict): |     def real_download(self, filename, info_dict): | ||||||
|         segments = info_dict['fragments'][:1] if self.params.get( |         fragment_base_url = info_dict.get('fragment_base_url') | ||||||
|  |         fragments = info_dict['fragments'][:1] if self.params.get( | ||||||
|             'test', False) else info_dict['fragments'] |             'test', False) else info_dict['fragments'] | ||||||
|  |  | ||||||
|         ctx = { |         ctx = { | ||||||
|             'filename': filename, |             'filename': filename, | ||||||
|             'total_frags': len(segments), |             'total_frags': len(fragments), | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         self._prepare_and_start_frag_download(ctx) |         self._prepare_and_start_frag_download(ctx) | ||||||
| @@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD): | |||||||
|         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) |         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | ||||||
|  |  | ||||||
|         frag_index = 0 |         frag_index = 0 | ||||||
|         for i, segment in enumerate(segments): |         for i, fragment in enumerate(fragments): | ||||||
|             frag_index += 1 |             frag_index += 1 | ||||||
|             if frag_index <= ctx['fragment_index']: |             if frag_index <= ctx['fragment_index']: | ||||||
|                 continue |                 continue | ||||||
| @@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD): | |||||||
|             count = 0 |             count = 0 | ||||||
|             while count <= fragment_retries: |             while count <= fragment_retries: | ||||||
|                 try: |                 try: | ||||||
|                     success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) |                     fragment_url = fragment.get('url') | ||||||
|  |                     if not fragment_url: | ||||||
|  |                         assert fragment_base_url | ||||||
|  |                         fragment_url = urljoin(fragment_base_url, fragment['path']) | ||||||
|  |                     success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) | ||||||
|                     if not success: |                     if not success: | ||||||
|                         return False |                         return False | ||||||
|                     self._append_fragment(ctx, frag_content) |                     self._append_fragment(ctx, frag_content) | ||||||
|   | |||||||
| @@ -1892,9 +1892,13 @@ class InfoExtractor(object): | |||||||
|                                 'Bandwidth': bandwidth, |                                 'Bandwidth': bandwidth, | ||||||
|                             } |                             } | ||||||
|  |  | ||||||
|  |                         def location_key(location): | ||||||
|  |                             return 'url' if re.match(r'^https?://', location) else 'path' | ||||||
|  |  | ||||||
|                         if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: |                         if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: | ||||||
|  |  | ||||||
|                             media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) |                             media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) | ||||||
|  |                             media_location_key = location_key(media_template) | ||||||
|  |  | ||||||
|                             # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ |                             # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ | ||||||
|                             # can't be used at the same time |                             # can't be used at the same time | ||||||
| @@ -1904,7 +1908,7 @@ class InfoExtractor(object): | |||||||
|                                     segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) |                                     segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) | ||||||
|                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) |                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) | ||||||
|                                 representation_ms_info['fragments'] = [{ |                                 representation_ms_info['fragments'] = [{ | ||||||
|                                     'url': media_template % { |                                     media_location_key: media_template % { | ||||||
|                                         'Number': segment_number, |                                         'Number': segment_number, | ||||||
|                                         'Bandwidth': bandwidth, |                                         'Bandwidth': bandwidth, | ||||||
|                                     }, |                                     }, | ||||||
| @@ -1928,7 +1932,7 @@ class InfoExtractor(object): | |||||||
|                                         'Number': segment_number, |                                         'Number': segment_number, | ||||||
|                                     } |                                     } | ||||||
|                                     representation_ms_info['fragments'].append({ |                                     representation_ms_info['fragments'].append({ | ||||||
|                                         'url': segment_url, |                                         media_location_key: segment_url, | ||||||
|                                         'duration': float_or_none(segment_d, representation_ms_info['timescale']), |                                         'duration': float_or_none(segment_d, representation_ms_info['timescale']), | ||||||
|                                     }) |                                     }) | ||||||
|  |  | ||||||
| @@ -1952,8 +1956,9 @@ class InfoExtractor(object): | |||||||
|                             for s in representation_ms_info['s']: |                             for s in representation_ms_info['s']: | ||||||
|                                 duration = float_or_none(s['d'], timescale) |                                 duration = float_or_none(s['d'], timescale) | ||||||
|                                 for r in range(s.get('r', 0) + 1): |                                 for r in range(s.get('r', 0) + 1): | ||||||
|  |                                     segment_uri = representation_ms_info['segment_urls'][segment_index] | ||||||
|                                     fragments.append({ |                                     fragments.append({ | ||||||
|                                         'url': representation_ms_info['segment_urls'][segment_index], |                                         location_key(segment_uri): segment_uri, | ||||||
|                                         'duration': duration, |                                         'duration': duration, | ||||||
|                                     }) |                                     }) | ||||||
|                                     segment_index += 1 |                                     segment_index += 1 | ||||||
| @@ -1962,6 +1967,7 @@ class InfoExtractor(object): | |||||||
|                         # No fragments key is present in this case. |                         # No fragments key is present in this case. | ||||||
|                         if 'fragments' in representation_ms_info: |                         if 'fragments' in representation_ms_info: | ||||||
|                             f.update({ |                             f.update({ | ||||||
|  |                                 'fragment_base_url': base_url, | ||||||
|                                 'fragments': [], |                                 'fragments': [], | ||||||
|                                 'protocol': 'http_dash_segments', |                                 'protocol': 'http_dash_segments', | ||||||
|                             }) |                             }) | ||||||
| @@ -1969,10 +1975,8 @@ class InfoExtractor(object): | |||||||
|                                 initialization_url = representation_ms_info['initialization_url'] |                                 initialization_url = representation_ms_info['initialization_url'] | ||||||
|                                 if not f.get('url'): |                                 if not f.get('url'): | ||||||
|                                     f['url'] = initialization_url |                                     f['url'] = initialization_url | ||||||
|                                 f['fragments'].append({'url': initialization_url}) |                                 f['fragments'].append({location_key(initialization_url): initialization_url}) | ||||||
|                             f['fragments'].extend(representation_ms_info['fragments']) |                             f['fragments'].extend(representation_ms_info['fragments']) | ||||||
|                             for fragment in f['fragments']: |  | ||||||
|                                 fragment['url'] = urljoin(base_url, fragment['url']) |  | ||||||
|                         try: |                         try: | ||||||
|                             existing_format = next( |                             existing_format = next( | ||||||
|                                 fo for fo in formats |                                 fo for fo in formats | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user