| @@ -58,6 +58,8 @@ from youtube_dl.utils import ( | |||||||
|     xpath_text, |     xpath_text, | ||||||
|     render_table, |     render_table, | ||||||
|     match_str, |     match_str, | ||||||
|  |     parse_dfxp_time_expr, | ||||||
|  |     dfxp2srt, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -581,6 +583,42 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') | |||||||
|             'like_count > 100 & dislike_count <? 50 & description', |             'like_count > 100 & dislike_count <? 50 & description', | ||||||
|             {'like_count': 190, 'dislike_count': 10})) |             {'like_count': 190, 'dislike_count': 10})) | ||||||
|  |  | ||||||
|  |     def test_parse_dfxp_time_expr(self): | ||||||
|  |         self.assertEqual(parse_dfxp_time_expr(None), 0.0) | ||||||
|  |         self.assertEqual(parse_dfxp_time_expr(''), 0.0) | ||||||
|  |         self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) | ||||||
|  |         self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) | ||||||
|  |         self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) | ||||||
|  |         self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1) | ||||||
|  |  | ||||||
|  |     def test_dfxp2srt(self): | ||||||
|  |         dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  |             <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> | ||||||
|  |             <body> | ||||||
|  |                 <div xml:lang="en"> | ||||||
|  |                     <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> | ||||||
|  |                     <p begin="1" end="2">第二行<br/>♪♪</p> | ||||||
|  |                     <p begin="2" end="3"><span>Third<br/>Line</span></p> | ||||||
|  |                 </div> | ||||||
|  |             </body> | ||||||
|  |             </tt>''' | ||||||
|  |         srt_data = '''1 | ||||||
|  | 00:00:00,000 --> 00:00:01,000 | ||||||
|  | The following line contains Chinese characters and special symbols | ||||||
|  |  | ||||||
|  | 2 | ||||||
|  | 00:00:01,000 --> 00:00:02,000 | ||||||
|  | 第二行 | ||||||
|  | ♪♪ | ||||||
|  |  | ||||||
|  | 3 | ||||||
|  | 00:00:02,000 --> 00:00:03,000 | ||||||
|  | Third | ||||||
|  | Line | ||||||
|  |  | ||||||
|  | ''' | ||||||
|  |         self.assertEqual(dfxp2srt(dfxp_data), srt_data) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -20,6 +20,7 @@ from ..utils import ( | |||||||
|     prepend_extension, |     prepend_extension, | ||||||
|     shell_quote, |     shell_quote, | ||||||
|     subtitles_filename, |     subtitles_filename, | ||||||
|  |     dfxp2srt, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -651,6 +652,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): | |||||||
|                     'format' % new_ext) |                     'format' % new_ext) | ||||||
|                 continue |                 continue | ||||||
|             new_file = subtitles_filename(filename, lang, new_ext) |             new_file = subtitles_filename(filename, lang, new_ext) | ||||||
|  |  | ||||||
|  |             if ext == 'dfxp' or ext == 'ttml': | ||||||
|  |                 self._downloader.report_warning( | ||||||
|  |                     'You have requested to convert dfxp (TTML) subtitles into another format, ' | ||||||
|  |                     'which results in style information loss') | ||||||
|  |  | ||||||
|  |                 dfxp_file = subtitles_filename(filename, lang, ext) | ||||||
|  |                 srt_file = subtitles_filename(filename, lang, 'srt') | ||||||
|  |  | ||||||
|  |                 with io.open(dfxp_file, 'rt', encoding='utf-8') as f: | ||||||
|  |                     srt_data = dfxp2srt(f.read()) | ||||||
|  |  | ||||||
|  |                 with io.open(srt_file, 'wt', encoding='utf-8') as f: | ||||||
|  |                     f.write(srt_data) | ||||||
|  |  | ||||||
|  |                 ext = 'srt' | ||||||
|  |                 subs[lang] = { | ||||||
|  |                     'ext': 'srt', | ||||||
|  |                     'data': srt_data | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 if new_ext == 'srt': | ||||||
|  |                     continue | ||||||
|  |  | ||||||
|             self.run_ffmpeg( |             self.run_ffmpeg( | ||||||
|                 subtitles_filename(filename, lang, ext), |                 subtitles_filename(filename, lang, ext), | ||||||
|                 new_file, ['-f', new_format]) |                 new_file, ['-f', new_format]) | ||||||
|   | |||||||
| @@ -1800,6 +1800,59 @@ def match_filter_func(filter_str): | |||||||
|     return _match_func |     return _match_func | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def parse_dfxp_time_expr(time_expr): | ||||||
|  |     if not time_expr: | ||||||
|  |         return 0.0 | ||||||
|  |  | ||||||
|  |     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) | ||||||
|  |     if mobj: | ||||||
|  |         return float(mobj.group('time_offset')) | ||||||
|  |  | ||||||
|  |     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr) | ||||||
|  |     if mobj: | ||||||
|  |         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def format_srt_time(seconds): | ||||||
|  |     (mins, secs) = divmod(seconds, 60) | ||||||
|  |     (hours, mins) = divmod(mins, 60) | ||||||
|  |     millisecs = (secs - int(secs)) * 1000 | ||||||
|  |     secs = int(secs) | ||||||
|  |     return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def dfxp2srt(dfxp_data): | ||||||
|  |     _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'}) | ||||||
|  |  | ||||||
|  |     def parse_node(node): | ||||||
|  |         str_or_empty = functools.partial(str_or_none, default='') | ||||||
|  |  | ||||||
|  |         out = str_or_empty(node.text) | ||||||
|  |  | ||||||
|  |         for child in node: | ||||||
|  |             if child.tag == _x('ttml:br'): | ||||||
|  |                 out += '\n' + str_or_empty(child.tail) | ||||||
|  |             elif child.tag == _x('ttml:span'): | ||||||
|  |                 out += str_or_empty(parse_node(child)) | ||||||
|  |             else: | ||||||
|  |                 out += str_or_empty(xml.etree.ElementTree.tostring(child)) | ||||||
|  |  | ||||||
|  |         return out | ||||||
|  |  | ||||||
|  |     dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) | ||||||
|  |     out = [] | ||||||
|  |     paras = dfxp.findall(_x('.//ttml:p')) | ||||||
|  |  | ||||||
|  |     for para, index in zip(paras, itertools.count(1)): | ||||||
|  |         out.append('%d\n%s --> %s\n%s\n\n' % ( | ||||||
|  |             index, | ||||||
|  |             format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))), | ||||||
|  |             format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))), | ||||||
|  |             parse_node(para))) | ||||||
|  |  | ||||||
|  |     return ''.join(out) | ||||||
|  |  | ||||||
|  |  | ||||||
| class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): | class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): | ||||||
|     def __init__(self, proxies=None): |     def __init__(self, proxies=None): | ||||||
|         # Set default handlers |         # Set default handlers | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user