Skip to content

Commit

Permalink
load intermediate subtitle files with known encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
baxtree committed Jun 21, 2024
1 parent 65f7271 commit 21ceaa7
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 31 deletions.
14 changes: 8 additions & 6 deletions subaligner/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,10 @@ def predict_plain_text(self, video_file_path: str, subtitle_file_path: str, stre
task.output_sync_map_file()

# Load the above subtitle segment
adjusted_subs = Subtitle.load(
task.sync_map_file_path_absolute
).subs
adjusted_subs = Subtitle._get_srt_subs(
subrip_file_path=task.sync_map_file_path_absolute,
encoding="utf-8"
)

frame_rate = None
try:
Expand Down Expand Up @@ -686,9 +687,10 @@ def __adjust_durations(self, subs: List[SubRipItem], audio_file_path: str, stret
task.output_sync_map_file()

# Load the above subtitle segment
adjusted_subs = Subtitle.load(
task.sync_map_file_path_absolute
).subs
adjusted_subs = Subtitle._get_srt_subs(
subrip_file_path=task.sync_map_file_path_absolute,
encoding="utf-8"
)
for index, sub_new_loaded in enumerate(adjusted_subs):
sub_new_loaded.index = subs[index].index

Expand Down
50 changes: 25 additions & 25 deletions subaligner/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def __load_subrip(subrip_file_path: str) -> SubRipFile:
Returns:
SubRipFile: A list of SubRipItems.
"""
return Subtitle.__get_srt_subs(subrip_file_path)
return Subtitle._get_srt_subs(subrip_file_path)

@staticmethod
def __convert_ttml_to_subs(ttml_file_path: str) -> SubRipFile:
Expand All @@ -529,7 +529,7 @@ def __convert_ttml_to_subs(ttml_file_path: str) -> SubRipFile:
_, path = tempfile.mkstemp()
Utils.ttml2srt(ttml_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_vtt_to_subs(vtt_file_path: str) -> SubRipFile:
Expand All @@ -545,7 +545,7 @@ def __convert_vtt_to_subs(vtt_file_path: str) -> SubRipFile:
_, path = tempfile.mkstemp()
Utils.vtt2srt(vtt_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_ssa_to_subs(ssa_file_path: str) -> SubRipFile:
Expand All @@ -562,7 +562,7 @@ def __convert_ssa_to_subs(ssa_file_path: str) -> SubRipFile:
path = "%s.srt" % path
Utils.ssa2srt(ssa_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_ass_to_subs(ass_file_path: str) -> SubRipFile:
Expand All @@ -579,7 +579,7 @@ def __convert_ass_to_subs(ass_file_path: str) -> SubRipFile:
path = "%s.srt" % path
Utils.ass2srt(ass_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_microdvd_to_subs(microdvd_file_path: str) -> SubRipFile:
Expand All @@ -596,7 +596,7 @@ def __convert_microdvd_to_subs(microdvd_file_path: str) -> SubRipFile:
path = "%s.srt" % path
Utils.microdvd2srt(microdvd_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_mpl2_to_subs(mpl2_file_path: str) -> SubRipFile:
Expand All @@ -613,7 +613,7 @@ def __convert_mpl2_to_subs(mpl2_file_path: str) -> SubRipFile:
path = "%s.srt" % path
Utils.mpl22srt(mpl2_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_tmp_to_subs(tmp_file_path: str) -> SubRipFile:
Expand All @@ -630,7 +630,7 @@ def __convert_tmp_to_subs(tmp_file_path: str) -> SubRipFile:
path = "%s.srt" % path
Utils.tmp2srt(tmp_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_sami_to_subs(sami_file_path: str) -> SubRipFile:
Expand All @@ -646,7 +646,7 @@ def __convert_sami_to_subs(sami_file_path: str) -> SubRipFile:
_, path = tempfile.mkstemp()
Utils.sami2srt(sami_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_stl_to_subs(stl_file_path: str) -> SubRipFile:
Expand All @@ -662,7 +662,7 @@ def __convert_stl_to_subs(stl_file_path: str) -> SubRipFile:
_, path = tempfile.mkstemp()
Utils.stl2srt(stl_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_scc_to_subs(scc_file_path: str) -> SubRipFile:
Expand All @@ -678,7 +678,7 @@ def __convert_scc_to_subs(scc_file_path: str) -> SubRipFile:
_, path = tempfile.mkstemp()
Utils.scc2srt(scc_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_sbv_to_subs(sbv_file_path: str) -> SubRipFile:
Expand All @@ -694,7 +694,7 @@ def __convert_sbv_to_subs(sbv_file_path: str) -> SubRipFile:
_, path = tempfile.mkstemp()
Utils.sbv2srt(sbv_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __convert_ytt_to_subs(ytt_file_path: str) -> SubRipFile:
Expand All @@ -710,7 +710,7 @@ def __convert_ytt_to_subs(ytt_file_path: str) -> SubRipFile:
_, path = tempfile.mkstemp()
Utils.ytt2srt(ytt_file_path, path)

return Subtitle.__get_srt_subs(path, housekeep=True)
return Subtitle._get_srt_subs(path, housekeep=True)

@staticmethod
def __export_with_format(subs: List[SubRipItem], source_file_path: str, target_file_path: Optional[str], file_extension: str, suffix: str) -> None:
Expand All @@ -720,18 +720,6 @@ def __export_with_format(subs: List[SubRipItem], source_file_path: str, target_f
)
Subtitle.export_subtitle(source_file_path, subs, target_file_path)

@staticmethod
def __get_srt_subs(subrip_file_path: str, housekeep: bool = False) -> SubRipFile:
encoding = Utils.detect_encoding(subrip_file_path)
try:
subs = pysrt.open(subrip_file_path, encoding=encoding)
except Exception as e:
raise UnsupportedFormatException("Error occurred when loading subtitle from %s" % subrip_file_path) from e
finally:
if housekeep:
os.remove(subrip_file_path)
return subs

@staticmethod
def __save_subtitle_by_extension(file_extension: str,
subs: List[SubRipItem],
Expand Down Expand Up @@ -849,3 +837,15 @@ def __save_subtitle_by_extension(file_extension: str,
raise UnsupportedFormatException(
"Unknown subtitle format for file: {}".format(source_file_path)
)

@staticmethod
def _get_srt_subs(subrip_file_path: str, housekeep: bool = False, encoding: Optional[str] = None) -> SubRipFile:
encoding = encoding or Utils.detect_encoding(subrip_file_path)
try:
subs = pysrt.open(subrip_file_path, encoding=encoding)
except Exception as e:
raise UnsupportedFormatException("Error occurred when loading subtitle from %s" % subrip_file_path) from e
finally:
if housekeep:
os.remove(subrip_file_path)
return subs

0 comments on commit 21ceaa7

Please sign in to comment.