Coverage for srtools/transliteration.py : 0%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2# srtools
3# Copyright (C) 2019-2021 Andrej Radović <r.andrej@gmail.com>
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17import re
19from .character_dictionaries import CYR_TO_LAT_TTABLE
20from .character_dictionaries import LAT_TO_CYR_DIGRAPHS_DICT
21from .character_dictionaries import LAT_TO_CYR_TTABLE
23_LAT_TO_CYR_DIGRAPH_RX = (
24 "(" + "|".join(map(re.escape, LAT_TO_CYR_DIGRAPHS_DICT.keys())) + ")"
25)
26_LAT_TO_CYR_DIGRAPH_COMP_RX = re.compile(
27 _LAT_TO_CYR_DIGRAPH_RX, re.UNICODE | re.MULTILINE
28)
31def _cyr_sub_string_from_lat_match(match: re.Match) -> str:
32 cyr_digraph = match.group()
33 lat_digraph = LAT_TO_CYR_DIGRAPHS_DICT[cyr_digraph]
34 return lat_digraph
37def latin_to_cyrillic(text: str) -> str:
38 """Transliterate Serbian Latin string to Cyrillic.
40 You may use a special separator ``!`` to split digraphs `lj`, `nj`, `dž` to
41 prevent their conversion to single Cyrillic letters like so: `l!j`.
43 Args:
44 text: input Latin string to be transliterated.
46 Returns:
47 str: Input string transliterated to Cyrillic.
49 Examples:
50 >>> from srtools import latin_to_cyrillic
51 >>> in_str = "Đače, uštedu plaćaj žaljenjem zbog džinovskih cifara."
52 >>> latin_to_cyrillic(in_str)
53 'Ђаче, уштеду плаћај жаљењем због џиновских цифара.'
54 >>> latin_to_cyrillic('N!J je skraćenica za Nju Džersi')
55 'НЈ је скраћеница за Њу Џерси'
56 """
57 text_digraphs_substituted = _LAT_TO_CYR_DIGRAPH_COMP_RX.sub(
58 _cyr_sub_string_from_lat_match, text
59 )
60 text_digraphs_and_letters_substituted = (
61 text_digraphs_substituted.translate(LAT_TO_CYR_TTABLE)
62 )
63 return text_digraphs_and_letters_substituted
66def cyrillic_to_latin(text: str) -> str:
67 """Transliterate Serbian Cyrillic string to Latin.
69 Args:
70 text: input Cyrillic string to be transliterated.
72 Returns:
73 str: Input string transliterated to Latin.
75 Examples:
76 >>> from srtools import cyrillic_to_latin
77 >>> in_str = "Ђаче, уштеду плаћај жаљењем због џиновских цифара."
78 >>> cyrillic_to_latin(in_str)
79 'Đače, uštedu plaćaj žaljenjem zbog džinovskih cifara.'
80 """
81 return text.translate(CYR_TO_LAT_TTABLE)