Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# srtools 

3# Copyright (C) 2019-2021 Andrej Radović <r.andrej@gmail.com> 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# 

10# This program is distributed in the hope that it will be useful, 

11# but WITHOUT ANY WARRANTY; without even the implied warranty of 

12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

13# GNU General Public License for more details. 

14# 

15# You should have received a copy of the GNU General Public License 

16# along with this program. If not, see <http://www.gnu.org/licenses/>. 

17import re 

18 

19from .character_dictionaries import CYR_TO_LAT_TTABLE 

20from .character_dictionaries import LAT_TO_CYR_DIGRAPHS_DICT 

21from .character_dictionaries import LAT_TO_CYR_TTABLE 

22 

23_LAT_TO_CYR_DIGRAPH_RX = ( 

24 "(" + "|".join(map(re.escape, LAT_TO_CYR_DIGRAPHS_DICT.keys())) + ")" 

25) 

26_LAT_TO_CYR_DIGRAPH_COMP_RX = re.compile( 

27 _LAT_TO_CYR_DIGRAPH_RX, re.UNICODE | re.MULTILINE 

28) 

29 

30 

31def _cyr_sub_string_from_lat_match(match: re.Match) -> str: 

32 cyr_digraph = match.group() 

33 lat_digraph = LAT_TO_CYR_DIGRAPHS_DICT[cyr_digraph] 

34 return lat_digraph 

35 

36 

37def latin_to_cyrillic(text: str) -> str: 

38 """Transliterate Serbian Latin string to Cyrillic. 

39 

40 You may use a special separator ``!`` to split digraphs `lj`, `nj`, `dž` to 

41 prevent their conversion to single Cyrillic letters like so: `l!j`. 

42 

43 Args: 

44 text: input Latin string to be transliterated. 

45 

46 Returns: 

47 str: Input string transliterated to Cyrillic. 

48 

49 Examples: 

50 >>> from srtools import latin_to_cyrillic 

51 >>> in_str = "Đače, uštedu plaćaj žaljenjem zbog džinovskih cifara." 

52 >>> latin_to_cyrillic(in_str) 

53 'Ђаче, уштеду плаћај жаљењем због џиновских цифара.' 

54 >>> latin_to_cyrillic('N!J je skraćenica za Nju Džersi') 

55 'НЈ је скраћеница за Њу Џерси' 

56 """ 

57 text_digraphs_substituted = _LAT_TO_CYR_DIGRAPH_COMP_RX.sub( 

58 _cyr_sub_string_from_lat_match, text 

59 ) 

60 text_digraphs_and_letters_substituted = ( 

61 text_digraphs_substituted.translate(LAT_TO_CYR_TTABLE) 

62 ) 

63 return text_digraphs_and_letters_substituted 

64 

65 

66def cyrillic_to_latin(text: str) -> str: 

67 """Transliterate Serbian Cyrillic string to Latin. 

68 

69 Args: 

70 text: input Cyrillic string to be transliterated. 

71 

72 Returns: 

73 str: Input string transliterated to Latin. 

74 

75 Examples: 

76 >>> from srtools import cyrillic_to_latin 

77 >>> in_str = "Ђаче, уштеду плаћај жаљењем због џиновских цифара." 

78 >>> cyrillic_to_latin(in_str) 

79 'Đače, uštedu plaćaj žaljenjem zbog džinovskih cifara.' 

80 """ 

81 return text.translate(CYR_TO_LAT_TTABLE)