Source code for transliterate.tests.test_transliterate

# -*- coding: utf-8 -*-
from __future__ import absolute_import

import logging
import unittest

from .. import (
    defaults,
    detect_language,
    get_available_language_codes,
    get_available_language_packs,
    get_translit_function,
    slugify,
    translit,
)
from ..base import TranslitLanguagePack, registry
from ..conf import (
    get_setting,
    reset_to_defaults_settings,
    set_setting,
)
from ..contrib.apps.translipsum import TranslipsumGenerator
from ..decorators import (
    transliterate_function,
    transliterate_method,
)
from ..discover import autodiscover

from . import data
from .helpers import log_info


__title__ = 'transliterate.tests.test_transliterate'
__author__ = 'Artur Barseghyan'
__copyright__ = '2013-2018 Artur Barseghyan'
__license__ = 'GPL-2.0-only OR LGPL-2.1-or-later'
__all__ = ('TransliterateTest',)


LOGGER = logging.getLogger(__name__)


[docs]class TransliterateTest(unittest.TestCase): """Test ``transliterate.utils.translit``."""
[docs] def setUp(self): """Set up.""" self.latin_text = data.latin_text self.armenian_text = data.armenian_text self.cyrillic_text = data.cyrillic_text self.ukrainian_cyrillic_text = data.ukrainian_cyrillic_text self.bulgarian_cyrillic_text = data.bulgarian_cyrillic_text self.georgian_text = data.georgian_text self.greek_text = data.greek_text self.hebrew_text = data.hebrew_text self.mongolian_cyrillic_text = data.mongolian_cyrillic_text self.serbian_cyrillic_text = data.serbian_cyrillic_text self.pangram_serbian_cyrillic_text = data.pangram_serbian_cyrillic_text self.pangram_serbian_latin_text = data.pangram_serbian_latin_text
# reset_to_defaults_settings() @log_info def test_01_get_available_language_codes(self): """Test ``autodiscover`` and ``get_available_language_codes``.""" res = get_available_language_codes() res.sort() c = [ 'bg', 'el', 'hy', 'ka', 'l1', 'mk', 'mn', 'ru', 'sr', 'uk', ] c.sort() self.assertEqual(res, c) return res @log_info def test_02_translit_latin_to_armenian(self): """Test transliteration from Latin to Armenian.""" res = translit(self.latin_text, 'hy') self.assertEqual(res, self.armenian_text) return res @log_info def test_03_translit_latin_to_georgian(self): """Test transliteration from Latin to Georgian.""" res = translit(self.latin_text, 'ka') self.assertEqual(res, self.georgian_text) return res @log_info def test_04_translit_latin_to_greek(self): """Test transliteration from Latin to Greek.""" res = translit(self.latin_text, 'el') self.assertEqual(res, self.greek_text) return res @log_info def __test_05_translit_latin_to_hebrew(self): """Test transliteration from Latin to Hebrew.""" res = translit(self.latin_text, 'he') self.assertEqual(res, self.hebrew_text) return res @log_info def test_06_translit_latin_to_cyrillic(self): """Test transliteration from Latin to Cyrillic.""" res = translit(self.latin_text, 'ru') self.assertEqual(res, self.cyrillic_text) return res @log_info def test_06_translit_latin_to_ukrainian_cyrillic(self): """Test transliteration from Latin to Ukrainian Cyrillic.""" res = translit(self.latin_text, 'uk') self.assertEqual(res, self.ukrainian_cyrillic_text) return res @log_info def test_06_translit_latin_to_bulgarian_cyrillic(self): """Test transliteration from Latin to Bulgarian Cyrillic.""" res = translit(self.latin_text, 'bg') self.assertEqual(res, self.bulgarian_cyrillic_text) return res @log_info def test_06_translit_latin_to_mongolian_cyrillic(self): """Test transliteration from Latin to Mongolian Cyrillic.""" res = translit(self.latin_text, 'mn') self.assertEqual(res, self.mongolian_cyrillic_text) return res @log_info def test_06_translit_latin_to_serbian_cyrillic(self): """Test transliteration from Latin to Serbian Cyrillic.""" res = translit(self.latin_text, 'sr') self.assertEqual(res, self.serbian_cyrillic_text) return res @log_info def test_07_translit_armenian_to_latin(self): """Test transliteration from Armenian to Latin.""" res = translit(self.armenian_text, 'hy', reversed=True) self.assertEqual(res, self.latin_text) return res @log_info def test_08_translit_georgian_to_latin(self): """Test transliteration from Georgian to Latin.""" res = translit(self.georgian_text, 'ka', reversed=True) self.assertEqual(res.capitalize(), self.latin_text) return res @log_info def test_09_translit_greek_to_latin(self): """Test transliteration from Greek to Latin.""" res = translit(self.greek_text, 'el', reversed=True) self.assertEqual(res, self.latin_text) return res @log_info def __test_10_translit_hebrew_to_latin(self): """Test transliteration from Hebrew to Latin.""" res = translit(self.hebrew_text, 'he', reversed=True) self.assertEqual(res, self.latin_text) return res @log_info def test_11_translit_cyrillic_to_latin(self): """Test transliteration from Cyrillic to Latin.""" res = translit(self.cyrillic_text, 'ru', reversed=True) self.assertEqual(res, self.latin_text) return res @log_info def test_11_translit_ukrainian_cyrillic_to_latin(self): """Test transliteration from Ukrainian Cyrillic to Latin.""" res = translit(self.ukrainian_cyrillic_text, 'uk', reversed=True) self.assertEqual(res, self.latin_text) return res @log_info def test_11_translit_bulgarian_cyrillic_to_latin(self): """Test transliteration from Bulgarian Cyrillic to Latin.""" res = translit(self.bulgarian_cyrillic_text, 'bg', reversed=True) self.assertEqual(res, self.latin_text) return res @log_info def test_11_translit_mongolian_cyrillic_to_latin(self): """Test transliteration from Mongolian Cyrillic to Latin.""" res = translit(self.mongolian_cyrillic_text, 'mn', reversed=True) self.assertEqual(res, self.latin_text) return res @log_info def test_11_translit_serbian_cyrillic_to_latin(self): """Test transliteration from Serbian Cyrillic to Latin.""" res = translit(self.serbian_cyrillic_text, 'sr', reversed=True) self.assertEqual(res, self.latin_text) return res @log_info def test_12_function_decorator(self): """Test the function decorator from Latin to Armenian.""" @transliterate_function(language_code='hy') def decorator_test_armenian(text): return text res = decorator_test_armenian(self.latin_text) self.assertEqual(res, self.armenian_text) @log_info def test_13_method_decorator(self): """Test the method decorator from Latin to Cyrillic.""" class DecoratorTest(object): @transliterate_method(language_code='ru') def decorator_test_russian(self, text): return text res = DecoratorTest().decorator_test_russian(self.latin_text) self.assertEqual(res, self.cyrillic_text) return res @log_info def test_14_function_decorator(self): """Test the function decorator (reversed) from Armenian to Latin.""" @transliterate_function(language_code='hy', reversed=True) def decorator_test_armenian_reversed(text): return text res = decorator_test_armenian_reversed(self.armenian_text) self.assertEqual(res, self.latin_text) return res @log_info def test_15_register_custom_language_pack(self): """Test registering of a custom language pack.""" class ExampleLanguagePack(TranslitLanguagePack): """Example language pack.""" language_code = "example" language_name = "Example" mapping = data.test_15_register_custom_language_pack_mapping registry.register(ExampleLanguagePack) assert 'example' in get_available_language_codes() res = translit(self.latin_text, 'example') self.assertEqual(res, 'Lor5m 9psum 4olor s9t 1m5t') return res @log_info def test_16_translipsum_generator_armenian(self): """Test the translipsum generator. Generating lorem ipsum paragraphs in Armenian. """ g_am = TranslipsumGenerator(language_code='hy') res = g_am.generate_paragraph() assert res return res @log_info def test_17_translipsum_generator_georgian(self): """Test the translipsum generator. Generating lorem ipsum sentence in Georgian. """ g_ge = TranslipsumGenerator(language_code='ka') res = g_ge.generate_sentence() assert res return res @log_info def test_18_translipsum_generator_greek(self): """Test the translipsum generator Generating lorem ipsum sentence in Greek. """ g_el = TranslipsumGenerator(language_code='el') res = g_el.generate_sentence() assert res return res @log_info def __test_19_translipsum_generator_hebrew(self): """Test the translipsum generator. Generating lorem ipsum sentence in Hebrew. """ g_he = TranslipsumGenerator(language_code='he') res = g_he.generate_sentence() assert res return res @log_info def test_20_translipsum_generator_cyrillic(self): """Test the translipsum generator. Generating lorem ipsum sentence in Cyrillic. """ g_ru = TranslipsumGenerator(language_code='ru') res = g_ru.generate_sentence() assert res return res @log_info def test_20_translipsum_generator_ukrainian_cyrillic(self): """Test the translipsum generator. Generating lorem ipsum sentence in Ukrainian Cyrillic. """ g_uk = TranslipsumGenerator(language_code='uk') res = g_uk.generate_sentence() assert res return res @log_info def test_20_translipsum_generator_bulgarian_cyrillic(self): """Test the translipsum generator. Generating lorem ipsum sentence in Bulgarian Cyrillic. """ g_bg = TranslipsumGenerator(language_code='bg') res = g_bg.generate_sentence() assert res return res @log_info def test_20_translipsum_generator_mongolian_cyrillic(self): """Test the translipsum generator. Generating lorem ipsum sentence in Mongolian Cyrillic. """ g_bg = TranslipsumGenerator(language_code='mn') res = g_bg.generate_sentence() assert res return res @log_info def test_20_translipsum_generator_serbian_cyrillic(self): """Test the translipsum generator. Generating lorem ipsum sentence in Serbian Cyrillic. """ g_bg = TranslipsumGenerator(language_code='sr') res = g_bg.generate_sentence() assert res return res @log_info def test_21_language_detection_armenian(self): """Test language detection. Detecting Armenian.""" res = detect_language(self.armenian_text) self.assertEqual(res, 'hy') return res @log_info def test_22_language_detection_georgian(self): """Test language detection. Detecting Georgian. """ res = detect_language(self.georgian_text) self.assertEqual(res, 'ka') return res @log_info def test_23_language_detection_greek(self): """Test language detection. Detecting Greek. """ res = detect_language(self.greek_text) self.assertEqual(res, 'el') return res @log_info def __test_24_language_detection_hebrew(self): """Test language detection. Detecting Hebrew. """ res = detect_language(self.hebrew_text) self.assertEqual(res, 'he') return res @log_info def test_25_language_detection_cyrillic(self): """Test language detection. Detecting Russian (Cyrillic). """ res = detect_language(self.cyrillic_text) self.assertEqual(res, 'ru') return res @log_info def test_25_false_language_detection_cyrillic(self): """Test language detection. Detecting is not Russian (Cyrillic). """ res = detect_language(self.latin_text) self.assertNotEqual(res, 'ru') return res @log_info def __test_25_language_detection_ukrainian_cyrillic(self): """Testing language detection. Detecting Ukrainian (Cyrillic).""" res = detect_language(self.ukrainian_cyrillic_text) self.assertEqual(res, 'uk') return res @log_info def __test_25_language_detection_bulgarian_cyrillic(self): """Test language detection. Detecting Bulgarian (Cyrillic).""" res = detect_language(self.bulgarian_cyrillic_text) self.assertEqual(res, 'bg') return res @log_info def __test_25_language_detection_mongolian_cyrillic(self): """Test language detection. Detecting Mongolian (Cyrillic). """ res = detect_language(self.mongolian_cyrillic_text) self.assertEqual(res, 'mn') return res @log_info def __test_25_language_detection_serbian_cyrillic(self): """Test language detection. Detecting Serbian (Cyrillic). """ res = detect_language(self.serbian_cyrillic_text) self.assertEqual(res, 'sr') return res @log_info def test_26_slugify_armenian(self): """Test slugify from Armenian.""" res = slugify(self.armenian_text) self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def test_27_slugify_georgian(self): """Test slugify from Georgian.""" res = slugify(self.georgian_text) self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def test_28_slugify_greek(self): """Test slugify from Greek.""" res = slugify(self.greek_text) self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def __test_29_slugify_hebrew(self): """Test slugify from Hebrew.""" res = slugify(self.hebrew_text) self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def test_30_slugify_cyrillic(self): """Test slugify from Cyrillic.""" res = slugify(self.cyrillic_text) self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def test_30_slugify_ukrainian_cyrillic(self): """Test slugify from Ukrainian Cyrillic.""" res = slugify(self.ukrainian_cyrillic_text, language_code='uk') self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def test_30_slugify_bulgarian_cyrillic(self): """Test slugify from Bulgarian Cyrillic.""" res = slugify(self.bulgarian_cyrillic_text, language_code='bg') self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def test_30_slugify_mongolian_cyrillic(self): """Test slugify from Mongolian Cyrillic.""" res = slugify(self.mongolian_cyrillic_text, language_code='mn') self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def test_30_slugify_serbian_cyrillic(self): """Test slugify from Serbian Cyrillic.""" res = slugify(self.serbian_cyrillic_text, language_code='sr') self.assertEqual(res, 'lorem-ipsum-dolor-sit-amet') return res @log_info def test_31b_get_translit_function(self): """Test transliteration using get_translit_function.""" translit_function = get_translit_function('hy') # Test Latin to Armenian res = translit_function(self.latin_text) self.assertEqual(res, self.armenian_text) # Test Armenian to Latin res = translit_function(self.armenian_text, reversed=True) self.assertEqual(res, self.latin_text) @log_info def test_31_override_settings(self): """Testing settings override.""" def override_settings(): return get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS') self.assertEqual(defaults.LANGUAGE_DETECTION_MAX_NUM_KEYWORDS, override_settings()) set_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS', 10) self.assertEqual(10, override_settings()) return override_settings() @log_info def test_32_auto_translit_reversed(self): """Test automatic reversed translit. Test automatic reversed translit (from target script to source script) for Armenian, Georgian, Greek, Russian (Cyrillic) and Serbian (Cyrillic). """ res = [] texts = [ self.armenian_text, self.georgian_text, self.greek_text, self.cyrillic_text, self.serbian_cyrillic_text ] for text in texts: r = translit(text, reversed=True) self.assertEqual(r.capitalize(), self.latin_text) res.append(r) return res @log_info def test_33_register_unregister(self): """Testing register/un-register.""" from transliterate.contrib.languages.hy.translit_language_pack import ( ArmenianLanguagePack ) class A(TranslitLanguagePack): """Test class.""" language_code = "ru" language_name = "Example" mapping = data.test_33_register_unregister_mapping # Since key `ru` already exists in the registry it can't be replaced # (without force-register). res = registry.register(A) self.assertTrue(not res) # Now with force-register it can. res = registry.register(A, force=True) self.assertTrue(res) # Once we have it there and it's forced, we can't register another. res = registry.register(A, force=True) self.assertTrue(not res) # Un-register non-forced language pack. res = registry.unregister(ArmenianLanguagePack) self.assertTrue( res and ArmenianLanguagePack.language_code not in get_available_language_codes() ) res = registry.unregister(A) self.assertTrue( not res and A.language_code in get_available_language_codes() ) @log_info def __test_34_latin_to_latin(self): """Test latin to latin.""" class LatinToLatinLanguagePack(TranslitLanguagePack): """ Custom language pack which gets rid of accented characters in Greek but leaves other characters intact. """ language_code = "l2l" language_name = "Latin to Latin" mapping = data.test_34_latin_to_latin_mapping characters = data.test_34_latin_to_latin_characters reversed_characters = \ data.test_34_latin_to_latin_reversed_characters res = registry.register(LatinToLatinLanguagePack) self.assertTrue(res) text = data.test_34_latin_to_latin_text pack = LatinToLatinLanguagePack() res = pack.translit(text, strict=True, fail_silently=False) @log_info def __test_29_mappings(self): """Test mappings.""" for language_pack in get_available_language_packs(): LOGGER.debug( 'Testing language pack %s %s', language_pack.language_code, language_pack.language_name ) LOGGER.debug('Reversed test:') for letter in language_pack.mapping[1]: LOGGER.debug( ( letter, ' --> ', translit( letter, language_pack.language_code, reversed=True ) ) ) LOGGER.debug('Normal test:') for letter in language_pack.mapping[0]: LOGGER.debug( ( letter, ' --> ', translit(letter, language_pack.language_code) ) ) @log_info def test_35_translit_serbian_latin_to_serbian_cyrillic(self): """Test transliteration from Serbian Latin to Serbian Cyrillic.""" res = translit(self.pangram_serbian_latin_text, 'sr') self.assertEqual(res, self.pangram_serbian_cyrillic_text) return res @log_info def test_35_translit_serbian_cyrillic_to_serbian_latin(self): """Test transliteration from Serbian Cyrillic to Serbian Latin.""" res = translit(self.pangram_serbian_cyrillic_text, 'sr', reversed=True) self.assertEqual(res, self.pangram_serbian_latin_text) return res
if __name__ == '__main__': unittest.main()