Created
March 1, 2022 22:33
-
-
Save armaandh/82edde58f8d6bee440a3e4814c02eee3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| enum ConsonantAssimilation { | |
| Progressive, | |
| Regressive | |
| }; | |
| enum Type { | |
| Substantives, | |
| Compound, | |
| District, | |
| Name, | |
| NameTypical, | |
| Typical, | |
| }; | |
| enum Chosung { | |
| ㄱ("g") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㄺ: | |
| case ㄻ: | |
| case ㄼ: | |
| case ㄽ: | |
| case ㄾ: | |
| case ㄿ: | |
| case ㅀ: | |
| return "kk"; | |
| case ㅎ: | |
| return "k"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { | |
| return prevCharacterPronunciation.endsWith("n"); | |
| } | |
| }, | |
| ㄲ("kk"), | |
| ㄴ("n") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㄹ: | |
| case ㅀ: | |
| return "l"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄷ("d") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㄾ: | |
| return "tt"; | |
| case ㄶ: | |
| case ㅎ: | |
| return "t"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄸ("tt"), | |
| ㄹ("r") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㄱ: | |
| case ㄲ: | |
| case ㄳ: | |
| case ㄺ: | |
| case ㄼ: | |
| case ㄿ: | |
| case ㅁ: | |
| case ㅂ: | |
| case ㅄ: | |
| case ㅇ: | |
| case ㅋ: | |
| case ㅍ: | |
| return "n"; | |
| case ㄴ: | |
| case ㄷ: | |
| case ㄵ: | |
| case ㄶ: | |
| case ㅅ: | |
| case ㅆ: | |
| case ㅈ: | |
| case ㅊ: | |
| case ㅎ: | |
| switch (consonantAssimilation) { | |
| case Progressive: | |
| return "n"; | |
| default: | |
| return "l"; | |
| } | |
| case ㄹ: | |
| case ㄻ: | |
| case ㄽ: | |
| case ㄾ: | |
| case ㅀ: | |
| case ㅌ: | |
| return "l"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅁ("m"), | |
| ㅂ("b") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㄾ: | |
| return "pp"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅃ("pp"), | |
| ㅅ("s"), | |
| ㅆ("ss"), | |
| ㅇ("") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㄱ: | |
| if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) { | |
| return "n"; | |
| } else { | |
| return "g"; | |
| } | |
| case ㄺ: | |
| return "g"; | |
| case ㄲ: | |
| return "kk"; | |
| case ㄳ: | |
| case ㄽ: | |
| case ㅄ: | |
| case ㅅ: | |
| return "s"; | |
| case ㅇ: | |
| if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) { | |
| return "n"; | |
| } else { | |
| return defaultPronunciation; | |
| } | |
| case ㄴ: | |
| case ㄶ: | |
| return "n"; | |
| case ㄵ: | |
| case ㅈ: | |
| return "j"; | |
| case ㄷ: | |
| return currentCharacter.getJungsung().isInducePalatalization() ? "j" : "d"; | |
| case ㄹ: | |
| case ㅀ: | |
| if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) { | |
| return "l"; | |
| } else { | |
| return "r"; | |
| } | |
| case ㄻ: | |
| case ㅁ: | |
| return "m"; | |
| case ㄼ: | |
| case ㅂ: | |
| return "b"; | |
| case ㄾ: | |
| case ㅌ: | |
| return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t"; | |
| case ㄿ: | |
| case ㅍ: | |
| return "p"; | |
| case ㅆ: | |
| return "ss"; | |
| case ㅊ: | |
| return "ch"; | |
| case ㅋ: | |
| return "k"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { | |
| return prevCharacterPronunciation.endsWith("ng") && currentCharacterPronunciation.isEmpty(); | |
| } | |
| }, | |
| ㅈ("j") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㅎ: | |
| return "ch"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅉ("jj"), | |
| ㅊ("ch"), | |
| ㅋ("k"), | |
| ㅌ("t") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㅈ: | |
| case ㅊ: | |
| return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { | |
| return prevCharacterPronunciation.endsWith("t"); | |
| } | |
| }, | |
| ㅍ("p") { | |
| protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { | |
| return prevCharacterPronunciation.endsWith("p"); | |
| } | |
| }, | |
| ㅎ("h") { | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (prevCharacter.getJongsung()) { | |
| case ㄱ: | |
| if (type == Type.Substantives) { | |
| return defaultPronunciation; | |
| } else { | |
| return ""; | |
| } | |
| case ㄲ: | |
| return "kk"; | |
| case ㄷ: | |
| if (type == Type.Substantives) { | |
| return defaultPronunciation; | |
| } else { | |
| return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t"; | |
| } | |
| case ㄾ: | |
| case ㅅ: | |
| case ㅆ: | |
| case ㅈ: | |
| case ㅊ: | |
| case ㅌ: | |
| return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t"; | |
| case ㄺ: | |
| return "k"; | |
| case ㄼ: | |
| return "p"; | |
| case ㄽ: | |
| return "s"; | |
| case ㅀ: | |
| return "r"; | |
| case ㅂ: | |
| if (type == Type.Substantives) { | |
| return defaultPronunciation; | |
| } else { | |
| return "p"; | |
| } | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { | |
| return !currentCharacterPronunciation.isEmpty() && prevCharacterPronunciation.endsWith(String.valueOf(currentCharacterPronunciation.charAt(0))); | |
| } | |
| }; | |
| protected final String defaultPronunciation; | |
| Chosung(String defaultPronunciation) { | |
| this.defaultPronunciation = defaultPronunciation; | |
| } | |
| public String getPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| if (prevCharacter == null || !prevCharacter.isKoreanCharacter()) { | |
| return defaultPronunciation; | |
| } else { | |
| String complexPronunciation = getComplexPronunciation(prevCharacter, currentCharacter, consonantAssimilation, type); | |
| return isNeedHyphen(prevCharacter.getRomanizedString(null, currentCharacter, consonantAssimilation, type), complexPronunciation) ? "-" + complexPronunciation : complexPronunciation; | |
| } | |
| } | |
| protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| return defaultPronunciation; | |
| } | |
| protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { | |
| return false; | |
| } | |
| } | |
| /** | |
| * The vowel used as the middle syllable of Hangul, which is called "Jungsung". | |
| */ | |
| public enum Jungsung { | |
| ㅏ("a", false), | |
| ㅐ("ae", false), | |
| ㅑ("ya", true), | |
| ㅒ("yae", true), | |
| ㅓ("eo", false), | |
| ㅔ("e", false), | |
| ㅕ("yeo", true), | |
| ㅖ("ye", true), | |
| ㅗ("o", false), | |
| ㅘ("wa", false), | |
| ㅙ("wae", false), | |
| ㅚ("oe", false), | |
| ㅛ("yo", true), | |
| ㅜ("u", false), | |
| ㅝ("wo", false), | |
| ㅞ("we", false), | |
| ㅟ("wi", false), | |
| ㅠ("yu", true), | |
| ㅡ("eu", false), | |
| ㅢ("ui", false), | |
| ㅣ("i", true); | |
| private final String defaultPronunciation; | |
| private final boolean inducePalatalization; | |
| Jungsung(String defaultPronunciation, boolean inducePalatalization) { | |
| this.defaultPronunciation = defaultPronunciation; | |
| this.inducePalatalization = inducePalatalization; | |
| } | |
| public String getPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter) { | |
| boolean insertHyphen = false; | |
| if (prevCharacter != null && prevCharacter.isKoreanCharacter() && prevCharacter.getJongsung() == Jongsung.NONE && currentCharacter.getChosung() == Chosung.ㅇ) { | |
| switch (prevCharacter.getJungsung().defaultPronunciation.charAt(prevCharacter.getJungsung().defaultPronunciation.length() - 1)) { | |
| case 'a': | |
| switch (defaultPronunciation.charAt(0)) { | |
| case 'a': | |
| case 'e': | |
| insertHyphen = true; | |
| } | |
| break; | |
| case 'e': | |
| switch (defaultPronunciation.charAt(0)) { | |
| case 'a': | |
| case 'e': | |
| case 'o': | |
| case 'u': | |
| insertHyphen = true; | |
| } | |
| break; | |
| } | |
| } | |
| return insertHyphen ? "-" + defaultPronunciation : defaultPronunciation; | |
| } | |
| public boolean isInducePalatalization() { | |
| return inducePalatalization; | |
| } | |
| } | |
| /** | |
| * The consonant used as the final syllable of Hangul, which is called "Jongsung". | |
| */ | |
| public enum Jongsung { | |
| NONE(""), | |
| ㄱ("k") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄲ: | |
| case ㅋ: | |
| return ""; | |
| case ㅇ: | |
| if (type == Type.Compound && nextCharacter.jungsung.isInducePalatalization()) { | |
| return "ng"; | |
| } else { | |
| return ""; | |
| } | |
| case ㄴ: | |
| case ㅁ: | |
| case ㄹ: | |
| return "ng"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄲ("k") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄲ: | |
| case ㅋ: | |
| case ㅇ: | |
| case ㅎ: | |
| return ""; | |
| case ㄴ: | |
| case ㅁ: | |
| case ㄹ: | |
| return "ng"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄳ("k") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄲ: | |
| case ㅋ: | |
| return ""; | |
| case ㄴ: | |
| case ㅁ: | |
| case ㄹ: | |
| return "ng"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄴ("n") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄹ: | |
| switch (consonantAssimilation) { | |
| case Regressive: | |
| return "l"; | |
| default: | |
| return "n"; | |
| } | |
| case ㅇ: | |
| return ""; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄵ("n") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄹ: | |
| switch (consonantAssimilation) { | |
| case Regressive: | |
| return "l"; | |
| default: | |
| return "n"; | |
| } | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄶ("n") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| return ㄴ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); | |
| } | |
| }, | |
| ㄷ("t") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄴ: | |
| case ㅁ: | |
| return "n"; | |
| case ㄸ: | |
| case ㅇ: | |
| case ㅌ: | |
| case ㅎ: | |
| if (type == Type.Substantives) { | |
| return defaultPronunciation; | |
| } else { | |
| return ""; | |
| } | |
| case ㄹ: | |
| switch (consonantAssimilation) { | |
| case Regressive: | |
| return "l"; | |
| default: | |
| return "n"; | |
| } | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄹ("l") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㅇ: | |
| if (type == Type.Compound && nextCharacter.getJungsung().isInducePalatalization()) { | |
| return defaultPronunciation; | |
| } else { | |
| return ""; | |
| } | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄺ("k") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄱ: | |
| case ㄲ: | |
| case ㅇ: | |
| case ㅎ: | |
| return "l"; | |
| case ㄴ: | |
| case ㄹ: | |
| case ㅁ: | |
| return "ng"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄻ("m") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄹ: | |
| case ㅁ: | |
| case ㅇ: | |
| return "l"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄼ("l") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄴ: | |
| case ㄹ: | |
| return "m"; | |
| case ㄷ: | |
| case ㄸ: | |
| case ㅂ: | |
| case ㅅ: | |
| case ㅆ: | |
| case ㅈ: | |
| case ㅉ: | |
| case ㅊ: | |
| case ㅋ: | |
| case ㅌ: | |
| case ㅎ: | |
| return "p"; | |
| case ㅃ: | |
| return ""; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㄽ("l"), | |
| ㄾ("l"), | |
| ㄿ("l") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄴ: | |
| case ㄹ: | |
| return "m"; | |
| case ㄷ: | |
| case ㄸ: | |
| case ㅂ: | |
| case ㅅ: | |
| case ㅆ: | |
| case ㅈ: | |
| case ㅉ: | |
| case ㅊ: | |
| case ㅋ: | |
| case ㅌ: | |
| case ㅎ: | |
| return "p"; | |
| case ㅃ: | |
| case ㅍ: | |
| return ""; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅀ("l") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㅎ: | |
| return ""; | |
| case ㅇ: | |
| if (type == Type.Compound && nextCharacter.getJungsung().isInducePalatalization()) { | |
| return defaultPronunciation; | |
| } else { | |
| return ""; | |
| } | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅁ("m") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㅇ: | |
| return ""; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅂ("p") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄴ: | |
| case ㄹ: | |
| case ㅁ: | |
| return "m"; | |
| case ㅃ: | |
| case ㅇ: | |
| return ""; | |
| case ㅎ: | |
| if (type == Type.Substantives) { | |
| return defaultPronunciation; | |
| } else { | |
| return ""; | |
| } | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅄ("p") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄴ: | |
| case ㄹ: | |
| case ㅁ: | |
| return "m"; | |
| case ㅃ: | |
| return ""; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅅ("t") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); | |
| } | |
| }, | |
| ㅆ("t") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); | |
| } | |
| }, | |
| ㅇ("ng"), | |
| ㅈ("t") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); | |
| } | |
| }, | |
| ㅊ("t") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); | |
| } | |
| }, | |
| ㅋ("k") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄲ: | |
| case ㅇ: | |
| return ""; | |
| case ㄴ: | |
| case ㅁ: | |
| case ㄹ: | |
| return "ng"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅌ("t") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄴ: | |
| case ㅁ: | |
| return "n"; | |
| case ㄸ: | |
| case ㅇ: | |
| case ㅎ: | |
| return ""; | |
| case ㄹ: | |
| return "l"; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅍ("p") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㅃ: | |
| case ㅇ: | |
| return ""; | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }, | |
| ㅎ("t") { | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| switch (nextCharacter.getChosung()) { | |
| case ㄱ: | |
| case ㄲ: | |
| case ㄷ: | |
| case ㄸ: | |
| case ㅇ: | |
| case ㅈ: | |
| case ㅉ: | |
| case ㅊ: | |
| case ㅋ: | |
| case ㅌ: | |
| case ㅍ: | |
| case ㅎ: | |
| return ""; | |
| case ㄴ: | |
| case ㅁ: | |
| return "n"; | |
| case ㄹ: | |
| switch (consonantAssimilation) { | |
| case Regressive: | |
| return "l"; | |
| default: | |
| return "n"; | |
| } | |
| default: | |
| return defaultPronunciation; | |
| } | |
| } | |
| }; | |
| protected final String defaultPronunciation; | |
| Jongsung(String defaultPronunciation) { | |
| this.defaultPronunciation = defaultPronunciation; | |
| } | |
| public String getPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| return (nextCharacter == null || !nextCharacter.isKoreanCharacter()) ? defaultPronunciation : getComplexPronunciation(nextCharacter, consonantAssimilation, type); | |
| } | |
| protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| return defaultPronunciation; | |
| } | |
| } | |
| class KoreanCharacter { | |
| /** | |
| * First character code point in Hangul Syllables in Unicode table ({@code 가}). | |
| */ | |
| public final static int KoreanLowerValue = 0xAC00; | |
| /** | |
| * Last character code point in Hangul Syllables in Unicode table ({@code 힣}). | |
| */ | |
| public final static int KoreanUpperValue = 0xD7A3; | |
| /** | |
| * The original character from constructor's argument. | |
| */ | |
| private final char character; | |
| /** | |
| * Disassembled initial syllable of Hangul. | |
| */ | |
| private Chosung chosung; | |
| /** | |
| * Disassembled middle syllable of Hangul. | |
| */ | |
| private Jungsung jungsung; | |
| /** | |
| * Disassembled final syllable of Hangul. | |
| */ | |
| private Jongsung jongsung; | |
| /** | |
| * Constructor | |
| * | |
| * @param koreanCharacter | |
| * the Hangul or other character | |
| */ | |
| public KoreanCharacter(char koreanCharacter) { | |
| character = koreanCharacter; | |
| if (isKoreanCharacter(character)) { | |
| int value = character - KoreanLowerValue; | |
| chosung = Chosung.values()[value / (21 * 28)]; | |
| jungsung = Jungsung.values()[value % (21 * 28) / 28]; | |
| jongsung = Jongsung.values()[value % 28]; | |
| } | |
| } | |
| /** | |
| * Constructor with Hangul object with each syllables. | |
| * | |
| * @param chosung | |
| * the consonant used as the initial syllable of Hangul. | |
| * @param jungsung | |
| * the vowel used as the middle syllable of Hangul. | |
| * @param jongsung | |
| * the consonant used as the final syllable of Hangul. | |
| * @throws NullPointerException | |
| * if any arguments is null. | |
| */ | |
| public KoreanCharacter(Chosung chosung, Jungsung jungsung, Jongsung jongsung) { | |
| Objects.requireNonNull(chosung, "All parameters must not be null."); | |
| Objects.requireNonNull(jungsung, "All parameters must not be null."); | |
| Objects.requireNonNull(jongsung, "All parameters must not be null."); | |
| this.chosung = chosung; | |
| this.jungsung = jungsung; | |
| this.jongsung = jongsung; | |
| this.character = (char) ((chosung.ordinal() * 21 * 28 + jungsung.ordinal() * 28 + jongsung.ordinal()) + KoreanLowerValue); | |
| } | |
| /** | |
| * Whether or not the character of this object is Hangul. | |
| * | |
| * @return Whether all syllables exist to complete Hangul character. | |
| */ | |
| public boolean isKoreanCharacter() { | |
| return chosung != null && jungsung != null && jongsung != null; | |
| } | |
| /** | |
| * @return the initial syllable if object has Hangul character, and null if not. | |
| */ | |
| public Chosung getChosung() { | |
| return chosung; | |
| } | |
| /** | |
| * @return the middle syllable if object has Hangul character, and null if not. | |
| */ | |
| public Jungsung getJungsung() { | |
| return jungsung; | |
| } | |
| /** | |
| * @return the final syllable if object has Hangul character, and null if not. | |
| */ | |
| public Jongsung getJongsung() { | |
| return jongsung; | |
| } | |
| /** | |
| * @return the character that this object has. | |
| */ | |
| public char getCharacter() { | |
| return character; | |
| } | |
| /** | |
| * @return the romanized string of the character this object has. | |
| */ | |
| public String getRomanizedString() { | |
| return getRomanizedString(null, null, ConsonantAssimilation.Progressive, Type.Typical); | |
| } | |
| /** | |
| * @param prevCharacter | |
| * the character preceding this character in the sentence. | |
| * @param nextCharacter | |
| * the character after this character in the sentence. | |
| * @param consonantAssimilation | |
| * the consonant assimilation type. | |
| * @param type | |
| * the type of word | |
| * @return the romanized string of the character this object has. | |
| */ | |
| public String getRomanizedString(KoreanCharacter prevCharacter, KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { | |
| if (!isKoreanCharacter()) { | |
| return toString(); | |
| } | |
| if (type == Type.Name || type == Type.NameTypical) { | |
| prevCharacter = null; | |
| nextCharacter = null; | |
| } | |
| return chosung.getPronunciation(prevCharacter, this, consonantAssimilation, type) | |
| + jungsung.getPronunciation(prevCharacter, this) | |
| + jongsung.getPronunciation(nextCharacter, consonantAssimilation, type); | |
| } | |
| /** | |
| * To check if character is in the Hangul Syllable of Unicode table. | |
| * | |
| * @param character | |
| * the character to check. | |
| * @return true if the character is Hangul | |
| */ | |
| public static boolean isKoreanCharacter(char character) { | |
| return (KoreanLowerValue <= character && character <= KoreanUpperValue); | |
| } | |
| /** | |
| * Compares this object to another in ascending order. | |
| * | |
| * @param other | |
| * the other object to compare to. | |
| * @return the value of {@link Character#compareTo}. | |
| */ | |
| @Override | |
| public int compareTo(KoreanCharacter other) { | |
| return Character.compare(character, other.character); | |
| } | |
| /** | |
| * Compares this object to another to test if they are equal. | |
| * | |
| * @param other | |
| * the other object to compare to. | |
| * @return true if this object is equal. | |
| */ | |
| @Override | |
| public boolean equals(Object other) { | |
| if (this == other) { | |
| return true; | |
| } | |
| if (other == null || getClass() != other.getClass()) { | |
| return false; | |
| } | |
| return character == ((KoreanCharacter) other).character; | |
| } | |
| /** | |
| * Return the hash code for this character. | |
| * | |
| * @return the value of {@link Character#hashCode()} | |
| */ | |
| @Override | |
| public int hashCode() { | |
| return Character.hashCode(character); | |
| } | |
| /** | |
| * Returns a {@link String} object representing this character's value. | |
| * | |
| * @return a string representation of this character. | |
| */ | |
| @Override | |
| public String toString() { | |
| return String.valueOf(character); | |
| } | |
| } | |
| /** | |
| * A Java library that converts Korean into Roman characters. | |
| * It is implemented based on the National Korean Language Romanization and can be covered a lot, | |
| * but it is not perfect because it is difficult to implement 100% if there is no word dictionary data due to the nature of Korean. | |
| */ | |
| public class KoreanRomanizer { | |
| private static final Pattern doubleSurnames = Pattern.compile("^(\\s*)(강전|남궁|독고|동방|등정|망절|무본|사공|서문|선우|소봉|어금|장곡|제갈|황목|황보)(.{1,10})$"); | |
| private static final Pattern districtPostfixes = Pattern.compile("^(.{1,20}?)(특별자치도|특별자치시|특별시|광역시|대로|구|군|도|동|리|면|시|읍|가|길|로)(\\s*)$"); | |
| private static final Pattern districtPostfixesWithNumbers1 = Pattern.compile("^(.{0,20}?)(\\d+)(\\s*)(가길|가|번길|로|단지|동)(\\s*)$"); | |
| private static final Pattern districtPostfixesWithNumbers2 = Pattern.compile("^(.{0,20}?)(대?로)\\s*(\\d+[가번]?)(길)(\\s*)$"); | |
| private static final Map<String, String> typicalSurenameRules = new HashMap<String, String>() { | |
| { | |
| put("가", "Ka"); | |
| put("간", "Kan"); | |
| put("갈", "Kal"); | |
| put("감", "Kam"); | |
| put("강", "Kang"); | |
| put("강전", "Kangjun"); | |
| put("견", "Kyun"); | |
| put("경", "Kyung"); | |
| put("계", "Kye"); | |
| put("고", "Ko"); | |
| put("공", "Kong"); | |
| put("곽", "Kwak"); | |
| put("구", "Koo"); | |
| put("국", "Kook"); | |
| put("군", "Kun"); | |
| put("궁", "Koong"); | |
| put("궉", "Kwok"); | |
| put("권", "Kwon"); | |
| put("근", "Keun"); | |
| put("금", "Keum"); | |
| put("기", "Ki"); | |
| put("길", "Kil"); | |
| put("김", "Kim"); | |
| put("노", "Noh"); | |
| put("두", "Doo"); | |
| put("란", "Lan"); | |
| put("뢰", "Loi"); | |
| put("루", "Lu"); | |
| put("망절", "Mangjul"); | |
| put("명", "Myung"); | |
| put("문", "Moon"); | |
| put("박", "Park"); | |
| put("변", "Byun"); | |
| put("부", "Boo"); | |
| put("선", "Sun"); | |
| put("선우", "Sunwoo"); | |
| put("성", "Sung"); | |
| put("순", "Soon"); | |
| put("신", "Shin"); | |
| put("심", "Shim"); | |
| put("아", "Ah"); | |
| put("어금", "Eokum"); | |
| put("오", "Oh"); | |
| put("우", "Woo"); | |
| put("운", "Woon"); | |
| put("유", "Yoo"); | |
| put("윤", "Yoon"); | |
| put("이", "Lee"); | |
| put("임", "Lim"); | |
| put("정", "Jung"); | |
| put("조", "Cho"); | |
| put("주", "Joo"); | |
| put("준", "June"); | |
| put("즙", "Chup"); | |
| put("최", "Choi"); | |
| put("편", "Pyun"); | |
| put("평", "Pyung"); | |
| put("풍", "Poong"); | |
| put("현", "Hyun"); | |
| put("형", "Hyung"); | |
| put("흥", "Hong"); | |
| } | |
| }; | |
| /** | |
| * Romanize string. | |
| * | |
| * @param string | |
| * the string to convert to roman string. | |
| * @return the romanized string. | |
| * @throws NullPointerException | |
| * if argument string is null | |
| */ | |
| public static String romanize(String string) { | |
| return romanize(string, null, null); | |
| } | |
| /** | |
| * Romanize string with consonant assimilation option. | |
| * | |
| * @param string | |
| * the string to convert to roman string. | |
| * @param consonantAssimilation | |
| * the consonant assimilation type. | |
| * @return the romanized string. | |
| * @throws NullPointerException | |
| * if argument string is null | |
| */ | |
| public static String romanize(String string, KoreanCharacter.ConsonantAssimilation consonantAssimilation) { | |
| return romanize(string, null, consonantAssimilation); | |
| } | |
| /** | |
| * Romanize string with type option. | |
| * | |
| * @param string | |
| * the string to convert to roman string. | |
| * @param type | |
| * the type of word | |
| * @return the romanized string. | |
| * @throws NullPointerException | |
| * if argument string is null | |
| */ | |
| public static String romanize(String string, KoreanCharacter.Type type) { | |
| return romanize(string, type, null); | |
| } | |
| /** | |
| * Romanize string with Consonant assimilation and type option. | |
| * | |
| * @param string | |
| * the string to convert. | |
| * @param type | |
| * the type of word | |
| * @param consonantAssimilation | |
| * the consonant assimilation type. | |
| * @return Romanized string | |
| * @throws NullPointerException | |
| * if string parameter is null | |
| */ | |
| public static String romanize(String string, KoreanCharacter.Type type, KoreanCharacter.ConsonantAssimilation consonantAssimilation) { | |
| Objects.requireNonNull(string, "String should not be null."); | |
| consonantAssimilation = (consonantAssimilation == null) ? KoreanCharacter.ConsonantAssimilation.Regressive : consonantAssimilation; | |
| type = (type == null) ? KoreanCharacter.Type.Typical : type; | |
| switch (type) { | |
| case Name: | |
| case NameTypical: | |
| string = normalizeName(string, type); | |
| break; | |
| case District: | |
| string = normalizeDistrict(string); | |
| break; | |
| } | |
| StringBuilder buffer = new StringBuilder(string.length() * 3); | |
| KoreanCharacter prevCharacter; | |
| KoreanCharacter currentCharacter = null; | |
| KoreanCharacter nextCharacter = null; | |
| for (int i = 0; i < string.length(); i++) { | |
| prevCharacter = currentCharacter; | |
| currentCharacter = (nextCharacter == null) ? new KoreanCharacter(string.charAt(i)) : nextCharacter; | |
| nextCharacter = (i < string.length() - 1) ? new KoreanCharacter(string.charAt(i + 1)) : null; | |
| if (currentCharacter.isKoreanCharacter()) { | |
| String pronunciation = currentCharacter.getRomanizedString(prevCharacter, nextCharacter, consonantAssimilation, type); | |
| if (prevCharacter == null || !prevCharacter.isKoreanCharacter()) { | |
| if (type == KoreanCharacter.Type.District && prevCharacter != null && (prevCharacter.toString().equals("-") || Character.isDigit(prevCharacter.getCharacter()))) { | |
| buffer.append(pronunciation); | |
| } else { | |
| buffer.append(Character.toUpperCase(pronunciation.charAt(0))); | |
| buffer.append(pronunciation.substring(1)); | |
| } | |
| } else { | |
| buffer.append(pronunciation); | |
| } | |
| } else { | |
| buffer.append(currentCharacter); | |
| } | |
| } | |
| return buffer.toString(); | |
| } | |
| /** | |
| * The {@code main} method to convert string from the standard input. | |
| * | |
| * @param args | |
| * first argument is {@link KoreanCharacter.Type} value, | |
| * second argument is {@link KoreanCharacter.ConsonantAssimilation} value | |
| * (Both arguments must be specified or none specified.) | |
| */ | |
| // public static void main(String... args) { | |
| // get("/romanize", "application/json", (req, res) -> { | |
| // String text = req.queryParams("text"); | |
| // return KoreanRomanizer.romanize(text, null, null); | |
| // }); | |
| // } | |
| public static void main(String... args) { | |
| //return KoreanRomanizer.romanize(text, null, null); | |
| System.out.println(KoreanRomanizer.romanize("바나나", null, null)); // expect banana comes back | |
| } | |
| /** | |
| * @param string | |
| * the name string to normalize. | |
| * @param type | |
| * the type of word | |
| * @return the normalized name string. | |
| */ | |
| private static String normalizeName(String string, KoreanCharacter.Type type) { | |
| Matcher matcher = doubleSurnames.matcher(string); | |
| if (type == KoreanCharacter.Type.NameTypical) { | |
| if (matcher.find()) { | |
| return matcher.group(1) + typicalSurenameRules.getOrDefault(matcher.group(2), matcher.group(2)) + " " + matcher.group(3); | |
| } else { | |
| return typicalSurenameRules.getOrDefault(String.valueOf(string.charAt(0)), String.valueOf(string.charAt(0))) + " " + string.substring(1); | |
| } | |
| } else { | |
| if (matcher.find()) { | |
| return matcher.group(1) + matcher.group(2) + " " + matcher.group(3); | |
| } else { | |
| return string.charAt(0) + " " + string.substring(1); | |
| } | |
| } | |
| } | |
| /** | |
| * @param string | |
| * the district string to normalize. | |
| * @return the normalized district string. | |
| */ | |
| private static String normalizeDistrict(String string) { | |
| Matcher matcher = districtPostfixesWithNumbers2.matcher(string); | |
| if (matcher.find()) { | |
| return matcher.group(1) + "-" + matcher.group(2) + " " + matcher.group(3) + "-" + matcher.group(4) + matcher.group(5); | |
| } else { | |
| matcher = districtPostfixesWithNumbers1.matcher(string); | |
| if (matcher.find()) { | |
| return matcher.group(1) + (matcher.group(1).endsWith(" ") ? "" : " ") + matcher.group(2) + "-" + matcher.group(3) + matcher.group(4); | |
| } else { | |
| matcher = districtPostfixes.matcher(string); | |
| if (matcher.find()) { | |
| return matcher.group(1) + "-" + matcher.group(2) + matcher.group(3); | |
| } | |
| } | |
| } | |
| return string; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment