enum ConsonantAssimilation { Progressive, Regressive }; enum Type { Substantives, Compound, District, Name, NameTypical, Typical, }; enum Chosung { ㄱ("g") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㄺ: case ㄻ: case ㄼ: case ㄽ: case ㄾ: case ㄿ: case ㅀ: return "kk"; case ㅎ: return "k"; default: return defaultPronunciation; } } protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { return prevCharacterPronunciation.endsWith("n"); } }, ㄲ("kk"), ㄴ("n") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㄹ: case ㅀ: return "l"; default: return defaultPronunciation; } } }, ㄷ("d") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㄾ: return "tt"; case ㄶ: case ㅎ: return "t"; default: return defaultPronunciation; } } }, ㄸ("tt"), ㄹ("r") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㄱ: case ㄲ: case ㄳ: case ㄺ: case ㄼ: case ㄿ: case ㅁ: case ㅂ: case ㅄ: case ㅇ: case ㅋ: case ㅍ: return "n"; case ㄴ: case ㄷ: case ㄵ: case ㄶ: case ㅅ: case ㅆ: case ㅈ: case ㅊ: case ㅎ: switch (consonantAssimilation) { case Progressive: return "n"; default: return "l"; } case ㄹ: case ㄻ: case ㄽ: case ㄾ: case ㅀ: case ㅌ: return "l"; default: return defaultPronunciation; } } }, ㅁ("m"), ㅂ("b") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㄾ: return "pp"; default: return defaultPronunciation; } } }, ㅃ("pp"), ㅅ("s"), ㅆ("ss"), ㅇ("") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㄱ: if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) { return "n"; } else { return "g"; } case ㄺ: return "g"; case ㄲ: return "kk"; case ㄳ: case ㄽ: case ㅄ: case ㅅ: return "s"; case ㅇ: if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) { return "n"; } else { return defaultPronunciation; } case ㄴ: case ㄶ: return "n"; case ㄵ: case ㅈ: return "j"; case ㄷ: return currentCharacter.getJungsung().isInducePalatalization() ? "j" : "d"; case ㄹ: case ㅀ: if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) { return "l"; } else { return "r"; } case ㄻ: case ㅁ: return "m"; case ㄼ: case ㅂ: return "b"; case ㄾ: case ㅌ: return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t"; case ㄿ: case ㅍ: return "p"; case ㅆ: return "ss"; case ㅊ: return "ch"; case ㅋ: return "k"; default: return defaultPronunciation; } } protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { return prevCharacterPronunciation.endsWith("ng") && currentCharacterPronunciation.isEmpty(); } }, ㅈ("j") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㅎ: return "ch"; default: return defaultPronunciation; } } }, ㅉ("jj"), ㅊ("ch"), ㅋ("k"), ㅌ("t") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㅈ: case ㅊ: return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t"; default: return defaultPronunciation; } } protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { return prevCharacterPronunciation.endsWith("t"); } }, ㅍ("p") { protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { return prevCharacterPronunciation.endsWith("p"); } }, ㅎ("h") { protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (prevCharacter.getJongsung()) { case ㄱ: if (type == Type.Substantives) { return defaultPronunciation; } else { return ""; } case ㄲ: return "kk"; case ㄷ: if (type == Type.Substantives) { return defaultPronunciation; } else { return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t"; } case ㄾ: case ㅅ: case ㅆ: case ㅈ: case ㅊ: case ㅌ: return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t"; case ㄺ: return "k"; case ㄼ: return "p"; case ㄽ: return "s"; case ㅀ: return "r"; case ㅂ: if (type == Type.Substantives) { return defaultPronunciation; } else { return "p"; } default: return defaultPronunciation; } } protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { return !currentCharacterPronunciation.isEmpty() && prevCharacterPronunciation.endsWith(String.valueOf(currentCharacterPronunciation.charAt(0))); } }; protected final String defaultPronunciation; Chosung(String defaultPronunciation) { this.defaultPronunciation = defaultPronunciation; } public String getPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { if (prevCharacter == null || !prevCharacter.isKoreanCharacter()) { return defaultPronunciation; } else { String complexPronunciation = getComplexPronunciation(prevCharacter, currentCharacter, consonantAssimilation, type); return isNeedHyphen(prevCharacter.getRomanizedString(null, currentCharacter, consonantAssimilation, type), complexPronunciation) ? "-" + complexPronunciation : complexPronunciation; } } protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) { return defaultPronunciation; } protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) { return false; } } /** * The vowel used as the middle syllable of Hangul, which is called "Jungsung". */ public enum Jungsung { ㅏ("a", false), ㅐ("ae", false), ㅑ("ya", true), ㅒ("yae", true), ㅓ("eo", false), ㅔ("e", false), ㅕ("yeo", true), ㅖ("ye", true), ㅗ("o", false), ㅘ("wa", false), ㅙ("wae", false), ㅚ("oe", false), ㅛ("yo", true), ㅜ("u", false), ㅝ("wo", false), ㅞ("we", false), ㅟ("wi", false), ㅠ("yu", true), ㅡ("eu", false), ㅢ("ui", false), ㅣ("i", true); private final String defaultPronunciation; private final boolean inducePalatalization; Jungsung(String defaultPronunciation, boolean inducePalatalization) { this.defaultPronunciation = defaultPronunciation; this.inducePalatalization = inducePalatalization; } public String getPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter) { boolean insertHyphen = false; if (prevCharacter != null && prevCharacter.isKoreanCharacter() && prevCharacter.getJongsung() == Jongsung.NONE && currentCharacter.getChosung() == Chosung.ㅇ) { switch (prevCharacter.getJungsung().defaultPronunciation.charAt(prevCharacter.getJungsung().defaultPronunciation.length() - 1)) { case 'a': switch (defaultPronunciation.charAt(0)) { case 'a': case 'e': insertHyphen = true; } break; case 'e': switch (defaultPronunciation.charAt(0)) { case 'a': case 'e': case 'o': case 'u': insertHyphen = true; } break; } } return insertHyphen ? "-" + defaultPronunciation : defaultPronunciation; } public boolean isInducePalatalization() { return inducePalatalization; } } /** * The consonant used as the final syllable of Hangul, which is called "Jongsung". */ public enum Jongsung { NONE(""), ㄱ("k") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄲ: case ㅋ: return ""; case ㅇ: if (type == Type.Compound && nextCharacter.jungsung.isInducePalatalization()) { return "ng"; } else { return ""; } case ㄴ: case ㅁ: case ㄹ: return "ng"; default: return defaultPronunciation; } } }, ㄲ("k") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄲ: case ㅋ: case ㅇ: case ㅎ: return ""; case ㄴ: case ㅁ: case ㄹ: return "ng"; default: return defaultPronunciation; } } }, ㄳ("k") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄲ: case ㅋ: return ""; case ㄴ: case ㅁ: case ㄹ: return "ng"; default: return defaultPronunciation; } } }, ㄴ("n") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄹ: switch (consonantAssimilation) { case Regressive: return "l"; default: return "n"; } case ㅇ: return ""; default: return defaultPronunciation; } } }, ㄵ("n") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄹ: switch (consonantAssimilation) { case Regressive: return "l"; default: return "n"; } default: return defaultPronunciation; } } }, ㄶ("n") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { return ㄴ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); } }, ㄷ("t") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄴ: case ㅁ: return "n"; case ㄸ: case ㅇ: case ㅌ: case ㅎ: if (type == Type.Substantives) { return defaultPronunciation; } else { return ""; } case ㄹ: switch (consonantAssimilation) { case Regressive: return "l"; default: return "n"; } default: return defaultPronunciation; } } }, ㄹ("l") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㅇ: if (type == Type.Compound && nextCharacter.getJungsung().isInducePalatalization()) { return defaultPronunciation; } else { return ""; } default: return defaultPronunciation; } } }, ㄺ("k") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄱ: case ㄲ: case ㅇ: case ㅎ: return "l"; case ㄴ: case ㄹ: case ㅁ: return "ng"; default: return defaultPronunciation; } } }, ㄻ("m") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄹ: case ㅁ: case ㅇ: return "l"; default: return defaultPronunciation; } } }, ㄼ("l") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄴ: case ㄹ: return "m"; case ㄷ: case ㄸ: case ㅂ: case ㅅ: case ㅆ: case ㅈ: case ㅉ: case ㅊ: case ㅋ: case ㅌ: case ㅎ: return "p"; case ㅃ: return ""; default: return defaultPronunciation; } } }, ㄽ("l"), ㄾ("l"), ㄿ("l") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄴ: case ㄹ: return "m"; case ㄷ: case ㄸ: case ㅂ: case ㅅ: case ㅆ: case ㅈ: case ㅉ: case ㅊ: case ㅋ: case ㅌ: case ㅎ: return "p"; case ㅃ: case ㅍ: return ""; default: return defaultPronunciation; } } }, ㅀ("l") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㅎ: return ""; case ㅇ: if (type == Type.Compound && nextCharacter.getJungsung().isInducePalatalization()) { return defaultPronunciation; } else { return ""; } default: return defaultPronunciation; } } }, ㅁ("m") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㅇ: return ""; default: return defaultPronunciation; } } }, ㅂ("p") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄴ: case ㄹ: case ㅁ: return "m"; case ㅃ: case ㅇ: return ""; case ㅎ: if (type == Type.Substantives) { return defaultPronunciation; } else { return ""; } default: return defaultPronunciation; } } }, ㅄ("p") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄴ: case ㄹ: case ㅁ: return "m"; case ㅃ: return ""; default: return defaultPronunciation; } } }, ㅅ("t") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); } }, ㅆ("t") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); } }, ㅇ("ng"), ㅈ("t") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); } }, ㅊ("t") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type); } }, ㅋ("k") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄲ: case ㅇ: return ""; case ㄴ: case ㅁ: case ㄹ: return "ng"; default: return defaultPronunciation; } } }, ㅌ("t") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄴ: case ㅁ: return "n"; case ㄸ: case ㅇ: case ㅎ: return ""; case ㄹ: return "l"; default: return defaultPronunciation; } } }, ㅍ("p") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㅃ: case ㅇ: return ""; default: return defaultPronunciation; } } }, ㅎ("t") { protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { switch (nextCharacter.getChosung()) { case ㄱ: case ㄲ: case ㄷ: case ㄸ: case ㅇ: case ㅈ: case ㅉ: case ㅊ: case ㅋ: case ㅌ: case ㅍ: case ㅎ: return ""; case ㄴ: case ㅁ: return "n"; case ㄹ: switch (consonantAssimilation) { case Regressive: return "l"; default: return "n"; } default: return defaultPronunciation; } } }; protected final String defaultPronunciation; Jongsung(String defaultPronunciation) { this.defaultPronunciation = defaultPronunciation; } public String getPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { return (nextCharacter == null || !nextCharacter.isKoreanCharacter()) ? defaultPronunciation : getComplexPronunciation(nextCharacter, consonantAssimilation, type); } protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { return defaultPronunciation; } } class KoreanCharacter { /** * First character code point in Hangul Syllables in Unicode table ({@code 가}). */ public final static int KoreanLowerValue = 0xAC00; /** * Last character code point in Hangul Syllables in Unicode table ({@code 힣}). */ public final static int KoreanUpperValue = 0xD7A3; /** * The original character from constructor's argument. */ private final char character; /** * Disassembled initial syllable of Hangul. */ private Chosung chosung; /** * Disassembled middle syllable of Hangul. */ private Jungsung jungsung; /** * Disassembled final syllable of Hangul. */ private Jongsung jongsung; /** * Constructor * * @param koreanCharacter * the Hangul or other character */ public KoreanCharacter(char koreanCharacter) { character = koreanCharacter; if (isKoreanCharacter(character)) { int value = character - KoreanLowerValue; chosung = Chosung.values()[value / (21 * 28)]; jungsung = Jungsung.values()[value % (21 * 28) / 28]; jongsung = Jongsung.values()[value % 28]; } } /** * Constructor with Hangul object with each syllables. * * @param chosung * the consonant used as the initial syllable of Hangul. * @param jungsung * the vowel used as the middle syllable of Hangul. * @param jongsung * the consonant used as the final syllable of Hangul. * @throws NullPointerException * if any arguments is null. */ public KoreanCharacter(Chosung chosung, Jungsung jungsung, Jongsung jongsung) { Objects.requireNonNull(chosung, "All parameters must not be null."); Objects.requireNonNull(jungsung, "All parameters must not be null."); Objects.requireNonNull(jongsung, "All parameters must not be null."); this.chosung = chosung; this.jungsung = jungsung; this.jongsung = jongsung; this.character = (char) ((chosung.ordinal() * 21 * 28 + jungsung.ordinal() * 28 + jongsung.ordinal()) + KoreanLowerValue); } /** * Whether or not the character of this object is Hangul. * * @return Whether all syllables exist to complete Hangul character. */ public boolean isKoreanCharacter() { return chosung != null && jungsung != null && jongsung != null; } /** * @return the initial syllable if object has Hangul character, and null if not. */ public Chosung getChosung() { return chosung; } /** * @return the middle syllable if object has Hangul character, and null if not. */ public Jungsung getJungsung() { return jungsung; } /** * @return the final syllable if object has Hangul character, and null if not. */ public Jongsung getJongsung() { return jongsung; } /** * @return the character that this object has. */ public char getCharacter() { return character; } /** * @return the romanized string of the character this object has. */ public String getRomanizedString() { return getRomanizedString(null, null, ConsonantAssimilation.Progressive, Type.Typical); } /** * @param prevCharacter * the character preceding this character in the sentence. * @param nextCharacter * the character after this character in the sentence. * @param consonantAssimilation * the consonant assimilation type. * @param type * the type of word * @return the romanized string of the character this object has. */ public String getRomanizedString(KoreanCharacter prevCharacter, KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) { if (!isKoreanCharacter()) { return toString(); } if (type == Type.Name || type == Type.NameTypical) { prevCharacter = null; nextCharacter = null; } return chosung.getPronunciation(prevCharacter, this, consonantAssimilation, type) + jungsung.getPronunciation(prevCharacter, this) + jongsung.getPronunciation(nextCharacter, consonantAssimilation, type); } /** * To check if character is in the Hangul Syllable of Unicode table. * * @param character * the character to check. * @return true if the character is Hangul */ public static boolean isKoreanCharacter(char character) { return (KoreanLowerValue <= character && character <= KoreanUpperValue); } /** * Compares this object to another in ascending order. * * @param other * the other object to compare to. * @return the value of {@link Character#compareTo}. */ @Override public int compareTo(KoreanCharacter other) { return Character.compare(character, other.character); } /** * Compares this object to another to test if they are equal. * * @param other * the other object to compare to. * @return true if this object is equal. */ @Override public boolean equals(Object other) { if (this == other) { return true; } if (other == null || getClass() != other.getClass()) { return false; } return character == ((KoreanCharacter) other).character; } /** * Return the hash code for this character. * * @return the value of {@link Character#hashCode()} */ @Override public int hashCode() { return Character.hashCode(character); } /** * Returns a {@link String} object representing this character's value. * * @return a string representation of this character. */ @Override public String toString() { return String.valueOf(character); } } /** * A Java library that converts Korean into Roman characters. * It is implemented based on the National Korean Language Romanization and can be covered a lot, * but it is not perfect because it is difficult to implement 100% if there is no word dictionary data due to the nature of Korean. */ public class KoreanRomanizer { private static final Pattern doubleSurnames = Pattern.compile("^(\\s*)(강전|남궁|독고|동방|등정|망절|무본|사공|서문|선우|소봉|어금|장곡|제갈|황목|황보)(.{1,10})$"); private static final Pattern districtPostfixes = Pattern.compile("^(.{1,20}?)(특별자치도|특별자치시|특별시|광역시|대로|구|군|도|동|리|면|시|읍|가|길|로)(\\s*)$"); private static final Pattern districtPostfixesWithNumbers1 = Pattern.compile("^(.{0,20}?)(\\d+)(\\s*)(가길|가|번길|로|단지|동)(\\s*)$"); private static final Pattern districtPostfixesWithNumbers2 = Pattern.compile("^(.{0,20}?)(대?로)\\s*(\\d+[가번]?)(길)(\\s*)$"); private static final Map typicalSurenameRules = new HashMap() { { put("가", "Ka"); put("간", "Kan"); put("갈", "Kal"); put("감", "Kam"); put("강", "Kang"); put("강전", "Kangjun"); put("견", "Kyun"); put("경", "Kyung"); put("계", "Kye"); put("고", "Ko"); put("공", "Kong"); put("곽", "Kwak"); put("구", "Koo"); put("국", "Kook"); put("군", "Kun"); put("궁", "Koong"); put("궉", "Kwok"); put("권", "Kwon"); put("근", "Keun"); put("금", "Keum"); put("기", "Ki"); put("길", "Kil"); put("김", "Kim"); put("노", "Noh"); put("두", "Doo"); put("란", "Lan"); put("뢰", "Loi"); put("루", "Lu"); put("망절", "Mangjul"); put("명", "Myung"); put("문", "Moon"); put("박", "Park"); put("변", "Byun"); put("부", "Boo"); put("선", "Sun"); put("선우", "Sunwoo"); put("성", "Sung"); put("순", "Soon"); put("신", "Shin"); put("심", "Shim"); put("아", "Ah"); put("어금", "Eokum"); put("오", "Oh"); put("우", "Woo"); put("운", "Woon"); put("유", "Yoo"); put("윤", "Yoon"); put("이", "Lee"); put("임", "Lim"); put("정", "Jung"); put("조", "Cho"); put("주", "Joo"); put("준", "June"); put("즙", "Chup"); put("최", "Choi"); put("편", "Pyun"); put("평", "Pyung"); put("풍", "Poong"); put("현", "Hyun"); put("형", "Hyung"); put("흥", "Hong"); } }; /** * Romanize string. * * @param string * the string to convert to roman string. * @return the romanized string. * @throws NullPointerException * if argument string is null */ public static String romanize(String string) { return romanize(string, null, null); } /** * Romanize string with consonant assimilation option. * * @param string * the string to convert to roman string. * @param consonantAssimilation * the consonant assimilation type. * @return the romanized string. * @throws NullPointerException * if argument string is null */ public static String romanize(String string, KoreanCharacter.ConsonantAssimilation consonantAssimilation) { return romanize(string, null, consonantAssimilation); } /** * Romanize string with type option. * * @param string * the string to convert to roman string. * @param type * the type of word * @return the romanized string. * @throws NullPointerException * if argument string is null */ public static String romanize(String string, KoreanCharacter.Type type) { return romanize(string, type, null); } /** * Romanize string with Consonant assimilation and type option. * * @param string * the string to convert. * @param type * the type of word * @param consonantAssimilation * the consonant assimilation type. * @return Romanized string * @throws NullPointerException * if string parameter is null */ public static String romanize(String string, KoreanCharacter.Type type, KoreanCharacter.ConsonantAssimilation consonantAssimilation) { Objects.requireNonNull(string, "String should not be null."); consonantAssimilation = (consonantAssimilation == null) ? KoreanCharacter.ConsonantAssimilation.Regressive : consonantAssimilation; type = (type == null) ? KoreanCharacter.Type.Typical : type; switch (type) { case Name: case NameTypical: string = normalizeName(string, type); break; case District: string = normalizeDistrict(string); break; } StringBuilder buffer = new StringBuilder(string.length() * 3); KoreanCharacter prevCharacter; KoreanCharacter currentCharacter = null; KoreanCharacter nextCharacter = null; for (int i = 0; i < string.length(); i++) { prevCharacter = currentCharacter; currentCharacter = (nextCharacter == null) ? new KoreanCharacter(string.charAt(i)) : nextCharacter; nextCharacter = (i < string.length() - 1) ? new KoreanCharacter(string.charAt(i + 1)) : null; if (currentCharacter.isKoreanCharacter()) { String pronunciation = currentCharacter.getRomanizedString(prevCharacter, nextCharacter, consonantAssimilation, type); if (prevCharacter == null || !prevCharacter.isKoreanCharacter()) { if (type == KoreanCharacter.Type.District && prevCharacter != null && (prevCharacter.toString().equals("-") || Character.isDigit(prevCharacter.getCharacter()))) { buffer.append(pronunciation); } else { buffer.append(Character.toUpperCase(pronunciation.charAt(0))); buffer.append(pronunciation.substring(1)); } } else { buffer.append(pronunciation); } } else { buffer.append(currentCharacter); } } return buffer.toString(); } /** * The {@code main} method to convert string from the standard input. * * @param args * first argument is {@link KoreanCharacter.Type} value, * second argument is {@link KoreanCharacter.ConsonantAssimilation} value * (Both arguments must be specified or none specified.) */ // public static void main(String... args) { // get("/romanize", "application/json", (req, res) -> { // String text = req.queryParams("text"); // return KoreanRomanizer.romanize(text, null, null); // }); // } public static void main(String... args) { //return KoreanRomanizer.romanize(text, null, null); System.out.println(KoreanRomanizer.romanize("바나나", null, null)); // expect banana comes back } /** * @param string * the name string to normalize. * @param type * the type of word * @return the normalized name string. */ private static String normalizeName(String string, KoreanCharacter.Type type) { Matcher matcher = doubleSurnames.matcher(string); if (type == KoreanCharacter.Type.NameTypical) { if (matcher.find()) { return matcher.group(1) + typicalSurenameRules.getOrDefault(matcher.group(2), matcher.group(2)) + " " + matcher.group(3); } else { return typicalSurenameRules.getOrDefault(String.valueOf(string.charAt(0)), String.valueOf(string.charAt(0))) + " " + string.substring(1); } } else { if (matcher.find()) { return matcher.group(1) + matcher.group(2) + " " + matcher.group(3); } else { return string.charAt(0) + " " + string.substring(1); } } } /** * @param string * the district string to normalize. * @return the normalized district string. */ private static String normalizeDistrict(String string) { Matcher matcher = districtPostfixesWithNumbers2.matcher(string); if (matcher.find()) { return matcher.group(1) + "-" + matcher.group(2) + " " + matcher.group(3) + "-" + matcher.group(4) + matcher.group(5); } else { matcher = districtPostfixesWithNumbers1.matcher(string); if (matcher.find()) { return matcher.group(1) + (matcher.group(1).endsWith(" ") ? "" : " ") + matcher.group(2) + "-" + matcher.group(3) + matcher.group(4); } else { matcher = districtPostfixes.matcher(string); if (matcher.find()) { return matcher.group(1) + "-" + matcher.group(2) + matcher.group(3); } } } return string; } }