Skip to content

Instantly share code, notes, and snippets.

@armaandh
Created March 1, 2022 22:33
Show Gist options
  • Select an option

  • Save armaandh/82edde58f8d6bee440a3e4814c02eee3 to your computer and use it in GitHub Desktop.

Select an option

Save armaandh/82edde58f8d6bee440a3e4814c02eee3 to your computer and use it in GitHub Desktop.
enum ConsonantAssimilation {
Progressive,
Regressive
};
enum Type {
Substantives,
Compound,
District,
Name,
NameTypical,
Typical,
};
enum Chosung {
ㄱ("g") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㄺ:
case ㄻ:
case ㄼ:
case ㄽ:
case ㄾ:
case ㄿ:
case ㅀ:
return "kk";
case ㅎ:
return "k";
default:
return defaultPronunciation;
}
}
protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) {
return prevCharacterPronunciation.endsWith("n");
}
},
ㄲ("kk"),
ㄴ("n") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㄹ:
case ㅀ:
return "l";
default:
return defaultPronunciation;
}
}
},
ㄷ("d") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㄾ:
return "tt";
case ㄶ:
case ㅎ:
return "t";
default:
return defaultPronunciation;
}
}
},
ㄸ("tt"),
ㄹ("r") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㄱ:
case ㄲ:
case ㄳ:
case ㄺ:
case ㄼ:
case ㄿ:
case ㅁ:
case ㅂ:
case ㅄ:
case ㅇ:
case ㅋ:
case ㅍ:
return "n";
case ㄴ:
case ㄷ:
case ㄵ:
case ㄶ:
case ㅅ:
case ㅆ:
case ㅈ:
case ㅊ:
case ㅎ:
switch (consonantAssimilation) {
case Progressive:
return "n";
default:
return "l";
}
case ㄹ:
case ㄻ:
case ㄽ:
case ㄾ:
case ㅀ:
case ㅌ:
return "l";
default:
return defaultPronunciation;
}
}
},
ㅁ("m"),
ㅂ("b") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㄾ:
return "pp";
default:
return defaultPronunciation;
}
}
},
ㅃ("pp"),
ㅅ("s"),
ㅆ("ss"),
ㅇ("") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㄱ:
if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) {
return "n";
} else {
return "g";
}
case ㄺ:
return "g";
case ㄲ:
return "kk";
case ㄳ:
case ㄽ:
case ㅄ:
case ㅅ:
return "s";
case ㅇ:
if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) {
return "n";
} else {
return defaultPronunciation;
}
case ㄴ:
case ㄶ:
return "n";
case ㄵ:
case ㅈ:
return "j";
case ㄷ:
return currentCharacter.getJungsung().isInducePalatalization() ? "j" : "d";
case ㄹ:
case ㅀ:
if (type == Type.Compound && currentCharacter.getJungsung().isInducePalatalization()) {
return "l";
} else {
return "r";
}
case ㄻ:
case ㅁ:
return "m";
case ㄼ:
case ㅂ:
return "b";
case ㄾ:
case ㅌ:
return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t";
case ㄿ:
case ㅍ:
return "p";
case ㅆ:
return "ss";
case ㅊ:
return "ch";
case ㅋ:
return "k";
default:
return defaultPronunciation;
}
}
protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) {
return prevCharacterPronunciation.endsWith("ng") && currentCharacterPronunciation.isEmpty();
}
},
ㅈ("j") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㅎ:
return "ch";
default:
return defaultPronunciation;
}
}
},
ㅉ("jj"),
ㅊ("ch"),
ㅋ("k"),
ㅌ("t") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㅈ:
case ㅊ:
return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t";
default:
return defaultPronunciation;
}
}
protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) {
return prevCharacterPronunciation.endsWith("t");
}
},
ㅍ("p") {
protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) {
return prevCharacterPronunciation.endsWith("p");
}
},
ㅎ("h") {
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (prevCharacter.getJongsung()) {
case ㄱ:
if (type == Type.Substantives) {
return defaultPronunciation;
} else {
return "";
}
case ㄲ:
return "kk";
case ㄷ:
if (type == Type.Substantives) {
return defaultPronunciation;
} else {
return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t";
}
case ㄾ:
case ㅅ:
case ㅆ:
case ㅈ:
case ㅊ:
case ㅌ:
return currentCharacter.getJungsung().isInducePalatalization() ? "ch" : "t";
case ㄺ:
return "k";
case ㄼ:
return "p";
case ㄽ:
return "s";
case ㅀ:
return "r";
case ㅂ:
if (type == Type.Substantives) {
return defaultPronunciation;
} else {
return "p";
}
default:
return defaultPronunciation;
}
}
protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) {
return !currentCharacterPronunciation.isEmpty() && prevCharacterPronunciation.endsWith(String.valueOf(currentCharacterPronunciation.charAt(0)));
}
};
protected final String defaultPronunciation;
Chosung(String defaultPronunciation) {
this.defaultPronunciation = defaultPronunciation;
}
public String getPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
if (prevCharacter == null || !prevCharacter.isKoreanCharacter()) {
return defaultPronunciation;
} else {
String complexPronunciation = getComplexPronunciation(prevCharacter, currentCharacter, consonantAssimilation, type);
return isNeedHyphen(prevCharacter.getRomanizedString(null, currentCharacter, consonantAssimilation, type), complexPronunciation) ? "-" + complexPronunciation : complexPronunciation;
}
}
protected String getComplexPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
return defaultPronunciation;
}
protected boolean isNeedHyphen(String prevCharacterPronunciation, String currentCharacterPronunciation) {
return false;
}
}
/**
* The vowel used as the middle syllable of Hangul, which is called "Jungsung".
*/
public enum Jungsung {
ㅏ("a", false),
ㅐ("ae", false),
ㅑ("ya", true),
ㅒ("yae", true),
ㅓ("eo", false),
ㅔ("e", false),
ㅕ("yeo", true),
ㅖ("ye", true),
ㅗ("o", false),
ㅘ("wa", false),
ㅙ("wae", false),
ㅚ("oe", false),
ㅛ("yo", true),
ㅜ("u", false),
ㅝ("wo", false),
ㅞ("we", false),
ㅟ("wi", false),
ㅠ("yu", true),
ㅡ("eu", false),
ㅢ("ui", false),
ㅣ("i", true);
private final String defaultPronunciation;
private final boolean inducePalatalization;
Jungsung(String defaultPronunciation, boolean inducePalatalization) {
this.defaultPronunciation = defaultPronunciation;
this.inducePalatalization = inducePalatalization;
}
public String getPronunciation(KoreanCharacter prevCharacter, KoreanCharacter currentCharacter) {
boolean insertHyphen = false;
if (prevCharacter != null && prevCharacter.isKoreanCharacter() && prevCharacter.getJongsung() == Jongsung.NONE && currentCharacter.getChosung() == Chosung.ㅇ) {
switch (prevCharacter.getJungsung().defaultPronunciation.charAt(prevCharacter.getJungsung().defaultPronunciation.length() - 1)) {
case 'a':
switch (defaultPronunciation.charAt(0)) {
case 'a':
case 'e':
insertHyphen = true;
}
break;
case 'e':
switch (defaultPronunciation.charAt(0)) {
case 'a':
case 'e':
case 'o':
case 'u':
insertHyphen = true;
}
break;
}
}
return insertHyphen ? "-" + defaultPronunciation : defaultPronunciation;
}
public boolean isInducePalatalization() {
return inducePalatalization;
}
}
/**
* The consonant used as the final syllable of Hangul, which is called "Jongsung".
*/
public enum Jongsung {
NONE(""),
ㄱ("k") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄲ:
case ㅋ:
return "";
case ㅇ:
if (type == Type.Compound && nextCharacter.jungsung.isInducePalatalization()) {
return "ng";
} else {
return "";
}
case ㄴ:
case ㅁ:
case ㄹ:
return "ng";
default:
return defaultPronunciation;
}
}
},
ㄲ("k") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄲ:
case ㅋ:
case ㅇ:
case ㅎ:
return "";
case ㄴ:
case ㅁ:
case ㄹ:
return "ng";
default:
return defaultPronunciation;
}
}
},
ㄳ("k") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄲ:
case ㅋ:
return "";
case ㄴ:
case ㅁ:
case ㄹ:
return "ng";
default:
return defaultPronunciation;
}
}
},
ㄴ("n") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄹ:
switch (consonantAssimilation) {
case Regressive:
return "l";
default:
return "n";
}
case ㅇ:
return "";
default:
return defaultPronunciation;
}
}
},
ㄵ("n") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄹ:
switch (consonantAssimilation) {
case Regressive:
return "l";
default:
return "n";
}
default:
return defaultPronunciation;
}
}
},
ㄶ("n") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
return ㄴ.getComplexPronunciation(nextCharacter, consonantAssimilation, type);
}
},
ㄷ("t") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄴ:
case ㅁ:
return "n";
case ㄸ:
case ㅇ:
case ㅌ:
case ㅎ:
if (type == Type.Substantives) {
return defaultPronunciation;
} else {
return "";
}
case ㄹ:
switch (consonantAssimilation) {
case Regressive:
return "l";
default:
return "n";
}
default:
return defaultPronunciation;
}
}
},
ㄹ("l") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㅇ:
if (type == Type.Compound && nextCharacter.getJungsung().isInducePalatalization()) {
return defaultPronunciation;
} else {
return "";
}
default:
return defaultPronunciation;
}
}
},
ㄺ("k") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄱ:
case ㄲ:
case ㅇ:
case ㅎ:
return "l";
case ㄴ:
case ㄹ:
case ㅁ:
return "ng";
default:
return defaultPronunciation;
}
}
},
ㄻ("m") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄹ:
case ㅁ:
case ㅇ:
return "l";
default:
return defaultPronunciation;
}
}
},
ㄼ("l") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄴ:
case ㄹ:
return "m";
case ㄷ:
case ㄸ:
case ㅂ:
case ㅅ:
case ㅆ:
case ㅈ:
case ㅉ:
case ㅊ:
case ㅋ:
case ㅌ:
case ㅎ:
return "p";
case ㅃ:
return "";
default:
return defaultPronunciation;
}
}
},
ㄽ("l"),
ㄾ("l"),
ㄿ("l") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄴ:
case ㄹ:
return "m";
case ㄷ:
case ㄸ:
case ㅂ:
case ㅅ:
case ㅆ:
case ㅈ:
case ㅉ:
case ㅊ:
case ㅋ:
case ㅌ:
case ㅎ:
return "p";
case ㅃ:
case ㅍ:
return "";
default:
return defaultPronunciation;
}
}
},
ㅀ("l") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㅎ:
return "";
case ㅇ:
if (type == Type.Compound && nextCharacter.getJungsung().isInducePalatalization()) {
return defaultPronunciation;
} else {
return "";
}
default:
return defaultPronunciation;
}
}
},
ㅁ("m") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㅇ:
return "";
default:
return defaultPronunciation;
}
}
},
ㅂ("p") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄴ:
case ㄹ:
case ㅁ:
return "m";
case ㅃ:
case ㅇ:
return "";
case ㅎ:
if (type == Type.Substantives) {
return defaultPronunciation;
} else {
return "";
}
default:
return defaultPronunciation;
}
}
},
ㅄ("p") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄴ:
case ㄹ:
case ㅁ:
return "m";
case ㅃ:
return "";
default:
return defaultPronunciation;
}
}
},
ㅅ("t") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type);
}
},
ㅆ("t") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type);
}
},
ㅇ("ng"),
ㅈ("t") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type);
}
},
ㅊ("t") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
return ㄷ.getComplexPronunciation(nextCharacter, consonantAssimilation, type);
}
},
ㅋ("k") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄲ:
case ㅇ:
return "";
case ㄴ:
case ㅁ:
case ㄹ:
return "ng";
default:
return defaultPronunciation;
}
}
},
ㅌ("t") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄴ:
case ㅁ:
return "n";
case ㄸ:
case ㅇ:
case ㅎ:
return "";
case ㄹ:
return "l";
default:
return defaultPronunciation;
}
}
},
ㅍ("p") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㅃ:
case ㅇ:
return "";
default:
return defaultPronunciation;
}
}
},
ㅎ("t") {
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
switch (nextCharacter.getChosung()) {
case ㄱ:
case ㄲ:
case ㄷ:
case ㄸ:
case ㅇ:
case ㅈ:
case ㅉ:
case ㅊ:
case ㅋ:
case ㅌ:
case ㅍ:
case ㅎ:
return "";
case ㄴ:
case ㅁ:
return "n";
case ㄹ:
switch (consonantAssimilation) {
case Regressive:
return "l";
default:
return "n";
}
default:
return defaultPronunciation;
}
}
};
protected final String defaultPronunciation;
Jongsung(String defaultPronunciation) {
this.defaultPronunciation = defaultPronunciation;
}
public String getPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
return (nextCharacter == null || !nextCharacter.isKoreanCharacter()) ? defaultPronunciation : getComplexPronunciation(nextCharacter, consonantAssimilation, type);
}
protected String getComplexPronunciation(KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
return defaultPronunciation;
}
}
class KoreanCharacter {
/**
* First character code point in Hangul Syllables in Unicode table ({@code 가}).
*/
public final static int KoreanLowerValue = 0xAC00;
/**
* Last character code point in Hangul Syllables in Unicode table ({@code 힣}).
*/
public final static int KoreanUpperValue = 0xD7A3;
/**
* The original character from constructor's argument.
*/
private final char character;
/**
* Disassembled initial syllable of Hangul.
*/
private Chosung chosung;
/**
* Disassembled middle syllable of Hangul.
*/
private Jungsung jungsung;
/**
* Disassembled final syllable of Hangul.
*/
private Jongsung jongsung;
/**
* Constructor
*
* @param koreanCharacter
* the Hangul or other character
*/
public KoreanCharacter(char koreanCharacter) {
character = koreanCharacter;
if (isKoreanCharacter(character)) {
int value = character - KoreanLowerValue;
chosung = Chosung.values()[value / (21 * 28)];
jungsung = Jungsung.values()[value % (21 * 28) / 28];
jongsung = Jongsung.values()[value % 28];
}
}
/**
* Constructor with Hangul object with each syllables.
*
* @param chosung
* the consonant used as the initial syllable of Hangul.
* @param jungsung
* the vowel used as the middle syllable of Hangul.
* @param jongsung
* the consonant used as the final syllable of Hangul.
* @throws NullPointerException
* if any arguments is null.
*/
public KoreanCharacter(Chosung chosung, Jungsung jungsung, Jongsung jongsung) {
Objects.requireNonNull(chosung, "All parameters must not be null.");
Objects.requireNonNull(jungsung, "All parameters must not be null.");
Objects.requireNonNull(jongsung, "All parameters must not be null.");
this.chosung = chosung;
this.jungsung = jungsung;
this.jongsung = jongsung;
this.character = (char) ((chosung.ordinal() * 21 * 28 + jungsung.ordinal() * 28 + jongsung.ordinal()) + KoreanLowerValue);
}
/**
* Whether or not the character of this object is Hangul.
*
* @return Whether all syllables exist to complete Hangul character.
*/
public boolean isKoreanCharacter() {
return chosung != null && jungsung != null && jongsung != null;
}
/**
* @return the initial syllable if object has Hangul character, and null if not.
*/
public Chosung getChosung() {
return chosung;
}
/**
* @return the middle syllable if object has Hangul character, and null if not.
*/
public Jungsung getJungsung() {
return jungsung;
}
/**
* @return the final syllable if object has Hangul character, and null if not.
*/
public Jongsung getJongsung() {
return jongsung;
}
/**
* @return the character that this object has.
*/
public char getCharacter() {
return character;
}
/**
* @return the romanized string of the character this object has.
*/
public String getRomanizedString() {
return getRomanizedString(null, null, ConsonantAssimilation.Progressive, Type.Typical);
}
/**
* @param prevCharacter
* the character preceding this character in the sentence.
* @param nextCharacter
* the character after this character in the sentence.
* @param consonantAssimilation
* the consonant assimilation type.
* @param type
* the type of word
* @return the romanized string of the character this object has.
*/
public String getRomanizedString(KoreanCharacter prevCharacter, KoreanCharacter nextCharacter, ConsonantAssimilation consonantAssimilation, Type type) {
if (!isKoreanCharacter()) {
return toString();
}
if (type == Type.Name || type == Type.NameTypical) {
prevCharacter = null;
nextCharacter = null;
}
return chosung.getPronunciation(prevCharacter, this, consonantAssimilation, type)
+ jungsung.getPronunciation(prevCharacter, this)
+ jongsung.getPronunciation(nextCharacter, consonantAssimilation, type);
}
/**
* To check if character is in the Hangul Syllable of Unicode table.
*
* @param character
* the character to check.
* @return true if the character is Hangul
*/
public static boolean isKoreanCharacter(char character) {
return (KoreanLowerValue <= character && character <= KoreanUpperValue);
}
/**
* Compares this object to another in ascending order.
*
* @param other
* the other object to compare to.
* @return the value of {@link Character#compareTo}.
*/
@Override
public int compareTo(KoreanCharacter other) {
return Character.compare(character, other.character);
}
/**
* Compares this object to another to test if they are equal.
*
* @param other
* the other object to compare to.
* @return true if this object is equal.
*/
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
return character == ((KoreanCharacter) other).character;
}
/**
* Return the hash code for this character.
*
* @return the value of {@link Character#hashCode()}
*/
@Override
public int hashCode() {
return Character.hashCode(character);
}
/**
* Returns a {@link String} object representing this character's value.
*
* @return a string representation of this character.
*/
@Override
public String toString() {
return String.valueOf(character);
}
}
/**
* A Java library that converts Korean into Roman characters.
* It is implemented based on the National Korean Language Romanization and can be covered a lot,
* but it is not perfect because it is difficult to implement 100% if there is no word dictionary data due to the nature of Korean.
*/
public class KoreanRomanizer {
private static final Pattern doubleSurnames = Pattern.compile("^(\\s*)(강전|남궁|독고|동방|등정|망절|무본|사공|서문|선우|소봉|어금|장곡|제갈|황목|황보)(.{1,10})$");
private static final Pattern districtPostfixes = Pattern.compile("^(.{1,20}?)(특별자치도|특별자치시|특별시|광역시|대로|구|군|도|동|리|면|시|읍|가|길|로)(\\s*)$");
private static final Pattern districtPostfixesWithNumbers1 = Pattern.compile("^(.{0,20}?)(\\d+)(\\s*)(가길|가|번길|로|단지|동)(\\s*)$");
private static final Pattern districtPostfixesWithNumbers2 = Pattern.compile("^(.{0,20}?)(대?로)\\s*(\\d+[가번]?)(길)(\\s*)$");
private static final Map<String, String> typicalSurenameRules = new HashMap<String, String>() {
{
put("가", "Ka");
put("간", "Kan");
put("갈", "Kal");
put("감", "Kam");
put("강", "Kang");
put("강전", "Kangjun");
put("견", "Kyun");
put("경", "Kyung");
put("계", "Kye");
put("고", "Ko");
put("공", "Kong");
put("곽", "Kwak");
put("구", "Koo");
put("국", "Kook");
put("군", "Kun");
put("궁", "Koong");
put("궉", "Kwok");
put("권", "Kwon");
put("근", "Keun");
put("금", "Keum");
put("기", "Ki");
put("길", "Kil");
put("김", "Kim");
put("노", "Noh");
put("두", "Doo");
put("란", "Lan");
put("뢰", "Loi");
put("루", "Lu");
put("망절", "Mangjul");
put("명", "Myung");
put("문", "Moon");
put("박", "Park");
put("변", "Byun");
put("부", "Boo");
put("선", "Sun");
put("선우", "Sunwoo");
put("성", "Sung");
put("순", "Soon");
put("신", "Shin");
put("심", "Shim");
put("아", "Ah");
put("어금", "Eokum");
put("오", "Oh");
put("우", "Woo");
put("운", "Woon");
put("유", "Yoo");
put("윤", "Yoon");
put("이", "Lee");
put("임", "Lim");
put("정", "Jung");
put("조", "Cho");
put("주", "Joo");
put("준", "June");
put("즙", "Chup");
put("최", "Choi");
put("편", "Pyun");
put("평", "Pyung");
put("풍", "Poong");
put("현", "Hyun");
put("형", "Hyung");
put("흥", "Hong");
}
};
/**
* Romanize string.
*
* @param string
* the string to convert to roman string.
* @return the romanized string.
* @throws NullPointerException
* if argument string is null
*/
public static String romanize(String string) {
return romanize(string, null, null);
}
/**
* Romanize string with consonant assimilation option.
*
* @param string
* the string to convert to roman string.
* @param consonantAssimilation
* the consonant assimilation type.
* @return the romanized string.
* @throws NullPointerException
* if argument string is null
*/
public static String romanize(String string, KoreanCharacter.ConsonantAssimilation consonantAssimilation) {
return romanize(string, null, consonantAssimilation);
}
/**
* Romanize string with type option.
*
* @param string
* the string to convert to roman string.
* @param type
* the type of word
* @return the romanized string.
* @throws NullPointerException
* if argument string is null
*/
public static String romanize(String string, KoreanCharacter.Type type) {
return romanize(string, type, null);
}
/**
* Romanize string with Consonant assimilation and type option.
*
* @param string
* the string to convert.
* @param type
* the type of word
* @param consonantAssimilation
* the consonant assimilation type.
* @return Romanized string
* @throws NullPointerException
* if string parameter is null
*/
public static String romanize(String string, KoreanCharacter.Type type, KoreanCharacter.ConsonantAssimilation consonantAssimilation) {
Objects.requireNonNull(string, "String should not be null.");
consonantAssimilation = (consonantAssimilation == null) ? KoreanCharacter.ConsonantAssimilation.Regressive : consonantAssimilation;
type = (type == null) ? KoreanCharacter.Type.Typical : type;
switch (type) {
case Name:
case NameTypical:
string = normalizeName(string, type);
break;
case District:
string = normalizeDistrict(string);
break;
}
StringBuilder buffer = new StringBuilder(string.length() * 3);
KoreanCharacter prevCharacter;
KoreanCharacter currentCharacter = null;
KoreanCharacter nextCharacter = null;
for (int i = 0; i < string.length(); i++) {
prevCharacter = currentCharacter;
currentCharacter = (nextCharacter == null) ? new KoreanCharacter(string.charAt(i)) : nextCharacter;
nextCharacter = (i < string.length() - 1) ? new KoreanCharacter(string.charAt(i + 1)) : null;
if (currentCharacter.isKoreanCharacter()) {
String pronunciation = currentCharacter.getRomanizedString(prevCharacter, nextCharacter, consonantAssimilation, type);
if (prevCharacter == null || !prevCharacter.isKoreanCharacter()) {
if (type == KoreanCharacter.Type.District && prevCharacter != null && (prevCharacter.toString().equals("-") || Character.isDigit(prevCharacter.getCharacter()))) {
buffer.append(pronunciation);
} else {
buffer.append(Character.toUpperCase(pronunciation.charAt(0)));
buffer.append(pronunciation.substring(1));
}
} else {
buffer.append(pronunciation);
}
} else {
buffer.append(currentCharacter);
}
}
return buffer.toString();
}
/**
* The {@code main} method to convert string from the standard input.
*
* @param args
* first argument is {@link KoreanCharacter.Type} value,
* second argument is {@link KoreanCharacter.ConsonantAssimilation} value
* (Both arguments must be specified or none specified.)
*/
// public static void main(String... args) {
// get("/romanize", "application/json", (req, res) -> {
// String text = req.queryParams("text");
// return KoreanRomanizer.romanize(text, null, null);
// });
// }
public static void main(String... args) {
//return KoreanRomanizer.romanize(text, null, null);
System.out.println(KoreanRomanizer.romanize("바나나", null, null)); // expect banana comes back
}
/**
* @param string
* the name string to normalize.
* @param type
* the type of word
* @return the normalized name string.
*/
private static String normalizeName(String string, KoreanCharacter.Type type) {
Matcher matcher = doubleSurnames.matcher(string);
if (type == KoreanCharacter.Type.NameTypical) {
if (matcher.find()) {
return matcher.group(1) + typicalSurenameRules.getOrDefault(matcher.group(2), matcher.group(2)) + " " + matcher.group(3);
} else {
return typicalSurenameRules.getOrDefault(String.valueOf(string.charAt(0)), String.valueOf(string.charAt(0))) + " " + string.substring(1);
}
} else {
if (matcher.find()) {
return matcher.group(1) + matcher.group(2) + " " + matcher.group(3);
} else {
return string.charAt(0) + " " + string.substring(1);
}
}
}
/**
* @param string
* the district string to normalize.
* @return the normalized district string.
*/
private static String normalizeDistrict(String string) {
Matcher matcher = districtPostfixesWithNumbers2.matcher(string);
if (matcher.find()) {
return matcher.group(1) + "-" + matcher.group(2) + " " + matcher.group(3) + "-" + matcher.group(4) + matcher.group(5);
} else {
matcher = districtPostfixesWithNumbers1.matcher(string);
if (matcher.find()) {
return matcher.group(1) + (matcher.group(1).endsWith(" ") ? "" : " ") + matcher.group(2) + "-" + matcher.group(3) + matcher.group(4);
} else {
matcher = districtPostfixes.matcher(string);
if (matcher.find()) {
return matcher.group(1) + "-" + matcher.group(2) + matcher.group(3);
}
}
}
return string;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment