Created
August 22, 2012 03:07
-
-
Save thuandt/3421905 to your computer and use it in GitHub Desktop.
Revisions
-
MrTux revised this gist
Sep 7, 2012 . 3 changed files with 12 additions and 9 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -22,6 +22,7 @@ def no_accent_vietnamese(utf8_str): return r.sub(lambda m: replaces_dict[m.group(0)], utf8_str) if __name__ == '__main__': print no_accent_vietnamese("Việt Nam Đất Nước Con Người") print no_accent_vietnamese("Welcome to Vietnam !") print no_accent_vietnamese("VIỆT NAM ĐẤT NƯỚC CON NGƯỜI") This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -24,6 +24,7 @@ def no_accent_vietnamese(s): s = re.sub(u'đ', 'd', s) return s.encode('utf-8') if __name__ == '__main__': print no_accent_vietnamese("Việt Nam Đất Nước Con Người") print no_accent_vietnamese("Welcome to Vietnam !") print no_accent_vietnamese("VIỆT NAM ĐẤT NƯỚC CON NGƯỜI") This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,6 +13,7 @@ def no_accent_vietnamese(s): s = re.sub(u'đ', 'd', s) return unicodedata.normalize('NFKD', unicode(s)).encode('ASCII', 'ignore') if __name__ == '__main__': print no_accent_vietnamese("Việt Nam Đất Nước Con Người") print no_accent_vietnamese("Welcome to Vietnam !") print no_accent_vietnamese("VIỆT NAM ĐẤT NƯỚC CON NGƯỜI") -
Dương Tiến Thuận revised this gist
Sep 7, 2012 . 2 changed files with 47 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,29 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Chương trình chuyển đổi từ Tiếng Việt có dấu sang Tiếng Việt không dấu """ import re def no_accent_vietnamese(s): s = s.decode('utf-8') s = re.sub(u'[àáạảãâầấậẩẫăằắặẳẵ]', 'a', s) s = re.sub(u'[ÀÁẠẢÃĂẰẮẶẲẴÂẦẤẬẨẪ]', 'A', s) s = re.sub(u'èéẹẻẽêềếệểễ', 'e', s) s = re.sub(u'ÈÉẸẺẼÊỀẾỆỂỄ', 'E', s) s = re.sub(u'òóọỏõôồốộổỗơờớợởỡ', 'o', s) s = re.sub(u'ÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠ', 'O', s) s = re.sub(u'ìíịỉĩ', 'i', s) s = re.sub(u'ÌÍỊỈĨ', 'I', s) s = re.sub(u'ùúụủũưừứựửữ', 'u', s) s = re.sub(u'ƯỪỨỰỬỮÙÚỤỦŨ', 'U', s) s = re.sub(u'ỳýỵỷỹ', 'y', s) s = re.sub(u'ỲÝỴỶỸ', 'Y', s) s = re.sub(u'Đ', 'D', s) s = re.sub(u'đ', 'd', s) return s.encode('utf-8') print no_accent_vietnamese("Việt Nam Đất Nước Con Người") print no_accent_vietnamese("Welcome to Vietnam !") print no_accent_vietnamese("VIỆT NAM ĐẤT NƯỚC CON NGƯỜI") This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,18 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Chương trình chuyển đổi từ Tiếng Việt có dấu sang Tiếng Việt không dấu """ import re import unicodedata def no_accent_vietnamese(s): s = s.decode('utf-8') s = re.sub(u'Đ', 'D', s) s = re.sub(u'đ', 'd', s) return unicodedata.normalize('NFKD', unicode(s)).encode('ASCII', 'ignore') print no_accent_vietnamese("Việt Nam Đất Nước Con Người") print no_accent_vietnamese("Welcome to Vietnam !") print no_accent_vietnamese("VIỆT NAM ĐẤT NƯỚC CON NGƯỜI") -
Dương Tiến Thuận created this gist
Aug 22, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,27 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Chương trình chuyển đổi từ Tiếng Việt có dấu sang Tiếng Việt không dấu Chỉnh sửa từ mã nguồn của anh NamNT http://www.vithon.org/2009/06/14/x%E1%BB%AD-ly-ti%E1%BA%BFng-vi%E1%BB%87t-trong-python """ import re INTAB = "ạảãàáâậầấẩẫăắằặẳẵóòọõỏôộổỗồốơờớợởỡéèẻẹẽêếềệểễúùụủũưựữửừứíìịỉĩýỳỷỵỹđẠẢÃÀÁÂẬẦẤẨẪĂẮẰẶẲẴÓÒỌÕỎÔỘỔỖỒỐƠỜỚỢỞỠÉÈẺẸẼÊẾỀỆỂỄÚÙỤỦŨƯỰỮỬỪỨÍÌỊỈĨÝỲỶỴỸĐ" INTAB = [ch.encode('utf8') for ch in unicode(INTAB, 'utf8')] OUTTAB = "a" * 17 + "o" * 17 + "e" * 11 + "u" * 11 + "i" * 5 + "y" * 5 + "d" + \ "A" * 17 + "O" * 17 + "E" * 11 + "U" * 11 + "I" * 5 + "Y" * 5 + "D" r = re.compile("|".join(INTAB)) replaces_dict = dict(zip(INTAB, OUTTAB)) def no_accent_vietnamese(utf8_str): return r.sub(lambda m: replaces_dict[m.group(0)], utf8_str) print no_accent_vietnamese("Việt Nam Đất Nước Con Người") print no_accent_vietnamese("Welcome to Vietnam !") print no_accent_vietnamese("VIỆT NAM ĐẤT NƯỚC CON NGƯỜI")