Created
July 7, 2012 16:27
-
-
Save flying19880517/3067078 to your computer and use it in GitHub Desktop.
Revisions
-
Lionheart created this gist
Jul 7, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,58 @@ bool MainWindow::isUtf8File(QIODevice *file) { const int testSize = 1024; char str[testSize];/// int size = file->peek(str, testSize); // char buf[3]; // if (f->peek(buf, sizeof(buf)) == sizeof(buf)) // return (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF); int encodingBytesCount = 0; bool allTextsAreASCIIChars = true; for (int i = 0; i < size; ++i){ char current = str[i]; if ((current & 0x80) == 0x80) allTextsAreASCIIChars = false; // First byte if (encodingBytesCount == 0){ if ((current & 0x80) == 0) continue;// ASCII chars, from 0x00-0x7F if ((current & 0xC0) == 0xC0){ encodingBytesCount = 1; current <<= 2; // More than two bytes used to encoding a unicode char. // Calculate the real length. while ((current & 0x80) == 0x80){ current <<= 1; ++encodingBytesCount; } }else{ // Invalid bits structure for UTF8 encoding rule. return false; } }else{ // Following bytes, must start with 10. if ((current & 0xC0) == 0x80) --encodingBytesCount; else return false; } } // if(encodingBytesCount != 0) // { // // Invalid bits structure for UTF8 encoding rule. // // Wrong following bytes count. // return false; // } // Although UTF8 supports encoding for ASCII chars, we regard as a input stream, whose contents are all ASCII as default encoding. return !allTextsAreASCIIChars; }