bool MainWindow::isUtf8File(QIODevice *file) { const int testSize = 1024; char str[testSize];/// int size = file->peek(str, testSize); // char buf[3]; // if (f->peek(buf, sizeof(buf)) == sizeof(buf)) // return (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF); int encodingBytesCount = 0; bool allTextsAreASCIIChars = true; for (int i = 0; i < size; ++i){ char current = str[i]; if ((current & 0x80) == 0x80) allTextsAreASCIIChars = false; // First byte if (encodingBytesCount == 0){ if ((current & 0x80) == 0) continue;// ASCII chars, from 0x00-0x7F if ((current & 0xC0) == 0xC0){ encodingBytesCount = 1; current <<= 2; // More than two bytes used to encoding a unicode char. // Calculate the real length. while ((current & 0x80) == 0x80){ current <<= 1; ++encodingBytesCount; } }else{ // Invalid bits structure for UTF8 encoding rule. return false; } }else{ // Following bytes, must start with 10. if ((current & 0xC0) == 0x80) --encodingBytesCount; else return false; } } // if(encodingBytesCount != 0) // { // // Invalid bits structure for UTF8 encoding rule. // // Wrong following bytes count. // return false; // } // Although UTF8 supports encoding for ASCII chars, we regard as a input stream, whose contents are all ASCII as default encoding. return !allTextsAreASCIIChars; }