#include #include #include #include #include QString eliminateBadChars(QByteArray d) { QString r = QString::fromUtf8(QByteArray::fromHex(d)); for(int i = 0; i < r.size(); i++) { QChar c = r.at(i); if(c.isSpace() || !c.isPrint() || c.isMark() || c.category() == QChar::Other_Control || c.category() == QChar::Other_Format || c.category() == QChar::Other_NotAssigned || c.category() == QChar::Other_PrivateUse || // Comment the code line below to stop crashing. // If this particular category of characters is eliminated, the utf8 string can be displayed, if it isn't - Qt crashes /*c.category() == QChar::Symbol_Other ||*/ c.category() == QChar::NoCategory) r.replace(i, 1, ""); } return r; } bool hasBadChar(QByteArray d, QChar::Category *cat) { QString s = QString::fromUtf8(QByteArray::fromHex(d)); for(int i = 0; i < s.size(); i++) { QChar c = s.at(i); *cat = c.category(); if(c.isSpace() || !c.isPrint() || c.isMark() || c.category() == QChar::Other_Control || c.category() == QChar::Other_Format || c.category() == QChar::Other_NotAssigned || c.category() == QChar::Other_PrivateUse || c.category() == QChar::Symbol_Other || c.category() == QChar::NoCategory) return true; } return false; } void testForBadChars(QListWidget *lb, QListWidget *la, QMap *m, QString name) { QFile f(QCoreApplication::applicationDirPath() + "/" + name); if(!f.open(QIODevice::ReadOnly)) { qDebug() << "ERR: " << name << " failed to open"; return; } int c = 0, t = 0; while(!f.atEnd()) { QByteArray l = f.readLine(); l = l.replace('\n', ""); QChar::Category cat; if(hasBadChar(l, &cat)) { c++; lb->insertItem(0, QByteArray::fromHex(l)); la->insertItem(0, eliminateBadChars(l)); m->insert(cat, m->value(cat, 0) + 1); } t++; } qDebug() << name << " -> " << c << "/" << t; } int main(int argc, char *argv[]) { QApplication a(argc, argv); QWidget *w = new QWidget; w->resize(800, 600); w->show(); QListWidget *lb = new QListWidget(w); QListWidget *la = new QListWidget(w); lb->move(0,0); lb->resize(w->width() / 2, w->height()); lb->show(); la->move(lb->width(), 0); la->resize(lb->size()); la->show(); // Map out bad chars QMap m; //testForBadChars(lb, la, &m, "users.txt"); //testForBadChars(lb, la, &m, "users.19.txt"); //testForBadChars(lb, la, &m, "users.20.txt"); testForBadChars(lb, la, &m, "users.50.txt"); //testForBadChars(lb, la, &m, "users.86.txt"); //testForBadChars(lb, la, &m, "users.634.txt"); // List Bad Chars qDebug() << "--- BAD CHAR CATEGORIES ---"; for(QMap :: iterator i = m.begin(); i != m.end(); i++) { qDebug() << i.key() << i.value(); } qDebug()<<111; return a.exec(); }