75 lines
2.1 KiB
Diff
75 lines
2.1 KiB
Diff
Index: htnet/HtHTTP.cc
|
|
===================================================================
|
|
--- htnet/HtHTTP.cc.orig
|
|
+++ htnet/HtHTTP.cc
|
|
@@ -643,6 +643,8 @@
|
|
String line = 0;
|
|
int inHeader = 1;
|
|
|
|
+ _needUTF8Convert = 0;
|
|
+
|
|
if (_response._modification_time)
|
|
{
|
|
delete _response._modification_time;
|
|
@@ -731,8 +733,15 @@
|
|
token = strtok(token, "\n\t");
|
|
|
|
if (token && *token)
|
|
+ {
|
|
_response._content_type = token;
|
|
-
|
|
+ if ((_response._content_type.indexOf("text/html") != -1) && (_response._content_type.indexOf("UTF-8") != -1))
|
|
+ {
|
|
+ if ( debug > 4 )
|
|
+ cout << "needUTF8Convert flagged" << endl;
|
|
+ _needUTF8Convert = 1;
|
|
+ }
|
|
+ }
|
|
}
|
|
else if( ! mystrncasecmp((char*)line, "content-length:", 15))
|
|
{
|
|
@@ -970,6 +979,31 @@
|
|
|
|
}
|
|
|
|
+ if ( _needUTF8Convert )
|
|
+ {
|
|
+ if ( debug > 4 )
|
|
+ cout << "Converting UTF-8 characters" << endl;
|
|
+
|
|
+ char *srcPtr, *dstPtr;
|
|
+ srcPtr = dstPtr = _response._contents.get();
|
|
+ while ( *srcPtr )
|
|
+ {
|
|
+ if ( ( *srcPtr & 0x80 ) == 0 )
|
|
+ *dstPtr++ = *srcPtr++;
|
|
+ else if ( ( *srcPtr & 0xE0 ) == 0xC0 ) {
|
|
+ *dstPtr++ = (((*srcPtr & 0x03) << 6) | (*(srcPtr+1) & 0x3F) ) & 0xFF;
|
|
+ srcPtr += 2;
|
|
+ } else if ( ( *srcPtr & 0xF0 ) == 0xE0 ) {
|
|
+ *dstPtr++ = '?';
|
|
+ srcPtr += 3;
|
|
+ } else {
|
|
+ *dstPtr++ = '?';
|
|
+ srcPtr += 4;
|
|
+ }
|
|
+ }
|
|
+ *dstPtr = 0;
|
|
+ }
|
|
+
|
|
// Set document length
|
|
_response._document_length = _response._contents.length();
|
|
|
|
Index: htnet/HtHTTP.h
|
|
===================================================================
|
|
--- htnet/HtHTTP.h.orig
|
|
+++ htnet/HtHTTP.h
|
|
@@ -316,6 +316,7 @@
|
|
int _bytes_read; // Bytes read
|
|
URL _url; // URL to retrieve
|
|
URL _referer; // Referring URL
|
|
+ int _needUTF8Convert; // Flag for simple UTF-8 convert
|
|
|
|
String _accept_language; // accept-language directive
|
|
|