diff -ur htmldoc-1.8.23.orig/htmldoc/htmldoc.cxx htmldoc-1.8.23/htmldoc/htmldoc.cxx
--- htmldoc-1.8.23.orig/htmldoc/htmldoc.cxx 2002-10-25 16:37:54.000000000 +0300
+++ htmldoc-1.8.23/htmldoc/htmldoc.cxx 2003-08-20 21:14:41.000000000 +0300
@@ -83,6 +83,12 @@
static void term_handler(int signum);
static void usage(void);
+/*
+ * Local variables...
+ */
+
+int autocharset = 0;
+
/*
* 'main()' - Main entry for HTMLDOC.
@@ -247,7 +253,13 @@
{
i ++;
if (i < argc)
- htmlSetCharSet(argv[i]);
+ {
+ if (strcmp(argv[i], "auto") == 0)
+ {
+ autocharset = 1;
+ }
+ else htmlSetCharSet(argv[i]);
+ }
else
usage();
}
@@ -1034,6 +1046,9 @@
while (document->prev != NULL)
document = document->prev;
+
+ if (autocharset)
+ htmlSetMetaCharSet(document);
htmlDebugStats("Document Tree", document);
@@ -1269,7 +1284,13 @@
else if (strncasecmp(line, "XRXCOMMENTS=", 12) == 0)
XRXComments = atoi(line + 12);
else if (strncasecmp(line, "CHARSET=", 8) == 0)
- htmlSetCharSet(line + 8);
+ {
+ if (strcmp(line + 8, "auto") == 0)
+ {
+ autocharset = 1;
+ }
+ else htmlSetCharSet(line + 8);
+ }
else if (strncasecmp(line, "PAGEMODE=", 9) == 0)
PDFPageMode = atoi(line + 9);
else if (strncasecmp(line, "PAGELAYOUT=", 11) == 0)
@@ -1994,7 +2015,13 @@
}
}
else if (strcmp(temp, "--charset") == 0)
- htmlSetCharSet(temp2);
+ {
+ if (strcmp(temp2, "auto") == 0)
+ {
+ autocharset = 1;
+ }
+ else htmlSetCharSet(temp2);
+ }
else if (strcmp(temp, "--pagemode") == 0)
{
for (i = 0; i < (int)(sizeof(PDFModes) / sizeof(PDFModes[0])); i ++)
diff -ur htmldoc-1.8.23.orig/htmldoc/html.h htmldoc-1.8.23/htmldoc/html.h
--- htmldoc-1.8.23.orig/htmldoc/html.h 2002-07-27 06:41:31.000000000 +0300
+++ htmldoc-1.8.23/htmldoc/html.h 2003-08-20 20:55:23.000000000 +0300
@@ -309,6 +309,7 @@
extern void htmlSetBaseSize(float p, float s);
extern void htmlSetCharSet(const char *cs);
+extern int htmlSetMetaCharSet(tree_t *tree);
extern void htmlSetTextColor(uchar *color);
extern void htmlDebugStats(const char *title, tree_t *t);
diff -ur htmldoc-1.8.23.orig/htmldoc/htmllib.cxx htmldoc-1.8.23/htmldoc/htmllib.cxx
--- htmldoc-1.8.23.orig/htmldoc/htmllib.cxx 2002-10-11 17:23:28.000000000 +0300
+++ htmldoc-1.8.23/htmldoc/htmllib.cxx 2003-08-20 21:56:21.000000000 +0300
@@ -1854,6 +1854,73 @@
return (NULL);
}
+/*
+ * 'htmlGetMetaCharSet()' - Get document charset from "meta" data...
+ */
+
+int
+htmlSetMetaCharSet(tree_t *tree) /* I - Document tree */
+{
+ uchar *tname, /* Name value from tree entry */
+ *tcontent, /* Content value from tree entry */
+ *tchar, *tchar2;
+ uchar charset[256];
+
+ while (tree != NULL)
+ {
+ /*
+ * Check this tree entry...
+ */
+ if (tree->markup == MARKUP_META)
+ {
+ if ((tname = htmlGetVariable(tree, (uchar *)"HTTP-EQUIV")) != NULL &&
+ (tcontent = htmlGetVariable(tree, (uchar *)"CONTENT")) != NULL)
+ if (strcasecmp((char *)tname, "Content-Type") == 0)
+ if ((tchar = (uchar *)strstr((char *)tcontent, "charset=")) != NULL ||
+ (tchar = (uchar *)strstr((char *)tcontent, "CHARSET=")) != NULL)
+ {
+
+ tchar += 8;
+ for (tchar2 = charset; (*tchar >= (uchar)'a' && *tchar <= (uchar)'z') ||
+ (*tchar >= (uchar)'A' && *tchar <= (uchar)'Z') ||
+ (*tchar >= (uchar)'0' && *tchar <= (uchar)'9') ||
+ *tchar == (uchar)'_' || *tchar == (uchar)'-';)
+ *tchar2++ = *tchar++;
+ *tchar2 = (uchar)'\0';
+ for (tchar2 = charset; *tchar2 != (uchar)'\0'; tchar2++)
+ *tchar2 = (uchar)tolower((char)*tchar2);
+ htmlSetCharSet((char *)charset);
+ return 1;
+ }
+ if ((tname = htmlGetVariable(tree, (uchar *)"CHARSET")) != NULL)
+ {
+ tchar = tname;
+ for (tchar2 = charset; *tchar != (uchar)'\0';)
+ *tchar2++ = (uchar)tolower((char)*tchar++);
+ *tchar2 = (uchar)'\0';
+ htmlSetCharSet((char *)charset);
+ return 1;
+ }
+ }
+
+ /*
+ * Check child entries...
+ */
+
+ if (tree->child != NULL)
+ if (htmlSetMetaCharSet(tree->child))
+ return 1;
+
+ /*
+ * Next tree entry...
+ */
+
+ tree = tree->next;
+ }
+
+ return 0;
+}
+
/*
* 'htmlGetStyle()' - Get a style value from a node's STYLE attribute.