aboutsummaryrefslogtreecommitdiffstats
path: root/indep.c
diff options
context:
space:
mode:
authorFumitoshi UKAI <ukai@debian.or.jp>2001-11-22 14:15:19 +0000
committerFumitoshi UKAI <ukai@debian.or.jp>2001-11-22 14:15:19 +0000
commit6f0999cbc2d23a7efaa616b673cf37f51241db86 (patch)
tree7117ee541c11f578cf6dfd1139381094495bf1c9 /indep.c
parent[w3m-dev 02506] (diff)
downloadw3m-6f0999cbc2d23a7efaa616b673cf37f51241db86.tar.gz
w3m-6f0999cbc2d23a7efaa616b673cf37f51241db86.zip
[w3m-dev 02503]
From: aito@fw.ipsj.or.jp closes: Debian Bug#120540
Diffstat (limited to '')
-rw-r--r--indep.c19
1 files changed, 18 insertions, 1 deletions
diff --git a/indep.c b/indep.c
index 717d9b5..d359253 100644
--- a/indep.c
+++ b/indep.c
@@ -1,4 +1,4 @@
-/* $Id: indep.c,v 1.7 2001/11/22 13:30:02 ukai Exp $ */
+/* $Id: indep.c,v 1.8 2001/11/22 14:15:19 ukai Exp $ */
#include "fm.h"
#include <stdio.h>
#include <pwd.h>
@@ -275,6 +275,7 @@ getescapechar(char **str)
{
int dummy = -1;
char *p = *str, *q;
+ int strict_entity = TRUE;
if (*p == '&')
p++;
@@ -319,8 +320,24 @@ getescapechar(char **str)
for (p++; IS_ALNUM(*p); p++)
;
q = allocStr(q, p - q);
+ if (strcasestr("lt gt amp quot nbsp",q) &&
+ *p != '=') {
+ /* a character entity MUST be terminated with ";". However,
+ there's MANY web pages which uses &lt , &gt or something
+ like them as &lt;, &gt;, etc. Therefore, we treat the most
+ popular character entities (including &#xxxx;) without
+ the last ";" as character entities. If the trailing character
+ is "=", it must be a part of query in an URL. So &lt=, &gt=, etc.
+ are not regarded as character entities.
+ */
+ strict_entity = FALSE;
+ }
if (*p == ';')
p++;
+ else if (strict_entity) {
+ *str = p;
+ return -1;
+ }
*str = p;
return getHash_si(&entity, q, -1);
}