diff options
author | Fumitoshi UKAI <ukai@debian.or.jp> | 2001-11-22 14:15:19 +0000 |
---|---|---|
committer | Fumitoshi UKAI <ukai@debian.or.jp> | 2001-11-22 14:15:19 +0000 |
commit | 6f0999cbc2d23a7efaa616b673cf37f51241db86 (patch) | |
tree | 7117ee541c11f578cf6dfd1139381094495bf1c9 /indep.c | |
parent | [w3m-dev 02506] (diff) | |
download | w3m-6f0999cbc2d23a7efaa616b673cf37f51241db86.tar.gz w3m-6f0999cbc2d23a7efaa616b673cf37f51241db86.zip |
[w3m-dev 02503]
From: aito@fw.ipsj.or.jp
closes: Debian Bug#120540
Diffstat (limited to '')
-rw-r--r-- | indep.c | 19 |
1 files changed, 18 insertions, 1 deletions
@@ -1,4 +1,4 @@ -/* $Id: indep.c,v 1.7 2001/11/22 13:30:02 ukai Exp $ */ +/* $Id: indep.c,v 1.8 2001/11/22 14:15:19 ukai Exp $ */ #include "fm.h" #include <stdio.h> #include <pwd.h> @@ -275,6 +275,7 @@ getescapechar(char **str) { int dummy = -1; char *p = *str, *q; + int strict_entity = TRUE; if (*p == '&') p++; @@ -319,8 +320,24 @@ getescapechar(char **str) for (p++; IS_ALNUM(*p); p++) ; q = allocStr(q, p - q); + if (strcasestr("lt gt amp quot nbsp",q) && + *p != '=') { + /* a character entity MUST be terminated with ";". However, + there's MANY web pages which uses < , > or something + like them as <, >, etc. Therefore, we treat the most + popular character entities (including &#xxxx;) without + the last ";" as character entities. If the trailing character + is "=", it must be a part of query in an URL. So <=, >=, etc. + are not regarded as character entities. + */ + strict_entity = FALSE; + } if (*p == ';') p++; + else if (strict_entity) { + *str = p; + return -1; + } *str = p; return getHash_si(&entity, q, -1); } |