aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAzure <azure@fox.blue>2019-06-30 16:15:22 +0000
committerAzure <azure@fox.blue>2019-06-30 16:15:22 +0000
commit83e072d6d5b248c416f89cb144e677b2a3bdaba9 (patch)
treece5478bd6a172d5e32cc3a68e2e5a748407ff05a
parentUpdate ChangeLog (diff)
downloadw3m-83e072d6d5b248c416f89cb144e677b2a3bdaba9.tar.gz
w3m-83e072d6d5b248c416f89cb144e677b2a3bdaba9.zip
Allow setting User Agent in Siteconf
Since Google gives usable search results to Lynx but not to w3m, and many other sites block Lynx but /not/ w3m, we want to be able to set the User Agent string on a per-site basis.
-rw-r--r--doc/README.siteconf7
-rw-r--r--fm.h4
-rw-r--r--rc.c12
-rw-r--r--url.c5
4 files changed, 26 insertions, 2 deletions
diff --git a/doc/README.siteconf b/doc/README.siteconf
index 39b1028..5eb2554 100644
--- a/doc/README.siteconf
+++ b/doc/README.siteconf
@@ -13,6 +13,7 @@ substitute_url "<destination-url>"
url_charset <charset>
no_referer_from on|off
no_referer_to on|off
+user_agent "string"
The last match wins.
@@ -40,6 +41,12 @@ url_charset utf-8
When combinated with "decode_url" option turned on, links to
Wikipedia will be human-readable.
+url m@^https?://(.*\.)google\.com/@
+user_agent "Lynx/2.8.8dev.3 libwww-FM/2.14 SSL-MM/1.4.1"
+
+Tell Google we're actually Lynx. (So they send us a text-browser friendly
+results page.)
+
===== Regular expressions notes =====
Following expressions are all equivalent:
diff --git a/fm.h b/fm.h
index 8face3c..4a17ecc 100644
--- a/fm.h
+++ b/fm.h
@@ -271,8 +271,10 @@ extern int REV_LB[];
#define SCONF_URL_CHARSET 2
#define SCONF_NO_REFERER_FROM 3
#define SCONF_NO_REFERER_TO 4
-#define SCONF_N_FIELD 5
+#define SCONF_USER_AGENT 5
+#define SCONF_N_FIELD 6
#define query_SCONF_SUBSTITUTE_URL(pu) ((const char *)querySiteconf(pu, SCONF_SUBSTITUTE_URL))
+#define query_SCONF_USER_AGENT(pu) ((const char *)querySiteconf(pu, SCONF_USER_AGENT))
#define query_SCONF_URL_CHARSET(pu) ((const wc_ces *)querySiteconf(pu, SCONF_URL_CHARSET))
#define query_SCONF_NO_REFERER_FROM(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_FROM))
#define query_SCONF_NO_REFERER_TO(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_TO))
diff --git a/rc.c b/rc.c
index 3fd84ef..25f7a56 100644
--- a/rc.c
+++ b/rc.c
@@ -1602,6 +1602,7 @@ helpFile(char *base)
* url_charset <charset>
* no_referer_from on|off
* no_referer_to on|off
+ * user_agent "<string>"
*
* The last match wins.
*/
@@ -1614,6 +1615,7 @@ struct siteconf_rec {
unsigned char mask[(SCONF_N_FIELD + 7) >> 3];
char *substitute_url;
+ char *user_agent;
#ifdef USE_M17N
wc_ces url_charset;
#endif
@@ -1640,6 +1642,7 @@ newSiteconfRec(void)
memset(ent->mask, 0, sizeof(ent->mask));
ent->substitute_url = NULL;
+ ent->user_agent = NULL;
#ifdef USE_M17N
ent->url_charset = 0;
#endif
@@ -1718,6 +1721,10 @@ loadSiteconf(void)
ent->substitute_url = getQWord(&p);
SCONF_SET(ent, SCONF_SUBSTITUTE_URL);
}
+ if (strcmp(s, "user_agent") == 0) {
+ ent->user_agent = getQWord(&p);
+ SCONF_SET(ent, SCONF_USER_AGENT);
+ }
#ifdef USE_M17N
else if (strcmp(s, "url_charset") == 0) {
char *charset = getWord(&p);
@@ -1797,6 +1804,11 @@ url_found:
return tmp->ptr;
}
return NULL;
+ case SCONF_USER_AGENT:
+ if (ent->user_agent && *ent->user_agent) {
+ return ent->user_agent;
+ }
+ return NULL;
#ifdef USE_M17N
case SCONF_URL_CHARSET:
return &ent->url_charset;
diff --git a/url.c b/url.c
index 31d7c4b..aab7b9d 100644
--- a/url.c
+++ b/url.c
@@ -1323,10 +1323,13 @@ otherinfo(ParsedURL *target, ParsedURL *current, char *referer)
Str s = Strnew();
const int *no_referer_ptr;
int no_referer;
+ const char* url_user_agent = query_SCONF_USER_AGENT(target);
if (!override_user_agent) {
Strcat_charp(s, "User-Agent: ");
- if (UserAgent == NULL || *UserAgent == '\0')
+ if (url_user_agent)
+ Strcat_charp(s, url_user_agent);
+ else if (UserAgent == NULL || *UserAgent == '\0')
Strcat_charp(s, w3m_version);
else
Strcat_charp(s, UserAgent);