diff options
author | Azure <azure@fox.blue> | 2019-06-30 16:15:22 +0000 |
---|---|---|
committer | Azure <azure@fox.blue> | 2019-06-30 16:15:22 +0000 |
commit | 83e072d6d5b248c416f89cb144e677b2a3bdaba9 (patch) | |
tree | ce5478bd6a172d5e32cc3a68e2e5a748407ff05a | |
parent | Update ChangeLog (diff) | |
download | w3m-83e072d6d5b248c416f89cb144e677b2a3bdaba9.tar.gz w3m-83e072d6d5b248c416f89cb144e677b2a3bdaba9.zip |
Allow setting User Agent in Siteconf
Since Google gives usable search results to Lynx but not to w3m, and
many other sites block Lynx but /not/ w3m, we want to be able to set
the User Agent string on a per-site basis.
-rw-r--r-- | doc/README.siteconf | 7 | ||||
-rw-r--r-- | fm.h | 4 | ||||
-rw-r--r-- | rc.c | 12 | ||||
-rw-r--r-- | url.c | 5 |
4 files changed, 26 insertions, 2 deletions
diff --git a/doc/README.siteconf b/doc/README.siteconf index 39b1028..5eb2554 100644 --- a/doc/README.siteconf +++ b/doc/README.siteconf @@ -13,6 +13,7 @@ substitute_url "<destination-url>" url_charset <charset> no_referer_from on|off no_referer_to on|off +user_agent "string" The last match wins. @@ -40,6 +41,12 @@ url_charset utf-8 When combinated with "decode_url" option turned on, links to Wikipedia will be human-readable. +url m@^https?://(.*\.)google\.com/@ +user_agent "Lynx/2.8.8dev.3 libwww-FM/2.14 SSL-MM/1.4.1" + +Tell Google we're actually Lynx. (So they send us a text-browser friendly +results page.) + ===== Regular expressions notes ===== Following expressions are all equivalent: @@ -271,8 +271,10 @@ extern int REV_LB[]; #define SCONF_URL_CHARSET 2 #define SCONF_NO_REFERER_FROM 3 #define SCONF_NO_REFERER_TO 4 -#define SCONF_N_FIELD 5 +#define SCONF_USER_AGENT 5 +#define SCONF_N_FIELD 6 #define query_SCONF_SUBSTITUTE_URL(pu) ((const char *)querySiteconf(pu, SCONF_SUBSTITUTE_URL)) +#define query_SCONF_USER_AGENT(pu) ((const char *)querySiteconf(pu, SCONF_USER_AGENT)) #define query_SCONF_URL_CHARSET(pu) ((const wc_ces *)querySiteconf(pu, SCONF_URL_CHARSET)) #define query_SCONF_NO_REFERER_FROM(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_FROM)) #define query_SCONF_NO_REFERER_TO(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_TO)) @@ -1602,6 +1602,7 @@ helpFile(char *base) * url_charset <charset> * no_referer_from on|off * no_referer_to on|off + * user_agent "<string>" * * The last match wins. */ @@ -1614,6 +1615,7 @@ struct siteconf_rec { unsigned char mask[(SCONF_N_FIELD + 7) >> 3]; char *substitute_url; + char *user_agent; #ifdef USE_M17N wc_ces url_charset; #endif @@ -1640,6 +1642,7 @@ newSiteconfRec(void) memset(ent->mask, 0, sizeof(ent->mask)); ent->substitute_url = NULL; + ent->user_agent = NULL; #ifdef USE_M17N ent->url_charset = 0; #endif @@ -1718,6 +1721,10 @@ loadSiteconf(void) ent->substitute_url = getQWord(&p); SCONF_SET(ent, SCONF_SUBSTITUTE_URL); } + if (strcmp(s, "user_agent") == 0) { + ent->user_agent = getQWord(&p); + SCONF_SET(ent, SCONF_USER_AGENT); + } #ifdef USE_M17N else if (strcmp(s, "url_charset") == 0) { char *charset = getWord(&p); @@ -1797,6 +1804,11 @@ url_found: return tmp->ptr; } return NULL; + case SCONF_USER_AGENT: + if (ent->user_agent && *ent->user_agent) { + return ent->user_agent; + } + return NULL; #ifdef USE_M17N case SCONF_URL_CHARSET: return &ent->url_charset; @@ -1323,10 +1323,13 @@ otherinfo(ParsedURL *target, ParsedURL *current, char *referer) Str s = Strnew(); const int *no_referer_ptr; int no_referer; + const char* url_user_agent = query_SCONF_USER_AGENT(target); if (!override_user_agent) { Strcat_charp(s, "User-Agent: "); - if (UserAgent == NULL || *UserAgent == '\0') + if (url_user_agent) + Strcat_charp(s, url_user_agent); + else if (UserAgent == NULL || *UserAgent == '\0') Strcat_charp(s, w3m_version); else Strcat_charp(s, UserAgent); |