diff options
| author | Azure <azure@fox.blue> | 2019-06-30 16:15:22 +0000 | 
|---|---|---|
| committer | Azure <azure@fox.blue> | 2019-06-30 16:15:22 +0000 | 
| commit | 83e072d6d5b248c416f89cb144e677b2a3bdaba9 (patch) | |
| tree | ce5478bd6a172d5e32cc3a68e2e5a748407ff05a | |
| parent | Update ChangeLog (diff) | |
| download | w3m-83e072d6d5b248c416f89cb144e677b2a3bdaba9.tar.gz w3m-83e072d6d5b248c416f89cb144e677b2a3bdaba9.zip | |
Allow setting User Agent in Siteconf
Since Google gives usable search results to Lynx but not to w3m, and
many other sites block Lynx but /not/ w3m, we want to be able to set
the User Agent string on a per-site basis.
Diffstat (limited to '')
| -rw-r--r-- | doc/README.siteconf | 7 | ||||
| -rw-r--r-- | fm.h | 4 | ||||
| -rw-r--r-- | rc.c | 12 | ||||
| -rw-r--r-- | url.c | 5 | 
4 files changed, 26 insertions, 2 deletions
| diff --git a/doc/README.siteconf b/doc/README.siteconf index 39b1028..5eb2554 100644 --- a/doc/README.siteconf +++ b/doc/README.siteconf @@ -13,6 +13,7 @@ substitute_url "<destination-url>"  url_charset <charset>  no_referer_from on|off  no_referer_to on|off +user_agent "string"  The last match wins. @@ -40,6 +41,12 @@ url_charset utf-8  When combinated with "decode_url" option turned on, links to  Wikipedia will be human-readable. +url m@^https?://(.*\.)google\.com/@ +user_agent "Lynx/2.8.8dev.3 libwww-FM/2.14 SSL-MM/1.4.1" + +Tell Google we're actually Lynx. (So they send us a text-browser friendly +results page.) +  ===== Regular expressions notes =====  Following expressions are all equivalent: @@ -271,8 +271,10 @@ extern int REV_LB[];  #define SCONF_URL_CHARSET	2  #define SCONF_NO_REFERER_FROM	3  #define SCONF_NO_REFERER_TO	4 -#define SCONF_N_FIELD		5 +#define SCONF_USER_AGENT	5 +#define SCONF_N_FIELD		6  #define query_SCONF_SUBSTITUTE_URL(pu) ((const char *)querySiteconf(pu, SCONF_SUBSTITUTE_URL)) +#define query_SCONF_USER_AGENT(pu) ((const char *)querySiteconf(pu, SCONF_USER_AGENT))  #define query_SCONF_URL_CHARSET(pu) ((const wc_ces *)querySiteconf(pu, SCONF_URL_CHARSET))  #define query_SCONF_NO_REFERER_FROM(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_FROM))  #define query_SCONF_NO_REFERER_TO(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_TO)) @@ -1602,6 +1602,7 @@ helpFile(char *base)   * url_charset <charset>   * no_referer_from on|off   * no_referer_to on|off + * user_agent "<string>"   *    * The last match wins.   */ @@ -1614,6 +1615,7 @@ struct siteconf_rec {      unsigned char mask[(SCONF_N_FIELD + 7) >> 3];      char *substitute_url; +    char *user_agent;  #ifdef USE_M17N      wc_ces url_charset;  #endif @@ -1640,6 +1642,7 @@ newSiteconfRec(void)      memset(ent->mask, 0, sizeof(ent->mask));      ent->substitute_url = NULL; +    ent->user_agent = NULL;  #ifdef USE_M17N      ent->url_charset = 0;  #endif @@ -1718,6 +1721,10 @@ loadSiteconf(void)  	    ent->substitute_url = getQWord(&p);  	    SCONF_SET(ent, SCONF_SUBSTITUTE_URL);  	} +	if (strcmp(s, "user_agent") == 0) { +	    ent->user_agent = getQWord(&p); +	    SCONF_SET(ent, SCONF_USER_AGENT); +	}  #ifdef USE_M17N  	else if (strcmp(s, "url_charset") == 0) {  	    char *charset = getWord(&p); @@ -1797,6 +1804,11 @@ url_found:  	    return tmp->ptr;  	}  	return NULL; +    case SCONF_USER_AGENT: +	if (ent->user_agent && *ent->user_agent) { +	    return ent->user_agent; +	} +	return NULL;  #ifdef USE_M17N      case SCONF_URL_CHARSET:  	return &ent->url_charset; @@ -1323,10 +1323,13 @@ otherinfo(ParsedURL *target, ParsedURL *current, char *referer)      Str s = Strnew();      const int *no_referer_ptr;      int no_referer; +    const char* url_user_agent = query_SCONF_USER_AGENT(target);      if (!override_user_agent) {          Strcat_charp(s, "User-Agent: "); -        if (UserAgent == NULL || *UserAgent == '\0') +	if (url_user_agent) +	   Strcat_charp(s, url_user_agent); +	else if (UserAgent == NULL || *UserAgent == '\0')              Strcat_charp(s, w3m_version);          else              Strcat_charp(s, UserAgent); | 
