/* $Id: url.c,v 1.100 2010/12/15 10:50:24 htrb Exp $ */
#include "fm.h"
#ifndef __MINGW32_VERSION
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#else
#include <winsock.h>
#endif /* __MINGW32_VERSION */
#include <signal.h>
#include <setjmp.h>
#include <errno.h>
#include <sys/stat.h>
#ifdef __EMX__
#include <io.h> /* ?? */
#endif /* __EMX__ */
#include "html.h"
#include "Str.h"
#include "myctype.h"
#include "regex.h"
#ifdef USE_SSL
#ifndef SSLEAY_VERSION_NUMBER
#include <openssl/crypto.h> /* SSLEAY_VERSION_NUMBER may be here */
#endif
#include <openssl/err.h>
#endif
#ifdef __WATT32__
#define write(a,b,c) write_s(a,b,c)
#endif /* __WATT32__ */
#ifdef __MINGW32_VERSION
#define write(a,b,c) send(a,b,c, 0)
#define close(fd) closesocket(fd)
#endif
#ifndef HOST_NAME_MAX
#define HOST_NAME_MAX 64
#endif
#ifdef INET6
/* see rc.c, "dns_order" and dnsorders[] */
int ai_family_order_table[7][3] = {
{PF_UNSPEC, PF_UNSPEC, PF_UNSPEC}, /* 0:unspec */
{PF_INET, PF_INET6, PF_UNSPEC}, /* 1:inet inet6 */
{PF_INET6, PF_INET, PF_UNSPEC}, /* 2:inet6 inet */
{PF_UNSPEC, PF_UNSPEC, PF_UNSPEC}, /* 3: --- */
{PF_INET, PF_UNSPEC, PF_UNSPEC}, /* 4:inet */
{PF_UNSPEC, PF_UNSPEC, PF_UNSPEC}, /* 5: --- */
{PF_INET6, PF_UNSPEC, PF_UNSPEC}, /* 6:inet6 */
};
#endif /* INET6 */
static JMP_BUF AbortLoading;
/* XXX: note html.h SCM_ */
static int
DefaultPort[] = {
80, /* http */
70, /* gopher */
21, /* ftp */
21, /* ftpdir */
0, /* local - not defined */
0, /* local-CGI - not defined? */
0, /* exec - not defined? */
119, /* nntp */
119, /* nntp group */
119, /* news */
119, /* news group */
0, /* data - not defined */
0, /* mailto - not defined */
#ifdef USE_SSL
443, /* https */
#endif /* USE_SSL */
};
struct cmdtable schemetable[] = {
{"http", SCM_HTTP},
{"gopher", SCM_GOPHER},
{"ftp", SCM_FTP},
{"local", SCM_LOCAL},
{"file", SCM_LOCAL},
/* {"exec", SCM_EXEC}, */
{"nntp", SCM_NNTP},
/* {"nntp", SCM_NNTP_GROUP}, */
{"news", SCM_NEWS},
/* {"news", SCM_NEWS_GROUP}, */
{"data", SCM_DATA},
#ifndef USE_W3MMAILER
{"mailto", SCM_MAILTO},
#endif
#ifdef USE_SSL
{"https", SCM_HTTPS},
#endif /* USE_SSL */
{NULL, SCM_UNKNOWN},
};
static struct table2 DefaultGuess[] = {
{"html", "text/html"},
{"htm", "text/html"},
{"shtml", "text/html"},
{"xhtml", "application/xhtml+xml"},
{"gif", "image/gif"},
{"jpeg", "image/jpeg"},
{"jpg", "image/jpeg"},
{"png", "image/png"},
{"xbm", "image/xbm"},
{"au", "audio/basic"},
{"gz", "application/x-gzip"},
{"Z", "application/x-compress"},
{"bz2", "application/x-bzip"},
{"tar", "application/x-tar"},
{"zip", "application/x-zip"},
{"lha", "application/x-lha"},
{"lzh", "application/x-lha"},
{"ps", "application/postscript"},
{"pdf", "application/pdf"},
{NULL, NULL}
};
static void add_index_file(ParsedURL *pu, URLFile *uf);
static char * schemeNumToName(int scheme);
/* #define HTTP_DEFAULT_FILE "/index.html" */
#ifndef HTTP_DEFAULT_FILE
#define HTTP_DEFAULT_FILE "/"
#endif /* not HTTP_DEFAULT_FILE */
#ifdef SOCK_DEBUG
#include <stdarg.h>
static void
sock_log(char *message, ...)
{
FILE *f = fopen("zzzsocklog", "a");
va_list va;
if (f == NULL)
return;
va_start(va, message);
vfprintf(f, message, va);
fclose(f);
}
#endif
static TextList *mimetypes_list;
static struct table2 **UserMimeTypes;
static struct table2 *
loadMimeTypes(char *filename)
{
FILE *f;
char *d, *type;
int i, n;
Str tmp;
struct table2 *mtypes;
f = fopen(expandPath(filename), "r");
if (f == NULL)
return NULL;
n = 0;
while (tmp = Strfgets(f), tmp->length > 0) {
d = tmp->ptr;
if (d[0] != '#') {
d = strtok(d, " \t\n\r");
if (d != NULL) {
d = strtok(NULL, " \t\n\r");
for (i = 0; d != NULL; i++)
d = strtok(NULL, " \t\n\r");
n += i;
}
}
}
fseek(f, 0, 0);
mtypes = New_N(struct table2, n + 1);
i = 0;
while (tmp = Strfgets(f), tmp->length > 0) {
d = tmp->ptr;
if (d[0] == '#')
continue;
type = strtok(d, " \t\n\r");
if (type == NULL)
continue;
while (1) {
d = strtok(NULL, " \t\n\r");
if (d == NULL)
break;
mtypes[i].item1 = Strnew_charp(d)->ptr;
mtypes[i].item2 = Strnew_charp(type)->ptr;
i++;
}
}
mtypes[i].item1 = NULL;
mtypes[i].item2 = NULL;
fclose(f);
return mtypes;
}
void
initMimeTypes()
{
int i;
TextListItem *tl;
if (non_null(mimetypes_files))
mimetypes_list = make_domain_list(mimetypes_files);
else
mimetypes_list = NULL;
if (mimetypes_list == NULL)
return;
UserMimeTypes = New_N(struct table2 *, mimetypes_list->nitem);
for (i = 0, tl = mimetypes_list->first; tl; i++, tl = tl->next)
UserMimeTypes[i] = loadMimeTypes(tl->ptr);
}
static char *
DefaultFile(int scheme)
{
switch (scheme) {
case SCM_HTTP:
#ifdef USE_SSL
case SCM_HTTPS:
#endif /* USE_SSL */
return allocStr(HTTP_DEFAULT_FILE, -1);
#ifdef USE_GOPHER
case SCM_GOPHER:
return allocStr("1", -1);
#endif /* USE_GOPHER */
case SCM_LOCAL:
case SCM_LOCAL_CGI:
case SCM_FTP:
case SCM_FTPDIR:
return allocStr("/", -1);
}
return NULL;
}
static MySignalHandler
KeyAbort(SIGNAL_ARG)
{
LONGJMP(AbortLoading, 1);
SIGNAL_RETURN;
}
#ifdef USE_SSL
SSL_CTX *ssl_ctx = NULL;
void
free_ssl_ctx()
{
if (ssl_ctx != NULL)
SSL_CTX_free(ssl_ctx);
ssl_ctx = NULL;
ssl_accept_this_site(NULL);
}
#if SSLEAY_VERSION_NUMBER >= 0x00905100
#include <openssl/rand.h>
static void
init_PRNG()
{
char buffer[256];
const char *file;
long l;
if (RAND_status())
return;
if ((file = RAND_file_name(buffer, sizeof(buffer)))) {
#ifdef USE_EGD
if (RAND_egd(file) > 0)
return;
#endif
RAND_load_file(file, -1);
}
if (RAND_status())
goto seeded;
srand48((long)time(NULL));
while (!RAND_status()) {
l = lrand48();
RAND_seed((unsigned char *)&l, sizeof(long));
}
seeded:
if (file)
RAND_write_file(file);
}
#endif /* SSLEAY_VERSION_NUMBER >= 0x00905100 */
static SSL *
openSSLHandle(int sock, char *hostname, char **p_cert)
{
SSL *handle = NULL;
static char *old_ssl_forbid_method = NULL;
#ifdef USE_SSL_VERIFY
static int old_ssl_verify_server = -1;
#endif
if (old_ssl_forbid_method != ssl_forbid_method
&& (!old_ssl_forbid_method || !ssl_forbid_method ||
strcmp(old_ssl_forbid_method, ssl_forbid_method))) {
old_ssl_forbid_method = ssl_forbid_method;
#ifdef USE_SSL_VERIFY
ssl_path_modified = 1;
#else
free_ssl_ctx();
#endif
}
#ifdef USE_SSL_VERIFY
if (old_ssl_verify_server != ssl_verify_server) {
old_ssl_verify_server = ssl_verify_server;
ssl_path_modified = 1;
}
if (ssl_path_modified) {
free_ssl_ctx();
ssl_path_modified = 0;
}
#endif /* defined(USE_SSL_VERIFY) */
if (ssl_ctx == NULL) {
int option;
#if OPENSSL_VERSION_NUMBER < 0x0800
ssl_ctx = SSL_CTX_new();
X509_set_default_verify_paths(ssl_ctx->cert);
#else /* SSLEAY_VERSION_NUMBER >= 0x0800 */
#if (OPENSSL_VERSION_NUMBER < 0x10100000L) || defined(LIBRESSL_VERSION_NUMBER)
SSLeay_add_ssl_algorithms();
SSL_load_error_strings();
#else
OPENSSL_init_ssl(0, NULL);
#endif
if (!(ssl_ctx = SSL_CTX_new(SSLv23_client_method())))
goto eend;
#if (OPENSSL_VERSION_NUMBER < 0x10100000L) || defined(LIBRESSL_VERSION_NUMBER)
SSL_CTX_set_cipher_list(ssl_ctx, "DEFAULT:!LOW:!RC4:!EXP");
#endif
option = SSL_OP_ALL;
if (ssl_forbid_method) {
if (strchr(ssl_forbid_method, '2'))
option |= SSL_OP_NO_SSLv2;
if (strchr(ssl_forbid_method, '3'))
option |= SSL_OP_NO_SSLv3;
if (strchr(ssl_forbid_method, 't'))
option |= SSL_OP_NO_TLSv1;
if (strchr(ssl_forbid_method, 'T'))
option |= SSL_OP_NO_TLSv1;
if (strchr(ssl_forbid_method, '4'))
option |= SSL_OP_NO_TLSv1;
#ifdef SSL_OP_NO_TLSv1_1
if (strchr(ssl_forbid_method, '5'))
option |= SSL_OP_NO_TLSv1_1;
#endif
#ifdef SSL_OP_NO_TLSv1_2
if (strchr(ssl_forbid_method, '6'))
option |= SSL_OP_NO_TLSv1_2;
#endif
#ifdef SSL_OP_NO_TLSv1_3
if (strchr(ssl_forbid_method, '7'))
option |= SSL_OP_NO_TLSv1_3;
#endif
}
#ifdef SSL_OP_NO_COMPRESSION
option |= SSL_OP_NO_COMPRESSION;
#endif
SSL_CTX_set_options(ssl_ctx, option);
#ifdef SSL_MODE_RELEASE_BUFFERS
SSL_CTX_set_mode (ssl_ctx, SSL_MODE_RELEASE_BUFFERS);
#endif
#ifdef USE_SSL_VERIFY
/* derived from openssl-0.9.5/apps/s_{client,cb}.c */
#if 1 /* use SSL_get_verify_result() to verify cert */
SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_NONE, NULL);
#else
SSL_CTX_set_verify(ssl_ctx,
ssl_verify_server ? SSL_VERIFY_PEER :
SSL_VERIFY_NONE, NULL);
#endif
if (ssl_cert_file != NULL && *ssl_cert_file != '\0') {
int ng = 1;
if (SSL_CTX_use_certificate_file
(ssl_ctx, ssl_cert_file, SSL_FILETYPE_PEM) > 0) {
char *key_file = (ssl_key_file == NULL
|| *ssl_key_file ==
'\0') ? ssl_cert_file : ssl_key_file;
if (SSL_CTX_use_PrivateKey_file
(ssl_ctx, key_file, SSL_FILETYPE_PEM) > 0)
if (SSL_CTX_check_private_key(ssl_ctx))
ng = 0;
}
if (ng) {
free_ssl_ctx();
goto eend;
}
}
if ((!ssl_ca_file && !ssl_ca_path)
|| !SSL_CTX_load_verify_locations(ssl_ctx, ssl_ca_file, ssl_ca_path))
#endif /* defined(USE_SSL_VERIFY) */
SSL_CTX_set_default_verify_paths(ssl_ctx);
#endif /* SSLEAY_VERSION_NUMBER >= 0x0800 */
}
handle = SSL_new(ssl_ctx);
SSL_set_fd(handle, sock);
#if SSLEAY_VERSION_NUMBER >= 0x00905100
init_PRNG();
#endif /* SSLEAY_VERSION_NUMBER >= 0x00905100 */
#if (SSLEAY_VERSION_NUMBER >= 0x00908070) && !defined(OPENSSL_NO_TLSEXT)
SSL_set_tlsext_host_name(handle,hostname);
#endif /* (SSLEAY_VERSION_NUMBER >= 0x00908070) && !defined(OPENSSL_NO_TLSEXT) */
if (SSL_connect(handle) > 0) {
Str serv_cert = ssl_get_certificate(handle, hostname);
if (serv_cert) {
*p_cert = serv_cert->ptr;
return handle;
}
close(sock);
SSL_free(handle);
return NULL;
}
eend:
close(sock);
if (handle)
SSL_free(handle);
/* FIXME: gettextize? */
disp_err_message(Sprintf
("SSL error: %s",
ERR_error_string(ERR_get_error(), NULL))->ptr, FALSE);
return NULL;
}
static void
SSL_write_from_file(SSL * ssl, char *file)
{
FILE *fd;
int c;
char buf[1];
fd = fopen(file, "r");
if (fd != NULL) {
while ((c = fgetc(fd)) != EOF) {
buf[0] = c;
SSL_write(ssl, buf, 1);
}
fclose(fd);
}
}
#endif /* USE_SSL */
static void
write_from_file(int sock, char *file)
{
FILE *fd;
int c;
char buf[1];
fd = fopen(file, "r");
if (fd != NULL) {
while ((c = fgetc(fd)) != EOF) {
buf[0] = c;
write(sock, buf, 1);
}
fclose(fd);
}
}
ParsedURL *
baseURL(Buffer *buf)
{
if (buf->bufferprop & BP_NO_URL) {
/* no URL is defined for the buffer */
return NULL;
}
if (buf->baseURL != NULL) {
/* <BASE> tag is defined in the document */
return buf->baseURL;
}
else if (IS_EMPTY_PARSED_URL(&buf->currentURL))
return NULL;
else
return &buf->currentURL;
}
int
openSocket(char *const hostname,
char *remoteport_name, unsigned short remoteport_num)
{
volatile int sock = -1;
#ifdef INET6
int *af;
struct addrinfo hints, *res0, *res;
int error;
char *hname;
#else /* not INET6 */
struct sockaddr_in hostaddr;
struct hostent *entry;
struct protoent *proto;
unsigned short s_port;
int a1, a2, a3, a4;
unsigned long adr;
#endif /* not INET6 */
MySignalHandler(*volatile prevtrap) (SIGNAL_ARG) = NULL;
if (fmInitialized) {
/* FIXME: gettextize? */
message(Sprintf("Opening socket...")->ptr, 0, 0);
refresh();
}
if (SETJMP(AbortLoading) != 0) {
#ifdef SOCK_DEBUG
sock_log("openSocket() failed. reason: user abort\n");
#endif
if (sock >= 0)
close(sock);
goto error;
}
TRAP_ON;
if (hostname == NULL) {
#ifdef SOCK_DEBUG
sock_log("openSocket() failed. reason: Bad hostname \"%s\"\n",
hostname);
#endif
goto error;
}
#ifdef INET6
/* rfc2732 compliance */
hname = hostname;
if (hname != NULL && hname[0] == '[' && hname[strlen(hname) - 1] == ']') {
hname = allocStr(hostname + 1, -1);
hname[strlen(hname) - 1] = '\0';
if (strspn(hname, "0123456789abcdefABCDEF:.") != strlen(hname))
goto error;
}
for (af = ai_family_order_table[DNS_order];; af++) {
memset(&hints, 0, sizeof(hints));
hints.ai_family = *af;
hints.ai_socktype = SOCK_STREAM;
if (remoteport_num != 0) {
Str portbuf = Sprintf("%d", remoteport_num);
error = getaddrinfo(hname, portbuf->ptr, &hints, &res0);
}
else {
error = -1;
}
if (error && remoteport_name && remoteport_name[0] != '\0') {
/* try default port */
error = getaddrinfo(hname, remoteport_name, &hints, &res0);
}
if (error) {
if (*af == PF_UNSPEC) {
goto error;
}
/* try next ai family */
continue;
}
sock = -1;
for (res = res0; res; res = res->ai_next) {
sock = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if (sock < 0) {
continue;
}
if (connect(sock, res->ai_addr, res->ai_addrlen) < 0) {
close(sock);
sock = -1;
continue;
}
break;
}
if (sock < 0) {
freeaddrinfo(res0);
if (*af == PF_UNSPEC) {
goto error;
}
/* try next ai family */
continue;
}
freeaddrinfo(res0);
break;
}
#else /* not INET6 */
s_port = htons(remoteport_num);
bzero((char *)&hostaddr, sizeof(struct sockaddr_in));
if ((proto = getprotobyname("tcp")) == NULL) {
/* protocol number of TCP is 6 */
proto = New(struct protoent);
proto->p_proto = 6;
}
if ((sock = socket(AF_INET, SOCK_STREAM, proto->p_proto)) < 0) {
#ifdef SOCK_DEBUG
sock_log("openSocket: socket() failed. reason: %s\n", strerror(errno));
#endif
goto error;
}
regexCompile("^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$", 0);
if (regexMatch(hostname, -1, 1)) {
sscanf(hostname, "%d.%d.%d.%d", &a1, &a2, &a3, &a4);
adr = htonl((a1 << 24) | (a2 << 16) | (a3 << 8) | a4);
bcopy((void *)&adr, (void *)&hostaddr.sin_addr, sizeof(long));
hostaddr.sin_family = AF_INET;
hostaddr.sin_port = s_port;
if (fmInitialized) {
message(Sprintf("Connecting to %s", hostname)->ptr, 0, 0);
refresh();
}
if (connect(sock, (struct sockaddr *)&hostaddr,
sizeof(struct sockaddr_in)) < 0) {
#ifdef SOCK_DEBUG
sock_log("openSocket: connect() failed. reason: %s\n",
strerror(errno));
#endif
goto error;
}
}
else {
char **h_addr_list;
int result = -1;
if (fmInitialized) {
message(Sprintf("Performing hostname lookup on %s", hostname)->ptr,
0, 0);
refresh();
}
if ((entry = gethostbyname(hostname)) == NULL) {
#ifdef SOCK_DEBUG
sock_log("openSocket: gethostbyname() failed. reason: %s\n",
strerror(errno));
#endif
goto error;
}
hostaddr.sin_family = AF_INET;
hostaddr.sin_port = s_port;
for (h_addr_list = entry->h_addr_list; *h_addr_list; h_addr_list++) {
bcopy((void *)h_addr_list[0], (void *)&hostaddr.sin_addr,
entry->h_length);
#ifdef SOCK_DEBUG
adr = ntohl(*(long *)&hostaddr.sin_addr);
sock_log("openSocket: connecting %d.%d.%d.%d\n",
(adr >> 24) & 0xff,
(adr >> 16) & 0xff, (adr >> 8) & 0xff, adr & 0xff);
#endif
if (fmInitialized) {
message(Sprintf("Connecting to %s", hostname)->ptr, 0, 0);
refresh();
}
if ((result = connect(sock, (struct sockaddr *)&hostaddr,
sizeof(struct sockaddr_in))) == 0) {
break;
}
#ifdef SOCK_DEBUG
else {
sock_log("openSocket: connect() failed. reason: %s\n",
strerror(errno));
}
#endif
}
if (result < 0) {
goto error;
}
}
#endif /* not INET6 */
TRAP_OFF;
return sock;
error:
TRAP_OFF;
return -1;
}
#define COPYPATH_SPC_ALLOW 0
#define COPYPATH_SPC_IGNORE 1
#define COPYPATH_SPC_REPLACE 2
#define COPYPATH_SPC_MASK 3
#define COPYPATH_LOWERCASE 4
static char *
copyPath(char *orgpath, int length, int option)
{
Str tmp = Strnew();
char ch;
while ((ch = *orgpath) != 0 && length != 0) {
if (option & COPYPATH_LOWERCASE)
ch = TOLOWER(ch);
if (IS_SPACE(ch)) {
switch (option & COPYPATH_SPC_MASK) {
case COPYPATH_SPC_ALLOW:
Strcat_char(tmp, ch);
break;
case COPYPATH_SPC_IGNORE:
/* do nothing */
break;
case COPYPATH_SPC_REPLACE:
Strcat_charp(tmp, "%20");
break;
}
}
else
Strcat_char(tmp, ch);
orgpath++;
length--;
}
return tmp->ptr;
}
void
parseURL(char *url, ParsedURL *p_url, ParsedURL *current)
{
char *p, *q, *qq;
Str tmp;
url = url_quote(url); /* quote 0x01-0x20, 0x7F-0xFF */
p = url;
copyParsedURL(p_url, NULL);
p_url->scheme = SCM_MISSING;
/* RFC1808: Relative Uniform Resource Locators
* 4. Resolving Relative URLs
*/
if (*url == '\0' || *url == '#') {
if (current)
copyParsedURL(p_url, current);
goto do_label;
}
if (!strncasecmp(url, "file://", 7)) {
#if defined( __EMX__ ) || defined( __CYGWIN__ )
if (!strncasecmp(url + 7, "localhost/", 10)) {
p_url->scheme = SCM_LOCAL;
p += 7 + 10 - 1;
url += 7 + 10 - 1;
} else
#endif
{
/* Recognize the machine's host name. This is necessary for URLs
* produced by 'ls --hyperlink' or similar. */
char hostname[HOST_NAME_MAX + 2];
if (gethostname (hostname, HOST_NAME_MAX + 2) == 0) {
size_t hostname_len;
/* Don't use hostname if it is truncated. */
hostname[HOST_NAME_MAX + 1] = '\0';
hostname_len = strlen (hostname);
if (hostname_len <= HOST_NAME_MAX) {
if (!strncasecmp(url + 7, hostname, hostname_len)
&& *(url + 7 + hostname_len) == '/') {
p_url->scheme = SCM_LOCAL;
p += 7 + hostname_len;
url += 7 + hostname_len;
}
}
}
}
}
#ifdef SUPPORT_DOS_DRIVE_PREFIX
if (IS_ALPHA(*p) && (p[1] == ':' || p[1] == '|')) {
p_url->scheme = SCM_LOCAL;
goto analyze_file;
}
#endif /* SUPPORT_DOS_DRIVE_PREFIX */
/* search for scheme */
p_url->scheme = getURLScheme(&p);
if (p_url->scheme == SCM_MISSING) {
/* scheme part is not found in the url. This means either
* (a) the url is relative to the current or (b) the url
* denotes a filename (therefore the scheme is SCM_LOCAL).
*/
if (current) {
switch (current->scheme) {
case SCM_LOCAL:
case SCM_LOCAL_CGI:
p_url->scheme = SCM_LOCAL;
break;
case SCM_FTP:
case SCM_FTPDIR:
p_url->scheme = SCM_FTP;
break;
#ifdef USE_NNTP
case SCM_NNTP:
case SCM_NNTP_GROUP:
p_url->scheme = SCM_NNTP;
break;
case SCM_NEWS:
case SCM_NEWS_GROUP:
p_url->scheme = SCM_NEWS;
break;
#endif
default:
p_url->scheme = current->scheme;
break;
}
}
else
p_url->scheme = SCM_LOCAL;
p = url;
if (!strncmp(p, "//", 2)) {
/* URL begins with // */
/* it means that 'scheme:' is abbreviated */
p += 2;
goto analyze_url;
}
/* the url doesn't begin with '//' */
goto analyze_file;
}
/* scheme part has been found */
if (p_url->scheme == SCM_UNKNOWN) {
p_url->file = allocStr(url, -1);
return;
}
/* get host and port */
if (p[0] != '/' || p[1] != '/') { /* scheme:foo or scheme:/foo */
p_url->host = NULL;
if (p_url->scheme != SCM_UNKNOWN)
p_url->port = DefaultPort[p_url->scheme];
else
p_url->port = 0;
goto analyze_file;
}
/* after here, p begins with // */
if (p_url->scheme == SCM_LOCAL) { /* file://foo */
#ifdef __EMX__
p += 2;
goto analyze_file;
#else
if (p[2] == '/' || p[2] == '~'
/* <A HREF="file:///foo">file:///foo</A> or <A HREF="file://~user">file://~user</A> */
#ifdef SUPPORT_DOS_DRIVE_PREFIX
|| (IS_ALPHA(p[2]) && (p[3] == ':' || p[3] == '|'))
/* <A HREF="file://DRIVE/foo">file://DRIVE/foo</A> */
#endif /* SUPPORT_DOS_DRIVE_PREFIX */
) {
p += 2;
goto analyze_file;
}
#endif /* __EMX__ */
}
p += 2; /* scheme://foo */
/* ^p is here */
analyze_url:
q = p;
#ifdef INET6
if (*q == '[') { /* rfc2732,rfc2373 compliance */
p++;
while (IS_XDIGIT(*p) || *p == ':' || *p == '.')
p++;
if (*p != ']' || (*(p + 1) && strchr(":/?#", *(p + 1)) == NULL))
p = q;
}
#endif
while (*p && strchr(":/@?#", *p) == NULL)
p++;
switch (*p) {
case ':':
/* scheme://user:pass@host or
* scheme://host:port
*/
qq = q;
q = ++p;
while (*p && strchr("@/?#", *p) == NULL)
p++;
if (*p == '@') {
/* scheme://user:pass@... */
p_url->user = copyPath(qq, q - 1 - qq, COPYPATH_SPC_IGNORE);
p_url->pass = copyPath(q, p - q, COPYPATH_SPC_ALLOW);
p++;
goto analyze_url;
}
/* scheme://host:port/ */
p_url->host = copyPath(qq, q - 1 - qq,
COPYPATH_SPC_IGNORE | COPYPATH_LOWERCASE);
tmp = Strnew_charp_n(q, p - q);
p_url->port = atoi(tmp->ptr);
/* *p is one of ['\0', '/', '?', '#'] */
break;
case '@':
/* scheme://user@... */
p_url->user = copyPath(q, p - q, COPYPATH_SPC_IGNORE);
p++;
goto analyze_url;
case '\0':
/* scheme://host */
case '/':
case '?':
case '#':
p_url->host = copyPath(q, p - q,
COPYPATH_SPC_IGNORE | COPYPATH_LOWERCASE);
if (p_url->scheme != SCM_UNKNOWN)
p_url->port = DefaultPort[p_url->scheme];
else
p_url->port = 0;
break;
}
analyze_file:
#ifndef SUPPORT_NETBIOS_SHARE
if (p_url->scheme == SCM_LOCAL && p_url->user == NULL &&
p_url->host != NULL && *p_url->host != '\0' &&
strcmp(p_url->host, "localhost")) {
/*
* In the environments other than CYGWIN, a URL like
* file://host/file is regarded as ftp://host/file.
* On the other hand, file://host/file on CYGWIN is
* regarded as local access to the file //host/file.
* `host' is a netbios-hostname, drive, or any other
* name; It is CYGWIN system call who interprets that.
*/
p_url->scheme = SCM_FTP; /* ftp://host/... */
if (p_url->port == 0)
p_url->port = DefaultPort[SCM_FTP];
}
#endif
if ((*p == '\0' || *p == '#' || *p == '?') && p_url->host == NULL) {
p_url->file = "";
goto do_query;
}
#ifdef SUPPORT_DOS_DRIVE_PREFIX
if (p_url->scheme == SCM_LOCAL) {
q = p;
if (*q == '/')
q++;
if (IS_ALPHA(q[0]) && (q[1] == ':' || q[1] == '|')) {
if (q[1] == '|') {
p = allocStr(q, -1);
p[1] = ':';
}
else
p = q;
}
}
#endif
q = p;
#ifdef USE_GOPHER
if (p_url->scheme == SCM_GOPHER) {
if (*q == '/')
q++;
if (*q && q[0] != '/' && q[1] != '/' && q[2] == '/')
q++;
}
#endif /* USE_GOPHER */
if (*p == '/')
p++;
if (*p == '\0' || *p == '#' || *p == '?') { /* scheme://host[:port]/ */
p_url->file = DefaultFile(p_url->scheme);
goto do_query;
}
#ifdef USE_GOPHER
if (p_url->scheme == SCM_GOPHER && *p == 'R') {
p++;
tmp = Strnew();
Strcat_char(tmp, *(p++));
while (*p && *p != '/')
p++;
Strcat_charp(tmp, p);
while (*p)
p++;
p_url->file = copyPath(tmp->ptr, -1, COPYPATH_SPC_IGNORE);
}
else
#endif /* USE_GOPHER */
{
char *cgi = strchr(p, '?');
again:
while (*p && *p != '#' && p != cgi)
p++;
if (*p == '#' && p_url->scheme == SCM_LOCAL) {
/*
* According to RFC2396, # means the beginning of
* URI-reference, and # should be escaped. But,
* if the scheme is SCM_LOCAL, the special
* treatment will apply to # for convinience.
*/
if (p > q && *(p - 1) == '/' && (cgi == NULL || p < cgi)) {
/*
* # comes as the first character of the file name
* that means, # is not a label but a part of the file
* name.
*/
p++;
goto again;
}
else if (*(p + 1) == '\0') {
/*
* # comes as the last character of the file name that
* means, # is not a label but a part of the file
* name.
*/
p++;
}
}
if (p_url->scheme == SCM_LOCAL || p_url->scheme == SCM_MISSING)
p_url->file = copyPath(q, p - q, COPYPATH_SPC_ALLOW);
else
p_url->file = copyPath(q, p - q, COPYPATH_SPC_IGNORE);
}
do_query:
if (*p == '?') {
q = ++p;
while (*p && *p != '#')
p++;
p_url->query = copyPath(q, p - q, COPYPATH_SPC_ALLOW);
}
do_label:
if (p_url->scheme == SCM_MISSING) {
p_url->scheme = SCM_LOCAL;
p_url->file = allocStr(p, -1);
p_url->label = NULL;
}
else if (*p == '#')
p_url->label = allocStr(p + 1, -1);
else
p_url->label = NULL;
}
#define ALLOC_STR(s) ((s)==NULL?NULL:allocStr(s,-1))
void
copyParsedURL(ParsedURL *p, const ParsedURL *q)
{
if (q == NULL) {
memset(p, 0, sizeof(ParsedURL));
p->scheme = SCM_UNKNOWN;
return;
}
p->scheme = q->scheme;
p->port = q->port;
p->is_nocache = q->is_nocache;
p->user = ALLOC_STR(q->user);
p->pass = ALLOC_STR(q->pass);
p->host = ALLOC_STR(q->host);
p->file = ALLOC_STR(q->file);
p->real_file = ALLOC_STR(q->real_file);
p->label = ALLOC_STR(q->label);
p->query = ALLOC_STR(q->query);
}
void
parseURL2(char *url, ParsedURL *pu, ParsedURL *current)
{
char *p;
Str tmp;
int relative_uri = FALSE;
parseURL(url, pu, current);
#ifndef USE_W3MMAILER
if (pu->scheme == SCM_MAILTO)
return;
#endif
if (pu->scheme == SCM_DATA)
return;
if (pu->scheme == SCM_NEWS || pu->scheme == SCM_NEWS_GROUP) {
if (pu->file && !strchr(pu->file, '@') &&
(!(p = strchr(pu->file, '/')) || strchr(p + 1, '-') ||
*(p + 1) == '\0'))
pu->scheme = SCM_NEWS_GROUP;
else
pu->scheme = SCM_NEWS;
return;
}
if (pu->scheme == SCM_NNTP || pu->scheme == SCM_NNTP_GROUP) {
if (pu->file && *pu->file == '/')
pu->file = allocStr(pu->file + 1, -1);
if (pu->file && !strchr(pu->file, '@') &&
(!(p = strchr(pu->file, '/')) || strchr(p + 1, '-') ||
*(p + 1) == '\0'))
pu->scheme = SCM_NNTP_GROUP;
else
pu->scheme = SCM_NNTP;
if (current && (current->scheme == SCM_NNTP ||
current->scheme == SCM_NNTP_GROUP)) {
if (pu->host == NULL) {
pu->host = current->host;
pu->port = current->port;
}
}
return;
}
if (pu->scheme == SCM_LOCAL) {
char *q = expandName(file_unquote(pu->file));
#ifdef SUPPORT_DOS_DRIVE_PREFIX
Str drive;
if (IS_ALPHA(q[0]) && q[1] == ':') {
drive = Strnew_charp_n(q, 2);
Strcat_charp(drive, file_quote(q+2));
pu->file = drive->ptr;
}
else
#endif
pu->file = file_quote(q);
}
if (current && (pu->scheme == current->scheme ||
(pu->scheme == SCM_FTP && current->scheme == SCM_FTPDIR) ||
(pu->scheme == SCM_LOCAL &&
current->scheme == SCM_LOCAL_CGI))
&& pu->host == NULL) {
/* Copy omitted element from the current URL */
pu->user = current->user;
pu->pass = current->pass;
pu->host = current->host;
pu->port = current->port;
if (pu->file && *pu->file) {
#ifdef USE_EXTERNAL_URI_LOADER
if (pu->scheme == SCM_UNKNOWN
&& strchr(pu->file, ':') == NULL
&& current && (p = strchr(current->file, ':')) != NULL) {
pu->file = Sprintf("%s:%s",
allocStr(current->file,
p - current->file), pu->file)->ptr;
}
else
#endif
if (
#ifdef USE_GOPHER
pu->scheme != SCM_GOPHER &&
#endif /* USE_GOPHER */
pu->file[0] != '/'
#ifdef SUPPORT_DOS_DRIVE_PREFIX
&& !(pu->scheme == SCM_LOCAL && IS_ALPHA(pu->file[0])
&& pu->file[1] == ':')
#endif
) {
/* file is relative [process 1] */
p = pu->file;
if (current->file) {
tmp = Strnew_charp(current->file);
while (tmp->length > 0) {
if (Strlastchar(tmp) == '/')
break;
Strshrink(tmp, 1);
}
Strcat_charp(tmp, p);
pu->file = tmp->ptr;
relative_uri = TRUE;
}
}
#ifdef USE_GOPHER
else if (pu->scheme == SCM_GOPHER && pu->file[0] == '/') {
p = pu->file;
pu->file = allocStr(p + 1, -1);
}
#endif /* USE_GOPHER */
}
else { /* scheme:[?query][#label] */
pu->file = current->file;
if (!pu->query)
pu->query = current->query;
}
/* comment: query part need not to be completed
* from the current URL. */
}
if (pu->file) {
#ifdef __EMX__
if (pu->scheme == SCM_LOCAL) {
if (strncmp(pu->file, "/$LIB/", 6)) {
char abs[_MAX_PATH];
_abspath(abs, file_unquote(pu->file), _MAX_PATH);
pu->file = file_quote(cleanupName(abs));
}
}
#else
if (pu->scheme == SCM_LOCAL && pu->file[0] != '/' &&
#ifdef SUPPORT_DOS_DRIVE_PREFIX /* for 'drive:' */
!(IS_ALPHA(pu->file[0]) && pu->file[1] == ':') &&
#endif
strcmp(pu->file, "-")) {
/* local file, relative path */
tmp = Strnew_charp(CurrentDir);
if (Strlastchar(tmp) != '/')
Strcat_char(tmp, '/');
Strcat_charp(tmp, file_unquote(pu->file));
pu->file = file_quote(cleanupName(tmp->ptr));
}
#endif
else if (pu->scheme == SCM_HTTP
#ifdef USE_SSL
|| pu->scheme == SCM_HTTPS
#endif
) {
if (relative_uri) {
/* In this case, pu->file is created by [process 1] above.
* pu->file may contain relative path (for example,
* "/foo/../bar/./baz.html"), cleanupName() must be applied.
* When the entire abs_path is given, it still may contain
* elements like `//', `..' or `.' in the pu->file. It is
* server's responsibility to canonicalize such path.
*/
pu->file = cleanupName(pu->file);
}
}
else if (
#ifdef USE_GOPHER
pu->scheme != SCM_GOPHER &&
#endif /* USE_GOPHER */
pu->file[0] == '/') {
/*
* this happens on the following conditions:
* (1) ftp scheme (2) local, looks like absolute path.
* In both case, there must be no side effect with
* cleanupName(). (I hope so...)
*/
pu->file = cleanupName(pu->file);
}
if (pu->scheme == SCM_LOCAL) {
#ifdef SUPPORT_NETBIOS_SHARE
if (pu->host && strcmp(pu->host, "localhost") != 0) {
Str tmp = Strnew_charp("//");
Strcat_m_charp(tmp, pu->host,
cleanupName(file_unquote(pu->file)), NULL);
pu->real_file = tmp->ptr;
}
else
#endif
pu->real_file = cleanupName(file_unquote(pu->file));
}
}
}
static Str
_parsedURL2Str(ParsedURL *pu, int pass)
{
Str tmp;
static char *scheme_str[] = {
"http", "gopher", "ftp", "ftp", "file", "file", "exec", "nntp", "nntp",
"news", "news", "data", "mailto",
#ifdef USE_SSL
"https",
#endif /* USE_SSL */
};
if (pu->scheme == SCM_MISSING) {
return Strnew_charp("???");
}
else if (pu->scheme == SCM_UNKNOWN) {
return Strnew_charp(pu->file);
}
if (pu->host == NULL && pu->file == NULL && pu->label != NULL) {
/* local label */
return Sprintf("#%s", pu->label);
}
if (pu->scheme == SCM_LOCAL && !strcmp(pu->file, "-")) {
tmp = Strnew_charp("-");
if (pu->label) {
Strcat_char(tmp, '#');
Strcat_charp(tmp, pu->label);
}
return tmp;
}
tmp = Strnew_charp(scheme_str[pu->scheme]);
Strcat_char(tmp, ':');
#ifndef USE_W3MMAILER
if (pu->scheme == SCM_MAILTO) {
Strcat_charp(tmp, pu->file);
if (pu->query) {
Strcat_char(tmp, '?');
Strcat_charp(tmp, pu->query);
}
return tmp;
}
#endif
if (pu->scheme == SCM_DATA) {
Strcat_charp(tmp, pu->file);
return tmp;
}
#ifdef USE_NNTP
if (pu->scheme != SCM_NEWS && pu->scheme != SCM_NEWS_GROUP)
#endif /* USE_NNTP */
{
Strcat_charp(tmp, "//");
}
if (pu->user) {
Strcat_charp(tmp, pu->user);
if (pass && pu->pass) {
Strcat_char(tmp, ':');
Strcat_charp(tmp, pu->pass);
}
Strcat_char(tmp, '@');
}
if (pu->host) {
Strcat_charp(tmp, pu->host);
if (pu->port != DefaultPort[pu->scheme]) {
Strcat_char(tmp, ':');
Strcat(tmp, Sprintf("%d", pu->port));
}
}
if (
#ifdef USE_NNTP
pu->scheme != SCM_NEWS && pu->scheme != SCM_NEWS_GROUP &&
#endif /* USE_NNTP */
(pu->file == NULL || (pu->file[0] != '/'
#ifdef SUPPORT_DOS_DRIVE_PREFIX
&& !(IS_ALPHA(pu->file[0])
&& pu->file[1] == ':'
&& pu->host == NULL)
#endif
)))
Strcat_char(tmp, '/');
Strcat_charp(tmp, pu->file);
if (pu->scheme == SCM_FTPDIR && Strlastchar(tmp) != '/')
Strcat_char(tmp, '/');
if (pu->query) {
Strcat_char(tmp, '?');
Strcat_charp(tmp, pu->query);
}
if (pu->label) {
Strcat_char(tmp, '#');
Strcat_charp(tmp, pu->label);
}
return tmp;
}
Str
parsedURL2Str(ParsedURL *pu)
{
return _parsedURL2Str(pu, FALSE);
}
int
getURLScheme(char **url)
{
char *p = *url, *q;
int i;
int scheme = SCM_MISSING;
while (*p && (IS_ALNUM(*p) || *p == '.' || *p == '+' || *p == '-'))
p++;
if (*p == ':') { /* scheme found */
scheme = SCM_UNKNOWN;
for (i = 0; (q = schemetable[i].cmdname) != NULL; i++) {
int len = strlen(q);
if (!strncasecmp(q, *url, len) && (*url)[len] == ':') {
scheme = schemetable[i].cmd;
*url = p + 1;
break;
}
}
}
return scheme;
}
static char *
schemeNumToName(int scheme)
{
int i;
for (i = 0; schemetable[i].cmdname != NULL; i++) {
if (schemetable[i].cmd == scheme)
return schemetable[i].cmdname;
}
return NULL;
}
static char *
otherinfo(ParsedURL *target, ParsedURL *current, char *referer)
{
Str s = Strnew();
const int *no_referer_ptr;
int no_referer;
const char* url_user_agent = query_SCONF_USER_AGENT(target);
if (!override_user_agent) {
Strcat_charp(s, "User-Agent: ");
if (url_user_agent)
Strcat_charp(s, url_user_agent);
else if (UserAgent == NULL || *UserAgent == '\0')
Strcat_charp(s, w3m_version);
else
Strcat_charp(s, UserAgent);
Strcat_charp(s, "\r\n");
}
Strcat_m_charp(s, "Accept: ", AcceptMedia, "\r\n", NULL);
Strcat_m_charp(s, "Accept-Encoding: ", AcceptEncoding, "\r\n", NULL);
Strcat_m_charp(s, "Accept-Language: ", AcceptLang, "\r\n", NULL);
if (target->host) {
Strcat_charp(s, "Host: ");
Strcat_charp(s, target->host);
if (target->port != DefaultPort[target->scheme])
Strcat(s, Sprintf(":%d", target->port));
Strcat_charp(s, "\r\n");
}
if (target->is_nocache || NoCache) {
Strcat_charp(s, "Pragma: no-cache\r\n");
Strcat_charp(s, "Cache-control: no-cache\r\n");
}
no_referer = NoSendReferer;
no_referer_ptr = query_SCONF_NO_REFERER_FROM(current);
no_referer = no_referer || (no_referer_ptr && *no_referer_ptr);
no_referer_ptr = query_SCONF_NO_REFERER_TO(target);
no_referer = no_referer || (no_referer_ptr && *no_referer_ptr);
if (!no_referer) {
#ifdef USE_SSL
if (current && current->scheme == SCM_HTTPS && target->scheme != SCM_HTTPS) {
/* Don't send Referer: if https:// -> http:// */
}
else
#endif
if (referer == NULL && current && current->scheme != SCM_LOCAL &&
current->scheme != SCM_LOCAL_CGI &&
(current->scheme != SCM_FTP ||
(current->user == NULL && current->pass == NULL))) {
char *p = current->label;
Strcat_charp(s, "Referer: ");
current->label = NULL;
Strcat(s, parsedURL2Str(current));
current->label = p;
Strcat_charp(s, "\r\n");
}
else if (referer != NULL && referer != NO_REFERER) {
char *p = strchr(referer, '#');
Strcat_charp(s, "Referer: ");
if (p)
Strcat_charp_n(s, referer, p - referer);
else
Strcat_charp(s, referer);
Strcat_charp(s, "\r\n");
}
}
return s->ptr;
}
Str
HTTPrequestMethod(HRequest *hr)
{
switch (hr->command) {
case HR_COMMAND_CONNECT:
return Strnew_charp("CONNECT");
case HR_COMMAND_POST:
return Strnew_charp("POST");
break;
case HR_COMMAND_HEAD:
return Strnew_charp("HEAD");
break;
case HR_COMMAND_GET:
default:
return Strnew_charp("GET");
}
return NULL;
}
Str
HTTPrequestURI(ParsedURL *pu, HRequest *hr)
{
Str tmp = Strnew();
if (hr->command == HR_COMMAND_CONNECT) {
Strcat_charp(tmp, pu->host);
Strcat(tmp, Sprintf(":%d", pu->port));
}
else if (hr->flag & HR_FLAG_LOCAL) {
Strcat_charp(tmp, pu->file);
if (pu->query) {
Strcat_char(tmp, '?');
Strcat_charp(tmp, pu->query);
}
}
else {
char *save_label = pu->label;
pu->label = NULL;
Strcat(tmp, _parsedURL2Str(pu, TRUE));
pu->label = save_label;
}
return tmp;
}
static Str
HTTPrequest(ParsedURL *pu, ParsedURL *current, HRequest *hr, TextList *extra)
{
Str tmp;
TextListItem *i;
#ifdef USE_COOKIE
Str cookie;
#endif /* USE_COOKIE */
tmp = HTTPrequestMethod(hr);
Strcat_charp(tmp, " ");
Strcat_charp(tmp, HTTPrequestURI(pu, hr)->ptr);
Strcat_charp(tmp, " HTTP/1.0\r\n");
if (hr->referer == NO_REFERER)
Strcat_charp(tmp, otherinfo(pu, NULL, NULL));
else
Strcat_charp(tmp, otherinfo(pu, current, hr->referer));
if (extra != NULL)
for (i = extra->first; i != NULL; i = i->next) {
if (strncasecmp(i->ptr, "Authorization:",
sizeof("Authorization:") - 1) == 0) {
#ifdef USE_SSL
if (hr->command == HR_COMMAND_CONNECT)
continue;
#endif
}
if (strncasecmp(i->ptr, "Proxy-Authorization:",
sizeof("Proxy-Authorization:") - 1) == 0) {
#ifdef USE_SSL
if (pu->scheme == SCM_HTTPS
&& hr->command != HR_COMMAND_CONNECT)
continue;
#endif
}
Strcat_charp(tmp, i->ptr);
}
#ifdef USE_COOKIE
if (hr->command != HR_COMMAND_CONNECT &&
use_cookie && (cookie = find_cookie(pu))) {
Strcat_charp(tmp, "Cookie: ");
Strcat(tmp, cookie);
Strcat_charp(tmp, "\r\n");
/* [DRAFT 12] s. 10.1 */
if (cookie->ptr[0] != '$')
Strcat_charp(tmp, "Cookie2: $Version=\"1\"\r\n");
}
#endif /* USE_COOKIE */
if (hr->command == HR_COMMAND_POST) {
if (hr->request->enctype == FORM_ENCTYPE_MULTIPART) {
Strcat_charp(tmp, "Content-Type: multipart/form-data; boundary=");
Strcat_charp(tmp, hr->request->boundary);
Strcat_charp(tmp, "\r\n");
Strcat(tmp,
Sprintf("Content-Length: %ld\r\n", hr->request->length));
Strcat_charp(tmp, "\r\n");
}
else {
if (!override_content_type) {
Strcat_charp(tmp,
"Content-Type: application/x-www-form-urlencoded\r\n");
}
Strcat(tmp,
Sprintf("Content-Length: %ld\r\n", hr->request->length));
if (header_string)
Strcat(tmp, header_string);
Strcat_charp(tmp, "\r\n");
Strcat_charp_n(tmp, hr->request->body, hr->request->length);
Strcat_charp(tmp, "\r\n");
}
}
else {
if (header_string)
Strcat(tmp, header_string);
Strcat_charp(tmp, "\r\n");
}
#ifdef DEBUG
fprintf(stderr, "HTTPrequest: [ %s ]\n\n", tmp->ptr);
#endif /* DEBUG */
return tmp;
}
void
init_stream(URLFile *uf, int scheme, InputStream stream)
{
memset(uf, 0, sizeof(URLFile));
uf->stream = stream;
uf->scheme = scheme;
uf->encoding = ENC_7BIT;
uf->is_cgi = FALSE;
uf->compression = CMP_NOCOMPRESS;
uf->content_encoding = CMP_NOCOMPRESS;
uf->guess_type = NULL;
uf->ext = NULL;
uf->modtime = -1;
}
URLFile
openURL(char *url, ParsedURL *pu, ParsedURL *current,
URLOption *option, FormList *request, TextList *extra_header,
URLFile *ouf, HRequest *hr, unsigned char *status)
{
Str tmp;
int sock, scheme;
char *p, *q, *u;
#ifdef USE_GOPHER
Str gophertmp;
char type;
int n;
#endif
URLFile uf;
HRequest hr0;
#ifdef USE_SSL
SSL *sslh = NULL;
#endif /* USE_SSL */
if (hr == NULL)
hr = &hr0;
if (ouf) {
uf = *ouf;
}
else {
init_stream(&uf, SCM_MISSING, NULL);
}
u = url;
scheme = getURLScheme(&u);
if (current == NULL && scheme == SCM_MISSING && !ArgvIsURL)
u = file_to_url(url); /* force to local file */
else
u = url;
retry:
parseURL2(u, pu, current);
if (pu->scheme == SCM_LOCAL && pu->file == NULL) {
if (pu->label != NULL) {
/* #hogege is not a label but a filename */
Str tmp2 = Strnew_charp("#");
Strcat_charp(tmp2, pu->label);
pu->file = tmp2->ptr;
pu->real_file = cleanupName(file_unquote(pu->file));
pu->label = NULL;
}
else {
/* given URL must be null string */
#ifdef SOCK_DEBUG
sock_log("given URL must be null string\n");
#endif
return uf;
}
}
uf.scheme = pu->scheme;
uf.url = parsedURL2Str(pu)->ptr;
pu->is_nocache = (option->flag & RG_NOCACHE);
uf.ext = filename_extension(pu->file, 1);
hr->command = HR_COMMAND_GET;
hr->flag = 0;
hr->referer = option->referer;
hr->request = request;
switch (pu->scheme) {
case SCM_LOCAL:
case SCM_LOCAL_CGI:
if (request && request->body)
/* local CGI: POST */
uf.stream = newFileStream(localcgi_post(pu->real_file, pu->query,
request, option->referer),
(void (*)())fclose);
else
/* lodal CGI: GET */
uf.stream = newFileStream(localcgi_get(pu->real_file, pu->query,
option->referer),
(void (*)())fclose);
if (uf.stream) {
uf.is_cgi = TRUE;
uf.scheme = pu->scheme = SCM_LOCAL_CGI;
return uf;
}
examineFile(pu->real_file, &uf);
if (uf.stream == NULL) {
if (dir_exist(pu->real_file)) {
add_index_file(pu, &uf);
if (uf.stream == NULL)
return uf;
}
else if (document_root != NULL) {
tmp = Strnew_charp(document_root);
if (Strlastchar(tmp) != '/' && pu->file[0] != '/')
Strcat_char(tmp, '/');
Strcat_charp(tmp, pu->file);
p = cleanupName(tmp->ptr);
q = cleanupName(file_unquote(p));
if (dir_exist(q)) {
pu->file = p;
pu->real_file = q;
add_index_file(pu, &uf);
if (uf.stream == NULL) {
return uf;
}
}
else {
examineFile(q, &uf);
if (uf.stream) {
pu->file = p;
pu->real_file = q;
}
}
}
}
if (uf.stream == NULL && retryAsHttp && url[0] != '/') {
if (scheme == SCM_MISSING || scheme == SCM_UNKNOWN) {
/* retry it as "http://" */
u = Strnew_m_charp("http://", url, NULL)->ptr;
goto retry;
}
}
return uf;
case SCM_FTP:
case SCM_FTPDIR:
if (pu->file == NULL)
pu->file = allocStr("/", -1);
if (non_null(FTP_proxy) &&
!Do_not_use_proxy &&
pu->host != NULL && !check_no_proxy(pu->host)) {
hr->flag |= HR_FLAG_PROXY;
sock = openSocket(FTP_proxy_parsed.host,
schemeNumToName(FTP_proxy_parsed.scheme),
FTP_proxy_parsed.port);
if (sock < 0)
return uf;
uf.scheme = SCM_HTTP;
tmp = HTTPrequest(pu, current, hr, extra_header);
write(sock, tmp->ptr, tmp->length);
}
else {
uf.stream = openFTPStream(pu, &uf);
uf.scheme = pu->scheme;
return uf;
}
break;
case SCM_HTTP:
#ifdef USE_SSL
case SCM_HTTPS:
#endif /* USE_SSL */
if (pu->file == NULL)
pu->file = allocStr("/", -1);
if (request && request->method == FORM_METHOD_POST && request->body)
hr->command = HR_COMMAND_POST;
if (request && request->method == FORM_METHOD_HEAD)
hr->command = HR_COMMAND_HEAD;
if ((
#ifdef USE_SSL
(pu->scheme == SCM_HTTPS) ? non_null(HTTPS_proxy) :
#endif /* USE_SSL */
non_null(HTTP_proxy)) && !Do_not_use_proxy &&
pu->host != NULL && !check_no_proxy(pu->host)) {
hr->flag |= HR_FLAG_PROXY;
#ifdef USE_SSL
if (pu->scheme == SCM_HTTPS && *status == HTST_CONNECT) {
sock = ssl_socket_of(ouf->stream);
if (!(sslh = openSSLHandle(sock, pu->host,
&uf.ssl_certificate))) {
*status = HTST_MISSING;
return uf;
}
}
else if (pu->scheme == SCM_HTTPS) {
sock = openSocket(HTTPS_proxy_parsed.host,
schemeNumToName(HTTPS_proxy_parsed.scheme),
HTTPS_proxy_parsed.port);
sslh = NULL;
}
else {
#endif /* USE_SSL */
sock = openSocket(HTTP_proxy_parsed.host,
schemeNumToName(HTTP_proxy_parsed.scheme),
HTTP_proxy_parsed.port);
#ifdef USE_SSL
sslh = NULL;
}
#endif /* USE_SSL */
if (sock < 0) {
#ifdef SOCK_DEBUG
sock_log("Can't open socket\n");
#endif
return uf;
}
#ifdef USE_SSL
if (pu->scheme == SCM_HTTPS) {
if (*status == HTST_NORMAL) {
hr->command = HR_COMMAND_CONNECT;
tmp = HTTPrequest(pu, current, hr, extra_header);
*status = HTST_CONNECT;
}
else {
hr->flag |= HR_FLAG_LOCAL;
tmp = HTTPrequest(pu, current, hr, extra_header);
*status = HTST_NORMAL;
}
}
else
#endif /* USE_SSL */
{
tmp = HTTPrequest(pu, current, hr, extra_header);
*status = HTST_NORMAL;
}
}
else {
sock = openSocket(pu->host, schemeNumToName(pu->scheme), pu->port);
if (sock < 0) {
*status = HTST_MISSING;
return uf;
}
#ifdef USE_SSL
if (pu->scheme == SCM_HTTPS) {
if (!(sslh = openSSLHandle(sock, pu->host,
&uf.ssl_certificate))) {
*status = HTST_MISSING;
return uf;
}
}
#endif /* USE_SSL */
hr->flag |= HR_FLAG_LOCAL;
tmp = HTTPrequest(pu, current, hr, extra_header);
*status = HTST_NORMAL;
}
#ifdef USE_SSL
if (pu->scheme == SCM_HTTPS) {
uf.stream = newSSLStream(sslh, sock);
if (sslh)
SSL_write(sslh, tmp->ptr, tmp->length);
else
write(sock, tmp->ptr, tmp->length);
if(w3m_reqlog){
FILE *ff = fopen(w3m_reqlog, "a");
if (ff == NULL)
return uf;
if (sslh)
fputs("HTTPS: request via SSL\n", ff);
else
fputs("HTTPS: request without SSL\n", ff);
fwrite(tmp->ptr, sizeof(char), tmp->length, ff);
fclose(ff);
}
if (hr->command == HR_COMMAND_POST &&
request->enctype == FORM_ENCTYPE_MULTIPART) {
if (sslh)
SSL_write_from_file(sslh, request->body);
else
write_from_file(sock, request->body);
}
return uf;
}
else
#endif /* USE_SSL */
{
write(sock, tmp->ptr, tmp->length);
if(w3m_reqlog){
FILE *ff = fopen(w3m_reqlog, "a");
if (ff == NULL)
return uf;
fwrite(tmp->ptr, sizeof(char), tmp->length, ff);
fclose(ff);
}
if (hr->command == HR_COMMAND_POST &&
request->enctype == FORM_ENCTYPE_MULTIPART)
write_from_file(sock, request->body);
}
break;
#ifdef USE_GOPHER
case SCM_GOPHER:
p = pu->file;
n = 0;
while(*p == '/') {
++p;
++n;
}
if(*p != '\0') {
type = pu->file[n];
switch(type) {
case '0':
case '1':
case 'm':
case 's':
case 'g':
case 'h':
case 'I':
case '5':
case '7':
case '9':
tmp = Strnew_charp(pu->file);
gophertmp = Strdup(tmp);
Strdelete(tmp, n, 1);
pu->file = tmp->ptr;
break;
default:
type = '\0';
break;
}
} else {
type = '\0';
}
if(pu->query != NULL) {
tmp = Strnew_charp(pu->file);
Strcat_char(tmp, '\t');
Strcat_charp(tmp, pu->query);
pu->file = tmp->ptr;
}
if (non_null(GOPHER_proxy) &&
!Do_not_use_proxy &&
pu->host != NULL && !check_no_proxy(pu->host)) {
hr->flag |= HR_FLAG_PROXY;
sock = openSocket(GOPHER_proxy_parsed.host,
schemeNumToName(GOPHER_proxy_parsed.scheme),
GOPHER_proxy_parsed.port);
if (sock < 0)
return uf;
uf.scheme = SCM_HTTP;
tmp = HTTPrequest(pu, current, hr, extra_header);
}
else {
sock = openSocket(pu->host, schemeNumToName(pu->scheme), pu->port);
if (sock < 0)
return uf;
if (pu->file == NULL)
pu->file = "1";
tmp = Strnew_charp(file_unquote(pu->file));
Strcat_char(tmp, '\n');
}
write(sock, tmp->ptr, tmp->length);
if(type != '\0') {
pu->file = gophertmp->ptr;
}
break;
#endif /* USE_GOPHER */
#ifdef USE_NNTP
case SCM_NNTP:
case SCM_NNTP_GROUP:
case SCM_NEWS:
case SCM_NEWS_GROUP:
if (pu->scheme == SCM_NNTP || pu->scheme == SCM_NEWS)
uf.scheme = SCM_NEWS;
else
uf.scheme = SCM_NEWS_GROUP;
uf.stream = openNewsStream(pu);
return uf;
#endif /* USE_NNTP */
case SCM_DATA:
if (pu->file == NULL)
return uf;
p = Strnew_charp(pu->file)->ptr;
q = strchr(p, ',');
if (q == NULL)
return uf;
*q++ = '\0';
tmp = Strnew_charp(q);
q = strrchr(p, ';');
if (q != NULL && !strcmp(q, ";base64")) {
*q = '\0';
uf.encoding = ENC_BASE64;
}
else
tmp = Str_url_unquote(tmp, FALSE, FALSE);
uf.stream = newStrStream(tmp);
uf.guess_type = (*p != '\0') ? p : "text/plain";
return uf;
case SCM_UNKNOWN:
default:
return uf;
}
uf.stream = newInputStream(sock);
return uf;
}
/* add index_file if exists */
static void
add_index_file(ParsedURL *pu, URLFile *uf)
{
char *p, *q;
TextList *index_file_list = NULL;
TextListItem *ti;
if (non_null(index_file))
index_file_list = make_domain_list(index_file);
if (index_file_list == NULL) {
uf->stream = NULL;
return;
}
for (ti = index_file_list->first; ti; ti = ti->next) {
p = Strnew_m_charp(pu->file, "/", file_quote(ti->ptr), NULL)->ptr;
p = cleanupName(p);
q = cleanupName(file_unquote(p));
examineFile(q, uf);
if (uf->stream != NULL) {
pu->file = p;
pu->real_file = q;
return;
}
}
}
static char *
guessContentTypeFromTable(struct table2 *table, char *filename)
{
struct table2 *t;
char *p;
if (table == NULL)
return NULL;
p = &filename[strlen(filename) - 1];
while (filename < p && *p != '.')
p--;
if (p == filename)
return NULL;
p++;
for (t = table; t->item1; t++) {
if (!strcmp(p, t->item1))
return t->item2;
}
for (t = table; t->item1; t++) {
if (!strcasecmp(p, t->item1))
return t->item2;
}
return NULL;
}
char *
guessContentType(char *filename)
{
char *ret;
int i;
if (filename == NULL)
return NULL;
if (mimetypes_list == NULL)
goto no_user_mimetypes;
for (i = 0; i < mimetypes_list->nitem; i++) {
if ((ret =
guessContentTypeFromTable(UserMimeTypes[i], filename)) != NULL)
return ret;
}
no_user_mimetypes:
return guessContentTypeFromTable(DefaultGuess, filename);
}
TextList *
make_domain_list(char *domain_list)
{
char *p;
Str tmp;
TextList *domains = NULL;
p = domain_list;
tmp = Strnew_size(64);
while (*p) {
while (*p && IS_SPACE(*p))
p++;
Strclear(tmp);
while (*p && !IS_SPACE(*p) && *p != ',')
Strcat_char(tmp, *p++);
if (tmp->length > 0) {
if (domains == NULL)
domains = newTextList();
pushText(domains, tmp->ptr);
}
while (*p && IS_SPACE(*p))
p++;
if (*p == ',')
p++;
}
return domains;
}
static int
domain_match(char *pat, char *domain)
{
if (domain == NULL)
return 0;
if (*pat == '.')
pat++;
for (;;) {
if (!strcasecmp(pat, domain))
return 1;
domain = strchr(domain, '.');
if (domain == NULL)
return 0;
domain++;
}
}
int
check_no_proxy(char *domain)
{
TextListItem *tl;
volatile int ret = 0;
MySignalHandler(*volatile prevtrap) (SIGNAL_ARG) = NULL;
if (NO_proxy_domains == NULL || NO_proxy_domains->nitem == 0 ||
domain == NULL)
return 0;
for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) {
if (domain_match(tl->ptr, domain))
return 1;
}
if (!NOproxy_netaddr) {
return 0;
}
/*
* to check noproxy by network addr
*/
if (SETJMP(AbortLoading) != 0) {
ret = 0;
goto end;
}
TRAP_ON;
{
#ifndef INET6
struct hostent *he;
int n;
unsigned char **h_addr_list;
char addr[4 * 16], buf[5];
he = gethostbyname(domain);
if (!he) {
ret = 0;
goto end;
}
for (h_addr_list = (unsigned char **)he->h_addr_list; *h_addr_list;
h_addr_list++) {
sprintf(addr, "%d", h_addr_list[0][0]);
for (n = 1; n < he->h_length; n++) {
sprintf(buf, ".%d", h_addr_list[0][n]);
strcat(addr, buf);
}
for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) {
if (strncmp(tl->ptr, addr, strlen(tl->ptr)) == 0) {
ret = 1;
goto end;
}
}
}
#else /* INET6 */
int error;
struct addrinfo hints;
struct addrinfo *res, *res0;
char addr[4 * 16];
int *af;
for (af = ai_family_order_table[DNS_order];; af++) {
memset(&hints, 0, sizeof(hints));
hints.ai_family = *af;
error = getaddrinfo(domain, NULL, &hints, &res0);
if (error) {
if (*af == PF_UNSPEC) {
break;
}
/* try next */
continue;
}
for (res = res0; res != NULL; res = res->ai_next) {
switch (res->ai_family) {
case AF_INET:
inet_ntop(AF_INET,
&((struct sockaddr_in *)res->ai_addr)->sin_addr,
addr, sizeof(addr));
break;
case AF_INET6:
inet_ntop(AF_INET6,
&((struct sockaddr_in6 *)res->ai_addr)->
sin6_addr, addr, sizeof(addr));
break;
default:
/* unknown */
continue;
}
for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) {
if (strncmp(tl->ptr, addr, strlen(tl->ptr)) == 0) {
freeaddrinfo(res0);
ret = 1;
goto end;
}
}
}
freeaddrinfo(res0);
if (*af == PF_UNSPEC) {
break;
}
}
#endif /* INET6 */
}
end:
TRAP_OFF;
return ret;
}
char *
filename_extension(char *path, int is_url)
{
char *last_dot = "", *p = path;
int i;
if (path == NULL)
return last_dot;
if (*p == '.')
p++;
for (; *p; p++) {
if (*p == '.') {
last_dot = p;
}
else if (is_url && *p == '?')
break;
}
if (*last_dot == '.') {
for (i = 1; i < 8 && last_dot[i]; i++) {
if (is_url && !IS_ALNUM(last_dot[i]))
break;
}
return allocStr(last_dot, i);
}
else
return last_dot;
}
#ifdef USE_EXTERNAL_URI_LOADER
static struct table2 **urimethods;
static struct table2 default_urimethods[] = {
{"mailto", "file:///$LIB/w3mmail.cgi?%s"},
{NULL, NULL}
};
static struct table2 *
loadURIMethods(char *filename)
{
FILE *f;
int i, n;
Str tmp;
struct table2 *um;
char *up, *p;
f = fopen(expandPath(filename), "r");
if (f == NULL)
return NULL;
i = 0;
while (tmp = Strfgets(f), tmp->length > 0) {
if (tmp->ptr[0] != '#')
i++;
}
fseek(f, 0, 0);
n = i;
um = New_N(struct table2, n + 1);
i = 0;
while (tmp = Strfgets(f), tmp->length > 0) {
if (tmp->ptr[0] == '#')
continue;
while (IS_SPACE(Strlastchar(tmp)))
Strshrink(tmp, 1);
for (up = p = tmp->ptr; *p != '\0'; p++) {
if (*p == ':') {
um[i].item1 = Strnew_charp_n(up, p - up)->ptr;
p++;
break;
}
}
if (*p == '\0')
continue;
while (*p != '\0' && IS_SPACE(*p))
p++;
um[i].item2 = Strnew_charp(p)->ptr;
i++;
}
um[i].item1 = NULL;
um[i].item2 = NULL;
fclose(f);
return um;
}
void
initURIMethods()
{
TextList *methodmap_list = NULL;
TextListItem *tl;
int i;
if (non_null(urimethodmap_files))
methodmap_list = make_domain_list(urimethodmap_files);
if (methodmap_list == NULL)
return;
urimethods = New_N(struct table2 *, (methodmap_list->nitem + 1));
for (i = 0, tl = methodmap_list->first; tl; tl = tl->next) {
urimethods[i] = loadURIMethods(tl->ptr);
if (urimethods[i])
i++;
}
urimethods[i] = NULL;
}
Str
searchURIMethods(ParsedURL *pu)
{
struct table2 *ump;
int i;
Str scheme = NULL;
Str url;
char *p;
if (pu->scheme != SCM_UNKNOWN)
return NULL; /* use internal */
if (urimethods == NULL)
return NULL;
url = parsedURL2Str(pu);
for (p = url->ptr; *p != '\0'; p++) {
if (*p == ':') {
scheme = Strnew_charp_n(url->ptr, p - url->ptr);
break;
}
}
if (scheme == NULL)
return NULL;
/*
* RFC2396 3.1. Scheme Component
* For resiliency, programs interpreting URI should treat upper case
* letters as equivalent to lower case in scheme names (e.g., allow
* "HTTP" as well as "http").
*/
for (i = 0; (ump = urimethods[i]) != NULL; i++) {
for (; ump->item1 != NULL; ump++) {
if (strcasecmp(ump->item1, scheme->ptr) == 0) {
return Sprintf(ump->item2, url_quote(url->ptr));
}
}
}
for (ump = default_urimethods; ump->item1 != NULL; ump++) {
if (strcasecmp(ump->item1, scheme->ptr) == 0) {
return Sprintf(ump->item2, url_quote(url->ptr));
}
}
return NULL;
}
/*
* RFC2396: Uniform Resource Identifiers (URI): Generic Syntax
* Appendix A. Collected BNF for URI
* uric = reserved | unreserved | escaped
* reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
* "$" | ","
* unreserved = alphanum | mark
* mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
* "(" | ")"
* escaped = "%" hex hex
*/
#define URI_PATTERN "([-;/?:@&=+$,a-zA-Z0-9_.!~*'()]|%[0-9A-Fa-f][0-9A-Fa-f])*"
void
chkExternalURIBuffer(Buffer *buf)
{
int i;
struct table2 *ump;
for (i = 0; (ump = urimethods[i]) != NULL; i++) {
for (; ump->item1 != NULL; ump++) {
reAnchor(buf, Sprintf("%s:%s", ump->item1, URI_PATTERN)->ptr);
}
}
for (ump = default_urimethods; ump->item1 != NULL; ump++) {
reAnchor(buf, Sprintf("%s:%s", ump->item1, URI_PATTERN)->ptr);
}
}
#endif
ParsedURL *
schemeToProxy(int scheme)
{
ParsedURL *pu = NULL; /* for gcc */
switch (scheme) {
case SCM_HTTP:
pu = &HTTP_proxy_parsed;
break;
#ifdef USE_SSL
case SCM_HTTPS:
pu = &HTTPS_proxy_parsed;
break;
#endif
case SCM_FTP:
pu = &FTP_proxy_parsed;
break;
#ifdef USE_GOPHER
case SCM_GOPHER:
pu = &GOPHER_proxy_parsed;
break;
#endif
#ifdef DEBUG
default:
abort();
#endif
}
return pu;
}
#ifdef USE_M17N
wc_ces
url_to_charset(const char *url, const ParsedURL *base, wc_ces doc_charset)
{
const ParsedURL *pu;
ParsedURL pu_buf;
const wc_ces *csptr;
if (url && *url && *url != '#') {
parseURL2((char *)url, &pu_buf, (ParsedURL *)base);
pu = &pu_buf;
} else {
pu = base;
}
if (pu && (pu->scheme == SCM_LOCAL || pu->scheme == SCM_LOCAL_CGI))
return SystemCharset;
csptr = query_SCONF_URL_CHARSET(pu);
return (csptr && *csptr) ? *csptr :
doc_charset ? doc_charset : DocumentCharset;
}
char *
url_encode(const char *url, const ParsedURL *base, wc_ces doc_charset)
{
return url_quote_conv((char *)url,
url_to_charset(url, base, doc_charset));
}
#if 0 /* unused */
char *
url_decode(const char *url, const ParsedURL *base, wc_ces doc_charset)
{
if (!DecodeURL)
return (char *)url;
return url_unquote_conv((char *)url,
url_to_charset(url, base, doc_charset));
}
#endif
char *
url_decode2(const char *url, const Buffer *buf)
{
wc_ces url_charset;
if (!DecodeURL)
return (char *)url;
url_charset = buf ?
url_to_charset(url, baseURL((Buffer *)buf), buf->document_charset) :
url_to_charset(url, NULL, 0);
return url_unquote_conv((char *)url, url_charset);
}
#else /* !defined(USE_M17N) */
char *
url_decode0(const char *url)
{
if (!DecodeURL)
return (char *)url;
return url_unquote_conv((char *)url, 0);
}
#endif /* !defined(USE_M17N) */