/* 
 * openurl.c -- locate and open an url
 *
 * 19.4.97, jw
 * 24.1.98, jw: complete rewrite supporting multiple protocols
 */

#include <unistd.h>		/* read() */
#include <errno.h>		/* errno */
#include "openurl.h"		/* includes jittr/dstring.h */

static struct suff_mime
{
  char *suff, *mime;
}
suff_mimes[] =
{
  { "",      "text/plain"      },	/* default mime-type */
  { ".htm",  "text/html"       },
  { ".html", "text/html"       },
  { ".gif",  "image/gif"       },
  { ".jpg",  "image/jpeg"      },
  { ".jpeg", "image/jpeg"      },
  { ".xbm",  "image/x-xbitmap" },
  { NULL,    NULL              }
};

static struct url_method 
{
  char *proto;
  int def_port;
  int (*open)(struct url_info *u);
}
url_methods[] =
{
  { "",     80, OpenUrlHttp },	/* default proto */
  { "HTTP", 80, OpenUrlHttp },
  { "FTP",  21, OpenUrlFtp  },
  { "FILE", -1, OpenUrlFile },
  { NULL,    0, NULL        }
};

static char *MakeUrl(dstring **url, char *base);
static char *BaseUrl(dstring **base);

/* 
 * u->url and (if nonzero) u->base define the url string.
 * u->port, u->host
 * must be carefully zeroed before entering, or their values override the
 * builtin defaults.
 */
int OpenUrl1(struct url_info *u)
{
  char *p;
  struct url_method *m = NULL;

  MakeUrl(&u->url, BaseUrl(&u->base));
  if ((p = strstr(u->url->buf, ":/")))
    {
      *p = '\0';
      for (m = url_methods; m->proto; m++)
	if (!strcasecmp(m->proto, u->url->buf))
         break;
      *p = ':';
    }
  
  /* protocoll specified, but unknown value? */
  if (m && !m->proto)
    {
#ifdef EPROTONOSUPPORT
      errno = EPROTONOSUPPORT;
#else
      errno = ENODEV;
#endif
      return -1;
    }

  /* no protocoll specified? Check if it is a simple file */
  if ((!m || !m->proto) && !access(u->url->buf, F_OK))
    {
      for (m = url_methods; m->proto; m++)
	if (!strcasecmp(m->proto, "FILE"))
	  break;
    }
  else
    {
       /* still no protocoll? Use default */
      if (!m || !m->proto)
	m = url_methods;
      
      u->port = m->def_port;
      if (!p || !strncmp(p, "://", 3))
	{
	  char *e = p = p ? (p + 3) : u->url->buf;

	  while (*e && *e != ':' && *e != '/')
	    e++;
	  dstring_append(&u->host, 0, p, e-p);
	  if (*e == ':')
	    {
	      u->port = atoi(++e);
	      while (*e && *e != '/')
		e++;
	    }
	  p = e;
	}
    }

  if (!u->host || !u->host->length)
    dstring_append(&u->host, 0, "localhost", 0);
  if (!u->port)
    u->port = m->def_port;
  
  if (p && *p == ':')
    p++;
  dstring_append(&u->path, 0, p ? p : u->url->buf, 0);
  if (!u->path->length) dstring_append(&u->path, 0, "/", 1);
  dstring_append(&u->mime, 0, MimeTypeBySuffix(u->path->buf), 0);
  return m->open(u);		/* shall fill in most of *u */
}

int OpenUrl(char *url, struct url_info *u, int mode)
{
  static struct url_info i;

  if (!u) u = &i;
  bzero((char *)u, sizeof(*u));
  dstring_append(&u->url, 0, url, 0);
  u->mode = mode;
  u->timeout = DEFAULT_TIMEOUT;
  return OpenUrl1(u);
}

void 
FreeUrlInfo(struct url_info *i)
{
  if (!i) 	return;
  if (i->url)      free((char *)(i->url));
  if (i->base)     free((char *)(i->base));
  if (i->proto)    free((char *)(i->proto));
  if (i->host)     free((char *)(i->host));
  if (i->path)     free((char *)(i->path));
  if (i->mime)     free((char *)(i->mime));
  if (i->name)     free((char *)(i->name));
  if (i->pass)     free((char *)(i->pass));
  if (i->modified) free((char *)(i->modified));
  if (i->date)     free((char *)(i->date));
  if (i->server)   free((char *)(i->server));
  if (i->redirect) free((char *)(i->redirect));
}

char *MimeTypeBySuffix(char *name)
{
  char *p = name;
  struct suff_mime *s;

  while (*name++)
    if (*name == '.')
      p = name;
    
  for (s = suff_mimes; s->suff; s++)
    if (!strcasecmp(s->suff, p))
      break;
  
  /* no suffix identified? use default */
  if (!s->suff)
    s = suff_mimes;
  return s->mime;
}

static char *BaseUrl(dstring **d)
{
  char *p;

  if (!d || !*d)
    return NULL;

  for (p = (*d)->buf + (*d)->length; p >= (*d)->buf; p--)
    {
      if (*p == '/')
        {
	  *++p = '\0';
	  (*d)->length = p - (*d)->buf;
	  break;
	}
    }
  return (*d)->buf;
}

/*
 * concatenate base and url in one of three ways into url:
 * -  url contains :/ before any other /, then base is not used
 * -  else url starts with /, then base is used without any path components
 * -  otherwise the entire base is prepended before url.
 */
static char *MakeUrl(dstring **url, char *base)
{
  dstring *d = NULL;
  char *colon;
  int rel, blen;
  
  if (!url || !*url)
    return NULL;

  colon = strstr((*url)->buf, ":/");
  rel = (!colon || (strstr((*url)->buf, "/") > colon)) ? 1 : 0;
  blen = (rel && base) ? strlen(base) : 0;

  if (base && *((*url)->buf) == '/')
    {
      char *p = strstr(base, "://");

      if ((p = strchr(p ? p+3 : base, '/')) && blen > p - base)
        blen = p - base;
    }
  
  if (rel && base)
    dstring_append(&d, 0, base, blen);
  dstring_append(&d, -1, (*url)->buf, (*url)->length);
  free((char *)(*url));
  *url = d;
  return (*url)->buf;
}

#if 0
static int IsToken   __P((char **bufp, char *token, int *lenp));

static int IsToken(pp, t, ll)
char **pp, *t;
int *ll;
{
  char *p = *pp;
  int l = *ll;

  while (*t)
    {
      if (*t!=*p && (!isalpha(*t) || !isalpha(*p) || tolower(*t)!=tolower(*p)))
        return 0;
      t++;
      p++;
      l--;
    }
  *pp = p;
  *ll = l;
  return 1;
}

int AnchorFindHref(url, fd, nam, vlen, val, blen)
char *url, **nam, **val;
int fd, vlen, blen;
{
  dstring *ds = NULL;
  dstring *base = NULL;
  dstring *href = NULL;
  dstring *a_url = NULL;
  char *s;
  int r, reftype = 0;

  for (r = 0; r < vlen; r++)
    val[r][0] = '\0';

  if (url)
    {
      dstring_append(&base, 0, NULL, strlen(url));
      BaseUrl(url, base->buf);
      base->length = strlen(base->buf);
    }

  dstring_append(&ds, 0, NULL, 1024);
  while ((r = read(fd, ds->buf + ds->length, 1024)) > 0)
    {
      ds->length += r;
      dstring_append(&ds, -1, NULL, 1024);
    }

  s = ds->buf;
  while (s < ds->buf+ds->length)
    {
      int l = ds->buf+ds->length-s;

      if (IsToken(&s, "<BASE ", &l))
        reftype = 'B';
      if (IsToken(&s, "<A ", &l))
        reftype = 'A';
      if (IsToken(&s, "</A>", &l) ||
               (reftype == 'B' && IsToken(&s, ">", &l)))
        reftype = 0;
      if (IsToken(&s, "HREF=", &l))
        {
	  if (*s == '"')
	    { 
	      s++; 
	      l--;
	    }
	  dstring_append(reftype == 'B' ? &base : &href, 0, NULL, 0);
	  while (l > 0 && *s != ' ' && *s != '\t' && *s != '"' && *s != '\n' && *s != '\r')
	    {
	      dstring_append(reftype == 'B' ? &base : &href, -1, s++, 1);
	      l--;
	    }
	}
      if (reftype == 'A')
        {
	  int i;

	  for (i = 0; i < vlen; i++)
	    {
	      if (IsToken(&s, nam[i], &l))
	        {
		  int n;

		  dstring_append(&a_url, 0, NULL, href->length + base->length);
		  MakeUrl(href->buf, base->buf, a_url->buf);
		  a_url->length = strlen(a_url->buf);
		  n = a_url->length < blen ? a_url->length : blen - 1;
		  strncpy(val[i], a_url->buf, n);
		}
	    }
	}
      s++;
    }

  if (ds)    free((char *)ds);
  if (base)  free((char *)base);
  if (href)  free((char *)href);
  if (a_url) free((char *)a_url);
  return 0;
}
#endif

#ifdef STANDALONE
int TcpPortConnect() { return -4; };

int main(ac, av)
int ac;
char **av;
{
  char *url;
  int h;
  int fd = OpenUrl(av[1], av[2], &url, &h);

  printf("url=%s, is_http=%d, fd=%d\n", url, h, fd);
  if (fd)
    {
      int i;
      char *nam[5], *val[5], buf[5*256];

      nam[0] = "up";	val[0] = buf+0*256;
      nam[1] = "down";	val[1] = buf+1*256;
      nam[2] = "next";	val[2] = buf+2*256;
      nam[3] = "prev";	val[3] = buf+3*256;
      nam[4] = "back";	val[4] = buf+4*256;
      AnchorFindHref(url, fd, nam, 5, val, 256);
      for (i = 0; i < 5; i++)
	printf("\t%s\t%s\n", nam[i], val[i]);
    }
  return close(fd);
}
#endif
