/*
 * charset.c -- convert character encoding in dstrings
 *
 * from draft-ietf-mmusic-rtsp-03.ps:
 * RTSP is a text-based protocol and uses the ISO 10646 character set in UTF-8
 * encoding (RFC 2044).  The 10646 character set avoids tricky character set
 * switching, but is invisible to the application as long as US-ASCII is being
 * used. This is also the encoding used for RTCP. ISO 8859-1 translates
 * directly into Unicode, with a high-order octet of zero. ISO 8859-1
 * characters with the most-significant bit set are represented as 1100001x
 * 10xxxxxx.
 *
 * 14.2.98, jw.
 */
#include <jittr/dstring.h>

/*
 * Converts from ISO 10646 charset into latin1 (ISO 8859-1). Unicode is
 * to be used with HTTP and RTSP. If dp is zero or identical to sp, inplace
 * conversionis done.
 * Returns the number of converted characters or negative, if *sp contains
 * characters not representable in iso latin1.
 */
int
jittr_unicode2latin(sp, dp)
dstring **sp, **dp;
{
  dstring *s = *sp;
  unsigned char *p, *q;
  int i, l, bad = 0, good = 0;

  if (!s)
    return 0;
  
  l = s->length;

  if (dp && dp != sp)
    {
      dstring_append(dp, 0, NULL, l);	/* over estimate */
      q = (unsigned char *)(*dp)->buf;
    }
  else
    q = (unsigned char *)s->buf;
  
  p = (unsigned char *)s->buf;
  for (i = 0; i < l; i++)
    {
      if (*p & 0x80)
        {
	  switch (*p)
	    {
	    case 0xc2:
	      good++; p++; *q++ = *p++ | 0x80;
	      break;
	    case 0xc3:
	      good++; p++; *q++ = *p++ | 0xc0;
	      break;
	    default:
	      bad++;
	      *q++ = *p++;
	    }
	}
      else 
        *q++ = *p++;
    }

  *q = '\0';
  if (dp && dp != sp)
    (*dp)->length = (char *)q - (*dp)->buf;
  else
    s->length = (char *)q - s->buf;
  return bad ? (-1 -good) : good;
}


/*
 * Converts from ISO 8859-1 charset into unicode (ISO 10646). Unicode is
 * to be used with HTTP and RTSP. If dp is zero or identical to sp, inplace
 * conversionis done.
 * Returns the number of escaped characters.
 */
int
jittr_latin2unicode(sp, dp)
dstring **sp, **dp;
{
  dstring *s = *sp;
  unsigned char *p, *q;
  int i, l;
  int escs = 0;

  if (!s)
    return 0;

  i = l = s->length;
  while (--i >= 0)
    if (s->buf[i] & 0x80)
      escs++;

  if (dp && dp != sp)
    {
      dstring_append(dp, 0, NULL, l + escs);
      p = (unsigned char *)s->buf;
      q = (unsigned char *)(*dp)->buf;
      (*dp)->length = l + escs;
    }
  else
    {
      /* Inplace edit */
      if (!escs)
	return 0;		/* nothing to be done */
      dstring_append(&s, -1, NULL, escs);
      p = (unsigned char *)s->buf + escs;
      q = (unsigned char *)s->buf;
      /* Make room for escaping: */
      xbcopy(q, p, l);
      s->length += escs;
    }

  for (i = 0; i < l; i++)
    if (*p & 0x80)
      {
        *q++ = 0xc0 | *p >> 6;
	*q++ = 0x80 | (*p++ & 0x3f);
      }
    else
      *q++ = *p++;
    
    *q = '\0';
    return escs;
}

#ifdef STANDALONE
# ifdef DEBUG
#  include <stdio.h>
FILE *debugfp = stderr;
# endif
int main(int ac, char **av)
{
  dstring *d = NULL;
  int r;

  dstring_append(&d, 0, av[1], 0);
  r = jittr_latin2unicode(&d, NULL);
  printf("%d: '%s'\n", r, d->buf);
  r = jittr_unicode2latin(&d, NULL);
  printf("%d: '%s'\n", r, d->buf);
  return 0;
}
#endif
