#include "inc\mc.h"

void win12512unicode(wchar_t *ws, const char *s, int len)
{
  int c;
  while((c=*s++)&&((len--)>0))
  {
    if (c==0xA8) c=0x401;
    if (c==0xAA) c=0x404;
    if (c==0xAF) c=0x407;
    if (c==0xB8) c=0x451;
    if (c==0xBA) c=0x454;
    if (c==0xBF) c=0x457;
    if ((c>=0xC0)&&(c<0x100)) c+=0x350;
    *ws++=c;
  }
  *ws=0;
}

const char wintranslation[128] =
{
	0x5F,0x5F,0x27,0x5F,0x22,0x3A,0xC5,0xD8,0x5F,0x25,0x5F,0x3C,0x5F,0x5F,0x5F,0x5F,
	0x5F,0x27,0x27,0x22,0x22,0x07,0x2D,0x2D,0x5F,0x54,0x5F,0x3E,0x5F,0x5F,0x5F,0x5F,
	0xFF,0xF6,0xF7,0x5F,0xFD,0x83,0xB3,0x15,0xF0,0x63,0xF2,0x3C,0xBF,0x2D,0x52,0xF4,
	0xF8,0x2B,'I' ,'i' ,0xA3,0xE7,0x14,0xFA,0xF1,0xFC,0xF3,0x3E,0x5F,0x5F,0x5F,0xF5,
	0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
	0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
	0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
	0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF
};

const char koi8translation[128] =
{
	0x5F,0x5F,0x27,0x5F,0x22,0x3A,0xC5,0xD8,0x5F,0x25,0x5F,0x3C,0x5F,0x5F,0x5F,0x5F,
	0x5F,0x27,0x27,0x22,0x22,0x07,0x2D,0x2D,0x5F,0x54,0x5F,0x3E,0x5F,0x5F,0x5F,0x5F,
	0xFF,0xF6,0xF7,0xF1,0xF3,0x5F,'i' ,0xF5,0xF0,0x63,0xF2,0x3C,0xBF,0xA3,0x52,0xF4,
	0xF8,0x2B,0x5F,0xF0,0xF2,0xE7,'I' ,0xF4,0xF1,0xFC,0xF3,0x3E,0x5F,0x83,0x5F,0xF5,

	0xEE,0xA0,0xA1,0xE6,0xA4,0xA5,0xE4,0xA3,0xE5,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,
	0xAF,0xEF,0xE0,0xE1,0xE2,0xE3,0xA6,0xA2,0xEC,0xEB,0xA7,0xE8,0xED,0xE9,0xE7,0xEA,
	0x9E,0x80,0x81,0x96,0x84,0x85,0x94,0x83,0x95,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,
	0x8F,0x9F,0x90,0x91,0x92,0x93,0x86,0x82,0x9C,0x9B,0x87,0x98,0x9D,0x99,0x97,0x9A
};

const unsigned short dos2unicode[128] =
{
	0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
	0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,0x041F,
	0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
	0x0428,0x0429,0x042A,0x042B,0x042C,0x042D,0x042E,0x042F,
	0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
	0x0438,0x0439,0x043A,0x043B,0x043C,0x043D,0x043E,0x043F,
	0x002D,0x002D,0x002D,0x00A6,0x002B,0x00A6,0x00A6,0x00AC,
	0x00AC,0x00A6,0x00A6,0x00AC,0x002D,0x002D,0x002D,0x00AC,
	0x004C,0x002B,0x0054,0x002B,0x002D,0x002B,0x00A6,0x00A6,
	0x004C,0x0433,0x00A6,0x0054,0x00A6,0x003D,0x002B,0x00A6,
	0x00A6,0x0054,0x0054,0x004C,0x004C,0x002D,0x0433,0x002B,
	0x002B,0x002D,0x002D,0x002D,0x002D,0x00A6,0x00A6,0x002D,
	0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
	0x0448,0x0449,0x044A,0x044B,0x044C,0x044D,0x044E,0x044F,
	0x0401,0x0451,0x0404,0x0454,0x0407,0x0457,0x040E,0x045E,
	0x00B0,0x2022,0x00B7,0x0076,0x2116,0x00A4,0x00A6,0x00A0
};

unsigned int char8to16(int c, int type)
{
  if (c>=128)
  {
    switch(type)
    {
    case 1:
      //Win->Dos
      c = wintranslation[c-128];
      break;
    case 2:
      //Koi8->Dos
      c = koi8translation[c-128];
      break;
    case 3:
      break;
      //Dos
    }
    if (c < 128) return(c);
    return(dos2unicode[c - 128]);
  }
  return(c);
}

void dos2utf16(wchar_t *ws, const char* s)
{
  int c;
  while((c=*s++))
  {
    *ws++=char8to16(c, 3);
  }
  *ws=0;
}

// Готовимся к отказу от ANSI вообще. Пока не используется, скоро, наверное, будет...

/* UTF-8 to UTF-16 conversion.  Surrogates are handeled properly, e.g.
 * a single 4-byte UTF-8 character is encoded into a surrogate pair.
 * On the other hand, if the UTF-8 string contains surrogate values, this
 * is considered an error and returned as such.
 *
 * The destination array must be able to hold as many Unicode-16 characters
 * as there are ASCII characters in the UTF-8 string.  This in case all UTF-8
 * characters are ASCII characters.  No more will be needed.
 *
 * Copyright (c) 2000 Morten Rolland, Screen Media
 */



int utf8_to_utf16(char *utf8, int cc, wchar_t *unicode16)
{
  int count = 0;
  unsigned char c0, c1;
  unsigned long scalar;
  
  while(--cc >= 0) {
    c0 = *utf8++;
    
    if ( c0 < 0x80 ) {
      /* Plain ASCII character, simple translation :-) */
      if (unicode16) *unicode16++ = c0;
      count++;
      continue;
    }
    
    if ( (c0 & 0xc0) == 0x80 )
      /* Illegal; starts with 10xxxxxx */
      return -1;
    
    /* c0 must be 11xxxxxx if we get here => at least 2 bytes */
    scalar = c0;
    if(--cc < 0)
      return -1;
    
    c1 = *utf8++;
    
    if ( (c1 & 0xc0) != 0x80 )
      /* Bad byte */
      return -1;
    
    scalar <<= 6;
    scalar |= (c1 & 0x3f);
    
    if ( !(c0 & 0x20) ) {
      /* Two bytes UTF-8 */
      if ( scalar < 0x80 )
        return -1;	/* Overlong encoding */
      if (unicode16) *unicode16++ = scalar & 0x7ff;
      count++;
      continue;
    }
    
    /* c0 must be 111xxxxx if we get here => at least 3 bytes */
    if(--cc < 0)
      return -1;
    
    c1 = *utf8++;
    if ( (c1 & 0xc0) != 0x80 )
      /* Bad byte */
      return -1;
    
    scalar <<= 6;
    scalar |= (c1 & 0x3f);
    
    if ( !(c0 & 0x10) ) {
      /* Three bytes UTF-8 */
      if ( scalar < 0x800 )
        return -1;	/* Overlong encoding */
      if ( scalar >= 0xd800 && scalar < 0xe000 )
        return -1;	/* UTF-16 high/low halfs */
      if (unicode16) *unicode16++ = scalar & 0xffff;
      count++;
      continue;
    }
    
    /* c0 must be 1111xxxx if we get here => at least 4 bytes */
    
    c1 = *utf8++;
    if(--cc < 0)
      return -1;
    
    if ( (c1 & 0xc0) != 0x80 )
      /* Bad byte */
      return -1;
    
    scalar <<= 6;
    scalar |= (c1 & 0x3f);
    
    if ( !(c0 & 0x08) ) {
      /* Four bytes UTF-8, needs encoding as surrogates */
      if ( scalar < 0x10000 )
        return -1;	/* Overlong encoding */
      scalar -= 0x10000;
      if (unicode16) *unicode16++ = ((scalar >> 10) & 0x3ff) + 0xd800;
      if (unicode16) *unicode16++ = (scalar & 0x3ff) + 0xdc00;
      count += 2;
      continue;
    }
    return -1;	/* No support for more than four byte UTF-8 */
  }
  return count;
}

