#include "stdafx.h"
#include "string_conv.h"
#include "gbk_to_unicode.h"
#include "unicode_to_gbk.h"
int dprintf(char *format, ... )
{
#if 0
char buf[1024];
va_list marker;
int n;
va_start(marker, format); // 开始分析字符串
n = vsprintf(buf, format, marker); // 格式化输出字符串
printf("%s", buf);
va_end(marker); // 结束分析
return 0;
#else
return 0;
#endif
}
/*-----------------+-------------------------+------------------------
| endian = 1 大端 |
| endian = 0 小端 |
-------------------+-------------------------+-----------------------*/
int gbk_to_unicode(unsigned char *gbk_buf, unsigned short *unicode_buf, int max_unicode_buf_size, int endian)
{
unsigned short word;
unsigned char *gbk_ptr = gbk_buf;
unsigned short *uni_ptr = unicode_buf;
unsigned int uni_ind = 0, gbk_ind = 0, uni_num = 0;
unsigned char ch;
int word_pos;
if( !gbk_buf || !unicode_buf )
return -1;
while(1)
{
ch = *(gbk_ptr + gbk_ind);
if(ch == 0x00)
break;
if( ch > 0x80 )
{
//chinese
//word = *p *256 + *(p+1);
if(endian == 1) //大端
{
word = *(gbk_ptr + gbk_ind);
word <<= 8;
gbk_ind++;
word += *(gbk_ptr + gbk_ind);
gbk_ind++;
}
else
{
word = *(gbk_ptr + gbk_ind + 1);
word <<= 8;
word += *(gbk_ptr + gbk_ind);
gbk_ind += 2;
}
//printf("word: 0x%x\n", word);
word_pos = word - g_gbk_first_code;
if(word >= g_gbk_first_code && word <= g_gbk_last_code && (word_pos < g_gbk_to_unicode_buf_size))
{
//*p_out = g_gbk_to_unicode_buf[word_pos];
//*p_out ++;
//n++;
*(uni_ptr + uni_ind) = g_gbk_to_unicode_buf[word_pos];
uni_ind++;
uni_num++;
}
}
else
{
//for ASCII
//*p_out = (unsigned short)*p;
//*p_out ++;
//n++;
gbk_ind++;
//*(uni_ptr + uni_ind) = g_gbk_to_unicode_buf[word_pos];
*(uni_ptr + uni_ind) = ch;
uni_ind++;
uni_num++;
}
if(uni_num > max_unicode_buf_size - 1)
break;
}
return uni_num;
}
int unicode_to_gbk(unsigned short *unicode_buf, unsigned char *gbk_buf, int max_gbk_buf_size, int endian)
{
unsigned short word;
unsigned short gbk_word;
unsigned char ch;
unsigned char *gbk_ptr = gbk_buf;
unsigned short *uni_ptr = unicode_buf;
unsigned int uni_ind = 0, gbk_ind = 0, gbk_num = 0;
int word_pos;
if( !gbk_buf || !unicode_buf )
return -1;
while(1)
{
word = *(uni_ptr + uni_ind);
uni_ind++;
if(word == 0x0000) //字符串结束符
break;
//dprintf("[0]word: 0x%x\n", word);
if(endian == 1) //大端
{
//高低位交换
ch = (unsigned char)word;
word = word >> 8;
word += ch << 8;
}
//dprintf("[1]word: 0x%x\n", word);
if(word < 0x80) /*ASCII不用查表*/
{
*(gbk_ptr + gbk_ind) = (unsigned char)word;
//dprintf("gbk_ptr[%d]: 0x%x\n", gbk_ind, *(gbk_ptr + gbk_ind));
gbk_ind++;
}
else
{
word_pos = word - g_unicode_first_code;
//dprintf("word_pos: %d\n", word_pos);
if(word >= g_unicode_first_code && word <= g_unicode_last_code && word_pos < g_unicode_to_gbk_buf_size)
{
gbk_word = g_unicode_to_gbk_buf[word_pos];
//dprintf("gbk_word: 0x%x\n", gbk_word);
*(gbk_ptr + gbk_ind) = (unsigned char)(gbk_word >> 8);
gbk_ind++;
*(gbk_ptr + gbk_ind) = (unsigned char)(gbk_word >> 0);
gbk_ind++;
gbk_num++;
}
}
if(gbk_num > max_gbk_buf_size - 1)
break;
}
return gbk_num;
}
int unicode_to_utf8(unsigned short *unicode_buf, unsigned char *utf8_buf, int max_utf8_buf_size, int endian)
{
unsigned short *uni_ptr = unicode_buf;
unsigned char *utf_ptr = utf8_buf;
unsigned short word;
unsigned char ch;
unsigned int uni_ind = 0, utf_ind = 0, utf_num = 0;
while(1)
{
word = *(uni_ptr + uni_ind);
uni_ind++;
if(word == 0x0000) //结束符
break;
if(endian == 1) //大端
{
//高低位交换
ch = (unsigned char)word;
word = word >> 8;
word += ch << 8;
}
if(word < 0x80)
{
*(utf_ptr + utf_ind) = word & 0x7F | 0x00;
utf_ind++;
utf_num++;
}
else if(word < 0x0800)
{
*(utf_ptr + utf_ind) = (word >> 6) & 0x1F | 0xC0;
utf_ind++;
*(utf_ptr + utf_ind) = word & 0x3F | 0x80;
utf_ind++;
utf_num++;
}
else if(word < 0x010000)
{
*(utf_ptr + utf_ind) = (word >> 12) & 0x0F | 0xE0;
utf_ind++;
*(utf_ptr + utf_ind) = (word >> 6) & 0x3F | 0x80;
utf_ind++;
*(utf_ptr + utf_ind) = word & 0x3F | 0x80;
utf_ind++;
utf_num++;
}
else if(word < 0x110000)
{
*(utf_ptr + utf_ind) = (word >> 18) & 0x07 | 0xF0;
utf_ind++;
*(utf_ptr + utf_ind) = (word >> 12) & 0x3F | 0x80;
utf_ind++;
*(utf_ptr + utf_ind) = (word >> 6) & 0x3F | 0x80;
utf_ind++;
*(utf_ptr + utf_ind) = word & 0x3F | 0x80;
utf_ind++;
utf_num++;
}
if(utf_num > max_utf8_buf_size - 1)
{
break;
}
}
return utf_num;
}
int utf8_to_unicode(unsigned char *utf8_buf, unsigned short *unicode_buf, int max_unicode_buf_size, int endian)
{
unsigned short *uni_ptr = unicode_buf;
unsigned char *utf_ptr = utf8_buf;
unsigned short word;
unsigned char ch;
unsigned int uni_ind = 0, utf_ind = 0, utf_num = 0;
while(1)
{
ch = *(utf_ptr + utf_ind);
if(ch == 0x00) //结束符
break;
if ((ch & 0x80) == 0)
{
*(unicode_buf + uni_ind) = *(utf_ptr + utf_ind);
uni_ind++;
utf_ind++;
utf_num++;
}
else if((ch & 0xE0) == 0xC0) ///< 110x-xxxx 10xx-xxxx
{
word = (*(utf_ptr + utf_ind) & 0x3F) << 6;
word |= (*(utf_ptr + utf_ind + 1) & 0x3F);
*(unicode_buf + uni_ind) = word;
uni_ind++;
utf_ind += 2;
utf_num++;
}
else if((ch & 0xF0) == 0xE0) ///< 1110-xxxx 10xx-xxxx 10xx-xxxx
{
word = (*(utf_ptr + utf_ind) & 0x1F) << 12;
word |= (*(utf_ptr + utf_ind + 1) & 0x3F) << 6;
word |= (*(utf_ptr + utf_ind + 2) & 0x3F);
*(unicode_buf + uni_ind) = word;
uni_ind++;
utf_ind += 3;
utf_num++;
}
else if((ch & 0xF8) == 0xF0) ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
{
word = (*(utf_ptr + utf_ind) & 0x0F) << 18;
word = (*(utf_ptr + utf_ind + 1) & 0x3F) << 12;
word |= (*(utf_ptr + utf_ind + 2) & 0x3F) << 6;
word |= (*(utf_ptr + utf_ind + 3) & 0x3F);
*(unicode_buf + uni_ind) = word;
uni_ind++;
utf_ind += 4;
utf_num++;
}
else ///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
{
word = (*(utf_ptr + utf_ind) & 0x07) << 24;
word = (*(utf_ptr + utf_ind + 1) & 0x3F) << 18;
word = (*(utf_ptr + utf_ind + 2) & 0x3F) << 12;
word = (*(utf_ptr + utf_ind + 3) & 0x3F) << 6;
word = (*(utf_ptr + utf_ind + 4) & 0x3F);
*(unicode_buf + uni_ind) = word;
uni_ind++;
utf_ind += 5;
utf_num++;
}
if(utf_num > max_unicode_buf_size - 1)
{
break;
}
}
return utf_num;
}
- 1
- 2
- 3
- 4
- 5
- 6
前往页