/***************************************************************************************
*
*
* Filename: test.c
*
* Description:
*
* Version: 1.0
* Created: 2013年05月29日 10时02分40秒
* Revision: none
* Compiler: gcc
*
* Author: lyy
*
***************************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <sys/stat.h>
#include <stdarg.h>
#include <fcntl.h>
#include <locale.h>
#include <wchar.h>
#include <errno.h>
#include <termios.h>
#define PHONEBOOK_EXPORT_FILE_ADDR "phonebook_export.csv"
#define PHONEBOOK_EXPORT_FILE_NAME "phonebook_export.csv"
#define PHONEBOOK_EXPORT_FILE_ADDR_R "phonebook_export_r.csv"
typedef enum
{
PB_ENCODE_INVALID = 0,
PB_ENCODE_UTF8NOBOM_TO_UTF8,
PB_ENCODE_UTF8NOBOM_TO_UTF16LE,
PB_ENCODE_UTF8_TO_UTF8NOBOM
}pb_encode;
#define PB_RCODE_ERROR -1
#define PB_RCODE_OK 0
/*===========================================================================
FUNCTION short2char
DESCRIPTION
convert unsigned string to char string
DEPENDENCIES
src : unsigned short str
srclen : length of source str
des : char str
RETURN VALUE
success: length of char str /fail:-1
SIDE EFFECTS
None
=========================================================================*/
int short2char(const unsigned short *src, int srclen, char *des)
{
int i = 0;
int j = 0;
if (srclen <= 0 || src == NULL)
{
return -1;
}
while (i < srclen)
{
/*edit by Allen.zheng@2011-12-21, for covert the short */
des[j] = (unsigned char)((src[i] & 0xFF));
des[j + 1] = (unsigned char)((src[i] & 0xFF00) >> 8);
i++;
j += 2;
}
return j;
}
/*===========================================================================
FUNCTION utf2uni
DESCRIPTION
DEPENDENCIES
Unicode <-> UTF-8
U-00000000 - U-0000007F: 0xxxxxxx
U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
RETURN VALUE
SIDE EFFECTS
None
=========================================================================*/
static int utf2uni(unsigned char* utf, int sptr, int cnt_bits)
{
int unic = 0; // represent the unicode char
unsigned char firstByte = utf[sptr];
int ptr = 0; // pointer 0 ~ 15
int i = 0;
// resolve single unsigned char UTF-8 encoding char
if(0 == cnt_bits)
{
return (char) firstByte;
}
// resolve the first unsigned char
firstByte &= (1 << (7 - cnt_bits)) - 1;
// resolve multiple unsigned chars UTF-8 encoding char(except the first unsigned char)
for(i = sptr + cnt_bits - 1; i > sptr; --i)
{
unsigned char utfb = utf[i];
unic |= (utfb & 0x3f) << ptr;
ptr += 6;
}
unic |= firstByte << ptr;
return unic;
}
/*===========================================================================
FUNCTION get_cnt_bits
RETURN VALUE
success: count of utf-8 code /fail:-1
SIDE EFFECTS
None
=========================================================================*/
static int get_cnt_bits(unsigned char b)
{
int cnt = 0;
int i = 0;
/*invalid char return error*/
if (0 == b)
{
return -1;
}
for(i = 7; i >= 0; --i)
{
if (((b >> i) & 0x1) == 1)
{
++cnt;
}
else
{
break;
}
}
return (cnt > 6 || cnt == 1) ? -1 : cnt;
}
// UTF8-wu BOM to unicode(UTF-16LE/UCS-2 Little Endian )
int conv_utf8_to_ucs2(char *des_data, int *des_len, char *src_data, int src_len)
{
int result = 0;
int i = 0;
unsigned short *uni_short = NULL;
unsigned short unic = 0;
unsigned short *uni_short_ptr = NULL;
int uni_short_len = 0;
int index = 0;
int cnt_bits = 0;
char * utf_ptr = NULL;
int testi=0;
unsigned char * utf_data = (unsigned char *) src_data;
utf_ptr = src_data;
printf("conv_utf8_to_ucs2 enter\n");
if (NULL == des_data || NULL == des_len || NULL == src_data)
{
printf("convert buffer is NULL\n");
result = -1;
goto LCT_RETURN;
}
if (src_len < 0)
{
printf("source length is error\n");
result = -1;
goto LCT_RETURN;
}
if (NULL == (uni_short = (unsigned short *) malloc((src_len * 2) + 1)))
{
printf("malloc outfbuf error\n");
result = -1;
goto LCT_RETURN;
}
uni_short_ptr = (unsigned short *)uni_short;
for(;index < src_len;)
{
/*get occupy bits of utf-8 code */
cnt_bits = get_cnt_bits(utf_data[index]);
/*occupy bits error, skip next*/
if(-1 == cnt_bits)
{
++index;
continue;
}
/*utf-8 code is 0xxxxxxx*/
else if(0 == cnt_bits)
{
uni_short_ptr[uni_short_len] = (unsigned short)utf_data[index];
++index;
uni_short_len++;
}
else
{
for(i = 0; i < src_len; i++)
{
utf_ptr[i] = (unsigned char)(src_data[i]);
}
unic = utf2uni(utf_data, index, cnt_bits);
index += cnt_bits;
uni_short_ptr[uni_short_len++] = unic;
}
}
uni_short_ptr[uni_short_len] = L'\0';
#if 0
for(testi=0;testi<=uni_short_len;testi++)
{
MSG_HIGH("uni_short_ptr[%d]=%02x",testi,uni_short_ptr[testi],0);
}
#endif
*des_len = short2char(uni_short, uni_short_len, des_data);
LCT_RETURN:
if (uni_short)
{
free(uni_short);
}
printf("conv_utf8_to_ucs2 ok\n");
return result;
}
// UTF8-wu BOM to unicode(UTF-16LE/UCS-2 Little Endian )
int conv_utf8nobom_to_utf8(char *des_data, int *des_len, char *src_data, int src_len)
{
*des_len = src_len;
strcpy(des_data, src_data);
return PB_RCODE_OK;
}
/*
* which:
* 1: utf8nobom_to_utf8
* 2: utf8nobom_to_UTF-16LE/UCS-2 Little Endian
*
* */
int conv_one_to_other(int which)
{
FILE *e_file = NULL;
FILE *r_file = NULL;
char file_buf[1025];
char r_file_buf[8000];
int read_catch = 1024;
int r_read_catch = 8000;
int mresult = 0;
int i = 0;
int j = 0;
int (*p_fun_conv_one_to_other)(char *des_data, int *des_len, char *src_data, int src_len);
int flag = 0;
char tmp_head[16];
char utf8_head[3] = {0xEF, 0xBB, 0xBF}; // writer utf8 header
char utf16le_head[2] = {0xFF, 0xFE}; // writer unicode(UTF-16LE/UCS-2 Little Endian ) header
e_file = fopen(PHONEBOOK_EXPORT_FILE_ADDR,"rb");
if(e_file==NULL)
{
printf("open_file_error %s\n", PHONEBOOK_EXPORT_FILE_ADDR);
return PB_RCODE_ERROR;
}
r_file = fopen(PHONEBOOK_EXPORT_FILE_ADDR_R,"ab+");
if(r_file==NULL)
{
printf("open_file_error %s\n", PHONEBOOK_EXPORT_FILE_ADDR_R);
return PB_RCODE_ERROR;
}
/* read and wirte the file*/
memset(file_buf,0,1024);
memset(r_file_buf,0,r_read_catch);
switch(which)
{
case PB_ENCODE_UTF8NOBOM_TO_UTF8:
{
printf("PB_ENCODE_UTF8NOBOM_TO_UTF8 pb encode type\n");
for (i=0; i<sizeof(utf8_head); i++)
{
fputc(utf8_head[i], r_file);
}
p_fun_conv_one_to_other = conv_utf8nobom_to_utf8;
mresult = PB_RCODE_OK;
break;
}
case PB_ENCODE_UTF8NOBOM_TO_UTF16LE:
{
printf("PB_ENCODE_UTF8NOBOM_TO_UTF16LE pb encode type\n");
for (i=0; i<sizeof(utf16le_head); i++)
{
fputc(utf16le_head[i], r_file);
}
p_fun_conv_one_to_other = conv_utf8_to_ucs2;
mresult = PB_RCODE_OK;
break;
}
case PB
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
UTF8-无BOM转为UTF16LE.zip (5个子文件)
phonebook_export.csv 140B
phonebook_export_unicode_L.csv 140B
phonebook_export_utf8.csv 77B
UTF8_Unicode转换.c 9KB
phonebook_export_utf8_noBOM.csv 74B
共 5 条
- 1
资源评论
yong718100_lyy
- 粉丝: 7
- 资源: 46
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功