#define _POSIX_C_SOURCE 2
#include <ctype.h>
#include <errno.h>
#include <glob.h>
/*#include <printf.h>*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <values.h>
#include <errno.h>
#ifndef __BYTE_ORDER
#ifdef BYTE_ORDER
#define __BYTE_ORDER BYTE_ORDER
#define __LITTLE_ENDIAN LITTLE_ENDIAN
#define __BIG_ENDIAN BIG_ENDIAN
#define __PDP_ENDIAN PDP_ENDIAN
#else
#error __BYTE_ORDER undefined.
#endif
#endif
#include "2UTF.h"
static char version[] = " 2UTF V" VERSION " \n";
static char blurb[] =
" © Copyright 1997, 1998, 2000 by Ričardas Čepas <[email protected]> and others. \n"
" Copying policy: BSD style. \n"
" See file 'copyright' provided with the 2UTF distribution. \n"
" No warranty. Use at your own risk. \n";
#ifndef USE_LIBC
#define mbtowc our_mbtowc
#define wctomb our_wctomb
#endif
#ifndef CONFIG_PATHNAMES
#define CONFIG_PATHNAMES "/usr/local/etc/2UTF.config /usr/etc/2UTF.config /etc/2UTF.config"
#endif
#define max_string_length 1178
#ifndef ALIASES
#define ALIASES "/var/local/lib/2UTF.aliases"
#endif
#ifndef PATH
#define PATH "/usr/local/share/i18n/charmaps/"
#endif
#ifndef PATH2
#define PATH2 "/usr/share/i18n/charmaps/"
#endif
#ifndef PATH3
#define PATH3 "/usr/share/i18n/charmap/"
#endif
#ifndef MAX_PATHNAMES
#define MAX_PATHNAMES 24
#endif
#ifndef MAX_EXT_CHARSETS
#define MAX_EXT_CHARSETS 50
#endif
char aliases_pathname[] = ALIASES;
char default_charmap_format[] = " %*s /x%2x <U%X> ";
char *charmap_format[] =
{default_charmap_format, " 0x%x 0x%X ", " 0x%x 0x%X "};
unsigned char *compiled_paths[] =
{PATH, PATH2, PATH3, ""};
const int compiled_paths_number = 3;
int paths_number = 3;
unsigned char *read_paths[MAX_PATHNAMES];
unsigned char **paths = compiled_paths;
int ext_charsets_number = 0;
const char FILENAME[] = "FILENAME";
const char IO_err[] = "I/O error.";
const char IO_err_reading_config[] = "I/O error reading configuration file.";
const char add_iconv_only[] = "This option can only be used with --iconv=only.";
const char ambig_opt[] = "ambiguous option.";
const char avail_charmaps[] = "charmaps and aliases I can handle beyond iconv(3) and available in cache:";
const char avail_ext_charsets[] = "charsets I can handle via external filters:";
const char bad_charmap_format[] = "charmap file format error ?";
const char bad_line[] = "BAD LINE in aliases file:";
const char buffer_overflow[] = "buffer overflow by";
/*const char bad_sscanf_format_string[]= "bad sscanf(3) format string"; */
const char can_not_exec[] = "can't execute such command or fork subprocess:";
const char can_not_find_alias[] = "can't find such alias:";
const char can_not_open[] = "can't open for reading file";
const char can_not_open_any[] = " -- can't open matching files.";
const char can_not_create[] = "can't create file";
const char for_[] = "for";
const char help[] = "%s"
" Converts char-sets to and from Unicode. Decodes MIME text messages. \n"
"\n"
" Usage: \n"
"2UTF [-short_options] [--long_option ...] [charmap_file_or_alias] <input >output \n"
"fromUTF ... \n"
" If exact match for <charmap_file_or_alias> (converted to uppercase, \n" \
"`-' and '_' ignored) isn't found *<charmap_file_or_alias>* glob pattern \n" \
"is used. Without <charmap_file_or_alias> mail message is assumed.\n"
" Options: \n"
" -- stops option checking for the rest of the command line \n"
" -2 --UCS-2 --ucs-2 2 byte wide characters \n"
" -4 --UCS-4 --ucs-4 4 byte wide characters \n"
/*" -w --UCS-wchar_t --ucs-wchar_t sizeof(wchar_t) byte wide characters \n" */
" -8 --UTF-8 --utf-8 (default) multibyte characters \n"
" -C --create-aliases (re)creates aliases database \n"
" -c FILENAME --charmap-file=FILENAME \n"
" -d[N] --debug[=N] debug level (1-9), default 1 \n"
" -f[FORMAT] --format[=FORMAT] sscanf(3) format string for reading charmap \n"
"file. Lines beginning with %% or # are ignored. Default is \"%s\" \n"
" -e --encode-headers reencode MIME encoded headers \n"
" -o --forward (default for 2UTF) converts to Unicode \n"
" -H --html &<>\" appeared after approximations are escaped\n"
" -h -? --? -help --help this help \n"
" -i only --iconv=only don't read configuration file and use iconv() only \n"
" -i first --iconv=last attempt to use iconv before or after charmap files \n"
" -l --list-charmaps lists charmaps & aliases and exits \n"
" -p --pathnames outputs various pathnames and directories \n"
" -r --reverse (default for fromUTF) tries convert back to the legacy encodings \n"
" -W --show-charmap shows glyphs in charmap order \n"
" -S --spit-glyphs shows glyphs in console font (F000-F1FF) \n"
" -S... --spit-glyphs=[min][-][max] shows glyphs at given hex range. \n"
"Allowed range is from 0 to 7FFFFFFF. \n"
" -s --switch-to-UTF-8 outputs <ESC>%%G to stderr for switching \n"
"current virtual terminal to UTF-8 mode \n"
" -u[X] --unknown-char[=X] substitute X for unknown characters. \n"
"Default is '%c' (0x%.2X). \n"
" -v --verbose \n"
" -V --version --blurb shows version and copyright info. \n"
" Rightmost option takes precedence. Long options may be abbreviated. \n"
;
const char incomplete_charmap[] = "warning: incomplete charmap definiton";
const char internal_err[] = "internal error ?";
const char long_file[] = "warning: long charmap file.";
const char another_match[] = "warning: another match for this alias";
const char more_help[] = "``2UTF -h'' gives more information. \n";
const char multiply_matches[] = "warning: multiply matches for glob pattern";
const char needs_update[] = "aliases database needs update.";
const char no_charmaps[] = "no charmap files found.";
const char no_pathnames[] = "no pathnames found in configuration file.";
const char out_of_mem[] = "out of memory !";
const char paths_help_config_pathname[] = " Looks for configuration file as: \n";
const char paths_help_compiled[] = " If no directories in configuration file " \
"are found looks for charmap files in: \n";
const char paths_help_used[] = " Currently looks for charmap files in: \n";
const char paths_help_aliases[] =
" Aliases are cached in: \n"
"``" ALIASES "'' \n"
" \n";
const char short_help[] = \
"Usage: 2UTF|fromUTF [-short_options] [--long_option ...] [charset] <in >out \n";
const char too_many_pathnames[] = "too many pathnames in configuration file.";
const char too_many_ext_charsets[] = \
"too many charset definitions in configuration file.";
const char unexp_EOF[] = "unexpected end of file.";
const char unimplemented[] = "Sorry, this is not implemented.";
const char unknown_opt[] = "unknown option.";
const char using[] = "using";
const char will_use[] = "will use";
int Debug = FALSE, Mail = FALSE, encode = FALSE, iconv_first = TRUE, iconv_only = FALSE, reverse = FALSE, show_charmap = FALSE, verbose = FALSE;
/* stdout may be variable */
struct charset_type
unknown_charset = {NULL, "", NO, UNKNOWN, NULL, NULL},
USASCII_charset = {NULL, "us-ascii", IS, USASCII, NULL, NULL},
UTF8_charset = {NULL, "UTF-8", IS, UTF8, NULL, NULL};
struct charset_type *charset_p = &unknown_charset;
struct line_buf_type line =
{NULL, 0};
struct
{
unsigned char *names, *to_UTF, *from_UTF;
enum USASCII_is_subset_type USASCII_is_subset;
}
ext_charsets[MAX_EXT_CHARSETS + 1];
wchar_t unknown_wchar = 0xFFFD;
inline int our_wctomb (char *s, unsigned long wc);
inline int our_mbtowc (wchar_t *p, char *s, unsigned n);
inline enum charmap_file_format_type
Charmap_file_format_type (char *pathname)
{
char *ptr;
if (pathname && (ptr = strrchr (pathname, '.')))
{
if (strcmp (".TXT", ptr) == 0 || Strcasecmp (".x", ptr) == 0)
return (TXT);
}
return (DEFAULT);
}
void
Close_pipe (void)
{
if (charset_p->type == BUF_PIPE || charset_p->type == NON_BUF_PIPE)
pclose (charset_p->pipe);
}
char *
Convert (char
2UTF-1.22.tar.gz_utf-8 c++
版权申诉
163 浏览量
2022-09-24
12:15:44
上传
评论
收藏 125KB GZ 举报
JonSco
- 粉丝: 72
- 资源: 1万+
最新资源
- python 数据分析示例
- tensorflow-gpu-2.5.3-cp37-cp37m-manylinux2010-x86-64.whl
- tensorflow-gpu-2.2.1-cp35-cp35m-win-amd64.whl
- tensorflow-gpu-2.5.3-cp38-cp38-manylinux2010-x86-64.whl
- 2023-04-06-项目笔记 - 第一百三十五阶段 - 4.4.2.133全局变量的作用域-133 -2024.05.16
- Screenshot_20240516_224007.jpg
- IEEE802.3dj-800G DR4 & 1.6T DR8-welch-3dj-01a-230206 .pdf
- Matlab三维图绘制基础了解
- IEEE802.3dj-800GBASE FR4-welch-3dj-01a-2401 .pdf
- 实验3-拆弹专家-3023244203-刘宇翔
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈