#!/usr/bin/python
#encoding: utf-8
import numpy as np
RECT_HEIGHT = 16
RECT_WIDTH = 16
BYTE_COUNT_PER_ROW = int(RECT_WIDTH / 8)
BYTE_COUNT_PER_FONT = BYTE_COUNT_PER_ROW * RECT_HEIGHT
KEYS = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01]
class FontRender(object):
def __init__(self, font_file,
rect_height=RECT_HEIGHT, rect_width=RECT_WIDTH, byte_count_per_row=BYTE_COUNT_PER_ROW):
self.font_file = font_file
self.rect_height = rect_height
self.rect_width = rect_width
self.byte_count_per_row = byte_count_per_row
with open(font_file, "rb") as f:
self.data = f.read()
self.fonts=[]
for idx in range(0,len(self.data),BYTE_COUNT_PER_FONT):
font_rect=self.data[idx:idx+BYTE_COUNT_PER_FONT]
font=self.font2np(font_rect)
self.fonts.append(font)
self.__init_rect_list__()
def __init_rect_list__(self):
self.rect_list = [] * RECT_HEIGHT
for i in range(RECT_HEIGHT):
self.rect_list.append([] * RECT_WIDTH)
def get_font_area_index(self, txt, encoding='utf-8'):
# if not isinstance(txt, unicode):
# txt = txt.decode(encoding)
gb2312 = txt.encode('gb2312')
# hex_str = binascii.b2a_hex(gb2312)
# area = eval('0x' + hex_str[:2]) - 0xA0
# index = eval('0x' + hex_str[2:]) - 0xA0
area = gb2312[0] - 0xA0
index = gb2312[1] - 0xA0
return area, index
def get_font_rect(self, area, index):
offset =int( (94 * (area-1) + (index-1)) * BYTE_COUNT_PER_FONT )
btxt = self.data[offset:offset+32]
# with open(self.font_file, "rb") as f:
# f.seek(offset)
# btxt = f.read(BYTE_COUNT_PER_FONT)
return btxt
def convert_font_rect(self, font_rect, ft=1, ff=0):
for k in range(RECT_HEIGHT):
row_list = self.rect_list[k]
for j in range(self.byte_count_per_row):
idx=k * self.byte_count_per_row + j
asc = font_rect[idx]
for i in range(8):
flag = asc & KEYS[i]
row_list.append(flag and ft or ff)
def font2np(self,font_rect):
data=np.zeros((RECT_HEIGHT,RECT_HEIGHT),dtype=np.float32)
for k in range(RECT_HEIGHT):
for j in range(self.byte_count_per_row):
idx=k * self.byte_count_per_row + j
asc = font_rect[idx]
for i in range(8):
flag = asc & KEYS[i]
if flag:
data[k,j*8+i]= 1
return data
def render_font_rect(self, rect_list=None):
if not rect_list:
rect_list = self.rect_list
for row in rect_list:
for i in row:
if i:
print ('■'),
else:
print ('○'),
print
def convert(self, text, encoding='utf-8'):
# if not isinstance(text, unicodedata):
# text = text.decode(encoding)
data=[]
for t in text:
#area, index = self.get_font_area_index(t)
#font_rect = self.get_font_rect(area, index)
font=self.getFont(t)
data.append(font)
return data
def getFont(self,hz):
"""hz:汉字"""
area, index = self.get_font_area_index(hz)
font_rect = self.get_font_rect(area, index)
font=self.font2np(font_rect)
return font
def getAt(self,i):
# idx = i*BYTE_COUNT_PER_FONT
# font_rect=self.data[idx:idx+BYTE_COUNT_PER_FONT]
# font=self.font2np(font_rect)
return self.fonts[i]
def getall(self):
data=[]
for i in range(0,len(self.data)//BYTE_COUNT_PER_FONT+1):
font = self.getAt(i)
data.append(font)
return data
def get_rect_info(self):
return self.rect_list
import torch.utils.data as data
from typing import Any, Callable, List, Optional, Tuple
class HzDataset(data.Dataset):
def __init__(self) -> None:
super().__init__()
self.fr = FontRender('./HZK16')
self.cnt = len(self.fr.data)//BYTE_COUNT_PER_FONT
def __getitem__(self, index: int) -> Any:
"""
区位码=区号+位号[1-94,1-94] 国标码=区位码+32[ (1-94)+32, (1-94)+32] 机内码=将国标码的两个字节的第一位置壹。
offset =int( (94 * (area-1) + (index-1)) * BYTE_COUNT_PER_FONT )
area = gb2312[0] - 0xA0
index = gb2312[1] - 0xA0
"""
font=self.fr.getAt(index)
area=index//94+1
idx=index%94+1 #txt.encode('gb2312')
a=bytes([area+0xA0,idx+0xA0])
#label = bytes.decode(a,'gb2312')
return font[np.newaxis,:,:],a #label
def __len__(self) -> int:
return self.cnt
def test():
hz='./pallium/HZK16'
with open(hz, "rb") as f:
btxt = f.read(BYTE_COUNT_PER_FONT)
if '__main__' == __name__:
import cv2
text = u'同创伟业'
fr = FontRender('./HZK16')
data = fr.convert(text)
cv2.imshow("aaa", np.hstack(data))
cv2.waitKey(0)
data = fr.getall()
cv2.imshow("aaa", np.hstack(data[0:100]))
cv2.waitKey(0)