"""OCR in Python using the Tesseract engine from Google
http://code.google.com/p/pytesser/
by Michael J.T. O'Kelly
V 0.0.1, 3/10/07"""
import Image
import subprocess
import util
import errors
tesseract_exe_name = 'tesseract' # Name of executable to be called at command line
scratch_image_name = "temp.bmp" # This file must be .bmp or other Tesseract-compatible format
scratch_text_name_root = "temp" # Leave out the .txt extension
cleanup_scratch_flag = True # Temporary files cleaned up after OCR operation
def call_tesseract(input_filename, output_filename):
"""Calls external tesseract.exe on input file (restrictions on types),
outputting output_filename+'txt'"""
args = [tesseract_exe_name, input_filename, output_filename]
proc = subprocess.Popen(args)
retcode = proc.wait()
if retcode!=0:
errors.check_for_errors()
def image_to_string(im, cleanup = cleanup_scratch_flag):
"""Converts im to file, applies tesseract, and fetches resulting text.
If cleanup=True, delete scratch files after operation."""
try:
util.image_to_scratch(im, scratch_image_name)
call_tesseract(scratch_image_name, scratch_text_name_root)
text = util.retrieve_text(scratch_text_name_root)
finally:
if cleanup:
util.perform_cleanup(scratch_image_name, scratch_text_name_root)
return text
def image_file_to_string(filename, cleanup = cleanup_scratch_flag, graceful_errors=True):
"""Applies tesseract to filename; or, if image is incompatible and graceful_errors=True,
converts to compatible format and then applies tesseract. Fetches resulting text.
If cleanup=True, delete scratch files after operation."""
try:
try:
call_tesseract(filename, scratch_text_name_root)
text = util.retrieve_text(scratch_text_name_root)
except errors.Tesser_General_Exception:
if graceful_errors:
im = Image.open(filename)
text = image_to_string(im, cleanup)
else:
raise
finally:
if cleanup:
util.perform_cleanup(scratch_image_name, scratch_text_name_root)
return text
if __name__=='__main__':
im = Image.open('phototest.tif')
text = image_to_string(im)
print text
try:
text = image_file_to_string('fnord.tif', graceful_errors=False)
except errors.Tesser_General_Exception, value:
print "fnord.tif is incompatible filetype. Try graceful_errors=True"
print value
text = image_file_to_string('fnord.tif', graceful_errors=True)
print "fnord.tif contents:", text
text = image_file_to_string('fonts_test.png', graceful_errors=True)
print text
没有合适的资源?快使用搜索试试~ 我知道了~
python识别验证码的库pytesser
共42个文件
train:3个
py:3个
tif:2个
3星 · 超过75%的资源 需积分: 19 84 下载量 19 浏览量
2015-12-17
20:53:09
上传
评论
收藏 2.13MB ZIP 举报
温馨提示
可能有的人需要,特此提供,用法说明:直接将其解压缩后将将文件夹放在你即将要运行的程序相同的目录下。
资源推荐
资源详情
资源评论
收起资源包目录
pytesser_v0.0.1.zip (42个子文件)
pytesser_v0.0.1
tessdata
word-dawg 187KB
test_matrix 3KB
newdiff.asccodes 561B
tessconfigs
var_batch 2KB
old_batch 2KB
matdemo 2KB
batch 2KB
segdemo 2KB
pffmtable 548B
fnetwts 751B
confsets 12B
inttemp 661KB
netwts 1.31MB
blackText.params 3KB
configs
api_config 1012B
var_api_config 1KB
oldbox.train 816B
api_resaljet 760B
inter 97B
oldapi_config 815B
variable_config 2KB
var_box.train 1KB
box.train 412B
freq-dawg 720B
fmtable.cls 130KB
normproto 39KB
soptable.cls 194KB
DangAmbigs 235B
user-words 8KB
tesseract.log 133B
NOTICE 337B
tesseract.exe 808KB
fonts_test.png 20KB
phototest.tif 38KB
errors.py 424B
util.py 669B
LICENSE 558B
README 3KB
AUTHORS 273B
fnord.tif 1KB
ChangeLog 48B
pytesser.py 3KB
共 42 条
- 1
资源评论
- wang11983474232016-05-28很好地资源,欢迎大家下载
- Gentle__Man2018-07-27不能用,垃圾
HelloWorld_EE
- 粉丝: 1866
- 资源: 17
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功