1:安装 http://www.pythonware.com/products/pil/
2:安装 https://code.google.com/p/pytesser/downloads/detail?name=pytesser_v0.0.1.zip&can=2&q=
测试结果 pytesser 识别率太低
1:取得50个样例图
import httplib
for i in range(50):
url = 'http:////sysmonitor/verifyCodeServlet'
print "download", i
c = httplib.HTTPSConnection("1.1.1.1",8443)
c.request("GET", "/sysmonitor/verifyCodeServlet")
response = c.getresponse()
#print response.status, response.reason
#data = response.read()
file("./code/%04d.png" % i, "wb").write(response.read())
2:去噪
from PIL import Image
for i in range(50):
img = Image.open("./code/%04d.png" % i)
img = img.convert("RGBA")
pixdata = img.load()
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][0] < 90:
pixdata[x, y] = (0, 0, 0, 255)
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][1] < 136:
pixdata[x, y] = (0, 0, 0, 255)
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][2] > 0:
pixdata[x, y] = (255, 255, 255, 255)
img.save("./code/input-black-%04d.gif" % i, "GIF")
#im_orig = Image.open('input-black.gif')
#big = im_orig.resize((1000, 500), Image.NEAREST)
3:生成字库
import os ,Image
j = 1
dir="./code/"
for f in os.listdir(dir):
if f.endswith(".gif"):
img = Image.open(dir+f)
for i in range(4):
x = 4 + i*18
y = 2
img.crop((x, y, x+12, y+16)).save("fonts/%d.gif" % j)
print "j=",j
j += 1
4:识别
#!/usr/bin/env python
# ?*? coding: UTF?8 ?*?
import os, Image
def binary(f):
img = Image.open(f)
#img = img.convert('1')
img = img.convert("RGBA")
pixdata = img.load()
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][0] < 90:
pixdata[x, y] = (0, 0, 0, 255)
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][1] < 136:
pixdata[x, y] = (0, 0, 0, 255)
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][2] > 0:
pixdata[x, y] = (255, 255, 255, 255)
return img
def division(img):
font=[]
for i in range(4):
x=4 + i*18
y=2
font.append(img.crop((x,y,x+12,y+16)))
return font
def recognize(img):
fontMods = []
for i in range(10):
fontMods.append((str(i), Image.open("./num/%d.gif" % i)))
result=""
font=division(img)
for i in font:
target=i
points = []
for mod in fontMods:
diffs = 0
for yi in range(16):
for xi in range(12):
#(mod[1].getpixel((xi, yi))).save("./temp/temp.gif" % j)
if mod[1].getpixel((xi, yi)) != target.getpixel((xi, yi)):
#diffs += 1
if 0 in target.getpixel((xi, yi)):
compare = 0
else:
compare = 255
if mod[1].getpixel((xi, yi)) != compare:
diffs += 1
print "diffs:" + str(diffs)
points.append((diffs, mod[0]))
points.sort()
result += points[0][1]
return result
if __name__ == '__main__':
codedir="./code/"
for imgfile in os.listdir(codedir):
if imgfile.endswith(".png"):
dir="./result/"
print(codedir+imgfile)
img=binary(codedir+imgfile)
num=recognize(img)
dir += (num+".png")
print "save to", dir
img.save(dir)