Python 3 reptile (14) verification code processing

 Infi-chu:

http://www.cnblogs.com/Infi-chu/

1, Graphic verification code identification
1. Use tesserocr

import tesserocr
from PIL import Image

# Store a picture of verification code locally for testing
image = Image.open('test.jpg')
result = tesserocr.image_to_text(image)
print(result)

# Convert text directly to string
import tesserocr
print(tesserocr.file_to_text('test.jpg'))

2. Process the verification code picture
convert() method, which can convert the picture into gray image and binary image

image = image.convert('L')	# Convert image to grayscale image
image.show()
image = image.convert('1')	# Convert the image to binary image, and the default threshold value of binary image is 127

# Now the image is transformed into gray image and then into binary image
image = image.convert('L')
threshold = 80	# Set threshold
table = []
for i in range(256):
    if i < threshold:
		table.append(0)
	else:
		table.append(1)
image = image.point(table,'1')
image.show()	# Image becomes clear
result = tesserocr.image_to_text(image)
print(result)

2, Sliding verification code identification
Sliding the captcha is like filling in a picture with a puzzle
1. Sliding verification code features:
Anti simulation
Anti-counterfeiting
Anti violence

2. How to identify:
Browser simulation verification

3. Initialization:

EMAIL = 'test@test.com'
PASSWORD = '123456'

class CrackGeetest():
    def __init__(self):
	    self.url = 'https://account.geetest.com/login'
		self.browser = webdriver.Chome()
		self.wait = WebDriverWait(self.browser,20)
		self.email = EMAIL
		self.pasword = PASSWORD

4. Simulation Click:

# Search button
def get_geetest_button(self):
    button = self.wait.until(EC.element_to_be_clickable((BY.CLASS_NAME,'geetest_radar_tip')))
	return button
# Click the verify button
button = self.get_geetest_button()
button.click()

5. Identify gaps:
First, compare the original image with the current image, use selenium to select the image elements, get the location and size, and then get the screenshot

# 
# Get location and size
def position(self):
    img = self.wait.until(EC.persence_of_element_located((By.CLASS_NAME,'geetest_canvas_img')))
	time.sleep(2)
	location = img.location
	size = img.size
	top,bottom,left,right = location['y'],location['y']+size['height'],location['x'],location['x']+size['width']
	return (top,bottom,left,right)
# Get a screenshot of the web page
def get_geetest_image(self,name='captcha.png'):
    top,bottom,left,right = self.get_position()	# Get the position, width and height of the picture, and then return the coordinates of the upper left corner and the lower right corner
	print('Verification code location',top,bottom,left,right)
	screenshot = self.get_screenshot()	# Get screen target
	captcha = screenshot.crop((left,top,right,bottom))
# Get the second picture (picture with gap)
def get_slider(self):
    slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'geetest_slider_button')))
	return slider
# Interface appears after clicking
slider = self.get_slider()
slider.click()
# Call the get geetest image() function to get the second image, named img1 and img2 respectively
'''
//To compare the gap of the image, it is necessary to traverse each coordinate point of the image to obtain the RGB data of the corresponding pixel points of the two images. If the gap is within a certain range, it means that the two pixels are the same, and then continue to compare the next pixel point. If the gap is beyond a certain range, it means that it is not the same pixel point, then the position is the gap position
'''
def is_pixel_equal(self,img1,img2,x,y):
    # Take the pixels of two pictures
	pixel1 = img1.load()[x,y]
	pixel2 = img2.load()[x,y]
	threshold = 60
	# If the absolute value of RGB in the two images is less than the defined threshold value, it means that the pixels are the same and continue to traverse. Otherwise, it is not the same. It is the notch position
	if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(pixel1[2] - pixel2[2]) < threshold:
	    return True
	else:
	    return False

def get_gap(self,img1,img2):
    left = 60
	for i in range(left,img1.size[0]):
	    for j in range(img1.size[1]):
			if not self.is_pixel_equal(img1.img2,i,j):	# Determine whether the pixels of a point of two pictures are the same
			    left = i
				return left
	return left

6. simulation drag:

def get_track():
    track = []
	current = 0
	mid = distance * 4 / 5
	t = 0.2
	v = 0
	while current < distance:
		if current < mid:
		    a = 2
		else:
		    a = -3
		v0 = v
		v = v0 + a * t
		x = v0*t+1/2*a*t^2
		move = v0*t+1/2*a*t^2
		current += move
		track.append(round(move))
	return track

def move_to_gap(self,slider,tracks):
    ActionChains(self.browser).click_and_hold(slider).perform()
	for x in tracks:
	    ActionChains(self.browser).move_by_offset(xoffset=x,yoffset=0).perform()
	time.sleep(0.3)
	ActionChains(self.browser).release().perform()

1. Similar to the verification code of 12306
2. way of thinking:
Character recognition and image recognition
3. Use super Eagle platform to identify
Modify Python API

import requests
from hashlib import md5

class Chaojiying(obj):
    def __init__(self,username,password,soft_id):
	    self.username=username
		self.password=md5(password.encode('utf-8')).hexdigest()
		self.soft_id=soft_id
		self.base_params = {
			'user':self.username,
			'pass2':self.password,
			'softid':self.soft_id,
		}
		self.headers = {
			'Connection':'Keep-Alive',
			'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
		}
	def post_pic(self,im,codetype):
	    params = {
			'codetype':codetype,
		}
		params.update(self.base_params)
		files = {'userfile':('test.jpg',im)}
		r = requests.post('http://upload.chaojiying.net/Upload/Processing.php',data=params,files=files,headers=self.headers)
		return r.json()
	def report_error(self,im_id):
	    params = {'id':im_id,}
		params.update(self.base_params)
		r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php',data=params,headers=self.headers)
		return r.json()

4. Initialization:

EMAIL = 'test@test.com'
PASSWORD = ''
CHAOJIYING_USERNAME='test'
CHAOJIYING_PASSWORD=''
CHAOJIYING_SOFT_ID=893590    # Software ID
CHAOJIYING_KIND=9102    # Verification code type
class CrackTouClick():
    def __init__(self):
	    self.url='Enter website to identify'
		self.browser=webdriver.Chome()
		self.wait=WebDriverWait(self.browser,20)
		self.email=EMAIL
		self.password=PASSWORD
		self.chaojiying=Chaojiying(CHAOJIYING_USERNAME,CHAOJIYING_PASSWORD,CHAOJIYING_SOFT_ID,CHAOJIYING_KIND)

5. obtain the verification code:

def open():
	self.browser.get(self.url)
	email=self.wait.until(EC.persence_of_element_located((By.ID,'email')))
	password=self.wait.until(EC.persence_of_element_located((By.ID,'password')))
	email.send_keys(self.password)
def get_touclick_button(self):
    button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'touclick-hod-wrap')))
	return button
def get_touclick_element(self):
    element = self.wait.until(EC.persence_of_element_located((By.CLASS_NAME,'touclick-pub-content')))
	return element
def get_position(self):
    element=self.get_touclick_element()
    time.sleep(1)
    location=element.location
    size=element.size
    top,bottom,left,right=location['y'],location['y']+size['height'],location['x'],location['x']+size['width']
    return (top,bottom,left,right)
def get_screenshot(self):
	screenshot=self.browser.get_screenshot_as_png()
	screenshot=Image.open(BytesIO(screenshot))
	return screenshot
def get_touclick_image(self,name='captcha.png')
    top,bottom,left,right=self.get_position()
	print('Verification code location',top,bottom,left,right)
	screenshot = self.get_screenshot()
	captcha = screenshot.crop((left,top,right,bottom))
	return captcha

6. Identification verification code:

image = self.get_touclick_image()
bytes_array=BytesIO()
image.save(bytes_array,format='PNG')
res = self.chaojiying.post_pic(bytes_array,getvalue(),CHAOJIYING_KIND)
print(res)
def get_points(self,captcha_result):
    groups=captcha_result.get('pic_str').split('|')
	locations=[[int(number) for number in group.split(',')]for group in groups]
	return locations
def touch_click_words(self,locations):
    for location in locations:
	    print(location)
		ActionChains(self.browser).move_to_element_with_offset(self.get_touclick_element(),location[0],location[1]).click().perform()
		time.sleep(1)

Tags: Python PHP JSON Selenium less

Posted on Fri, 20 Mar 2020 11:00:25 -0700 by madspoihur