验证:
HTTPBasicAuthHandler(用户基本的身份验证处理)
HTTPPasswordMgrWithDefaultRealm(经常和authhandler一起出现)#创建一个密码管理器
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
#添加进目标url,用户名 密码
password_mgr.add_password(None,url,username,password)
第一个参数为NONE,表示默认的域
如果需要添加不同域的凭证可以将none替换为对应的域名WithDefaultRealm (为不同的URL设置相同的域)
#! /usr/bin/evn python3import urllib.request
from urllib.parse import urlparsedef auto_login():url = 'https://ssr3.scrape.center/'# 指定用户名、密码username = 'admin'password = 'admin'# 创建一个密码管理器password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()# 添加url、用户名、密码password_mgr.add_password(None, url, username, password)# 创建一个基本密码认证处理器并把密码管理器传递给它handle = urllib.request.HTTPBasicAuthHandler(password_mgr)# 创建网络请求的构造器opener = urllib.request.build_opener(handle)response = opener.open(url)print(response.read().decode('utf-8'))auto_login()
Cookie
1.用账号密码登录
2.第一次登录成功后“set-cookie”
3.下次登录就不需要在输入了
处理cookie相关的handler :cookiejar
写cookiejar:MozillaCookieJar
将cookies保存成Mozilla型浏览器的cookies格式读cookiejar:LWPCookieJar
保存成 libwww-perl(LWP) 格式cookies文件。
#! /usr/bin/evn python3import urllib.request
import http.cookiejarurl = "https://www.baidu.com"# cookie = http.cookiejar.CookieJar()filename = "cookie1.txt"# cookie = http.cookiejar.LWPCookieJar(filename=filename)
# handle = urllib.request.HTTPCookieProcessor(cookie)
# opener = urllib.request.build_opener(handle)
# response = opener.open(url)
# for item in cookie:
# print(item)
# cookie.save(ignore_discard=True, ignore_expires=True)cookie = http.cookiejar.LWPCookieJar()
cookie.load(filename=filename, ignore_discard=True, ignore_expires=True)
handle = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handle)
response = opener.open(url)
print(response.read().decode('utf-8'))
项目内容:
利用cookie绕过登录网站
#!/usr/bin/env python3
import random
import urllib.request
import urllib.parse
import urllib.error# 定义URL
url = 'http://httpbin.org/post'# 定义多组User-Agent
ip_list = ["http://183.161.45.66:17114","http://119.41.198.172:18350","http://27.191.60.244:15982","http://27.215.237.221:20983",
]# 利用 random 函数 每次随机抽取一个User-Agent
proxy = random.choice(ip_list)
print(proxy)
try:proxy_hander = urllib.request.ProxyHandler({'http': proxy, 'https': proxy})opener = urllib.request.build_opener(proxy_hander)response = opener.open(url)print(response.read().decode('utf-8'))except urllib.error.URLError as e:print("error: ", e)
异常处理:
1、URLerror
urllib的error模块:
urlerror继承自OSError
except error.URLError as e:
print(e.reason)2、打印错误的原因
HTTPError:
专门用来处理HTTP请求
#! /usr/bin/evn python3import urllib.request
from urllib import request, error
from urllib.error import *
import sockettry:url = 'https://www.baidu.com/'response = urllib.request.urlopen(url, timeout=0.01)# header = {# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36'# }# req = urllib.request.Request(url=url, headers=header)# response = urllib.request.urlopen(req)# print(response.read().decode('utf-8'))
except error.URLError as e:print(e.reason)if isinstance(e.reason, socket.timeout):print("Timed out")# except error.HTTPError as e:
# print(e)