效果图
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from lxml import etreedef get_taobao_auction_data():# 配置Chrome选项chrome_options = Options()chrome_options.add_argument('--headless') # 无头模式chrome_options.add_argument('--disable-gpu')chrome_options.add_argument('--no-sandbox')chrome_options.add_argument('--disable-dev-shm-usage')chrome_options.add_argument('--disable-blink-features=AutomationControlled')chrome_options.add_argument('--disable-extensions')chrome_options.add_argument('--ignore-certificate-errors')chrome_options.add_argument('--window-size=1920,1080')# 设置Chrome浏览器路径chrome_options.binary_location = r"C:\Program Files\Google\Chrome\Application\chrome.exe"# 设置User-Agentchrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')try:print("正在初始化Chrome驱动...")service = Service(ChromeDriverManager().install())driver = webdriver.Chrome(service=service, options=chrome_options)print("Chrome驱动初始化成功")# 设置页面加载超时时间driver.set_page_load_timeout(30)# 访问目标网页url = "https://zc-paimai.taobao.com/wow/pm/default/pc/zichansearch?fcatV4Ids=[%22206067201%22]&corp_type=[%226%22]&structFieldMap={%22corp_type%22:[%226%22]}&page=1"driver.get(url)# 等待页面加载print("等待页面加载...")time.sleep(10)# 执行JavaScript滚动页面driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")time.sleep(2)# 获取页面内容page_source = driver.page_source# 保存原始响应到文件with open('taobao_auction.html', 'w', encoding='utf-8') as f:f.write(page_source)print("页面内容已保存到 taobao_auction.html")# 使用lxml解析HTMLhtml = etree.HTML(page_source)# 解析拍卖项目 - 更新XPath以匹配实际结构items = html.xpath('//div[contains(@style, "border: 1px solid rgb(230, 230, 230)")]')if items:print(f"\n找到 {len(items)} 个拍卖项目")for item in items:try:# 提取详情链接detail_url = item.xpath('.//a/@href')detail_url = "https:" + detail_url[0] if detail_url else "无链接"# 提取图片URLimg_url = item.xpath('.//img[contains(@style, "object-fit: cover")]/@src')img_url = "https:" + img_url[0] if img_url else "无图片"# 提取标题title = item.xpath('.//span[contains(@class, "text") and contains(@style, "font-size: 16px")]/@title')title = title[0].strip() if title else "无标题"# 提取当前价格current_price = item.xpath('.//div[contains(text(), "当前价")]/following-sibling::div//span[contains(@style, "font-size: 24px")]/text()')current_price = current_price[0].strip() if current_price else "无价格"# 提取评估价eval_price = item.xpath('.//div[contains(text(), "评估价")]/following-sibling::span[2]/text()')eval_price = eval_price[0].strip() if eval_price else "无评估价"# 提取拍卖状态status = item.xpath('.//div[contains(@style, "background: rgb(235, 0, 69)")]/text()')status = status[0].strip() if status else "无状态"# 提取围观次数views = item.xpath('.//span[contains(text(), "次围观")]/preceding-sibling::span/text()')views = views[0].strip() if views else "0"# 提取报名人数signups = item.xpath('.//span[contains(text(), "人报名")]/preceding-sibling::span/text()')signups = signups[0].strip() if signups else "0"print("\n拍卖项目信息:")print(f"标题: {title}")print(f"当前价: {current_price}")print(f"评估价: {eval_price}")print(f"状态: {status}")print(f"围观次数: {views}")print(f"报名人数: {signups}")print(f"图片URL: {img_url}")print(f"详情链接: {detail_url}")print("-" * 50)except Exception as e:print(f"解析项目时出错: {e}")continueelse:print("未找到拍卖项目,请检查页面结构")print("请查看保存的HTML文件以分析页面结构")except Exception as e:print(f"发生错误: {e}")finally:# 关闭浏览器try:driver.quit()except:passif __name__ == "__main__":get_taobao_auction_data()
获取登录cookie
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from lxml import etree
import redef get_auction_detail():# 1. 采集页面并保存html(如已有可跳过)chrome_options = Options()chrome_options.add_argument('--disable-gpu')chrome_options.add_argument('--no-sandbox')chrome_options.add_argument('--disable-dev-shm-usage')chrome_options.add_argument('--disable-blink-features=AutomationControlled')chrome_options.add_argument('--disable-extensions')chrome_options.add_argument('--ignore-certificate-errors')chrome_options.add_argument('--window-size=1920,1080')chrome_options.binary_location = r"C:\Program Files\Google\Chrome\Application\chrome.exe"chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')try:print("正在初始化Chrome驱动...")service = Service(ChromeDriverManager().install())driver = webdriver.Chrome(service=service, options=chrome_options)print("Chrome驱动初始化成功")driver.set_page_load_timeout(30)print("访问淘宝首页...")driver.get("https://www.taobao.com")time.sleep(3)print("请在浏览器中手动登录淘宝...")input("登录完成后请按回车键继续...")cookies = driver.get_cookies()print("已获取登录Cookie")url = "https://sf-item.taobao.com/sf_item/903309584546.htm"print(f"正在访问拍卖详情页: {url}")driver.get(url)print("等待页面加载...")time.sleep(10)driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")time.sleep(2)page_source = driver.page_sourcewith open('auction_detail.html', 'w', encoding='utf-8') as f:f.write(page_source)print("页面内容已保存到 auction_detail.html")except Exception as e:print(f"采集页面时发生错误: {e}")finally:try:driver.quit()except:pass# 2. 解析本地auction_detail.html,提取全部关键信息print("\n正在解析 auction_detail.html ...")with open('auction_detail.html', 'r', encoding='utf-8') as f:html = etree.HTML(f.read())def get_first(xpath_expr):res = html.xpath(xpath_expr)return res[0].strip() if res else ''# 标题title = get_first('//title/text()')# 当前价current_price = get_first('//span[contains(@class,"pm-current-price")]/em/text()')if not current_price:current_price = get_first('//span[contains(@class,"J_Price")]/em/text()')# 变卖价、保证金、加价幅度、评估价、变卖周期、延时周期、竞价规则(表格)def get_table_value(key):td = html.xpath(f'//table//span[contains(text(),"{key}")]/../../following-sibling::td[1]//span[contains(@class,"family-tahoma")]/text()')if not td:# 兼容" : "后直接文本td = html.xpath(f'//table//span[contains(text(),"{key}")]/../following-sibling::div//span[contains(@class,"family-tahoma")]/text()')if not td:# 兼容" : "后直接文本(无span)td = html.xpath(f'//table//span[contains(text(),"{key}")]/../../following-sibling::td[1]//text()')return td[0].strip() if td else ''sell_price = get_table_value('变卖价')deposit = get_table_value('保证金')increase = get_table_value('加价幅度')eval_price = get_table_value('评估价')sell_period = get_table_value('变卖周期')delay_period = get_table_value('延时周期')rule = ''rule_td = html.xpath('//table//span[contains(text(),"竞价规则")]/../../following-sibling::td[1]//span/text()')if rule_td:rule = rule_td[0].strip()else:# 兼容" : "后直接文本rule = get_first('//table//span[contains(text(),"竞价规则")]/../following-sibling::div//span/text()')# 主办法院court = get_first('//div[@class="unit-org-content"]/p/text()')# 拍卖公司及联系人company = get_first('//em[contains(@class,"contact-unit-person")]/text()')# 联系方式(手机号)phone = get_first('//span[@class="c-title" and contains(text(),"手机")]/following-sibling::span[@class="c-text"]/text()')# 公告链接notice_link = html.xpath('//a[contains(@class,"view-ano")]/@href')notice_link = notice_link[0] if notice_link else ''if notice_link and not notice_link.startswith('http'):notice_link = 'https:' + notice_linkprint("\n拍卖详情信息:")print(f"标题: {title}")print(f"当前价: {current_price}")print(f"变卖价: {sell_price}")print(f"保证金: {deposit}")print(f"加价幅度: {increase}")print(f"评估价: {eval_price}")print(f"变卖周期: {sell_period}")print(f"延时周期: {delay_period}")print(f"竞价规则: {rule}")print(f"主办法院: {court}")print(f"拍卖公司及联系人: {company}")print(f"联系方式: {phone}")print(f"公告链接: {notice_link}")print("-" * 50)if __name__ == "__main__":get_auction_detail()