求救各位python大神,找不到他的API,我用selenium爬取公开资讯讯观测站,使用webdriver按下【详细资料】按钮可以成功开的了视窗,但读不到新跳出来的视窗
这是我爬取的网站网址https://mopsov.twse.com.tw/mops/web/ajax_t114sb07?parameters=0eb65210d5bdc34ea16e295ccdbad1094cba7de165dfcdf4e7902f9ef62c42f5e1d5d55f2907af83df59ae82756caca37dd8deda6d21048dd6757f91f6feed9efade4567702b1a82869a09fd73fc40584805cee69a5101ddc3bffc42dba6838e87d6b6c0db4e0cc4b38b25e81f8e9e6aaeb537f477d135aaa7cfe6b60d70cfc4
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
# 启动 WebDriver
driver = webdriver.Chrome()
try:
# 打开目标网页
driver.get("https://mopsov.twse.com.tw/mops/web/ajax_t114sb07?parameters=0eb65210d5bdc34ea16e295ccdbad1094cba7de165dfcdf4e7902f9ef62c42f5e1d5d55f2907af83df59ae82756caca37dd8deda6d21048dd6757f91f6feed9efade4567702b1a82869a09fd73fc40584805cee69a5101ddc3bffc42dba6838e87d6b6c0db4e0cc4b38b25e81f8e9e6aaeb537f477d135aaa7cfe6b60d70cfc4")
# 记录原始视窗
original_window = driver.current_window_handle
# 等待并点击按钮
button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, \'//*[@id="t114sb07_fm"]/p[2]/table[2]/tbody/tr[4]/td[9]/input\'))
)
driver.execute_script("arguments[0].click();", button)
# 由于第二页是动态生成的 (URL 为 about:blank),所以等待特定元素出现
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#table01"))
)
# 透过 CSS Selector 找到第二页的表格 (JS Path: document.querySelector("#table01"))
table_element = driver.find_element(By.CSS_SELECTOR, "#table01")
table_html = table_element.get_attribute("outerHTML")
# 使用 BeautifulSoup 解析表格 HTML
soup = BeautifulSoup(table_html, "html.parser")
rows = soup.find_all("tr")
table_data = []
for row in rows:
cols = row.find_all(["td", "th"])
cols = [col.get_text(strip=True) for col in cols]
table_data.append(cols)
# 打印第二页表格的内容
for row in table_data:
print("\\t".join(row))
# 存入档案
with open("table01_data.txt", "w", encoding="utf-8") as file:
for row in table_data:
file.write("\\t".join(row) + "\\n")
print("数据已成功写入 table01_data.txt")
finally:
driver.quit()
2 个回答
1
猴子
iT邦新手 4 级 ‧ 2025-03-10 18:59:48
最佳解答
code
可能有些小 bug 自己在改改就好
# pip install requests bs4
import json
from dataclasses import asdict, dataclass, field
from typing import List, Optional
import requests
from bs4 import BeautifulSoup
year = 112
co_id = 1101
@dataclass
class StockCompensation:
"""股票报酬资料"""
shares: float # 配发股数
market_price: float # 市场价格(元/股)
amount_a: float # 计算后的股票报酬总额
@dataclass
class CashCompensation:
"""现金报酬资料"""
amount_b: float # 现金报酬总额
@dataclass
class ManagerCompensation:
"""管理阶层报酬总计"""
stock: StockCompensation # 股票报酬资讯
cash: CashCompensation # 现金报酬资讯
total_amount: float # 总报酬金额(股票 + 现金)
percentage_of_net_income: float # 佔公司净利比例(%)
@dataclass
class Manager:
"""管理阶层人员资讯"""
title: str # 职称
name: str # 姓名
@dataclass
class CompanyCompensation:
"""公司管理阶层报酬资讯"""
company_code: str # 公司代号
company_name: str # 公司名称
employee_compensation_year: int # 员工薪酬所属年度
distribution_year: int # 发放年度
unit: str # 单位(元/股)
manager_compensation: ManagerCompensation # 管理阶层报酬总计
managers: List[Manager] = field(default_factory=list) # 管理阶层成员列表
def get_company_detailed(co_id: int, year: int) -> Optional[CompanyCompensation]:
req = requests.post(
"https://mopsov.twse.com.tw/mops/web/ajax_t114sb07",
data={
"encodeURIComponent": 1,
"firstin": True,
"id": "",
"key": "",
"TYPEK": "sii",
"step": 32,
"home_step": 0,
"co_id": co_id,
"year": year,
},
)
soup = BeautifulSoup(req.text, "html.parser")
form = soup.select_one("form")
if form is None:
return
tables1 = form.select("table:nth-child(1)")
table1 = tables1[0] # 基本讯息
table2 = tables1[1] # 单位
table3 = form.select("table:nth-child(2)")[0] # 经理人酬劳分派情形
table4 = tables1[2] # 经理人职称及姓名
table1_tds = list(table1.select("td"))
table4_tds = [x.text for x in table4.select("td") if x.text.strip()]
year_data = table1_tds[2].select_one("td").contents # type: ignore
table3_tr_odd = [float(x.text.replace(",", "")) for x in table3.select("tr:nth-child(4) > td")]
manager_compensation = ManagerCompensation(
stock=StockCompensation(
shares=table3_tr_odd[0],
amount_a=table3_tr_odd[1],
market_price=table3_tr_odd[2],
), # 股票报酬资讯
cash=CashCompensation(amount_b=table3_tr_odd[3]), # 现金报酬资讯
total_amount=table3_tr_odd[4], # 总报酬金额(股票 + 现金)
percentage_of_net_income=table3_tr_odd[5], # 佔公司净利比例(%)
)
manager_compensation: ManagerCompensation
return CompanyCompensation(
company_code=table1_tds[0].contents[2].text.strip(),
company_name=table1_tds[1].contents[2].text.strip(),
employee_compensation_year=int(year_data[1].text),
distribution_year=int(year_data[4].text),
unit=table2.text.strip(),
manager_compensation=manager_compensation,
managers=[Manager(title=title, name=name) for title, name in zip(table4_tds[::2], table4_tds[1::2])],
)
if company := get_company_detailed(co_id, year):
company_dict = asdict(company)
print(json.dumps(company_dict, ensure_ascii=False, indent=2))
out:
{
"company_code": "1101",
"company_name": "台泥",
"employee_compensation_year": 111,
"distribution_year": 112,
"unit": "单位:元/股",
"manager_compensation": {
"stock": {
"shares": 0.0,
"market_price": 0.0,
"amount_a": 0.0
},
"cash": {
"amount_b": 8376383.0
},
"total_amount": 8376383.0,
"percentage_of_net_income": 0.1549
},
"managers": [
{
"title": "企业团总执行长",
"name": "张安平"
},
{
"title": "总经理",
"name": "程耀辉"
},
{
"title": "资深副总经理",
"name": "黄健强"
},
{
"title": "资深副总经理",
"name": "吕克甫"
},
{
"title": "副总经理",
"name": "葛保罗"
},
{
"title": "副总经理",
"name": "王建全"
},
{
"title": "副总经理",
"name": "刘凤萍"
},
{
"title": "副总经理",
"name": "叶毓君"
},
{
"title": "财务长",
"name": "于明仁"
},
{
"title": "资深协理",
"name": "蔡国屿"
},
{
"title": "资深协理",
"name": "丘惠生"
},
{
"title": "资深协理兼会计主管",
"name": "叶国宏"
},
{
"title": "资深协理",
"name": "邱钰文"
},
{
"title": "协理",
"name": "辜公怡"
},
{
"title": "协理",
"name": "余金龙"
},
{
"title": "协理",
"name": "蓝岑藯"
},
{
"title": "协理兼公司治理主管",
"name": "赖家柔"
},
{
"title": "协理",
"name": "陈光熙"
},
{
"title": "协理",
"name": "魏家珮"
},
{
"title": "协理",
"name": "李国源"
},
{
"title": "协理",
"name": "蒋政道"
},
{
"title": "协理",
"name": "陈银华"
},
{
"title": "协理",
"name": "陈怡中"
},
{
"title": "资安长",
"name": "张年旺"
},
{
"title": "资深经理",
"name": "陈进益"
},
{
"title": "资深经理",
"name": "吴云德"
},
{
"title": "资深经理",
"name": "陈技竫"
},
{
"title": "资深经理",
"name": "梁诗圣"
},
{
"title": "经理兼稽核主管",
"name": "曹家华"
},
{
"title": "资深副理",
"name": "吴子扬"
},
{
"title": "资深副理",
"name": "张毓扬"
},
{
"title": "总经理",
"name": "李钟培"
},
{
"title": "资深协理",
"name": "黄麟添"
},
{
"title": "协理",
"name": "洪维爵"
},
{
"title": "协理",
"name": "宋又新"
},
{
"title": "资深经理",
"name": "李明达"
},
{
"title": "经理",
"name": "刘志仁"
}
]
}
0
zivzhong
iT邦研究生 4 级 ‧ 2025-03-10 13:36:49
供参:
https://www.cupoy.com/qa/collection/00000180B6E4E37F000000026375706F795F72656C656173654355/000001812EE9861E000000066375706F795F72656C656173655155455354
-
1 -
-
ryan1040154
iT邦新手 5 级 ‧
2025-03-10 14:16:40
目前是参考这篇没错 但还是无法 但谢谢您提醒!!
修改