selenium修改

This commit is contained in:
xiaobulu27 2023-08-24 17:06:02 +08:00
parent 5a2129859f
commit 32e0934792
1 changed files with 22 additions and 17 deletions

View File

@ -138,24 +138,29 @@ def main():
name = row['主办']
url = row['地址']
domain = urlparse(url).netloc
# Open the website
driver = open_website(url)
# Retrieve cookies from previous session
cookies = get_cookies_from_previous_session(driver)
# Add cookies to the WebDriver
add_cookies(driver, cookies)
# Initialize the set to store visited pages
visited_pages = set()
# Initialize the data list
data = []
# Process the starting page and follow hyperlinks recursively
process_page(driver, url, visited_pages, domain, data)
# Export data to a separate Excel file for each URL
output_filename = f'web_dir/{name}_{domain}.xlsx'
export_to_excel(data, output_filename)
# Close the WebDriver
driver.quit()
# Open the website
driver = open_website(url)
# Retrieve cookies from previous session
cookies = get_cookies_from_previous_session(driver)
# Add cookies to the WebDriver
add_cookies(driver, cookies)
# Initialize the set to store visited pages
visited_pages = set()
# Initialize the data list
data = []
# Process the starting page and follow hyperlinks recursively
process_page(driver, url, visited_pages, domain, data)
# Export data to a separate Excel file in the web_dir directory
output_filename = f'web_dir/{name}_{domain}.xlsx'
export_to_excel(data, output_filename)
# Close the WebDriver
driver.quit()
if __name__ == "__main__":
main()