selenium修改
This commit is contained in:
parent
5a2129859f
commit
32e0934792
39
scrape.py
39
scrape.py
|
@ -138,24 +138,29 @@ def main():
|
|||
name = row['主办']
|
||||
url = row['地址']
|
||||
domain = urlparse(url).netloc
|
||||
# Open the website
|
||||
driver = open_website(url)
|
||||
# Retrieve cookies from previous session
|
||||
cookies = get_cookies_from_previous_session(driver)
|
||||
# Add cookies to the WebDriver
|
||||
add_cookies(driver, cookies)
|
||||
# Initialize the set to store visited pages
|
||||
visited_pages = set()
|
||||
# Initialize the data list
|
||||
data = []
|
||||
# Process the starting page and follow hyperlinks recursively
|
||||
process_page(driver, url, visited_pages, domain, data)
|
||||
# Export data to a separate Excel file for each URL
|
||||
output_filename = f'web_dir/{name}_{domain}.xlsx'
|
||||
export_to_excel(data, output_filename)
|
||||
|
||||
# Close the WebDriver
|
||||
driver.quit()
|
||||
# Open the website
|
||||
driver = open_website(url)
|
||||
|
||||
# Retrieve cookies from previous session
|
||||
cookies = get_cookies_from_previous_session(driver)
|
||||
# Add cookies to the WebDriver
|
||||
add_cookies(driver, cookies)
|
||||
|
||||
# Initialize the set to store visited pages
|
||||
visited_pages = set()
|
||||
# Initialize the data list
|
||||
data = []
|
||||
|
||||
# Process the starting page and follow hyperlinks recursively
|
||||
process_page(driver, url, visited_pages, domain, data)
|
||||
|
||||
# Export data to a separate Excel file in the web_dir directory
|
||||
output_filename = f'web_dir/{name}_{domain}.xlsx'
|
||||
export_to_excel(data, output_filename)
|
||||
|
||||
# Close the WebDriver
|
||||
driver.quit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue