From 32e093479205ac0d71d95dde7dceb68c92831a88 Mon Sep 17 00:00:00 2001
From: xiaobulu27 <xiaobulu27@outlook.com>
Date: Thu, 24 Aug 2023 17:06:02 +0800
Subject: [PATCH] =?UTF-8?q?selenium=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scrape.py | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/scrape.py b/scrape.py
index 574263f..a593747 100644
--- a/scrape.py
+++ b/scrape.py
@@ -138,24 +138,29 @@ def main():
         name = row['主办']
         url = row['地址']
         domain = urlparse(url).netloc
-    # Open the website
-    driver = open_website(url)
-    # Retrieve cookies from previous session
-    cookies = get_cookies_from_previous_session(driver)
-    # Add cookies to the WebDriver
-    add_cookies(driver, cookies)
-    # Initialize the set to store visited pages
-    visited_pages = set()
-    # Initialize the data list
-    data = []
-    # Process the starting page and follow hyperlinks recursively
-    process_page(driver, url, visited_pages, domain, data)
-    # Export data to a separate Excel file for each URL
-    output_filename = f'web_dir/{name}_{domain}.xlsx'
-    export_to_excel(data, output_filename)
 
-    # Close the WebDriver
-    driver.quit()
+        # Open the website
+        driver = open_website(url)
+
+        # Retrieve cookies from previous session
+        cookies = get_cookies_from_previous_session(driver)
+        # Add cookies to the WebDriver
+        add_cookies(driver, cookies)
+
+        # Initialize the set to store visited pages
+        visited_pages = set()
+        # Initialize the data list
+        data = []
+
+        # Process the starting page and follow hyperlinks recursively
+        process_page(driver, url, visited_pages, domain, data)
+
+        # Export data to a separate Excel file in the web_dir directory
+        output_filename = f'web_dir/{name}_{domain}.xlsx'
+        export_to_excel(data, output_filename)
+
+        # Close the WebDriver
+        driver.quit()
 
 if __name__ == "__main__":
     main()