This commit is contained in:
xiaobulu27 2023-07-07 08:46:20 +08:00
parent 637bbd3e17
commit 98e42e50af
1 changed files with 2 additions and 2 deletions

View File

@ -40,7 +40,7 @@ def process_page(driver, url, visited_pages, start_domain, data):
content_text = content_element.text content_text = content_element.text
print(content_text) print(content_text)
# Add URL, Domain, and Content to the data list # Add URL, Domain, and Content to the data list
data.append([url, start_domain, content_text]) data.append([start_domain, url, content_text])
# Find and process hyperlinks # Find and process hyperlinks
hrefs = extract_hyperlinks(driver) hrefs = extract_hyperlinks(driver)
@ -129,7 +129,7 @@ def add_cookies(driver, cookies):
def main(): def main():
# Starting URL # Starting URL
start_url = 'http://www.ctchn.ac.cn/' start_url = 'https://www.cbma.com.cn/'
# Parse the domain from the starting URL # Parse the domain from the starting URL
parsed_start_url = urlparse(start_url) parsed_start_url = urlparse(start_url)
start_domain = parsed_start_url.netloc start_domain = parsed_start_url.netloc