This commit is contained in:
parent
637bbd3e17
commit
98e42e50af
|
@ -40,7 +40,7 @@ def process_page(driver, url, visited_pages, start_domain, data):
|
|||
content_text = content_element.text
|
||||
print(content_text)
|
||||
# Add URL, Domain, and Content to the data list
|
||||
data.append([url, start_domain, content_text])
|
||||
data.append([start_domain, url, content_text])
|
||||
|
||||
# Find and process hyperlinks
|
||||
hrefs = extract_hyperlinks(driver)
|
||||
|
@ -129,7 +129,7 @@ def add_cookies(driver, cookies):
|
|||
|
||||
def main():
|
||||
# Starting URL
|
||||
start_url = 'http://www.ctchn.ac.cn/'
|
||||
start_url = 'https://www.cbma.com.cn/'
|
||||
# Parse the domain from the starting URL
|
||||
parsed_start_url = urlparse(start_url)
|
||||
start_domain = parsed_start_url.netloc
|
||||
|
|
Loading…
Reference in New Issue