diff --git a/scrape_nj.py b/scrape_nj.py index a829be5..e288c96 100644 --- a/scrape_nj.py +++ b/scrape_nj.py @@ -69,7 +69,7 @@ def process_page(driver, url, visited_pages, start_domain, data): hyperlink_content_text = hyperlink_content_element.text print(hyperlink_content_text) # Add URL, Domain, and Content of the hyperlink to the data list - data.append([href, start_domain, hyperlink_content_text]) + data.append([start_domain, href, hyperlink_content_text]) # Recursively process the page and follow hyperlinks process_page(driver, href, visited_pages, start_domain, data) except Exception as e: diff --git a/summary/summary juin.md b/summary/summary juin.md index a023b94..e72ec4f 100644 --- a/summary/summary juin.md +++ b/summary/summary juin.md @@ -10,7 +10,7 @@ ## 分析结果 -根据分析要求进行得到分析结果,具体见结果表 +通过对爬取结果进行分析并与标准文档比对,分别在27876页网页中发现错误100处,在4153篇公众号中发现错误33处,具体见结果表 ## 存在问题 diff --git a/summary/summary_juin.pdf b/summary/summary_juin.pdf index 29fd746..85790fb 100644 Binary files a/summary/summary_juin.pdf and b/summary/summary_juin.pdf differ