feat: 优化爬虫/输出分析报告

2023-11-09 17:51:54 +08:00 · 2023-11-09 17:51:54 +08:00 · 7bf87671b9
parent e5ff671c97
commit 7bf87671b9
12 changed files with 222 additions and 105 deletions
--- a/main.ui
+++ b/main.ui
@ -478,7 +478,7 @@
      </rect>
     </property>
     <property name="text">
-      <string>20</string>
+      <string>5</string>
     </property>
    </widget>
    <widget class="QPushButton" name="bStart">
--- a/mycode/crawl_chrome.py
+++ b/mycode/crawl_chrome.py
@ -7,6 +7,7 @@ from pathlib import Path
 import pandas as pd
 from .base import BASE_DIR
 import os
+from selenium.common.exceptions import TimeoutException
 chrome_driver_file = os.path.join(BASE_DIR, 'mycode', 'chromedriver.exe')
 failed_sites_file = os.path.join(BASE_DIR, 'mycode/failed_sites.xlsx')

@ -160,7 +161,6 @@ def add_cookies(driver, cookies):
        driver.add_cookie({'name': name, 'value': value})

 def chrom_main_from_list(sites):
-    driver = init_driver()
    for ind, item in enumerate(sites):
        group = item[0] # Replace with the actual column name for group
        name = item[1]
@ -169,29 +169,40 @@ def chrom_main_from_list(sites):
        if domain in ['xdjstc.com', 'epcyiqizu.com', 'cbra.ctc.ac.cn']:
            continue
        url = fix_url_scheme(url)
-        print(url)
+        driver = init_driver()
        # Open the website
-        driver.get(url)
+        # driver.get(url)

-        # Retrieve cookies from previous session
-        cookies = get_cookies_from_previous_session(driver)
-        # Add cookies to the WebDriver
-        add_cookies(driver, cookies)
+        # # Retrieve cookies from previous session
+        # cookies = get_cookies_from_previous_session(driver)
+        # # Add cookies to the WebDriver
+        # add_cookies(driver, cookies)

        # Initialize the set to store visited pages
        visited_pages = set()
        # Initialize the data list
        data = []
+        try:
+            # 设置页面加载超时时间为10秒
+            driver.set_page_load_timeout(10)
+            
+            # 设置脚本执行超时时间为10秒
+            driver.set_script_timeout(10)

-        # Process the starting page and follow hyperlinks recursively
-        process_page(driver, url, visited_pages, domain, data, group, name)
+            # 在这里编写你的代码，例如打开网页、点击按钮等操作
+            # ...
+            process_page(driver, url, visited_pages, domain, data, group, name)
+        except TimeoutException:
+            # 当超时异常发生时，进行相应的操作，例如跳过或报错
+            print("超时异常")
+            driver.quit()

        # Export data to a separate Excel file in the web_dir directory
        output_filename = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') 
        export_to_excel(data, output_filename)

    # Close the WebDriver
-    driver.quit()
+    # driver.quit()

 def chrome_main():
    # Read failed URLs from the list
--- a/mycode/main.py
+++ b/mycode/main.py
@ -2,6 +2,8 @@ import pandas as pd
 import os
 import sqlite3
 from mycode.base import BASE_DIR
+import re
+from openpyxl import load_workbook

 wechat_dir = os.path.join(BASE_DIR, 'article')
 web_dir = os.path.join(BASE_DIR, 'web_dir')
@ -23,6 +25,98 @@ def make_simple_csv_from_db():
    df.to_csv(os.path.join(wechat_dir, 'articles.csv'), index=False)


+def get_cbma_info_from_db_and_ana(year: str = '2023'):
+    conn = sqlite3.connect(os.path.join(BASE_DIR, 'db_folder/test.db'))
+    query = f'''
+    SELECT
+    id,
+    strftime('%Y年%m月%d日', datetime(a.p_date, 'unixepoch', 'localtime')) as pub_date,
+    g.nickname,
+    a.title,
+    a.content_url,
+    a.read_num
+    FROM
+    articles a
+    LEFT JOIN
+    gzhs g ON g.biz = a.biz
+    WHERE
+    pub_date > '{year}'
+    AND
+    g.biz = 'MzIzMDU4Njg3MA=='
+    ORDER BY
+    pub_date
+    '''
+    df = pd.read_sql_query(query, conn)
+    # 关闭数据库连接
+    conn.close()
+    for ind, row in df.iterrows():
+        id = row['id']
+        full_path = os.path.join(wechat_dir, row['nickname'], row['id'] + '.md')
+        try:
+            with open(full_path, encoding='utf-8') as f:
+                content = f.read()
+            # 从content中获取来源
+            a_match = re.findall('来源丨(.*?)\n', content)
+            a_list = []
+            if a_match:
+                # a = a_match[0].replace('\xa0', '、').replace(' ', '、')
+                # a = re.sub(r'、+', '、', a)
+                a = re.sub(r'[\xa0\s]+', '、', a_match[0])
+                df.at[ind, 'source'] = a
+        except FileNotFoundError:
+            print(full_path + '---不存在')
+    # 填充到execl中
+    template_path = os.path.join(BASE_DIR, 'summary/template_cbma.xlsx')
+    workbook = load_workbook(template_path)
+    sheet = workbook['公众号更新数']
+    sheet.cell(row=1, column=1, value=f'关于{year}年度中国建材总院新媒体更新情况明细表\n（官微）')
+    for ind, row in df.iterrows():
+        sheet.cell(row=ind+3, column=1, value=str(ind+1))
+        sheet.cell(row=ind+3, column=2, value=row['pub_date'])
+        sheet.cell(row=ind+3, column=3, value=row['title'])
+        sheet.cell(row=ind+3, column=4, value=row['source'])
+        sheet.cell(row=ind+3, column=6, value=row['read_num'])
+        sheet.cell(row=ind+3, column=7, value=row['content_url'])
+    output_path = os.path.join(BASE_DIR, f'summary/{year}年_cbma.xlsx')
+    workbook.save(output_path)
+    # 开始统计分数
+    t_1 = (df['source'].str.contains('瑞泰科技')).sum()
+    t_2 = (df['source'].str.contains('国检集团')).sum()
+    t_3 = (df['source'].str.contains('中材高新')).sum()
+    t_4 = (df['source'].str.contains('哈玻院')).sum()
+    t_5 = (df['source'].str.contains('中国新材院')).sum()
+    t_6 = (df['source'].str.contains('秦皇岛院')).sum()
+    t_7 = (df['source'].str.contains('西安墙材院')).sum()
+    t_8 = (df['source'].str.contains('咸阳陶瓷院')).sum()
+    t_9 = (df['source'].str.contains('钟表所')).sum()
+    t_10 = (df['source'].str.contains('总院北分')).sum()
+    t_11 = (df['source'].str.contains('中岩科技')).sum()
+    t_12 = (df['source'].str.contains('水泥新材院')).sum()
+    t_13 = (df['source'].str.contains('中建材科创院')).sum()
+    t_14 = (df['source'].str.contains('科建苑')).sum()
+    template_cal_path = os.path.join(BASE_DIR, 'summary/tempalte_cbma_cal.xlsx')
+    workbook2 = load_workbook(template_cal_path)
+    sheet2= workbook2['打分表']
+    sheet2.cell(row=1, column=1, value=f'中国建材总院宣传工作计分表（{year}年度）')
+    sheet2.cell(row=6, column=5, value=t_1)
+    sheet2.cell(row=6, column=7, value=t_2)
+    sheet2.cell(row=6, column=9, value=t_3)
+    sheet2.cell(row=6, column=11, value=t_4)
+    sheet2.cell(row=6, column=13, value=t_5)
+    sheet2.cell(row=6, column=15, value=t_6)
+    sheet2.cell(row=6, column=17, value=t_7)
+    sheet2.cell(row=6, column=19, value=t_8)
+    sheet2.cell(row=6, column=21, value=t_9)
+    sheet2.cell(row=6, column=23, value=t_10)
+    sheet2.cell(row=6, column=25, value=t_11)
+    sheet2.cell(row=6, column=27, value=t_12)
+    sheet2.cell(row=6, column=29, value=t_13)
+    sheet2.cell(row=6, column=31, value=t_14)
+    output_path2 = os.path.join(BASE_DIR, f'summary/{year}年_cbma_cal.xlsx')
+    workbook2.save(output_path2)
+    return output_path, output_path2
+
+
 def make_wechat_articles_full():
    df =  pd.read_csv(os.path.join(wechat_dir, 'articles.csv'))
    df['content'] = ''
@ -110,5 +204,5 @@ def ana_web():
    return output_data

 if __name__ == "__main__":
-    ana_web()
+    get_cbma_info_from_db_and_ana()

--- a/网络巡查.bat
+++ b/网络巡查.bat
--- a/start.py
+++ b/start.py
@ -7,7 +7,7 @@ import  win32com.client as win32
 import subprocess
 import os
 import datetime
-from mycode.main import make_simple_csv_from_db, make_wechat_articles_full, ana_web, ana_wechat, output_dir
+from mycode.main import make_simple_csv_from_db, make_wechat_articles_full, ana_web, ana_wechat, output_dir, get_cbma_info_from_db_and_ana
 from mycode.crawl_chrome import chrom_main_from_list
 import pandas as pd
 from urllib.parse import urlparse
@ -139,12 +139,6 @@ class MyThread(QThread):
            output = p.stdout.readline()
            if output:
                self.update_signal.emit({'msg': output.strip()})
-    
-    def capture_err(self, p):
-        while  self.running and p.poll() is None:
-            err = p.stderr.readline()
-            if err:
-                self.update_signal.emit({'msg': err.strip()})

    def run(self) -> None:
        self.update_signal.emit({'msg': '开始进行网站爬取...'})
@ -158,13 +152,14 @@ class MyThread(QThread):
            output =  os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') 
            # -u 代表不缓冲，直接输出
            cmd = [PYTHON_PATH, '-u', '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
-            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=False)
+            # cmd = [PYTHON_PATH, '-u', '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
+            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, shell=False)
            self.processes.append(process)
            self.running = True
            getlog_thread = threading.Thread(target=self.capture_output, args=(process,), daemon=True)
            getlog_thread.start()
-            getlog_thread_err = threading.Thread(target=self.capture_err, args=(process,), daemon=True)
-            getlog_thread_err.start()
+            # getlog_thread_err = threading.Thread(target=self.capture_err, args=(process,), daemon=True)
+            # getlog_thread_err.start()

        for process in self.processes:
            process.wait()
@ -205,12 +200,15 @@ class MainWindow(QMainWindow):
        self.ui.setupUi(self)
        self.ui.lSize.setValidator(QIntValidator())
        self.ui.bWechat.clicked.connect(self.open_wcplus)
-        self.ui.bWebSite.clicked.connect(self.open_websites_xlsx)
-        self.ui.bBiao.clicked.connect(self.open_biao_xlsx)
+        self.ui.bWebSite.clicked.connect(lambda: self.open_file(WEB_SITES_PATH))
+        self.ui.bBiao.clicked.connect(lambda: self.open_file(BIAO_PATH))
        self.ui.bStart.clicked.connect(self.start)
        self.ui.bAna.clicked.connect(self.start_ana)
-        self.ui.bRes1.clicked.connect(self.open_res1)
-        self.ui.bRes2.clicked.connect(self.open_res2)
+        self.ui.bRes1.clicked.connect(lambda: self.open_file(self.ui.lRes1.text()))
+        self.ui.bRes2.clicked.connect(lambda: self.open_file(self.ui.lRes2.text(), 'docx'))
+        self.ui.bCal.clicked.connect(self.cbma_cal)
+        self.ui.bOpenCalRes1.clicked.connect(lambda: self.open_file(self.ui.lCalRes1.text()))
+        self.ui.bOpenCalRes2.clicked.connect(lambda: self.open_file(self.ui.lCalRes2.text()))
        self.ui.vLog.setModel(self.logModel)
        self.res1Workbook = None

@ -219,31 +217,22 @@ class MainWindow(QMainWindow):
            subprocess.Popen('.\wcplus.exe')
            self.wcplus = True

-    def open_websites_xlsx(self):
-        app = win32.Dispatch("Excel.Application")
-        app.Visible = True
-        app.Workbooks.Open(WEB_SITES_PATH)
-        app.WindowState = 3 
-    
-    def open_biao_xlsx(self):
-        app = win32.Dispatch("Excel.Application")
-        app.Visible = True
-        app.Workbooks.Open(BIAO_PATH)
-        app.WindowState = 3 
-    
-    def open_res1(self):
-        if self.ui.lRes1.text():
-            app = win32.Dispatch("Excel.Application")
-            app.Visible = True
-            self.res1Workbook = app.Workbooks.Open(self.ui.lRes1.text())
-            app.WindowState = 3
-    
-    def open_res2(self):
-        if self.ui.lRes2.text():
-            app = win32.Dispatch("Word.Application")
-            app.Visible = True
-            app.Documents.Open(self.ui.lRes2.text())
-            app.WindowState = 3 
+    def open_file(self, path, type='xlsx'):
+        if path:
+            # try:
+            #     os.startfile(path)
+            # except Exception as e:
+            #     print("无法打开文件:", str(e))
+            if type == 'docs':
+                app = win32.Dispatch("Word.Application")
+                app.Visible = True
+                app.Documents.Open(path)
+                app.WindowState = 3
+            elif type == 'xlsx':
+                app = win32.Dispatch("Excel.Application")
+                app.Visible = True
+                app.Workbooks.Open(path)
+                app.WindowState = 3
    
    def get_time(self):
        now = datetime.datetime.now()
@ -276,6 +265,14 @@ class MainWindow(QMainWindow):
        self.ana_thread.update_signal.connect(self.update_log)
        self.ana_thread.start()

+    def cbma_cal(self):
+        now_year = datetime.datetime.now().year
+        self.update_log({'msg': '正在分析本年总院官微数据...'})
+        output_path, output_path2 = get_cbma_info_from_db_and_ana(now_year)
+        self.ui.lCalRes1.setText(output_path)
+        self.ui.lCalRes2.setText(output_path2)
+        self.update_log({'msg': '分析完毕!'})
+
    def update_log(self, rdict):
        if isinstance(rdict, str):
            self.log(f'{self.get_time()}-{rdict}', False)
@ -316,7 +313,9 @@ class MainWindow(QMainWindow):

 if __name__ == "__main__":
    # gen_doc()
+    print('正在启动程序...')
    app = MyApplication(sys.argv)
    main_window = app.createMainWindow()
    main_window.show()
+    print('启动成功')
    sys.exit(app.exec())
--- a/start.vbs
+++ b/start.vbs
@ -0,0 +1,8 @@
+Set objShell = CreateObject("WScript.Shell")
+strFolder = objShell.CurrentDirectory
+
+strPythonPath = strFolder & "\runtime\python.exe"
+strScriptPath = strFolder & "\start.py"
+
+objShell.Run """" & strPythonPath & """ """ & strScriptPath & """"
+' Set objExec = objShell.Exec("""" & strPythonPath & """ """ & strScriptPath & """")
--- a/summary/template_cbma.xlsx
+++ b/summary/template_cbma.xlsx
--- a/ui_mainwindow.py
+++ b/ui_mainwindow.py
@ -242,7 +242,7 @@ class Ui_MainWindow(object):
        self.label_2.setText(QCoreApplication.translate("MainWindow", u"\u8bf7\u5728\u4fee\u6539\u540e\u4fdd\u5b58\u5e76\u5173\u95ed", None))
        self.label_6.setText(QCoreApplication.translate("MainWindow", u"\u5c0f\u4e8e", None))
        self.label_3.setText(QCoreApplication.translate("MainWindow", u"KB-Chrome", None))
-        self.lSize.setText(QCoreApplication.translate("MainWindow", u"20", None))
+        self.lSize.setText(QCoreApplication.translate("MainWindow", u"5", None))
        self.bStart.setText(QCoreApplication.translate("MainWindow", u"\u5f00\u59cb\u722c\u53d6", None))
        self.groupBox_7.setTitle(QCoreApplication.translate("MainWindow", u"\u603b\u9662\u5b98\u5fae", None))
        self.label_10.setText(QCoreApplication.translate("MainWindow", u"\u6c47\u603b\u7ed3\u679cExcel:", None))
--- a/web3.py
+++ b/web3.py
@ -33,40 +33,40 @@ def fix_url_scheme(url, default_scheme='http'):
            url = f'{default_scheme}://{url}'
        return url
 if __name__ == '__main__':
-    print('巡查任务开始。。。')
-    now = datetime.datetime.now()
-    month = now.month
+    # print('巡查任务开始。。。')
+    # now = datetime.datetime.now()
+    # month = now.month

-    print('正在组合微信公众号爬取内容。。。')
-    make_simple_csv_from_db()
-    make_wechat_articles_full()
-    print('公众号爬取内容组装完毕!')
+    # print('正在组合微信公众号爬取内容。。。')
+    # make_simple_csv_from_db()
+    # make_wechat_articles_full()
+    # print('公众号爬取内容组装完毕!')

-    print('开始进行网站爬取。。。')
+    # print('开始进行网站爬取。。。')

    df = pd.read_excel('web_sites.xlsx', sheet_name='Sheet1')
-    processes = []
+    # processes = []

-    # 注册 SIGINT 信号处理函数
-    signal.signal(signal.SIGINT, sigint_handler)
+    # # 注册 SIGINT 信号处理函数
+    # signal.signal(signal.SIGINT, sigint_handler)

-    ind = 0
-    for ind, row in df.iterrows():
-        group = row['单位']
-        name = row['主办']
-        url = fix_url_scheme(row['地址'].strip())
-        domain = urlparse(url).netloc.replace('www.', '')
-        if domain in ['xdjstc.com', 'epcyiqizu.com', 'cbra.ctc.ac.cn']: # 这几个网站直接跳过
-            continue
-        output =  os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') 
-        # cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
-        cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
-        process = subprocess.Popen(cmd)
-        processes.append(process)
+    # ind = 0
+    # for ind, row in df.iterrows():
+    #     group = row['单位']
+    #     name = row['主办']
+    #     url = fix_url_scheme(row['地址'].strip())
+    #     domain = urlparse(url).netloc.replace('www.', '')
+    #     if domain in ['xdjstc.com', 'epcyiqizu.com', 'cbra.ctc.ac.cn']: # 这几个网站直接跳过
+    #         continue
+    #     output =  os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') 
+    #     # cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
+    #     cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
+    #     process = subprocess.Popen(cmd)
+    #     processes.append(process)

-    # Wait for all processes to finish
-    for process in processes:
-        process.wait()
+    # # Wait for all processes to finish
+    # for process in processes:
+    #     process.wait()

    print('网站爬取结束,校验中。。。')
    # Check output file sizes and save information if size is less than 20KB
@ -90,30 +90,30 @@ if __name__ == '__main__':

    print('网站爬取完毕!')

-    print('开始对比分析所有内容。。。')
-    # Run WeChat Analysis
-    wechat_results = ana_wechat()
-    # Run Web Content Analysis
-    web_results = ana_web()
+    # print('开始对比分析所有内容。。。')
+    # # Run WeChat Analysis
+    # wechat_results = ana_wechat()
+    # # Run Web Content Analysis
+    # web_results = ana_web()

-    # Save results in an Excel file with two sheets
-    output_excel_path = os.path.join(output_dir, f'{month}月-总院及下属公司官方公众号巡查结果汇总表.xlsx')
-    # with pd.ExcelWriter(output_excel_path) as writer:
-    #     df = pd.DataFrame(wechat_results, columns=['序号', '信源名称', '文章标题', '错误表述', '建议修改词语', '错误分类', '原文链接'])
-    #     df.to_excel(writer, sheet_name='公众号', index=False)
-    #     df2 = pd.DataFrame(web_results, columns=['序号', '信源名称', '文章标题', '错误表述', '建议修改词语', '错误分类', '原文链接'])
-    #     df2.to_excel(writer, sheet_name='网站', index=False)
-    template_path = os.path.join(output_dir, 'template.xlsx')
-    workbook = load_workbook(template_path)
+    # # Save results in an Excel file with two sheets
+    # output_excel_path = os.path.join(output_dir, f'{month}月-总院及下属公司官方公众号巡查结果汇总表.xlsx')
+    # # with pd.ExcelWriter(output_excel_path) as writer:
+    # #     df = pd.DataFrame(wechat_results, columns=['序号', '信源名称', '文章标题', '错误表述', '建议修改词语', '错误分类', '原文链接'])
+    # #     df.to_excel(writer, sheet_name='公众号', index=False)
+    # #     df2 = pd.DataFrame(web_results, columns=['序号', '信源名称', '文章标题', '错误表述', '建议修改词语', '错误分类', '原文链接'])
+    # #     df2.to_excel(writer, sheet_name='网站', index=False)
+    # template_path = os.path.join(output_dir, 'template.xlsx')
+    # workbook = load_workbook(template_path)

-    # 选择要操作的工作表
-    wechat_sheet = workbook['公众号']
-    web_sheet = workbook['网站']
-    for row in wechat_results:
-        wechat_sheet.append(row)
-    for row in web_results:
-        web_sheet.append(row)
-    workbook.save(output_excel_path)
-    workbook.close()
-    print('巡查任务执行完毕, 请查看summary文件夹, 可手动校对')
-    os.system("pause")
+    # # 选择要操作的工作表
+    # wechat_sheet = workbook['公众号']
+    # web_sheet = workbook['网站']
+    # for row in wechat_results:
+    #     wechat_sheet.append(row)
+    # for row in web_results:
+    #     web_sheet.append(row)
+    # workbook.save(output_excel_path)
+    # workbook.close()
+    # print('巡查任务执行完毕, 请查看summary文件夹, 可手动校对')
+    # os.system("pause")
--- a/zcspider/pipelines.py
+++ b/zcspider/pipelines.py
@ -4,6 +4,7 @@
 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 import os.path
 from openpyxl import Workbook, load_workbook
+from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE

 # useful for handling different item types with a single interface
 from scrapy.exceptions import IgnoreRequest
@ -58,6 +59,7 @@ class ZcspiderPipeline:
        # except:
        #     self.conn.rollback()
        #     raise
+        item['text'] = ILLEGAL_CHARACTERS_RE.sub(r'', item['text'])
        line = [item['group'], item['name'], item['domain'], item['url'], item['text']]
        self.ws.append(line)
        return item
--- a/zcspider/settings.py
+++ b/zcspider/settings.py
@ -96,7 +96,7 @@ DEFAULT_REQUEST_HEADERS = {
 REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
 TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
 FEED_EXPORT_ENCODING = 'gb18030'
-LOG_LEVEL = 'WARNING'
+LOG_LEVEL = 'DEBUG'
 DOWNLOAD_TIMEOUT = 10

 ITEM_PIPELINES = {
@ -110,4 +110,7 @@ FEED_EXPORTERS = {
 DOWNLOADER_MIDDLEWARES = {
    'zcspider.middlewares.FilterHTMLMiddleware': 200,
    # 其他下载中间件...
+}
+EXTENSIONS = {
+    'scrapy.extensions.telnet.TelnetConsole': None
 }
--- a/宣传巡查.exe
+++ b/宣传巡查.exe