diff --git a/main.ui b/main.ui
index 00dc8f7..94d160a 100644
--- a/main.ui
+++ b/main.ui
@@ -478,7 +478,7 @@
- 20
+ 5
diff --git a/mycode/crawl_chrome.py b/mycode/crawl_chrome.py
index d5a6603..34d06e0 100644
--- a/mycode/crawl_chrome.py
+++ b/mycode/crawl_chrome.py
@@ -7,6 +7,7 @@ from pathlib import Path
import pandas as pd
from .base import BASE_DIR
import os
+from selenium.common.exceptions import TimeoutException
chrome_driver_file = os.path.join(BASE_DIR, 'mycode', 'chromedriver.exe')
failed_sites_file = os.path.join(BASE_DIR, 'mycode/failed_sites.xlsx')
@@ -160,7 +161,6 @@ def add_cookies(driver, cookies):
driver.add_cookie({'name': name, 'value': value})
def chrom_main_from_list(sites):
- driver = init_driver()
for ind, item in enumerate(sites):
group = item[0] # Replace with the actual column name for group
name = item[1]
@@ -169,29 +169,40 @@ def chrom_main_from_list(sites):
if domain in ['xdjstc.com', 'epcyiqizu.com', 'cbra.ctc.ac.cn']:
continue
url = fix_url_scheme(url)
- print(url)
+ driver = init_driver()
# Open the website
- driver.get(url)
+ # driver.get(url)
- # Retrieve cookies from previous session
- cookies = get_cookies_from_previous_session(driver)
- # Add cookies to the WebDriver
- add_cookies(driver, cookies)
+ # # Retrieve cookies from previous session
+ # cookies = get_cookies_from_previous_session(driver)
+ # # Add cookies to the WebDriver
+ # add_cookies(driver, cookies)
# Initialize the set to store visited pages
visited_pages = set()
# Initialize the data list
data = []
+ try:
+ # 设置页面加载超时时间为10秒
+ driver.set_page_load_timeout(10)
+
+ # 设置脚本执行超时时间为10秒
+ driver.set_script_timeout(10)
- # Process the starting page and follow hyperlinks recursively
- process_page(driver, url, visited_pages, domain, data, group, name)
+ # 在这里编写你的代码,例如打开网页、点击按钮等操作
+ # ...
+ process_page(driver, url, visited_pages, domain, data, group, name)
+ except TimeoutException:
+ # 当超时异常发生时,进行相应的操作,例如跳过或报错
+ print("超时异常")
+ driver.quit()
# Export data to a separate Excel file in the web_dir directory
output_filename = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx')
export_to_excel(data, output_filename)
# Close the WebDriver
- driver.quit()
+ # driver.quit()
def chrome_main():
# Read failed URLs from the list
diff --git a/mycode/main.py b/mycode/main.py
index b3ecc6c..4345c97 100644
--- a/mycode/main.py
+++ b/mycode/main.py
@@ -2,6 +2,8 @@ import pandas as pd
import os
import sqlite3
from mycode.base import BASE_DIR
+import re
+from openpyxl import load_workbook
wechat_dir = os.path.join(BASE_DIR, 'article')
web_dir = os.path.join(BASE_DIR, 'web_dir')
@@ -23,6 +25,98 @@ def make_simple_csv_from_db():
df.to_csv(os.path.join(wechat_dir, 'articles.csv'), index=False)
+def get_cbma_info_from_db_and_ana(year: str = '2023'):
+ conn = sqlite3.connect(os.path.join(BASE_DIR, 'db_folder/test.db'))
+ query = f'''
+ SELECT
+ id,
+ strftime('%Y年%m月%d日', datetime(a.p_date, 'unixepoch', 'localtime')) as pub_date,
+ g.nickname,
+ a.title,
+ a.content_url,
+ a.read_num
+ FROM
+ articles a
+ LEFT JOIN
+ gzhs g ON g.biz = a.biz
+ WHERE
+ pub_date > '{year}'
+ AND
+ g.biz = 'MzIzMDU4Njg3MA=='
+ ORDER BY
+ pub_date
+ '''
+ df = pd.read_sql_query(query, conn)
+ # 关闭数据库连接
+ conn.close()
+ for ind, row in df.iterrows():
+ id = row['id']
+ full_path = os.path.join(wechat_dir, row['nickname'], row['id'] + '.md')
+ try:
+ with open(full_path, encoding='utf-8') as f:
+ content = f.read()
+ # 从content中获取来源
+ a_match = re.findall('来源丨(.*?)\n', content)
+ a_list = []
+ if a_match:
+ # a = a_match[0].replace('\xa0', '、').replace(' ', '、')
+ # a = re.sub(r'、+', '、', a)
+ a = re.sub(r'[\xa0\s]+', '、', a_match[0])
+ df.at[ind, 'source'] = a
+ except FileNotFoundError:
+ print(full_path + '---不存在')
+ # 填充到execl中
+ template_path = os.path.join(BASE_DIR, 'summary/template_cbma.xlsx')
+ workbook = load_workbook(template_path)
+ sheet = workbook['公众号更新数']
+ sheet.cell(row=1, column=1, value=f'关于{year}年度中国建材总院新媒体更新情况明细表\n(官微)')
+ for ind, row in df.iterrows():
+ sheet.cell(row=ind+3, column=1, value=str(ind+1))
+ sheet.cell(row=ind+3, column=2, value=row['pub_date'])
+ sheet.cell(row=ind+3, column=3, value=row['title'])
+ sheet.cell(row=ind+3, column=4, value=row['source'])
+ sheet.cell(row=ind+3, column=6, value=row['read_num'])
+ sheet.cell(row=ind+3, column=7, value=row['content_url'])
+ output_path = os.path.join(BASE_DIR, f'summary/{year}年_cbma.xlsx')
+ workbook.save(output_path)
+ # 开始统计分数
+ t_1 = (df['source'].str.contains('瑞泰科技')).sum()
+ t_2 = (df['source'].str.contains('国检集团')).sum()
+ t_3 = (df['source'].str.contains('中材高新')).sum()
+ t_4 = (df['source'].str.contains('哈玻院')).sum()
+ t_5 = (df['source'].str.contains('中国新材院')).sum()
+ t_6 = (df['source'].str.contains('秦皇岛院')).sum()
+ t_7 = (df['source'].str.contains('西安墙材院')).sum()
+ t_8 = (df['source'].str.contains('咸阳陶瓷院')).sum()
+ t_9 = (df['source'].str.contains('钟表所')).sum()
+ t_10 = (df['source'].str.contains('总院北分')).sum()
+ t_11 = (df['source'].str.contains('中岩科技')).sum()
+ t_12 = (df['source'].str.contains('水泥新材院')).sum()
+ t_13 = (df['source'].str.contains('中建材科创院')).sum()
+ t_14 = (df['source'].str.contains('科建苑')).sum()
+ template_cal_path = os.path.join(BASE_DIR, 'summary/tempalte_cbma_cal.xlsx')
+ workbook2 = load_workbook(template_cal_path)
+ sheet2= workbook2['打分表']
+ sheet2.cell(row=1, column=1, value=f'中国建材总院宣传工作计分表({year}年度)')
+ sheet2.cell(row=6, column=5, value=t_1)
+ sheet2.cell(row=6, column=7, value=t_2)
+ sheet2.cell(row=6, column=9, value=t_3)
+ sheet2.cell(row=6, column=11, value=t_4)
+ sheet2.cell(row=6, column=13, value=t_5)
+ sheet2.cell(row=6, column=15, value=t_6)
+ sheet2.cell(row=6, column=17, value=t_7)
+ sheet2.cell(row=6, column=19, value=t_8)
+ sheet2.cell(row=6, column=21, value=t_9)
+ sheet2.cell(row=6, column=23, value=t_10)
+ sheet2.cell(row=6, column=25, value=t_11)
+ sheet2.cell(row=6, column=27, value=t_12)
+ sheet2.cell(row=6, column=29, value=t_13)
+ sheet2.cell(row=6, column=31, value=t_14)
+ output_path2 = os.path.join(BASE_DIR, f'summary/{year}年_cbma_cal.xlsx')
+ workbook2.save(output_path2)
+ return output_path, output_path2
+
+
def make_wechat_articles_full():
df = pd.read_csv(os.path.join(wechat_dir, 'articles.csv'))
df['content'] = ''
@@ -110,5 +204,5 @@ def ana_web():
return output_data
if __name__ == "__main__":
- ana_web()
+ get_cbma_info_from_db_and_ana()
diff --git a/网络巡查.bat b/start.bat
similarity index 100%
rename from 网络巡查.bat
rename to start.bat
diff --git a/start.py b/start.py
index 847f0aa..f089e71 100644
--- a/start.py
+++ b/start.py
@@ -7,7 +7,7 @@ import win32com.client as win32
import subprocess
import os
import datetime
-from mycode.main import make_simple_csv_from_db, make_wechat_articles_full, ana_web, ana_wechat, output_dir
+from mycode.main import make_simple_csv_from_db, make_wechat_articles_full, ana_web, ana_wechat, output_dir, get_cbma_info_from_db_and_ana
from mycode.crawl_chrome import chrom_main_from_list
import pandas as pd
from urllib.parse import urlparse
@@ -139,12 +139,6 @@ class MyThread(QThread):
output = p.stdout.readline()
if output:
self.update_signal.emit({'msg': output.strip()})
-
- def capture_err(self, p):
- while self.running and p.poll() is None:
- err = p.stderr.readline()
- if err:
- self.update_signal.emit({'msg': err.strip()})
def run(self) -> None:
self.update_signal.emit({'msg': '开始进行网站爬取...'})
@@ -158,13 +152,14 @@ class MyThread(QThread):
output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx')
# -u 代表不缓冲,直接输出
cmd = [PYTHON_PATH, '-u', '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
- process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=False)
+ # cmd = [PYTHON_PATH, '-u', '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, shell=False)
self.processes.append(process)
self.running = True
getlog_thread = threading.Thread(target=self.capture_output, args=(process,), daemon=True)
getlog_thread.start()
- getlog_thread_err = threading.Thread(target=self.capture_err, args=(process,), daemon=True)
- getlog_thread_err.start()
+ # getlog_thread_err = threading.Thread(target=self.capture_err, args=(process,), daemon=True)
+ # getlog_thread_err.start()
for process in self.processes:
process.wait()
@@ -205,12 +200,15 @@ class MainWindow(QMainWindow):
self.ui.setupUi(self)
self.ui.lSize.setValidator(QIntValidator())
self.ui.bWechat.clicked.connect(self.open_wcplus)
- self.ui.bWebSite.clicked.connect(self.open_websites_xlsx)
- self.ui.bBiao.clicked.connect(self.open_biao_xlsx)
+ self.ui.bWebSite.clicked.connect(lambda: self.open_file(WEB_SITES_PATH))
+ self.ui.bBiao.clicked.connect(lambda: self.open_file(BIAO_PATH))
self.ui.bStart.clicked.connect(self.start)
self.ui.bAna.clicked.connect(self.start_ana)
- self.ui.bRes1.clicked.connect(self.open_res1)
- self.ui.bRes2.clicked.connect(self.open_res2)
+ self.ui.bRes1.clicked.connect(lambda: self.open_file(self.ui.lRes1.text()))
+ self.ui.bRes2.clicked.connect(lambda: self.open_file(self.ui.lRes2.text(), 'docx'))
+ self.ui.bCal.clicked.connect(self.cbma_cal)
+ self.ui.bOpenCalRes1.clicked.connect(lambda: self.open_file(self.ui.lCalRes1.text()))
+ self.ui.bOpenCalRes2.clicked.connect(lambda: self.open_file(self.ui.lCalRes2.text()))
self.ui.vLog.setModel(self.logModel)
self.res1Workbook = None
@@ -219,31 +217,22 @@ class MainWindow(QMainWindow):
subprocess.Popen('.\wcplus.exe')
self.wcplus = True
- def open_websites_xlsx(self):
- app = win32.Dispatch("Excel.Application")
- app.Visible = True
- app.Workbooks.Open(WEB_SITES_PATH)
- app.WindowState = 3
-
- def open_biao_xlsx(self):
- app = win32.Dispatch("Excel.Application")
- app.Visible = True
- app.Workbooks.Open(BIAO_PATH)
- app.WindowState = 3
-
- def open_res1(self):
- if self.ui.lRes1.text():
- app = win32.Dispatch("Excel.Application")
- app.Visible = True
- self.res1Workbook = app.Workbooks.Open(self.ui.lRes1.text())
- app.WindowState = 3
-
- def open_res2(self):
- if self.ui.lRes2.text():
- app = win32.Dispatch("Word.Application")
- app.Visible = True
- app.Documents.Open(self.ui.lRes2.text())
- app.WindowState = 3
+ def open_file(self, path, type='xlsx'):
+ if path:
+ # try:
+ # os.startfile(path)
+ # except Exception as e:
+ # print("无法打开文件:", str(e))
+ if type == 'docs':
+ app = win32.Dispatch("Word.Application")
+ app.Visible = True
+ app.Documents.Open(path)
+ app.WindowState = 3
+ elif type == 'xlsx':
+ app = win32.Dispatch("Excel.Application")
+ app.Visible = True
+ app.Workbooks.Open(path)
+ app.WindowState = 3
def get_time(self):
now = datetime.datetime.now()
@@ -276,6 +265,14 @@ class MainWindow(QMainWindow):
self.ana_thread.update_signal.connect(self.update_log)
self.ana_thread.start()
+ def cbma_cal(self):
+ now_year = datetime.datetime.now().year
+ self.update_log({'msg': '正在分析本年总院官微数据...'})
+ output_path, output_path2 = get_cbma_info_from_db_and_ana(now_year)
+ self.ui.lCalRes1.setText(output_path)
+ self.ui.lCalRes2.setText(output_path2)
+ self.update_log({'msg': '分析完毕!'})
+
def update_log(self, rdict):
if isinstance(rdict, str):
self.log(f'{self.get_time()}-{rdict}', False)
@@ -316,7 +313,9 @@ class MainWindow(QMainWindow):
if __name__ == "__main__":
# gen_doc()
+ print('正在启动程序...')
app = MyApplication(sys.argv)
main_window = app.createMainWindow()
main_window.show()
+ print('启动成功')
sys.exit(app.exec())
\ No newline at end of file
diff --git a/start.vbs b/start.vbs
new file mode 100644
index 0000000..f33cbe9
--- /dev/null
+++ b/start.vbs
@@ -0,0 +1,8 @@
+Set objShell = CreateObject("WScript.Shell")
+strFolder = objShell.CurrentDirectory
+
+strPythonPath = strFolder & "\runtime\python.exe"
+strScriptPath = strFolder & "\start.py"
+
+objShell.Run """" & strPythonPath & """ """ & strScriptPath & """"
+' Set objExec = objShell.Exec("""" & strPythonPath & """ """ & strScriptPath & """")
diff --git a/summary/template_cbma.xlsx b/summary/template_cbma.xlsx
new file mode 100644
index 0000000..9bd26a7
Binary files /dev/null and b/summary/template_cbma.xlsx differ
diff --git a/ui_mainwindow.py b/ui_mainwindow.py
index 38d6bb9..72fa742 100644
--- a/ui_mainwindow.py
+++ b/ui_mainwindow.py
@@ -242,7 +242,7 @@ class Ui_MainWindow(object):
self.label_2.setText(QCoreApplication.translate("MainWindow", u"\u8bf7\u5728\u4fee\u6539\u540e\u4fdd\u5b58\u5e76\u5173\u95ed", None))
self.label_6.setText(QCoreApplication.translate("MainWindow", u"\u5c0f\u4e8e", None))
self.label_3.setText(QCoreApplication.translate("MainWindow", u"KB-Chrome", None))
- self.lSize.setText(QCoreApplication.translate("MainWindow", u"20", None))
+ self.lSize.setText(QCoreApplication.translate("MainWindow", u"5", None))
self.bStart.setText(QCoreApplication.translate("MainWindow", u"\u5f00\u59cb\u722c\u53d6", None))
self.groupBox_7.setTitle(QCoreApplication.translate("MainWindow", u"\u603b\u9662\u5b98\u5fae", None))
self.label_10.setText(QCoreApplication.translate("MainWindow", u"\u6c47\u603b\u7ed3\u679cExcel:", None))
diff --git a/web3.py b/web3.py
index beeebf8..7580272 100644
--- a/web3.py
+++ b/web3.py
@@ -33,40 +33,40 @@ def fix_url_scheme(url, default_scheme='http'):
url = f'{default_scheme}://{url}'
return url
if __name__ == '__main__':
- print('巡查任务开始。。。')
- now = datetime.datetime.now()
- month = now.month
+ # print('巡查任务开始。。。')
+ # now = datetime.datetime.now()
+ # month = now.month
- print('正在组合微信公众号爬取内容。。。')
- make_simple_csv_from_db()
- make_wechat_articles_full()
- print('公众号爬取内容组装完毕!')
+ # print('正在组合微信公众号爬取内容。。。')
+ # make_simple_csv_from_db()
+ # make_wechat_articles_full()
+ # print('公众号爬取内容组装完毕!')
- print('开始进行网站爬取。。。')
+ # print('开始进行网站爬取。。。')
df = pd.read_excel('web_sites.xlsx', sheet_name='Sheet1')
- processes = []
+ # processes = []
- # 注册 SIGINT 信号处理函数
- signal.signal(signal.SIGINT, sigint_handler)
+ # # 注册 SIGINT 信号处理函数
+ # signal.signal(signal.SIGINT, sigint_handler)
- ind = 0
- for ind, row in df.iterrows():
- group = row['单位']
- name = row['主办']
- url = fix_url_scheme(row['地址'].strip())
- domain = urlparse(url).netloc.replace('www.', '')
- if domain in ['xdjstc.com', 'epcyiqizu.com', 'cbra.ctc.ac.cn']: # 这几个网站直接跳过
- continue
- output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx')
- # cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
- cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
- process = subprocess.Popen(cmd)
- processes.append(process)
+ # ind = 0
+ # for ind, row in df.iterrows():
+ # group = row['单位']
+ # name = row['主办']
+ # url = fix_url_scheme(row['地址'].strip())
+ # domain = urlparse(url).netloc.replace('www.', '')
+ # if domain in ['xdjstc.com', 'epcyiqizu.com', 'cbra.ctc.ac.cn']: # 这几个网站直接跳过
+ # continue
+ # output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx')
+ # # cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
+ # cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
+ # process = subprocess.Popen(cmd)
+ # processes.append(process)
- # Wait for all processes to finish
- for process in processes:
- process.wait()
+ # # Wait for all processes to finish
+ # for process in processes:
+ # process.wait()
print('网站爬取结束,校验中。。。')
# Check output file sizes and save information if size is less than 20KB
@@ -90,30 +90,30 @@ if __name__ == '__main__':
print('网站爬取完毕!')
- print('开始对比分析所有内容。。。')
- # Run WeChat Analysis
- wechat_results = ana_wechat()
- # Run Web Content Analysis
- web_results = ana_web()
+ # print('开始对比分析所有内容。。。')
+ # # Run WeChat Analysis
+ # wechat_results = ana_wechat()
+ # # Run Web Content Analysis
+ # web_results = ana_web()
- # Save results in an Excel file with two sheets
- output_excel_path = os.path.join(output_dir, f'{month}月-总院及下属公司官方公众号巡查结果汇总表.xlsx')
- # with pd.ExcelWriter(output_excel_path) as writer:
- # df = pd.DataFrame(wechat_results, columns=['序号', '信源名称', '文章标题', '错误表述', '建议修改词语', '错误分类', '原文链接'])
- # df.to_excel(writer, sheet_name='公众号', index=False)
- # df2 = pd.DataFrame(web_results, columns=['序号', '信源名称', '文章标题', '错误表述', '建议修改词语', '错误分类', '原文链接'])
- # df2.to_excel(writer, sheet_name='网站', index=False)
- template_path = os.path.join(output_dir, 'template.xlsx')
- workbook = load_workbook(template_path)
+ # # Save results in an Excel file with two sheets
+ # output_excel_path = os.path.join(output_dir, f'{month}月-总院及下属公司官方公众号巡查结果汇总表.xlsx')
+ # # with pd.ExcelWriter(output_excel_path) as writer:
+ # # df = pd.DataFrame(wechat_results, columns=['序号', '信源名称', '文章标题', '错误表述', '建议修改词语', '错误分类', '原文链接'])
+ # # df.to_excel(writer, sheet_name='公众号', index=False)
+ # # df2 = pd.DataFrame(web_results, columns=['序号', '信源名称', '文章标题', '错误表述', '建议修改词语', '错误分类', '原文链接'])
+ # # df2.to_excel(writer, sheet_name='网站', index=False)
+ # template_path = os.path.join(output_dir, 'template.xlsx')
+ # workbook = load_workbook(template_path)
- # 选择要操作的工作表
- wechat_sheet = workbook['公众号']
- web_sheet = workbook['网站']
- for row in wechat_results:
- wechat_sheet.append(row)
- for row in web_results:
- web_sheet.append(row)
- workbook.save(output_excel_path)
- workbook.close()
- print('巡查任务执行完毕, 请查看summary文件夹, 可手动校对')
- os.system("pause")
\ No newline at end of file
+ # # 选择要操作的工作表
+ # wechat_sheet = workbook['公众号']
+ # web_sheet = workbook['网站']
+ # for row in wechat_results:
+ # wechat_sheet.append(row)
+ # for row in web_results:
+ # web_sheet.append(row)
+ # workbook.save(output_excel_path)
+ # workbook.close()
+ # print('巡查任务执行完毕, 请查看summary文件夹, 可手动校对')
+ # os.system("pause")
\ No newline at end of file
diff --git a/zcspider/pipelines.py b/zcspider/pipelines.py
index 31db3fa..cdc6b24 100644
--- a/zcspider/pipelines.py
+++ b/zcspider/pipelines.py
@@ -4,6 +4,7 @@
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
import os.path
from openpyxl import Workbook, load_workbook
+from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
# useful for handling different item types with a single interface
from scrapy.exceptions import IgnoreRequest
@@ -58,6 +59,7 @@ class ZcspiderPipeline:
# except:
# self.conn.rollback()
# raise
+ item['text'] = ILLEGAL_CHARACTERS_RE.sub(r'', item['text'])
line = [item['group'], item['name'], item['domain'], item['url'], item['text']]
self.ws.append(line)
return item
diff --git a/zcspider/settings.py b/zcspider/settings.py
index 1736130..9c604b8 100644
--- a/zcspider/settings.py
+++ b/zcspider/settings.py
@@ -96,7 +96,7 @@ DEFAULT_REQUEST_HEADERS = {
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
FEED_EXPORT_ENCODING = 'gb18030'
-LOG_LEVEL = 'WARNING'
+LOG_LEVEL = 'DEBUG'
DOWNLOAD_TIMEOUT = 10
ITEM_PIPELINES = {
@@ -110,4 +110,7 @@ FEED_EXPORTERS = {
DOWNLOADER_MIDDLEWARES = {
'zcspider.middlewares.FilterHTMLMiddleware': 200,
# 其他下载中间件...
+}
+EXTENSIONS = {
+ 'scrapy.extensions.telnet.TelnetConsole': None
}
\ No newline at end of file
diff --git a/宣传巡查.exe b/宣传巡查.exe
new file mode 100644
index 0000000..dcb6ee2
Binary files /dev/null and b/宣传巡查.exe differ