diff --git a/main.ui b/main.ui index 548064b..ec2f1b7 100644 --- a/main.ui +++ b/main.ui @@ -779,6 +779,32 @@ + + + + 180 + 30 + 71 + 21 + + + + 2023 + + + + + + 260 + 30 + 41 + 21 + + + + + + diff --git a/start.py b/start.py index 73aa54b..4f655ab 100644 --- a/start.py +++ b/start.py @@ -3,7 +3,7 @@ from PySide6.QtCore import QStringListModel, QThread, Signal from PySide6.QtWidgets import QApplication, QMainWindow from PySide6.QtGui import QIntValidator from ui_mainwindow import Ui_MainWindow -import win32com.client as win32 +import win32com.client as win32 import subprocess import os import datetime @@ -27,11 +27,13 @@ TEMPLATE_REPORT_PATH = os.path.join(BASE_DIR, 'summary/template_report.docx') def fix_url_scheme(url, default_scheme='http'): - # 检查URL是否包含方案 - if not url.startswith('http://') and not url.startswith('https://'): - # 如果没有方案,添加默认方案 - url = f'{default_scheme}://{url}' - return url + # 检查URL是否包含方案 + if not url.startswith('http://') and not url.startswith('https://'): + # 如果没有方案,添加默认方案 + url = f'{default_scheme}://{url}' + return url + + class MyApplication(QApplication): def __init__(self, argv): super(MyApplication, self).__init__(argv) @@ -42,6 +44,7 @@ class MyApplication(QApplication): self.main_window = MainWindow() return self.main_window + def gen_doc(w1, w2): now = datetime.datetime.now() now_3 = now - datetime.timedelta(days=3) @@ -50,8 +53,10 @@ def gen_doc(w1, w2): # with open('w1.json', 'r', encoding='utf-8') as f: # w1 = json.loads(f.read()) cate_dict = {} - context = {'y': now.year, 'm': now.month, 'd': now.day, 'mo': now_3.month, 'do': now_3.day, 'su': 'xx', 'w1': w1, 'w2': w2} - output_report_path = os.path.join(BASE_DIR, f'summary/{now.year}年{now.month}月-分析结果简报.docx') + context = {'y': now.year, 'm': now.month, 'd': now.day, + 'mo': now_3.month, 'do': now_3.day, 'su': 'xx', 'w1': w1, 'w2': w2} + output_report_path = os.path.join( + BASE_DIR, f'summary/{now.year}年{now.month}月-分析结果简报.docx') doc = DocxTemplate(TEMPLATE_REPORT_PATH) for i in w1: if i[5] in cate_dict: @@ -69,6 +74,8 @@ def gen_doc(w1, w2): doc.render(context) doc.save(output_report_path) return output_report_path + + class AnaThread(QThread): update_signal = Signal(object) @@ -85,7 +92,8 @@ class AnaThread(QThread): try: # 生成汇总表 self.update_signal.emit({'msg': '开始生成汇总表...'}) - output_excel_path = os.path.join(BASE_DIR, f'summary/{now.year}年{now.month}月-分析结果汇总表.xlsx') + output_excel_path = os.path.join( + BASE_DIR, f'summary/{now.year}年{now.month}月-分析结果汇总表.xlsx') workbook = load_workbook(TEMPLATE_PATH) wechat_sheet = workbook['公众号'] web_sheet = workbook['网站'] @@ -103,7 +111,8 @@ class AnaThread(QThread): # 生成简报 self.update_signal.emit({'msg': '开始生成汇总简报...'}) output_report_path = gen_doc(wechat_results, web_results) - self.update_signal.emit({'msg': '分析完毕, 请查看结果栏, 可手动校对', 'output_excel_path': output_excel_path, 'output_report_path': output_report_path}) + self.update_signal.emit( + {'msg': '分析完毕, 请查看结果栏, 可手动校对', 'output_excel_path': output_excel_path, 'output_report_path': output_report_path}) except PermissionError as e: self.update_signal.emit({'msg': str(e)}) self.update_signal.emit({'msg': '文件被占用请先关闭!'}) @@ -129,7 +138,7 @@ class MyThread(QThread): self.running = False def capture_output(self, p): - while self.running and p.poll() is None: + while self.running and p.poll() is None: output = p.stdout.readline() if output: self.update_signal.emit({'msg': output.strip()}) @@ -143,14 +152,17 @@ class MyThread(QThread): name = row['主办'] url = fix_url_scheme(row['地址'].strip()) domain = urlparse(url).netloc.replace('www.', '') - output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') + output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') # -u 代表不缓冲,直接输出 - cmd = [PYTHON_PATH, '-u', '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}'] + cmd = [PYTHON_PATH, '-u', '-m', 'scrapy', 'crawl', 'basespider', '-a', + f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}'] # cmd = [PYTHON_PATH, '-u', '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx'] - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, shell=False) + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, text=True, shell=False) self.processes.append(process) self.running = True - getlog_thread = threading.Thread(target=self.capture_output, args=(process,), daemon=True) + getlog_thread = threading.Thread( + target=self.capture_output, args=(process,), daemon=True) getlog_thread.start() # getlog_thread_err = threading.Thread(target=self.capture_err, args=(process,), daemon=True) # getlog_thread_err.start() @@ -164,23 +176,24 @@ class MyThread(QThread): name = row['主办'] url = fix_url_scheme(row['地址'].strip()) domain = urlparse(url).netloc.replace("www.", "") - output_filename = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') + output_filename = os.path.join( + BASE_DIR, f'web_dir/{name}_{domain}.xlsx') if os.path.exists(output_filename): file_size = os.path.getsize(output_filename) - if file_size < self.lsize * 1024: # Convert KB to bytes + if file_size < self.lsize * 1024: # Convert KB to bytes info_to_save.append([group, name, url]) if info_to_save: self.update_signal.emit({'msg': '存在未爬取站点,正在调用Chrome继续爬取...'}) chrom_main_from_list(info_to_save) self.update_signal.emit({'msg': '网站爬取完毕!'}) - + def close(self): self.running = False if self.processes: for i in self.processes: i.kill() self.terminate() - + class MainWindow(QMainWindow): @@ -189,22 +202,29 @@ class MainWindow(QMainWindow): self.web_thread = None self.ana_thread = None self.wcplus = False - self.logModel= QStringListModel([]) + self.logModel = QStringListModel([]) self.ui = Ui_MainWindow() self.ui.setupUi(self) self.ui.lSize.setValidator(QIntValidator()) self.ui.bWechat.clicked.connect(self.open_wcplus) - self.ui.bWebSite.clicked.connect(lambda: self.open_file(WEB_SITES_PATH)) + self.ui.bWebSite.clicked.connect( + lambda: self.open_file(WEB_SITES_PATH)) self.ui.bBiao.clicked.connect(lambda: self.open_file(BIAO_PATH)) self.ui.bStart.clicked.connect(self.start) self.ui.bAna.clicked.connect(self.start_ana) - self.ui.bRes1.clicked.connect(lambda: self.open_file(self.ui.lRes1.text())) - self.ui.bRes2.clicked.connect(lambda: self.open_file(self.ui.lRes2.text(), 'docx')) - self.ui.bCal.clicked.connect(self.cbma_cal) - self.ui.bOpenCalRes1.clicked.connect(lambda: self.open_file(self.ui.lCalRes1.text())) - self.ui.bOpenCalRes2.clicked.connect(lambda: self.open_file(self.ui.lCalRes2.text())) - self.ui.bOpenCalRes3.clicked.connect(lambda: self.open_file(self.ui.lCalRes3.text())) - self.ui.bOpenCalRes4.clicked.connect(lambda: self.open_file(self.ui.lCalRes4.text())) + self.ui.bRes1.clicked.connect( + lambda: self.open_file(self.ui.lRes1.text())) + self.ui.bRes2.clicked.connect( + lambda: self.open_file(self.ui.lRes2.text(), 'docx')) + self.ui.bCal.clicked.connect(self.cbma_cal(self.ui.lYear.text())) + self.ui.bOpenCalRes1.clicked.connect( + lambda: self.open_file(self.ui.lCalRes1.text())) + self.ui.bOpenCalRes2.clicked.connect( + lambda: self.open_file(self.ui.lCalRes2.text())) + self.ui.bOpenCalRes3.clicked.connect( + lambda: self.open_file(self.ui.lCalRes3.text())) + self.ui.bOpenCalRes4.clicked.connect( + lambda: self.open_file(self.ui.lCalRes4.text())) self.ui.vLog.setModel(self.logModel) self.res1Workbook = None @@ -229,7 +249,7 @@ class MainWindow(QMainWindow): app.Visible = True app.Workbooks.Open(path) app.WindowState = 3 - + def get_time(self): now = datetime.datetime.now() return now.strftime('%H:%M:%S') @@ -249,7 +269,6 @@ class MainWindow(QMainWindow): self.log('', True) self.ui.lSize.setEnabled(True) self.ui.bStart.setText('开始爬取') - def start_web(self, lsize): self.web_thread = MyThread(lsize) @@ -261,11 +280,15 @@ class MainWindow(QMainWindow): self.ana_thread.update_signal.connect(self.update_log) self.ana_thread.start() - def cbma_cal(self): - now_year = datetime.datetime.now().year + def cbma_cal(self, year): + try: + now_year = int(year) + except Exception: + now_year = datetime.datetime.now().year self.update_log({'msg': '正在分析本年总院官微数据...'}) try: - origin_path, cbma_path, cbma_cal_path, cbma_month_path = get_cbma_info_from_db_and_ana(now_year) + origin_path, cbma_path, cbma_cal_path, cbma_month_path = get_cbma_info_from_db_and_ana( + now_year) except PermissionError as e: self.update_log({'msg': str(e)}) self.update_log({'msg': '文件被占用请先关闭!'}) @@ -321,4 +344,4 @@ if __name__ == "__main__": main_window = app.createMainWindow() main_window.show() print('启动成功') - sys.exit(app.exec()) \ No newline at end of file + sys.exit(app.exec()) diff --git a/ui_mainwindow.py b/ui_mainwindow.py index b94efd0..57ed36a 100644 --- a/ui_mainwindow.py +++ b/ui_mainwindow.py @@ -237,6 +237,12 @@ class Ui_MainWindow(object): self.lCalRes4.setObjectName(u"lCalRes4") self.lCalRes4.setGeometry(QRect(110, 150, 401, 16)) self.lCalRes4.setFont(font3) + self.lYear = QLineEdit(self.groupBox_7) + self.lYear.setObjectName(u"lYear") + self.lYear.setGeometry(QRect(180, 30, 71, 21)) + self.label_14 = QLabel(self.groupBox_7) + self.label_14.setObjectName(u"label_14") + self.label_14.setGeometry(QRect(260, 30, 41, 21)) MainWindow.setCentralWidget(self.centralwidget) self.menubar = QMenuBar(MainWindow) self.menubar.setObjectName(u"menubar") @@ -290,5 +296,7 @@ class Ui_MainWindow(object): self.label_13.setText(QCoreApplication.translate("MainWindow", u"\u5355\u4f4d\u6708\u5ea6\u7edf\u8ba1\u8868", None)) self.bOpenCalRes4.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00", None)) self.lCalRes4.setText("") + self.lYear.setText(QCoreApplication.translate("MainWindow", u"2023", None)) + self.label_14.setText(QCoreApplication.translate("MainWindow", u"\u5e74", None)) # retranslateUi