feat: 输出简报
This commit is contained in:
parent
70040f1c0a
commit
e5ff671c97
|
@ -5,7 +5,9 @@ __pycache__/
|
||||||
twistd.pid
|
twistd.pid
|
||||||
~$*
|
~$*
|
||||||
*.xlsx
|
*.xlsx
|
||||||
!template.xlsx
|
*.docx
|
||||||
|
!template*.xlsx
|
||||||
|
!template*.docx
|
||||||
wechat_dir/*
|
wechat_dir/*
|
||||||
*.csv
|
*.csv
|
||||||
.idea/*
|
.idea/*
|
||||||
|
|
315
main.ui
315
main.ui
|
@ -7,7 +7,7 @@
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>600</width>
|
<width>600</width>
|
||||||
<height>763</height>
|
<height>830</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="sizePolicy">
|
<property name="sizePolicy">
|
||||||
|
@ -19,13 +19,13 @@
|
||||||
<property name="minimumSize">
|
<property name="minimumSize">
|
||||||
<size>
|
<size>
|
||||||
<width>600</width>
|
<width>600</width>
|
||||||
<height>763</height>
|
<height>830</height>
|
||||||
</size>
|
</size>
|
||||||
</property>
|
</property>
|
||||||
<property name="maximumSize">
|
<property name="maximumSize">
|
||||||
<size>
|
<size>
|
||||||
<width>600</width>
|
<width>600</width>
|
||||||
<height>763</height>
|
<height>830</height>
|
||||||
</size>
|
</size>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
|
@ -123,70 +123,13 @@
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</widget>
|
</widget>
|
||||||
<widget class="QGroupBox" name="groupBox_3">
|
|
||||||
<property name="geometry">
|
|
||||||
<rect>
|
|
||||||
<x>10</x>
|
|
||||||
<y>380</y>
|
|
||||||
<width>191</width>
|
|
||||||
<height>91</height>
|
|
||||||
</rect>
|
|
||||||
</property>
|
|
||||||
<property name="font">
|
|
||||||
<font>
|
|
||||||
<pointsize>11</pointsize>
|
|
||||||
</font>
|
|
||||||
</property>
|
|
||||||
<property name="title">
|
|
||||||
<string>2.确认分析对比库</string>
|
|
||||||
</property>
|
|
||||||
<widget class="QPushButton" name="bBiao">
|
|
||||||
<property name="geometry">
|
|
||||||
<rect>
|
|
||||||
<x>20</x>
|
|
||||||
<y>30</y>
|
|
||||||
<width>151</width>
|
|
||||||
<height>24</height>
|
|
||||||
</rect>
|
|
||||||
</property>
|
|
||||||
<property name="styleSheet">
|
|
||||||
<string notr="true">background-color:#409EFF; color: white; border-radius: 2px</string>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
|
||||||
<string>打开分析标准Excel</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
<widget class="QLabel" name="label_4">
|
|
||||||
<property name="geometry">
|
|
||||||
<rect>
|
|
||||||
<x>20</x>
|
|
||||||
<y>60</y>
|
|
||||||
<width>151</width>
|
|
||||||
<height>16</height>
|
|
||||||
</rect>
|
|
||||||
</property>
|
|
||||||
<property name="font">
|
|
||||||
<font>
|
|
||||||
<family>楷体</family>
|
|
||||||
<pointsize>10</pointsize>
|
|
||||||
<bold>false</bold>
|
|
||||||
</font>
|
|
||||||
</property>
|
|
||||||
<property name="styleSheet">
|
|
||||||
<string notr="true">color: red;</string>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
|
||||||
<string>请在修改后保存并关闭</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</widget>
|
|
||||||
<widget class="QGroupBox" name="groupBox_5">
|
<widget class="QGroupBox" name="groupBox_5">
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>10</x>
|
<x>10</x>
|
||||||
<y>650</y>
|
<y>540</y>
|
||||||
<width>581</width>
|
<width>581</width>
|
||||||
<height>71</height>
|
<height>121</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="font">
|
<property name="font">
|
||||||
|
@ -195,13 +138,13 @@
|
||||||
</font>
|
</font>
|
||||||
</property>
|
</property>
|
||||||
<property name="title">
|
<property name="title">
|
||||||
<string>最终结果</string>
|
<string>汇总分析</string>
|
||||||
</property>
|
</property>
|
||||||
<widget class="QLabel" name="label_7">
|
<widget class="QLabel" name="label_7">
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>10</x>
|
<x>10</x>
|
||||||
<y>20</y>
|
<y>70</y>
|
||||||
<width>91</width>
|
<width>91</width>
|
||||||
<height>16</height>
|
<height>16</height>
|
||||||
</rect>
|
</rect>
|
||||||
|
@ -219,7 +162,7 @@
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>10</x>
|
<x>10</x>
|
||||||
<y>40</y>
|
<y>90</y>
|
||||||
<width>91</width>
|
<width>91</width>
|
||||||
<height>16</height>
|
<height>16</height>
|
||||||
</rect>
|
</rect>
|
||||||
|
@ -237,7 +180,7 @@
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>10</x>
|
<x>10</x>
|
||||||
<y>30</y>
|
<y>80</y>
|
||||||
<width>561</width>
|
<width>561</width>
|
||||||
<height>16</height>
|
<height>16</height>
|
||||||
</rect>
|
</rect>
|
||||||
|
@ -250,7 +193,7 @@
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>110</x>
|
<x>110</x>
|
||||||
<y>15</y>
|
<y>65</y>
|
||||||
<width>381</width>
|
<width>381</width>
|
||||||
<height>21</height>
|
<height>21</height>
|
||||||
</rect>
|
</rect>
|
||||||
|
@ -268,7 +211,7 @@
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>110</x>
|
<x>110</x>
|
||||||
<y>40</y>
|
<y>90</y>
|
||||||
<width>381</width>
|
<width>381</width>
|
||||||
<height>16</height>
|
<height>16</height>
|
||||||
</rect>
|
</rect>
|
||||||
|
@ -286,7 +229,7 @@
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>520</x>
|
<x>520</x>
|
||||||
<y>10</y>
|
<y>60</y>
|
||||||
<width>51</width>
|
<width>51</width>
|
||||||
<height>24</height>
|
<height>24</height>
|
||||||
</rect>
|
</rect>
|
||||||
|
@ -302,7 +245,7 @@
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>520</x>
|
<x>520</x>
|
||||||
<y>40</y>
|
<y>90</y>
|
||||||
<width>51</width>
|
<width>51</width>
|
||||||
<height>24</height>
|
<height>24</height>
|
||||||
</rect>
|
</rect>
|
||||||
|
@ -311,6 +254,71 @@
|
||||||
<string>打开</string>
|
<string>打开</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
|
<widget class="QPushButton" name="bBiao">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>20</x>
|
||||||
|
<y>30</y>
|
||||||
|
<width>151</width>
|
||||||
|
<height>24</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<pointsize>11</pointsize>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="styleSheet">
|
||||||
|
<string notr="true">background-color:#409EFF; color: white; border-radius: 2px</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>打开分析标准Excel</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLabel" name="label_4">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>180</x>
|
||||||
|
<y>30</y>
|
||||||
|
<width>151</width>
|
||||||
|
<height>16</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<family>楷体</family>
|
||||||
|
<pointsize>11</pointsize>
|
||||||
|
<bold>false</bold>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="styleSheet">
|
||||||
|
<string notr="true">color: red;</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>请在修改后保存并关闭</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QPushButton" name="bAna">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>420</x>
|
||||||
|
<y>30</y>
|
||||||
|
<width>151</width>
|
||||||
|
<height>24</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<pointsize>12</pointsize>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="styleSheet">
|
||||||
|
<string notr="true">background-color:#409EFF; color: white; border-radius: 2px</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>开始分析</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
</widget>
|
</widget>
|
||||||
<widget class="QLabel" name="label_9">
|
<widget class="QLabel" name="label_9">
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
|
@ -345,10 +353,10 @@
|
||||||
<widget class="QGroupBox" name="groupBox_6">
|
<widget class="QGroupBox" name="groupBox_6">
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>210</x>
|
<x>220</x>
|
||||||
<y>280</y>
|
<y>280</y>
|
||||||
<width>371</width>
|
<width>371</width>
|
||||||
<height>361</height>
|
<height>251</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="font">
|
<property name="font">
|
||||||
|
@ -357,7 +365,7 @@
|
||||||
</font>
|
</font>
|
||||||
</property>
|
</property>
|
||||||
<property name="title">
|
<property name="title">
|
||||||
<string>日志显示</string>
|
<string>操作日志显示</string>
|
||||||
</property>
|
</property>
|
||||||
<widget class="QListView" name="vLog">
|
<widget class="QListView" name="vLog">
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
|
@ -365,7 +373,7 @@
|
||||||
<x>10</x>
|
<x>10</x>
|
||||||
<y>20</y>
|
<y>20</y>
|
||||||
<width>351</width>
|
<width>351</width>
|
||||||
<height>321</height>
|
<height>221</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="font">
|
<property name="font">
|
||||||
|
@ -382,7 +390,7 @@
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
<x>10</x>
|
<x>10</x>
|
||||||
<y>490</y>
|
<y>380</y>
|
||||||
<width>191</width>
|
<width>191</width>
|
||||||
<height>151</height>
|
<height>151</height>
|
||||||
</rect>
|
</rect>
|
||||||
|
@ -393,7 +401,7 @@
|
||||||
</font>
|
</font>
|
||||||
</property>
|
</property>
|
||||||
<property name="title">
|
<property name="title">
|
||||||
<string>2.确认需要抓取的网站</string>
|
<string>2.确认需要爬取的官网</string>
|
||||||
</property>
|
</property>
|
||||||
<widget class="QPushButton" name="bWebSite">
|
<widget class="QPushButton" name="bWebSite">
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
|
@ -491,7 +499,160 @@
|
||||||
<string notr="true">background-color:#409EFF; color: white; border-radius: 2px</string>
|
<string notr="true">background-color:#409EFF; color: white; border-radius: 2px</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>开始巡查</string>
|
<string>开始爬取</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</widget>
|
||||||
|
<widget class="QGroupBox" name="groupBox_7">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>10</x>
|
||||||
|
<y>670</y>
|
||||||
|
<width>581</width>
|
||||||
|
<height>111</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<pointsize>11</pointsize>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="title">
|
||||||
|
<string>总院官微</string>
|
||||||
|
</property>
|
||||||
|
<widget class="QLabel" name="label_10">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>10</x>
|
||||||
|
<y>60</y>
|
||||||
|
<width>91</width>
|
||||||
|
<height>16</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<pointsize>10</pointsize>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>汇总结果Excel:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLabel" name="label_11">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>10</x>
|
||||||
|
<y>80</y>
|
||||||
|
<width>91</width>
|
||||||
|
<height>16</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<pointsize>10</pointsize>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>汇总打分Excel:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="Line" name="line_2">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>10</x>
|
||||||
|
<y>70</y>
|
||||||
|
<width>561</width>
|
||||||
|
<height>16</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Horizontal</enum>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLabel" name="lCalRes1">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>110</x>
|
||||||
|
<y>55</y>
|
||||||
|
<width>381</width>
|
||||||
|
<height>21</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<pointsize>9</pointsize>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string/>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QLabel" name="lCalRes2">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>110</x>
|
||||||
|
<y>80</y>
|
||||||
|
<width>381</width>
|
||||||
|
<height>16</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<pointsize>9</pointsize>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string/>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QPushButton" name="bOpenCalRes1">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>520</x>
|
||||||
|
<y>50</y>
|
||||||
|
<width>51</width>
|
||||||
|
<height>24</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="styleSheet">
|
||||||
|
<string notr="true">background-color:#409EFF; color: white; border-radius: 2px</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>打开</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QPushButton" name="bOpenCalRes2">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>520</x>
|
||||||
|
<y>80</y>
|
||||||
|
<width>51</width>
|
||||||
|
<height>24</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>打开</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
<widget class="QPushButton" name="bCal">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>20</x>
|
||||||
|
<y>30</y>
|
||||||
|
<width>151</width>
|
||||||
|
<height>24</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<pointsize>11</pointsize>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="styleSheet">
|
||||||
|
<string notr="true">background-color:#409EFF; color: white; border-radius: 2px</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>汇总打分</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</widget>
|
</widget>
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from .base import BASE_DIR
|
from mycode.base import BASE_DIR
|
||||||
|
|
||||||
wechat_dir = os.path.join(BASE_DIR, 'article')
|
wechat_dir = os.path.join(BASE_DIR, 'article')
|
||||||
web_dir = os.path.join(BASE_DIR, 'web_dir')
|
web_dir = os.path.join(BASE_DIR, 'web_dir')
|
||||||
|
@ -53,6 +53,10 @@ def ana_wechat():
|
||||||
|
|
||||||
if not result.empty:
|
if not result.empty:
|
||||||
for ind2, row2 in result.iterrows():
|
for ind2, row2 in result.iterrows():
|
||||||
|
if row['错误表述'] == '“两学一做”学习' and '“两学一做”学习教育' in row2['content']:
|
||||||
|
continue
|
||||||
|
if row['错误表述'] == '20大':
|
||||||
|
continue
|
||||||
output_row = [
|
output_row = [
|
||||||
index,
|
index,
|
||||||
row2['nickname'],
|
row2['nickname'],
|
||||||
|
@ -85,6 +89,10 @@ def ana_web():
|
||||||
result = df[mask]
|
result = df[mask]
|
||||||
if not result.empty:
|
if not result.empty:
|
||||||
for ind2, row2 in result.iterrows():
|
for ind2, row2 in result.iterrows():
|
||||||
|
if row['错误表述'] == '“两学一做”学习' and '“两学一做”学习教育' in row2['text']:
|
||||||
|
continue
|
||||||
|
if row['错误表述'] == '20大':
|
||||||
|
continue
|
||||||
output_row = [
|
output_row = [
|
||||||
index,
|
index,
|
||||||
row2['name'],
|
row2['name'],
|
||||||
|
@ -101,4 +109,6 @@ def ana_web():
|
||||||
|
|
||||||
return output_data
|
return output_data
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
ana_web()
|
||||||
|
|
||||||
|
|
|
@ -5,4 +5,5 @@ openpyxl==3.1.2
|
||||||
scrapy-xlsx==0.1.1
|
scrapy-xlsx==0.1.1
|
||||||
selenium==4.9.1
|
selenium==4.9.1
|
||||||
pyside6==6.5.2
|
pyside6==6.5.2
|
||||||
pywin32==306
|
pywin32==306
|
||||||
|
docxtpl==0.16.7
|
164
start.py
164
start.py
|
@ -13,6 +13,9 @@ import pandas as pd
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from openpyxl import load_workbook
|
from openpyxl import load_workbook
|
||||||
import threading
|
import threading
|
||||||
|
import traceback
|
||||||
|
from docxtpl import DocxTemplate
|
||||||
|
import json
|
||||||
# from queue import Queue
|
# from queue import Queue
|
||||||
|
|
||||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
@ -20,6 +23,8 @@ WEB_SITES_PATH = os.path.join(BASE_DIR, 'web_sites.xlsx')
|
||||||
BIAO_PATH = os.path.join(BASE_DIR, 'biao.xlsx')
|
BIAO_PATH = os.path.join(BASE_DIR, 'biao.xlsx')
|
||||||
PYTHON_PATH = os.path.join(BASE_DIR, 'runtime/python.exe')
|
PYTHON_PATH = os.path.join(BASE_DIR, 'runtime/python.exe')
|
||||||
TEMPLATE_PATH = os.path.join(BASE_DIR, 'summary/template.xlsx')
|
TEMPLATE_PATH = os.path.join(BASE_DIR, 'summary/template.xlsx')
|
||||||
|
TEMPLATE_REPORT_PATH = os.path.join(BASE_DIR, 'summary/template_report.docx')
|
||||||
|
|
||||||
|
|
||||||
def fix_url_scheme(url, default_scheme='http'):
|
def fix_url_scheme(url, default_scheme='http'):
|
||||||
# 检查URL是否包含方案
|
# 检查URL是否包含方案
|
||||||
|
@ -37,26 +42,49 @@ class MyApplication(QApplication):
|
||||||
self.main_window = MainWindow()
|
self.main_window = MainWindow()
|
||||||
return self.main_window
|
return self.main_window
|
||||||
|
|
||||||
class MyThread(QThread):
|
def gen_doc(w1, w2):
|
||||||
update_signal = Signal(dict)
|
now = datetime.datetime.now()
|
||||||
|
now_3 = now - datetime.timedelta(days=3)
|
||||||
|
# with open('w2.json', 'r', encoding='utf-8') as f:
|
||||||
|
# w2 = json.loads(f.read())
|
||||||
|
# with open('w1.json', 'r', encoding='utf-8') as f:
|
||||||
|
# w1 = json.loads(f.read())
|
||||||
|
gdbs = 0
|
||||||
|
yzbs = 0
|
||||||
|
ybwz = 0
|
||||||
|
zzcc = 0
|
||||||
|
context = {'y': now.year, 'm': now.month, 'd': now.day, 'mo': now_3.month, 'do': now_3.day, 'su': 'xx', 'w1': w1, 'w2': w2}
|
||||||
|
output_report_path = os.path.join(BASE_DIR, f'summary/{now.year}年{now.month}月-分析结果简报.docx')
|
||||||
|
doc = DocxTemplate(TEMPLATE_REPORT_PATH)
|
||||||
|
for i in w1:
|
||||||
|
if i[5] == '固定表述错误':
|
||||||
|
gdbs =gdbs + 1
|
||||||
|
elif i[5] == '严重表述错误':
|
||||||
|
yzbs = yzbs +1
|
||||||
|
elif i[5] == '一般文字差错':
|
||||||
|
ybwz = ybwz +1
|
||||||
|
elif i[5] == '政治差错':
|
||||||
|
zzcc = zzcc +1
|
||||||
|
for i in w2:
|
||||||
|
if i[5] == '固定表述错误':
|
||||||
|
gdbs =gdbs + 1
|
||||||
|
elif i[5] == '严重表述错误':
|
||||||
|
yzbs = yzbs +1
|
||||||
|
elif i[5] == '一般文字差错':
|
||||||
|
ybwz = ybwz +1
|
||||||
|
elif i[5] == '政治差错':
|
||||||
|
zzcc = zzcc +1
|
||||||
|
|
||||||
def __init__(self, lsize) -> None:
|
context['su'] = f'固定表述错误{gdbs}项, 严重表述错误{yzbs}项, 一般文字差错{ybwz}项, 政治差错{zzcc}项'
|
||||||
super().__init__()
|
|
||||||
self.lsize = lsize
|
doc.render(context)
|
||||||
self.processes = []
|
doc.save(output_report_path)
|
||||||
self.running = False
|
return output_report_path
|
||||||
|
class AnaThread(QThread):
|
||||||
|
update_signal = Signal(object)
|
||||||
|
|
||||||
def capture_output(self, p):
|
|
||||||
while self.running and p.poll() is None:
|
|
||||||
output = p.stdout.readline()
|
|
||||||
err = p.stderr.readline()
|
|
||||||
if err:
|
|
||||||
self.update_signal.emit({'msg': err.strip()})
|
|
||||||
if output:
|
|
||||||
self.update_signal.emit({'msg': output.strip()})
|
|
||||||
|
|
||||||
def ana(self):
|
def ana(self):
|
||||||
month = datetime.datetime.now().month
|
now = datetime.datetime.now()
|
||||||
self.update_signal.emit({'msg': '对比开始...'})
|
self.update_signal.emit({'msg': '对比开始...'})
|
||||||
self.update_signal.emit({'msg': '正在组合微信公众号爬取内容...'})
|
self.update_signal.emit({'msg': '正在组合微信公众号爬取内容...'})
|
||||||
make_simple_csv_from_db()
|
make_simple_csv_from_db()
|
||||||
|
@ -65,9 +93,10 @@ class MyThread(QThread):
|
||||||
self.update_signal.emit({'msg': '开始对比分析所有内容...'})
|
self.update_signal.emit({'msg': '开始对比分析所有内容...'})
|
||||||
wechat_results = ana_wechat()
|
wechat_results = ana_wechat()
|
||||||
web_results = ana_web()
|
web_results = ana_web()
|
||||||
output_excel_path = os.path.join(BASE_DIR, f'summary/{month}月-总院宣传阵地巡查结果汇总表.xlsx')
|
# 生成汇总表
|
||||||
|
self.update_signal.emit({'msg': '开始生成汇总表...'})
|
||||||
|
output_excel_path = os.path.join(BASE_DIR, f'summary/{now.year}年{now.month}月-分析结果汇总表.xlsx')
|
||||||
workbook = load_workbook(TEMPLATE_PATH)
|
workbook = load_workbook(TEMPLATE_PATH)
|
||||||
# 选择要操作的工作表
|
|
||||||
wechat_sheet = workbook['公众号']
|
wechat_sheet = workbook['公众号']
|
||||||
web_sheet = workbook['网站']
|
web_sheet = workbook['网站']
|
||||||
for row in wechat_results:
|
for row in wechat_results:
|
||||||
|
@ -76,7 +105,46 @@ class MyThread(QThread):
|
||||||
web_sheet.append(row)
|
web_sheet.append(row)
|
||||||
workbook.save(output_excel_path)
|
workbook.save(output_excel_path)
|
||||||
workbook.close()
|
workbook.close()
|
||||||
self.update_signal.emit({'msg': '分析完毕, 请查看结果栏, 可手动校对', 'output_excel_path': output_excel_path})
|
# with open('w1.json', 'w', encoding='utf-8') as f:
|
||||||
|
# f.write(json.dumps(wechat_results, ensure_ascii=False))
|
||||||
|
|
||||||
|
# with open('w2.json', 'w', encoding='utf-8') as f:
|
||||||
|
# f.write(json.dumps(web_results, ensure_ascii=False))
|
||||||
|
# 生成简报
|
||||||
|
self.update_signal.emit({'msg': '开始生成汇总简报...'})
|
||||||
|
output_report_path = gen_doc(wechat_results, web_results)
|
||||||
|
self.update_signal.emit({'msg': '分析完毕, 请查看结果栏, 可手动校对', 'output_excel_path': output_excel_path, 'output_report_path': output_report_path})
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
try:
|
||||||
|
self.ana()
|
||||||
|
except Exception as e:
|
||||||
|
self.update_signal.emit({'msg': traceback.format_exc()})
|
||||||
|
|
||||||
|
|
||||||
|
class MyThread(QThread):
|
||||||
|
update_signal = Signal(object)
|
||||||
|
|
||||||
|
def __init__(self, lsize) -> None:
|
||||||
|
"""
|
||||||
|
lsize: 多少kb需要调取Chrome
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.lsize = lsize
|
||||||
|
self.processes = []
|
||||||
|
self.running = False
|
||||||
|
|
||||||
|
def capture_output(self, p):
|
||||||
|
while self.running and p.poll() is None:
|
||||||
|
output = p.stdout.readline()
|
||||||
|
if output:
|
||||||
|
self.update_signal.emit({'msg': output.strip()})
|
||||||
|
|
||||||
|
def capture_err(self, p):
|
||||||
|
while self.running and p.poll() is None:
|
||||||
|
err = p.stderr.readline()
|
||||||
|
if err:
|
||||||
|
self.update_signal.emit({'msg': err.strip()})
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
self.update_signal.emit({'msg': '开始进行网站爬取...'})
|
self.update_signal.emit({'msg': '开始进行网站爬取...'})
|
||||||
|
@ -95,6 +163,8 @@ class MyThread(QThread):
|
||||||
self.running = True
|
self.running = True
|
||||||
getlog_thread = threading.Thread(target=self.capture_output, args=(process,), daemon=True)
|
getlog_thread = threading.Thread(target=self.capture_output, args=(process,), daemon=True)
|
||||||
getlog_thread.start()
|
getlog_thread.start()
|
||||||
|
getlog_thread_err = threading.Thread(target=self.capture_err, args=(process,), daemon=True)
|
||||||
|
getlog_thread_err.start()
|
||||||
|
|
||||||
for process in self.processes:
|
for process in self.processes:
|
||||||
process.wait()
|
process.wait()
|
||||||
|
@ -114,8 +184,6 @@ class MyThread(QThread):
|
||||||
self.update_signal.emit({'msg': '存在未爬取站点,正在调用Chrome继续爬取...'})
|
self.update_signal.emit({'msg': '存在未爬取站点,正在调用Chrome继续爬取...'})
|
||||||
chrom_main_from_list(info_to_save)
|
chrom_main_from_list(info_to_save)
|
||||||
self.update_signal.emit({'msg': '网站爬取完毕!'})
|
self.update_signal.emit({'msg': '网站爬取完毕!'})
|
||||||
self.ana()
|
|
||||||
self.exec()
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
self.running = False
|
self.running = False
|
||||||
|
@ -129,7 +197,8 @@ class MainWindow(QMainWindow):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(MainWindow, self).__init__()
|
super(MainWindow, self).__init__()
|
||||||
self.worker_thread = None
|
self.web_thread = None
|
||||||
|
self.ana_thread = None
|
||||||
self.wcplus = False
|
self.wcplus = False
|
||||||
self.logModel= QStringListModel([])
|
self.logModel= QStringListModel([])
|
||||||
self.ui = Ui_MainWindow()
|
self.ui = Ui_MainWindow()
|
||||||
|
@ -139,6 +208,7 @@ class MainWindow(QMainWindow):
|
||||||
self.ui.bWebSite.clicked.connect(self.open_websites_xlsx)
|
self.ui.bWebSite.clicked.connect(self.open_websites_xlsx)
|
||||||
self.ui.bBiao.clicked.connect(self.open_biao_xlsx)
|
self.ui.bBiao.clicked.connect(self.open_biao_xlsx)
|
||||||
self.ui.bStart.clicked.connect(self.start)
|
self.ui.bStart.clicked.connect(self.start)
|
||||||
|
self.ui.bAna.clicked.connect(self.start_ana)
|
||||||
self.ui.bRes1.clicked.connect(self.open_res1)
|
self.ui.bRes1.clicked.connect(self.open_res1)
|
||||||
self.ui.bRes2.clicked.connect(self.open_res2)
|
self.ui.bRes2.clicked.connect(self.open_res2)
|
||||||
self.ui.vLog.setModel(self.logModel)
|
self.ui.vLog.setModel(self.logModel)
|
||||||
|
@ -170,9 +240,9 @@ class MainWindow(QMainWindow):
|
||||||
|
|
||||||
def open_res2(self):
|
def open_res2(self):
|
||||||
if self.ui.lRes2.text():
|
if self.ui.lRes2.text():
|
||||||
app = win32.Dispatch("Excel.Application")
|
app = win32.Dispatch("Word.Application")
|
||||||
app.Visible = True
|
app.Visible = True
|
||||||
app.Workbooks.Open(self.ui.lRes2.text())
|
app.Documents.Open(self.ui.lRes2.text())
|
||||||
app.WindowState = 3
|
app.WindowState = 3
|
||||||
|
|
||||||
def get_time(self):
|
def get_time(self):
|
||||||
|
@ -180,34 +250,45 @@ class MainWindow(QMainWindow):
|
||||||
return now.strftime('%H:%M:%S')
|
return now.strftime('%H:%M:%S')
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
if self.ui.bStart.text() == '开始巡查' or self.ui.bStart.text() == '重新开始':
|
if self.ui.bStart.text() == '开始爬取' or self.ui.bStart.text() == '重新开始':
|
||||||
self.log('', True)
|
self.log('', True)
|
||||||
if self.res1Workbook:
|
if self.res1Workbook:
|
||||||
self.res1Workbook.Close()
|
self.res1Workbook.Close()
|
||||||
self.ui.lSize.setEnabled(False)
|
self.ui.lSize.setEnabled(False)
|
||||||
self.ui.bStart.setText('停止巡查')
|
self.ui.bStart.setText('停止爬取')
|
||||||
self.start_web(int(self.ui.lSize.text()))
|
self.start_web(int(self.ui.lSize.text()))
|
||||||
elif self.ui.bStart.text() == '停止巡查':
|
elif self.ui.bStart.text() == '停止爬取':
|
||||||
self.update_log({'msg': '正在停止...'})
|
self.update_log({'msg': '正在停止...'})
|
||||||
if self.worker_thread:
|
if self.web_thread:
|
||||||
self.worker_thread.close()
|
self.web_thread.close()
|
||||||
self.log('', True)
|
self.log('', True)
|
||||||
self.ui.lSize.setEnabled(True)
|
self.ui.lSize.setEnabled(True)
|
||||||
self.ui.bStart.setText('开始巡查')
|
self.ui.bStart.setText('开始爬取')
|
||||||
|
|
||||||
|
|
||||||
def start_web(self, lsize):
|
def start_web(self, lsize):
|
||||||
self.worker_thread = MyThread(lsize)
|
self.web_thread = MyThread(lsize)
|
||||||
self.worker_thread.update_signal.connect(self.update_log)
|
self.web_thread.update_signal.connect(self.update_log)
|
||||||
self.worker_thread.start()
|
self.web_thread.start()
|
||||||
|
|
||||||
|
def start_ana(self):
|
||||||
|
self.ana_thread = AnaThread()
|
||||||
|
self.ana_thread.update_signal.connect(self.update_log)
|
||||||
|
self.ana_thread.start()
|
||||||
|
|
||||||
def update_log(self, rdict):
|
def update_log(self, rdict):
|
||||||
self.log(f'{self.get_time()}-{rdict["msg"]}', False)
|
if isinstance(rdict, str):
|
||||||
if 'output_excel_path' in rdict:
|
self.log(f'{self.get_time()}-{rdict}', False)
|
||||||
self.ui.lRes1.setText(rdict['output_excel_path'])
|
elif isinstance(rdict, dict):
|
||||||
self.ui.bStart.setText('重新开始')
|
self.log(f'{self.get_time()}-{rdict["msg"]}', False)
|
||||||
self.ui.lSize.setEnabled(True)
|
if 'output_report_path' in rdict:
|
||||||
|
self.ui.lRes2.setText(rdict['output_report_path'])
|
||||||
|
# self.ui.bStart.setText('重新开始')
|
||||||
|
# self.ui.lSize.setEnabled(True)
|
||||||
|
if 'output_excel_path' in rdict:
|
||||||
|
self.ui.lRes1.setText(rdict['output_excel_path'])
|
||||||
|
# self.ui.bStart.setText('重新开始')
|
||||||
|
# self.ui.lSize.setEnabled(True)
|
||||||
|
|
||||||
def log(self, logLine: str, clear=False):
|
def log(self, logLine: str, clear=False):
|
||||||
log_list = self.logModel.stringList()
|
log_list = self.logModel.stringList()
|
||||||
|
@ -228,12 +309,13 @@ class MainWindow(QMainWindow):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error while terminating wcplus.exe: {str(e)}")
|
print(f"Error while terminating wcplus.exe: {str(e)}")
|
||||||
self.wcplus = False
|
self.wcplus = False
|
||||||
if self.worker_thread:
|
if self.web_thread:
|
||||||
self.worker_thread.close()
|
self.web_thread.close()
|
||||||
event.accept()
|
event.accept()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
# gen_doc()
|
||||||
app = MyApplication(sys.argv)
|
app = MyApplication(sys.argv)
|
||||||
main_window = app.createMainWindow()
|
main_window = app.createMainWindow()
|
||||||
main_window.show()
|
main_window.show()
|
||||||
|
|
Binary file not shown.
135
ui_mainwindow.py
135
ui_mainwindow.py
|
@ -23,14 +23,14 @@ class Ui_MainWindow(object):
|
||||||
def setupUi(self, MainWindow):
|
def setupUi(self, MainWindow):
|
||||||
if not MainWindow.objectName():
|
if not MainWindow.objectName():
|
||||||
MainWindow.setObjectName(u"MainWindow")
|
MainWindow.setObjectName(u"MainWindow")
|
||||||
MainWindow.resize(600, 763)
|
MainWindow.resize(600, 830)
|
||||||
sizePolicy = QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
|
sizePolicy = QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
|
||||||
sizePolicy.setHorizontalStretch(0)
|
sizePolicy.setHorizontalStretch(0)
|
||||||
sizePolicy.setVerticalStretch(0)
|
sizePolicy.setVerticalStretch(0)
|
||||||
sizePolicy.setHeightForWidth(MainWindow.sizePolicy().hasHeightForWidth())
|
sizePolicy.setHeightForWidth(MainWindow.sizePolicy().hasHeightForWidth())
|
||||||
MainWindow.setSizePolicy(sizePolicy)
|
MainWindow.setSizePolicy(sizePolicy)
|
||||||
MainWindow.setMinimumSize(QSize(600, 763))
|
MainWindow.setMinimumSize(QSize(600, 830))
|
||||||
MainWindow.setMaximumSize(QSize(600, 763))
|
MainWindow.setMaximumSize(QSize(600, 830))
|
||||||
icon = QIcon()
|
icon = QIcon()
|
||||||
icon.addFile(u"start.ico", QSize(), QIcon.Normal, QIcon.Off)
|
icon.addFile(u"start.ico", QSize(), QIcon.Normal, QIcon.Off)
|
||||||
MainWindow.setWindowIcon(icon)
|
MainWindow.setWindowIcon(icon)
|
||||||
|
@ -63,79 +63,87 @@ class Ui_MainWindow(object):
|
||||||
font1.setBold(False)
|
font1.setBold(False)
|
||||||
self.label_5.setFont(font1)
|
self.label_5.setFont(font1)
|
||||||
self.label_5.setStyleSheet(u"color: red;")
|
self.label_5.setStyleSheet(u"color: red;")
|
||||||
self.groupBox_3 = QGroupBox(self.centralwidget)
|
|
||||||
self.groupBox_3.setObjectName(u"groupBox_3")
|
|
||||||
self.groupBox_3.setGeometry(QRect(10, 380, 191, 91))
|
|
||||||
self.groupBox_3.setFont(font)
|
|
||||||
self.bBiao = QPushButton(self.groupBox_3)
|
|
||||||
self.bBiao.setObjectName(u"bBiao")
|
|
||||||
self.bBiao.setGeometry(QRect(20, 30, 151, 24))
|
|
||||||
self.bBiao.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
|
||||||
self.label_4 = QLabel(self.groupBox_3)
|
|
||||||
self.label_4.setObjectName(u"label_4")
|
|
||||||
self.label_4.setGeometry(QRect(20, 60, 151, 16))
|
|
||||||
self.label_4.setFont(font1)
|
|
||||||
self.label_4.setStyleSheet(u"color: red;")
|
|
||||||
self.groupBox_5 = QGroupBox(self.centralwidget)
|
self.groupBox_5 = QGroupBox(self.centralwidget)
|
||||||
self.groupBox_5.setObjectName(u"groupBox_5")
|
self.groupBox_5.setObjectName(u"groupBox_5")
|
||||||
self.groupBox_5.setGeometry(QRect(10, 650, 581, 71))
|
self.groupBox_5.setGeometry(QRect(10, 540, 581, 121))
|
||||||
self.groupBox_5.setFont(font)
|
self.groupBox_5.setFont(font)
|
||||||
self.label_7 = QLabel(self.groupBox_5)
|
self.label_7 = QLabel(self.groupBox_5)
|
||||||
self.label_7.setObjectName(u"label_7")
|
self.label_7.setObjectName(u"label_7")
|
||||||
self.label_7.setGeometry(QRect(10, 20, 91, 16))
|
self.label_7.setGeometry(QRect(10, 70, 91, 16))
|
||||||
font2 = QFont()
|
font2 = QFont()
|
||||||
font2.setPointSize(10)
|
font2.setPointSize(10)
|
||||||
self.label_7.setFont(font2)
|
self.label_7.setFont(font2)
|
||||||
self.label_8 = QLabel(self.groupBox_5)
|
self.label_8 = QLabel(self.groupBox_5)
|
||||||
self.label_8.setObjectName(u"label_8")
|
self.label_8.setObjectName(u"label_8")
|
||||||
self.label_8.setGeometry(QRect(10, 40, 91, 16))
|
self.label_8.setGeometry(QRect(10, 90, 91, 16))
|
||||||
self.label_8.setFont(font2)
|
self.label_8.setFont(font2)
|
||||||
self.line = QFrame(self.groupBox_5)
|
self.line = QFrame(self.groupBox_5)
|
||||||
self.line.setObjectName(u"line")
|
self.line.setObjectName(u"line")
|
||||||
self.line.setGeometry(QRect(10, 30, 561, 16))
|
self.line.setGeometry(QRect(10, 80, 561, 16))
|
||||||
self.line.setFrameShape(QFrame.HLine)
|
self.line.setFrameShape(QFrame.HLine)
|
||||||
self.line.setFrameShadow(QFrame.Sunken)
|
self.line.setFrameShadow(QFrame.Sunken)
|
||||||
self.lRes1 = QLabel(self.groupBox_5)
|
self.lRes1 = QLabel(self.groupBox_5)
|
||||||
self.lRes1.setObjectName(u"lRes1")
|
self.lRes1.setObjectName(u"lRes1")
|
||||||
self.lRes1.setGeometry(QRect(110, 15, 381, 21))
|
self.lRes1.setGeometry(QRect(110, 65, 381, 21))
|
||||||
font3 = QFont()
|
font3 = QFont()
|
||||||
font3.setPointSize(9)
|
font3.setPointSize(9)
|
||||||
self.lRes1.setFont(font3)
|
self.lRes1.setFont(font3)
|
||||||
self.lRes2 = QLabel(self.groupBox_5)
|
self.lRes2 = QLabel(self.groupBox_5)
|
||||||
self.lRes2.setObjectName(u"lRes2")
|
self.lRes2.setObjectName(u"lRes2")
|
||||||
self.lRes2.setGeometry(QRect(110, 40, 381, 16))
|
self.lRes2.setGeometry(QRect(110, 90, 381, 16))
|
||||||
self.lRes2.setFont(font3)
|
self.lRes2.setFont(font3)
|
||||||
self.bRes1 = QPushButton(self.groupBox_5)
|
self.bRes1 = QPushButton(self.groupBox_5)
|
||||||
self.bRes1.setObjectName(u"bRes1")
|
self.bRes1.setObjectName(u"bRes1")
|
||||||
self.bRes1.setGeometry(QRect(520, 10, 51, 24))
|
self.bRes1.setGeometry(QRect(520, 60, 51, 24))
|
||||||
self.bRes1.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
self.bRes1.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
||||||
self.bRes2 = QPushButton(self.groupBox_5)
|
self.bRes2 = QPushButton(self.groupBox_5)
|
||||||
self.bRes2.setObjectName(u"bRes2")
|
self.bRes2.setObjectName(u"bRes2")
|
||||||
self.bRes2.setGeometry(QRect(520, 40, 51, 24))
|
self.bRes2.setGeometry(QRect(520, 90, 51, 24))
|
||||||
|
self.bBiao = QPushButton(self.groupBox_5)
|
||||||
|
self.bBiao.setObjectName(u"bBiao")
|
||||||
|
self.bBiao.setGeometry(QRect(20, 30, 151, 24))
|
||||||
|
self.bBiao.setFont(font)
|
||||||
|
self.bBiao.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
||||||
|
self.label_4 = QLabel(self.groupBox_5)
|
||||||
|
self.label_4.setObjectName(u"label_4")
|
||||||
|
self.label_4.setGeometry(QRect(180, 30, 151, 16))
|
||||||
|
font4 = QFont()
|
||||||
|
font4.setFamilies([u"\u6977\u4f53"])
|
||||||
|
font4.setPointSize(11)
|
||||||
|
font4.setBold(False)
|
||||||
|
self.label_4.setFont(font4)
|
||||||
|
self.label_4.setStyleSheet(u"color: red;")
|
||||||
|
self.bAna = QPushButton(self.groupBox_5)
|
||||||
|
self.bAna.setObjectName(u"bAna")
|
||||||
|
self.bAna.setGeometry(QRect(420, 30, 151, 24))
|
||||||
|
font5 = QFont()
|
||||||
|
font5.setPointSize(12)
|
||||||
|
self.bAna.setFont(font5)
|
||||||
|
self.bAna.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
||||||
self.label_9 = QLabel(self.centralwidget)
|
self.label_9 = QLabel(self.centralwidget)
|
||||||
self.label_9.setObjectName(u"label_9")
|
self.label_9.setObjectName(u"label_9")
|
||||||
self.label_9.setGeometry(QRect(150, 0, 291, 31))
|
self.label_9.setGeometry(QRect(150, 0, 291, 31))
|
||||||
font4 = QFont()
|
font6 = QFont()
|
||||||
font4.setFamilies([u"\u6977\u4f53"])
|
font6.setFamilies([u"\u6977\u4f53"])
|
||||||
font4.setPointSize(12)
|
font6.setPointSize(12)
|
||||||
font4.setBold(False)
|
font6.setBold(False)
|
||||||
font4.setItalic(False)
|
font6.setItalic(False)
|
||||||
self.label_9.setFont(font4)
|
self.label_9.setFont(font6)
|
||||||
self.label_9.setStyleSheet(u"color:white;")
|
self.label_9.setStyleSheet(u"color:white;")
|
||||||
self.label_9.setAlignment(Qt.AlignRight|Qt.AlignTrailing|Qt.AlignVCenter)
|
self.label_9.setAlignment(Qt.AlignRight|Qt.AlignTrailing|Qt.AlignVCenter)
|
||||||
self.label_9.setMargin(6)
|
self.label_9.setMargin(6)
|
||||||
self.groupBox_6 = QGroupBox(self.centralwidget)
|
self.groupBox_6 = QGroupBox(self.centralwidget)
|
||||||
self.groupBox_6.setObjectName(u"groupBox_6")
|
self.groupBox_6.setObjectName(u"groupBox_6")
|
||||||
self.groupBox_6.setGeometry(QRect(210, 280, 371, 361))
|
self.groupBox_6.setGeometry(QRect(220, 280, 371, 251))
|
||||||
self.groupBox_6.setFont(font)
|
self.groupBox_6.setFont(font)
|
||||||
self.vLog = QListView(self.groupBox_6)
|
self.vLog = QListView(self.groupBox_6)
|
||||||
self.vLog.setObjectName(u"vLog")
|
self.vLog.setObjectName(u"vLog")
|
||||||
self.vLog.setGeometry(QRect(10, 20, 351, 321))
|
self.vLog.setGeometry(QRect(10, 20, 351, 221))
|
||||||
self.vLog.setFont(font3)
|
self.vLog.setFont(font3)
|
||||||
self.vLog.setStyleSheet(u"")
|
self.vLog.setStyleSheet(u"")
|
||||||
self.groupBox_2 = QGroupBox(self.centralwidget)
|
self.groupBox_2 = QGroupBox(self.centralwidget)
|
||||||
self.groupBox_2.setObjectName(u"groupBox_2")
|
self.groupBox_2.setObjectName(u"groupBox_2")
|
||||||
self.groupBox_2.setGeometry(QRect(10, 490, 191, 151))
|
self.groupBox_2.setGeometry(QRect(10, 380, 191, 151))
|
||||||
self.groupBox_2.setFont(font)
|
self.groupBox_2.setFont(font)
|
||||||
self.bWebSite = QPushButton(self.groupBox_2)
|
self.bWebSite = QPushButton(self.groupBox_2)
|
||||||
self.bWebSite.setObjectName(u"bWebSite")
|
self.bWebSite.setObjectName(u"bWebSite")
|
||||||
|
@ -158,10 +166,45 @@ class Ui_MainWindow(object):
|
||||||
self.bStart = QPushButton(self.groupBox_2)
|
self.bStart = QPushButton(self.groupBox_2)
|
||||||
self.bStart.setObjectName(u"bStart")
|
self.bStart.setObjectName(u"bStart")
|
||||||
self.bStart.setGeometry(QRect(20, 110, 151, 24))
|
self.bStart.setGeometry(QRect(20, 110, 151, 24))
|
||||||
font5 = QFont()
|
|
||||||
font5.setPointSize(12)
|
|
||||||
self.bStart.setFont(font5)
|
self.bStart.setFont(font5)
|
||||||
self.bStart.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
self.bStart.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
||||||
|
self.groupBox_7 = QGroupBox(self.centralwidget)
|
||||||
|
self.groupBox_7.setObjectName(u"groupBox_7")
|
||||||
|
self.groupBox_7.setGeometry(QRect(10, 670, 581, 111))
|
||||||
|
self.groupBox_7.setFont(font)
|
||||||
|
self.label_10 = QLabel(self.groupBox_7)
|
||||||
|
self.label_10.setObjectName(u"label_10")
|
||||||
|
self.label_10.setGeometry(QRect(10, 60, 91, 16))
|
||||||
|
self.label_10.setFont(font2)
|
||||||
|
self.label_11 = QLabel(self.groupBox_7)
|
||||||
|
self.label_11.setObjectName(u"label_11")
|
||||||
|
self.label_11.setGeometry(QRect(10, 80, 91, 16))
|
||||||
|
self.label_11.setFont(font2)
|
||||||
|
self.line_2 = QFrame(self.groupBox_7)
|
||||||
|
self.line_2.setObjectName(u"line_2")
|
||||||
|
self.line_2.setGeometry(QRect(10, 70, 561, 16))
|
||||||
|
self.line_2.setFrameShape(QFrame.HLine)
|
||||||
|
self.line_2.setFrameShadow(QFrame.Sunken)
|
||||||
|
self.lCalRes1 = QLabel(self.groupBox_7)
|
||||||
|
self.lCalRes1.setObjectName(u"lCalRes1")
|
||||||
|
self.lCalRes1.setGeometry(QRect(110, 55, 381, 21))
|
||||||
|
self.lCalRes1.setFont(font3)
|
||||||
|
self.lCalRes2 = QLabel(self.groupBox_7)
|
||||||
|
self.lCalRes2.setObjectName(u"lCalRes2")
|
||||||
|
self.lCalRes2.setGeometry(QRect(110, 80, 381, 16))
|
||||||
|
self.lCalRes2.setFont(font3)
|
||||||
|
self.bOpenCalRes1 = QPushButton(self.groupBox_7)
|
||||||
|
self.bOpenCalRes1.setObjectName(u"bOpenCalRes1")
|
||||||
|
self.bOpenCalRes1.setGeometry(QRect(520, 50, 51, 24))
|
||||||
|
self.bOpenCalRes1.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
||||||
|
self.bOpenCalRes2 = QPushButton(self.groupBox_7)
|
||||||
|
self.bOpenCalRes2.setObjectName(u"bOpenCalRes2")
|
||||||
|
self.bOpenCalRes2.setGeometry(QRect(520, 80, 51, 24))
|
||||||
|
self.bCal = QPushButton(self.groupBox_7)
|
||||||
|
self.bCal.setObjectName(u"bCal")
|
||||||
|
self.bCal.setGeometry(QRect(20, 30, 151, 24))
|
||||||
|
self.bCal.setFont(font)
|
||||||
|
self.bCal.setStyleSheet(u"background-color:#409EFF; color: white; border-radius: 2px")
|
||||||
MainWindow.setCentralWidget(self.centralwidget)
|
MainWindow.setCentralWidget(self.centralwidget)
|
||||||
self.menubar = QMenuBar(MainWindow)
|
self.menubar = QMenuBar(MainWindow)
|
||||||
self.menubar.setObjectName(u"menubar")
|
self.menubar.setObjectName(u"menubar")
|
||||||
|
@ -182,24 +225,32 @@ class Ui_MainWindow(object):
|
||||||
self.groupBox.setTitle(QCoreApplication.translate("MainWindow", u"1.\u5fae\u4fe1\u516c\u4f17\u53f7\u4fe1\u606f\u6293\u53d6", None))
|
self.groupBox.setTitle(QCoreApplication.translate("MainWindow", u"1.\u5fae\u4fe1\u516c\u4f17\u53f7\u4fe1\u606f\u6293\u53d6", None))
|
||||||
self.bWechat.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00\u5de5\u5177", None))
|
self.bWechat.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00\u5de5\u5177", None))
|
||||||
self.label_5.setText(QCoreApplication.translate("MainWindow", u"\u8bf7\u786e\u4fdd\u6240\u6709\u516c\u4f17\u53f7\u6293\u53d6\u5b8c\u6bd5", None))
|
self.label_5.setText(QCoreApplication.translate("MainWindow", u"\u8bf7\u786e\u4fdd\u6240\u6709\u516c\u4f17\u53f7\u6293\u53d6\u5b8c\u6bd5", None))
|
||||||
self.groupBox_3.setTitle(QCoreApplication.translate("MainWindow", u"2.\u786e\u8ba4\u5206\u6790\u5bf9\u6bd4\u5e93", None))
|
self.groupBox_5.setTitle(QCoreApplication.translate("MainWindow", u"\u6c47\u603b\u5206\u6790", None))
|
||||||
self.bBiao.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00\u5206\u6790\u6807\u51c6Excel", None))
|
|
||||||
self.label_4.setText(QCoreApplication.translate("MainWindow", u"\u8bf7\u5728\u4fee\u6539\u540e\u4fdd\u5b58\u5e76\u5173\u95ed", None))
|
|
||||||
self.groupBox_5.setTitle(QCoreApplication.translate("MainWindow", u"\u6700\u7ec8\u7ed3\u679c", None))
|
|
||||||
self.label_7.setText(QCoreApplication.translate("MainWindow", u"\u5206\u6790\u7ed3\u679cExcel:", None))
|
self.label_7.setText(QCoreApplication.translate("MainWindow", u"\u5206\u6790\u7ed3\u679cExcel:", None))
|
||||||
self.label_8.setText(QCoreApplication.translate("MainWindow", u"\u5206\u6790\u62a5\u544aWord:", None))
|
self.label_8.setText(QCoreApplication.translate("MainWindow", u"\u5206\u6790\u62a5\u544aWord:", None))
|
||||||
self.lRes1.setText("")
|
self.lRes1.setText("")
|
||||||
self.lRes2.setText("")
|
self.lRes2.setText("")
|
||||||
self.bRes1.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00", None))
|
self.bRes1.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00", None))
|
||||||
self.bRes2.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00", None))
|
self.bRes2.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00", None))
|
||||||
|
self.bBiao.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00\u5206\u6790\u6807\u51c6Excel", None))
|
||||||
|
self.label_4.setText(QCoreApplication.translate("MainWindow", u"\u8bf7\u5728\u4fee\u6539\u540e\u4fdd\u5b58\u5e76\u5173\u95ed", None))
|
||||||
|
self.bAna.setText(QCoreApplication.translate("MainWindow", u"\u5f00\u59cb\u5206\u6790", None))
|
||||||
self.label_9.setText(QCoreApplication.translate("MainWindow", u"\u4e2d\u56fd\u5efa\u6750\u603b\u9662\u5ba3\u4f20\u5de5\u4f5c\u4fe1\u606f\u5316\u7ba1\u7406\u5e73\u53f0", None))
|
self.label_9.setText(QCoreApplication.translate("MainWindow", u"\u4e2d\u56fd\u5efa\u6750\u603b\u9662\u5ba3\u4f20\u5de5\u4f5c\u4fe1\u606f\u5316\u7ba1\u7406\u5e73\u53f0", None))
|
||||||
self.groupBox_6.setTitle(QCoreApplication.translate("MainWindow", u"\u65e5\u5fd7\u663e\u793a", None))
|
self.groupBox_6.setTitle(QCoreApplication.translate("MainWindow", u"\u64cd\u4f5c\u65e5\u5fd7\u663e\u793a", None))
|
||||||
self.groupBox_2.setTitle(QCoreApplication.translate("MainWindow", u"2.\u786e\u8ba4\u9700\u8981\u6293\u53d6\u7684\u7f51\u7ad9", None))
|
self.groupBox_2.setTitle(QCoreApplication.translate("MainWindow", u"2.\u786e\u8ba4\u9700\u8981\u722c\u53d6\u7684\u5b98\u7f51", None))
|
||||||
self.bWebSite.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00\u7f51\u7ad9\u5217\u8868Excel", None))
|
self.bWebSite.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00\u7f51\u7ad9\u5217\u8868Excel", None))
|
||||||
self.label_2.setText(QCoreApplication.translate("MainWindow", u"\u8bf7\u5728\u4fee\u6539\u540e\u4fdd\u5b58\u5e76\u5173\u95ed", None))
|
self.label_2.setText(QCoreApplication.translate("MainWindow", u"\u8bf7\u5728\u4fee\u6539\u540e\u4fdd\u5b58\u5e76\u5173\u95ed", None))
|
||||||
self.label_6.setText(QCoreApplication.translate("MainWindow", u"\u5c0f\u4e8e", None))
|
self.label_6.setText(QCoreApplication.translate("MainWindow", u"\u5c0f\u4e8e", None))
|
||||||
self.label_3.setText(QCoreApplication.translate("MainWindow", u"KB-Chrome", None))
|
self.label_3.setText(QCoreApplication.translate("MainWindow", u"KB-Chrome", None))
|
||||||
self.lSize.setText(QCoreApplication.translate("MainWindow", u"20", None))
|
self.lSize.setText(QCoreApplication.translate("MainWindow", u"20", None))
|
||||||
self.bStart.setText(QCoreApplication.translate("MainWindow", u"\u5f00\u59cb\u5de1\u67e5", None))
|
self.bStart.setText(QCoreApplication.translate("MainWindow", u"\u5f00\u59cb\u722c\u53d6", None))
|
||||||
|
self.groupBox_7.setTitle(QCoreApplication.translate("MainWindow", u"\u603b\u9662\u5b98\u5fae", None))
|
||||||
|
self.label_10.setText(QCoreApplication.translate("MainWindow", u"\u6c47\u603b\u7ed3\u679cExcel:", None))
|
||||||
|
self.label_11.setText(QCoreApplication.translate("MainWindow", u"\u6c47\u603b\u6253\u5206Excel:", None))
|
||||||
|
self.lCalRes1.setText("")
|
||||||
|
self.lCalRes2.setText("")
|
||||||
|
self.bOpenCalRes1.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00", None))
|
||||||
|
self.bOpenCalRes2.setText(QCoreApplication.translate("MainWindow", u"\u6253\u5f00", None))
|
||||||
|
self.bCal.setText(QCoreApplication.translate("MainWindow", u"\u6c47\u603b\u6253\u5206", None))
|
||||||
# retranslateUi
|
# retranslateUi
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||||
|
|
||||||
from scrapy import signals
|
from scrapy import signals
|
||||||
|
from scrapy.http import HtmlResponse
|
||||||
|
|
||||||
# useful for handling different item types with a single interface
|
# useful for handling different item types with a single interface
|
||||||
from itemadapter import is_item, ItemAdapter
|
from itemadapter import is_item, ItemAdapter
|
||||||
|
@ -101,3 +102,13 @@ class ZcspiderDownloaderMiddleware:
|
||||||
|
|
||||||
def spider_opened(self, spider):
|
def spider_opened(self, spider):
|
||||||
spider.logger.info("Spider opened: %s" % spider.name)
|
spider.logger.info("Spider opened: %s" % spider.name)
|
||||||
|
|
||||||
|
|
||||||
|
class FilterHTMLMiddleware:
|
||||||
|
def process_response(self, request, response, spider):
|
||||||
|
if isinstance(response, HtmlResponse):
|
||||||
|
# 只接收HTML响应
|
||||||
|
return response
|
||||||
|
else:
|
||||||
|
# 忽略其他类型的资源文件
|
||||||
|
return request
|
|
@ -105,4 +105,9 @@ ITEM_PIPELINES = {
|
||||||
|
|
||||||
FEED_EXPORTERS = {
|
FEED_EXPORTERS = {
|
||||||
# 'xlsx': 'scrapy_xlsx.XlsxItemExporter',
|
# 'xlsx': 'scrapy_xlsx.XlsxItemExporter',
|
||||||
|
}
|
||||||
|
|
||||||
|
DOWNLOADER_MIDDLEWARES = {
|
||||||
|
'zcspider.middlewares.FilterHTMLMiddleware': 200,
|
||||||
|
# 其他下载中间件...
|
||||||
}
|
}
|
|
@ -39,12 +39,14 @@ class BaseSpider(scrapy.Spider):
|
||||||
def start_requests(self):
|
def start_requests(self):
|
||||||
for url in self.start_urls:
|
for url in self.start_urls:
|
||||||
url = self.fix_url_scheme(url)
|
url = self.fix_url_scheme(url)
|
||||||
r = scrapy.Request(url, dont_filter=True, headers=self.headers, callback=self.parse, errback=self.request2, meta={'download_timeout': 30})
|
self.visited_urls.add(url)
|
||||||
|
r = scrapy.Request(url, dont_filter=True, headers=self.headers, callback=self.parse, meta={'download_timeout': 30})
|
||||||
yield r
|
yield r
|
||||||
|
|
||||||
def is_file_url(self, url):
|
def is_file_url(self, url):
|
||||||
if f'.{url.split(".")[-1].lower()}' in self.ext:
|
for item in self.ext:
|
||||||
return True
|
if url.lower().endswith(item):
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def is_file_res(self, res):
|
def is_file_res(self, res):
|
||||||
|
@ -82,42 +84,39 @@ class BaseSpider(scrapy.Spider):
|
||||||
yield scrapy.Request(full_link, callback=self.parse, headers=self.headers, errback=self.request2, meta={'download_timeout': 30})
|
yield scrapy.Request(full_link, callback=self.parse, headers=self.headers, errback=self.request2, meta={'download_timeout': 30})
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
try:
|
if response.status >= 500:
|
||||||
if response.status >= 500:
|
return
|
||||||
return
|
if self.is_file_res(response):
|
||||||
self.visited_urls.add(response.url)
|
return
|
||||||
if self.is_file_res(response):
|
h = html2text.HTML2Text()
|
||||||
return
|
h.ignore_links = True # 忽略所有链接
|
||||||
h = html2text.HTML2Text()
|
# 提取纯文本内容
|
||||||
h.ignore_links = True # 忽略所有链接
|
# try:
|
||||||
# 提取纯文本内容
|
text = h.handle(response.text)
|
||||||
# try:
|
# except:
|
||||||
text = h.handle(response.text)
|
# text = h.handle(response.body.decode(encoding='gb18030'))
|
||||||
# except:
|
if response.status < 400:
|
||||||
# text = h.handle(response.body.decode(encoding='gb18030'))
|
yield {
|
||||||
if response.status < 400:
|
'group': self.group,
|
||||||
yield {
|
'name': self.name,
|
||||||
'group': self.group,
|
'domain': self.domain,
|
||||||
'name': self.name,
|
'url': response.url,
|
||||||
'domain': self.domain,
|
'text': text,
|
||||||
'url': response.url,
|
}
|
||||||
'text': text,
|
links = re.findall(r'href=["\']?([^"\'>]+)', response.text)
|
||||||
}
|
for link in links:
|
||||||
links = re.findall(r'href=["\']?([^"\'>]+)', response.text)
|
full_link = response.urljoin(link)
|
||||||
for link in links:
|
if not full_link.startswith('http'):
|
||||||
full_link = response.urljoin(link)
|
continue
|
||||||
if not full_link.startswith('http'):
|
if full_link not in self.visited_urls and (self.is_file_url(full_link) is False):
|
||||||
continue
|
if urlparse(full_link).netloc.replace('www.', '') == self.domain:
|
||||||
if full_link not in self.visited_urls and (self.is_file_url(full_link) is False):
|
self.visited_urls.add(response.url)
|
||||||
if urlparse(full_link).netloc.replace('www.', '') == self.domain:
|
# try:
|
||||||
# try:
|
yield scrapy.Request(full_link, callback=self.parse, headers=self.headers, meta={'download_timeout': 30})
|
||||||
yield scrapy.Request(full_link, callback=self.parse, headers=self.headers, errback=self.request2, meta={'download_timeout': 30})
|
# except ValueError:
|
||||||
# except ValueError:
|
# import traceback
|
||||||
# import traceback
|
# print(traceback.format_exc())
|
||||||
# print(traceback.format_exc())
|
# print(full_link)
|
||||||
# print(full_link)
|
|
||||||
except scrapy.exceptions.TimeoutError:
|
|
||||||
print(f'{response.url}-请求超时取消')
|
|
||||||
|
|
||||||
def closed(self, reason):
|
def closed(self, reason):
|
||||||
# This method will be called when the Spider is about to close
|
# This method will be called when the Spider is about to close
|
||||||
|
|
Loading…
Reference in New Issue