feat: 统计网站数据

This commit is contained in:
caoqianming 2024-01-11 14:39:47 +08:00
parent 3c8fc28113
commit f80b196145
2 changed files with 310 additions and 107 deletions

View File

@ -13,6 +13,7 @@ web_dir = os.path.join(BASE_DIR, 'web_dir')
output_dir = os.path.join(BASE_DIR, 'summary') output_dir = os.path.join(BASE_DIR, 'summary')
df_s = pd.read_excel(os.path.join(BASE_DIR, 'biao.xlsx'), sheet_name='筛查内容') df_s = pd.read_excel(os.path.join(BASE_DIR, 'biao.xlsx'), sheet_name='筛查内容')
def fix_url_scheme(url, default_scheme='http'): def fix_url_scheme(url, default_scheme='http'):
# 检查URL是否包含方案 # 检查URL是否包含方案
if not url.startswith('http://') and not url.startswith('https://'): if not url.startswith('http://') and not url.startswith('https://'):
@ -20,11 +21,13 @@ def fix_url_scheme(url, default_scheme='http'):
url = f'{default_scheme}://{url}' url = f'{default_scheme}://{url}'
return url return url
def trans_to_json(): def trans_to_json():
json_str = df_s.to_json(orient='records', force_ascii=False) json_str = df_s.to_json(orient='records', force_ascii=False)
with open('biao.json', 'w', encoding='utf-8') as f: with open('biao.json', 'w', encoding='utf-8') as f:
f.write(json_str) f.write(json_str)
def make_simple_csv_from_db(now: datetime): def make_simple_csv_from_db(now: datetime):
# 只查找当前月份更新的公众号数据 # 只查找当前月份更新的公众号数据
now_month_str = now.strftime('%Y-%m-%d 00:00:00') now_month_str = now.strftime('%Y-%m-%d 00:00:00')
@ -49,12 +52,14 @@ def make_simple_csv_from_db(now: datetime):
# 将数据写入CSV文件 # 将数据写入CSV文件
df.to_csv(os.path.join(wechat_dir, 'articles.csv'), index=False) df.to_csv(os.path.join(wechat_dir, 'articles.csv'), index=False)
def float_to_int(value): def float_to_int(value):
try: try:
return int(value) return int(value)
except: except:
return value return value
def get_cbma_info_from_db_and_ana(year: str = '2023'): def get_cbma_info_from_db_and_ana(year: str = '2023'):
# 全年统计数据 # 全年统计数据
zybiz = "MzIzMDU4Njg3MA==" zybiz = "MzIzMDU4Njg3MA=="
@ -104,13 +109,53 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'):
pub_year, pub_month, pub_day; pub_year, pub_month, pub_day;
''' '''
df = pd.read_sql_query(query_gzhs, conn) df = pd.read_sql_query(query_gzhs, conn)
conn.close conn.close()
# 尝试连接官网库进行查询
import psycopg2
conn_web = None
df_web = None
try:
conn_web = psycopg2.connect(
"dbname={} user={} password={} host={} port={}".format('edn_cms', 'auditor', 'Lde78B3_cbma', '10.65.253.10', '54321'))
cur_web = conn.cursor()
query_web = f"""
SELECT
a_outer.id,
TO_CHAR(a_outer.ctime, 'YYYY') AS pub_year,
TO_CHAR(a_outer.ctime, 'MM') AS pub_month,
TO_CHAR(a_outer.ctime, 'DD') AS pub_day,
a_outer.title,
a_outer.source,
a_outer.hits,
t.title as bankuai,
a_outer.src
FROM
"a_article" a_outer
left join (
select id, title, father, path
from a_article
where father in (20110528, 19080024)
) t on a_outer.father = t.id
WHERE
a_outer.TYPE = 3
and a_outer.deleted is NULL
and EXTRACT ( YEAR FROM a_outer.ctime ) = {year}
and bankuai is not NULL
ORDER BY
a_outer.ctime;
"""
df_web = pd.read_sql_query(query_web, conn_web)
cur_web.close()
conn_web.close()
except Exception as e:
pass
# 追加总院数据来源 # 追加总院数据来源
for ind, row in df.iterrows(): for ind, row in df.iterrows():
if row['gbiz'] == zybiz: if row['gbiz'] == zybiz:
full_path = os.path.join(wechat_dir, row['nickname'], row['id'] + '.md') full_path = os.path.join(
wechat_dir, row['nickname'], row['id'] + '.md')
try: try:
with open(full_path, encoding='utf-8') as f: with open(full_path, encoding='utf-8') as f:
content = f.read() content = f.read()
@ -146,32 +191,58 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'):
# df_fx.at[ind, '供总院全年稿数'] = ((cons_dw_1)&(cons)).sum() # df_fx.at[ind, '供总院全年稿数'] = ((cons_dw_1)&(cons)).sum()
# else: # else:
# df_fx.at[ind, '供总院全年稿数'] = ((df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() # df_fx.at[ind, '供总院全年稿数'] = ((df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum()
df_fx.at[ind, '供总院全年专稿数'] = ((df['source'] == dw)&(df['gbiz']==zybiz)).sum() df_fx.at[ind, '供总院全年专稿数'] = (
df_fx.at[ind, '供总院全年组稿数'] = ((df['source'].str.contains(dw)&(df['source']!=dw))&(df['gbiz']==zybiz)).sum() (df['source'] == dw) & (df['gbiz'] == zybiz)).sum()
df_fx.at[ind, '供总院全年阅读10000及以上数'] = ((df['read_num']>=10000)&(df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() df_fx.at[ind, '供总院全年网站专稿数'] = (
df_fx.at[ind, '供总院全年阅读5000及以上数'] = ((df['read_num']>=5000)&(df['read_num']<10000)&(df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() (df_web['source'] == dw)).sum()
df_fx.at[ind, '供总院全年阅读1000及以上数'] = ((df['read_num']>=1000)&(df['read_num']<5000)&(df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() df_fx.at[ind, '供总院全年组稿数'] = ((df['source'].str.contains(
dw) & (df['source'] != dw)) & (df['gbiz'] == zybiz)).sum()
df_fx.at[ind, '供总院全年网站组稿数'] = ((df_web['source'].str.contains(
dw) & (df_web['source'] != dw))).sum()
df_fx.at[ind, '供总院全年阅读10000及以上数'] = ((df['read_num'] >= 10000) & (
df['source'].str.contains(dw)) & (df['gbiz'] == zybiz)).sum()
df_fx.at[ind, '供总院全年阅读5000及以上数'] = ((df['read_num'] >= 5000) & (
df['read_num'] < 10000) & (df['source'].str.contains(dw)) & (df['gbiz'] == zybiz)).sum()
df_fx.at[ind, '供总院全年阅读1000及以上数'] = ((df['read_num'] >= 1000) & (
df['read_num'] < 5000) & (df['source'].str.contains(dw)) & (df['gbiz'] == zybiz)).sum()
for i in ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月', '上半年', '下半年', '全年']: for i in ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月', '上半年', '下半年', '全年']:
if '' in i: if '' in i:
i_str = i.replace('', '').zfill(2) i_str = i.replace('', '').zfill(2)
cons_y_m = (df['pub_month'] == str(i_str)) cons_y_m = (df['pub_month'] == str(i_str))
cons_y_m_web = (df_web['pub_month'] == str(i_str))
elif i == '上半年': elif i == '上半年':
cons_y_m = (df['pub_month'] =='01')|(df['pub_month'] =='02')|(df['pub_month'] =='03')|(df['pub_month'] =='04')|(df['pub_month'] =='05')|(df['pub_month'] =='06') cons_y_m = (df['pub_month'] == '01') | (df['pub_month'] == '02') | (df['pub_month'] == '03') | (
df['pub_month'] == '04') | (df['pub_month'] == '05') | (df['pub_month'] == '06')
cons_y_m_web = (df_web['pub_month'] == '01') | (df_web['pub_month'] == '02') | (df_web['pub_month'] == '03') | (
df_web['pub_month'] == '04') | (df_web['pub_month'] == '05') | (df_web['pub_month'] == '06')
elif i == '下半年': elif i == '下半年':
cons_y_m = (df['pub_month'] =='07')|(df['pub_month'] =='08')|(df['pub_month'] =='09')|(df['pub_month'] =='10')|(df['pub_month'] =='11')|(df['pub_month'] =='12') cons_y_m = (df['pub_month'] == '07') | (df['pub_month'] == '08') | (df['pub_month'] == '09') | (
df['pub_month'] == '10') | (df['pub_month'] == '11') | (df['pub_month'] == '12')
cons_y_m_web = (df_web['pub_month'] == '07') | (df_web['pub_month'] == '08') | (df_web['pub_month'] == '09') | (
df_web['pub_month'] == '10') | (df_web['pub_month'] == '11') | (df_web['pub_month'] == '12')
elif i == '全年': elif i == '全年':
cons_y_m = pd.Series(True, index=df.index) cons_y_m = pd.Series(True, index=df.index)
cons_y_m_web = pd.Series(True, index=df_web.index)
if '' in dw: # 针对这种同一部门的 if '' in dw: # 针对这种同一部门的
cons_dw_1 = pd.Series(False, index=df.index) cons_dw_1 = pd.Series(False, index=df.index)
cons_dw_1_web = pd.Series(False, index=df_web.index)
for item in dw.split(''): for item in dw.split(''):
cons_dw_1 = (df['source'].str.contains(item)) | cons_dw_1 cons_dw_1 = (df['source'].str.contains(item)) | cons_dw_1
df_fx.at[ind, f'供总院{i}稿数'] = ((cons_dw_1)&(cons_y_m)&(df['gbiz']==zybiz)).sum() cons_dw_1_web = (df_web['source'].str.contains(item)) | cons_dw_1_web
df_fx.at[ind, f'供总院{i}稿数'] = ((cons_dw_1) & (
cons_y_m) & (df['gbiz'] == zybiz)).sum()
df_fx.at[ind, f'供总院网站{i}稿数'] = ((cons_dw_1_web) & (
cons_y_m_web)).sum()
else: else:
df_fx.at[ind, f'供总院{i}稿数'] = (df['source'].str.contains(dw)&(cons_y_m)&(df['gbiz']==zybiz)).sum() df_fx.at[ind, f'供总院{i}稿数'] = (df['source'].str.contains(
dw) & (cons_y_m) & (df['gbiz'] == zybiz)).sum()
df_fx.at[ind, f'供总院网站{i}稿数'] = (df_web['source'].str.contains(
dw) & (cons_y_m_web)).sum()
df_fx[f'供总院{i}稿数'] = df_fx[f'供总院{i}稿数'].fillna(0) df_fx[f'供总院{i}稿数'] = df_fx[f'供总院{i}稿数'].fillna(0)
df_fx[f'供总院{i}稿数'] = df_fx[f'供总院{i}稿数'].astype(int) df_fx[f'供总院{i}稿数'] = df_fx[f'供总院{i}稿数'].astype(int)
df_fx[f'供总院网站{i}稿数'] = df_fx[f'供总院网站{i}稿数'].fillna(0)
df_fx[f'供总院网站{i}稿数'] = df_fx[f'供总院网站{i}稿数'].astype(int)
if gbiz: if gbiz:
# 进行查询 # 进行查询
@ -184,9 +255,14 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'):
df_fx.at[ind, f'{i}最高点击文章'] = '' df_fx.at[ind, f'{i}最高点击文章'] = ''
if cons_sum: if cons_sum:
max_read_row = df[cons].loc[df[cons]['read_num'].idxmax()] max_read_row = df[cons].loc[df[cons]['read_num'].idxmax()]
max_read_row_list = [max_read_row['id'], max_read_row['title'], str(max_read_row['read_num']), f'{max_read_row["pub_year"]}-{max_read_row["pub_month"]}-{max_read_row["pub_day"]}', max_read_row['source']] max_read_row_list = [max_read_row['id'], max_read_row['title'], str(
max_read_row['read_num']), f'{max_read_row["pub_year"]}-{max_read_row["pub_month"]}-{max_read_row["pub_day"]}', max_read_row['source']]
df_fx.at[ind, f'{i}最高点击文章'] = '***'.join(max_read_row_list) df_fx.at[ind, f'{i}最高点击文章'] = '***'.join(max_read_row_list)
df_fx.at[ind, f'总院网站{i}发布数'] = cons_y_m_web.sum()
df_fx[f'总院网站{i}发布数'] = df_fx[f'总院网站{i}发布数'].fillna(0)
df_fx[f'总院网站{i}发布数'] = df_fx[f'总院网站{i}发布数'].astype(int)
# 矫正数据类型 # 矫正数据类型
df_fx = df_fx.applymap(float_to_int) df_fx = df_fx.applymap(float_to_int)
# 先输出原始统计数据 # 先输出原始统计数据
@ -202,29 +278,49 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'):
for ind, row in df.iterrows(): for ind, row in df.iterrows():
if row['gbiz'] == zybiz: if row['gbiz'] == zybiz:
sheet.cell(row=ind_zy+3, column=1, value=str(ind_zy+1)) sheet.cell(row=ind_zy+3, column=1, value=str(ind_zy+1))
sheet.cell(row=ind_zy+3, column=2, value=f'{row["pub_year"]}-{row["pub_month"]}-{row["pub_day"]}') sheet.cell(row=ind_zy+3, column=2,
value=f'{row["pub_year"]}-{row["pub_month"]}-{row["pub_day"]}')
sheet.cell(row=ind_zy+3, column=3, value=row['title']) sheet.cell(row=ind_zy+3, column=3, value=row['title'])
sheet.cell(row=ind_zy+3, column=4, value=row['source']) sheet.cell(row=ind_zy+3, column=4, value=row['source'])
sheet.cell(row=ind_zy+3, column=6, value=row['read_num']) sheet.cell(row=ind_zy+3, column=6, value=row['read_num'])
sheet.cell(row=ind_zy+3, column=7, value=row['id']) sheet.cell(row=ind_zy+3, column=7, value=row['id'])
sheet.cell(row=ind_zy+3, column=8, value=row['content_url']) sheet.cell(row=ind_zy+3, column=8, value=row['content_url'])
ind_zy = ind_zy + 1 ind_zy = ind_zy + 1
sheet_web = workbook['官方网站更新数']
sheet_web.cell(row=1, column=1, value=f'关于{year}年度中国建材总院新媒体更新情况明细表\n(网站)')
ind_zyweb = 0
for ind, row in df_web.iterrows():
sheet.cell(row=ind_zy+3, column=1, value=str(ind_zyweb+1))
sheet.cell(row=ind_zy+3, column=2, value=f'{row["pub_year"]}-{row["pub_month"]}-{row["pub_day"]}')
sheet.cell(row=ind_zy+3, column=3, value=row['title'])
sheet.cell(row=ind_zy+3, column=4, value=row['source'])
sheet.cell(row=ind_zy+3, column=5, value=row['bankuai'])
ind_zyweb = ind_zyweb + 1
cbma_path = os.path.join(BASE_DIR, f'summary/{year}年_总院文章.xlsx') cbma_path = os.path.join(BASE_DIR, f'summary/{year}年_总院文章.xlsx')
workbook.save(cbma_path) workbook.save(cbma_path)
print(f'总院{year}年文章表生成完毕!') print(f'总院{year}年文章表生成完毕!')
template_cal_path = os.path.join(BASE_DIR, 'summary/template_cbma_cal.xlsx') template_cal_path = os.path.join(
BASE_DIR, 'summary/template_cbma_cal.xlsx')
workbook2 = load_workbook(template_cal_path) workbook2 = load_workbook(template_cal_path)
need_df_list = [ "瑞泰科技", "国检集团", "中材高新", "哈玻院", "中国新材院", "秦皇岛院", "西安墙材院", "咸阳陶瓷院", "钟表所", "总院北分", "中岩科技", "水泥新材院", "中建材科创院", "科建苑"] need_df_list = ["瑞泰科技", "国检集团", "中材高新", "哈玻院", "中国新材院", "秦皇岛院",
"西安墙材院", "咸阳陶瓷院", "钟表所", "总院北分", "中岩科技", "水泥新材院", "中建材科创院", "科建苑"]
sheet2 = workbook2['打分表'] sheet2 = workbook2['打分表']
sheet2.cell(row=1, column=1, value=f'中国建材总院宣传工作计分表({year}年度)') sheet2.cell(row=1, column=1, value=f'中国建材总院宣传工作计分表({year}年度)')
for ind, val in enumerate(need_df_list): for ind, val in enumerate(need_df_list):
row_ind_df_fx = df_fx['单位'].to_list().index(val) row_ind_df_fx = df_fx['单位'].to_list().index(val)
sheet2.cell(row=6, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年专稿数']) sheet2.cell(row=6, column=5+2*ind,
sheet2.cell(row=10, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年组稿数']) value=df_fx.at[row_ind_df_fx, '供总院全年专稿数'])
sheet2.cell(row=12, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年阅读10000及以上数']) sheet2.cell(row=7, column=5+2*ind,
sheet2.cell(row=13, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年阅读5000及以上数']) value=df_fx.at[row_ind_df_fx, '供总院网站全年专稿数'])
sheet2.cell(row=14, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年阅读1000及以上数']) sheet2.cell(row=10, column=5+2*ind,
value=df_fx.at[row_ind_df_fx, '供总院全年组稿数'])
sheet2.cell(row=12, column=5+2*ind,
value=df_fx.at[row_ind_df_fx, '供总院全年阅读10000及以上数'])
sheet2.cell(row=13, column=5+2*ind,
value=df_fx.at[row_ind_df_fx, '供总院全年阅读5000及以上数'])
sheet2.cell(row=14, column=5+2*ind,
value=df_fx.at[row_ind_df_fx, '供总院全年阅读1000及以上数'])
cbma_cal_path = os.path.join(BASE_DIR, f'summary/{year}年_总院打分.xlsx') cbma_cal_path = os.path.join(BASE_DIR, f'summary/{year}年_总院打分.xlsx')
workbook2.save(cbma_cal_path) workbook2.save(cbma_cal_path)
print(f'总院{year}年打分表生成完毕!') print(f'总院{year}年打分表生成完毕!')
@ -239,9 +335,11 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'):
except KeyError: except KeyError:
sheet = workbook3.copy_worksheet(workbook3['1月']) sheet = workbook3.copy_worksheet(workbook3['1月'])
sheet.title = i sheet.title = i
sheet.cell(row=1, column=1, value=f'关于{year}年度中国建材总院各企业新媒体更新情况统计表\n{i}') sheet.cell(row=1, column=1,
value=f'关于{year}年度中国建材总院各企业新媒体更新情况统计表\n{i}')
# 开始总院填充数据 # 开始总院填充数据
sheet.cell(row=4, column=3, value=df_fx.at[0, f'{i}发布数']) sheet.cell(row=4, column=3, value=df_fx.at[0, f'{i}发布数'])
sheet.cell(row=4, column=2, value=df_fx.at[0, f'总院网站{i}发布数'])
max_read_row = df_fx.at[dw_list.index('中国建材总院'), f'{i}最高点击文章'] max_read_row = df_fx.at[dw_list.index('中国建材总院'), f'{i}最高点击文章']
if max_read_row: if max_read_row:
_, title, read_num, pub_date, source = max_read_row.split('***') _, title, read_num, pub_date, source = max_read_row.split('***')
@ -250,59 +348,121 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'):
sheet.cell(row=7, column=5, value=pub_date) sheet.cell(row=7, column=5, value=pub_date)
sheet.cell(row=7, column=6, value=source) sheet.cell(row=7, column=6, value=source)
# 开始填充各单位数据 # 开始填充各单位数据
sheet.cell(row=14, column=3, value=df_fx.at[dw_list.index('瑞泰科技'), f'{i}发布数']) sheet.cell(row=14, column=3,
sheet.cell(row=14, column=6, value=df_fx.at[dw_list.index('瑞泰科技'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('瑞泰科技'), f'{i}发布数'])
sheet.cell(row=14, column=6,
value=df_fx.at[dw_list.index('瑞泰科技'), f'供总院{i}稿数'])
sheet.cell(row=14, column=5,
value=df_fx.at[dw_list.index('瑞泰科技'), f'供总院网站{i}稿数'])
sheet.cell(row=15, column=3, value=df_fx.at[dw_list.index('国检集团'), f'{i}发布数']) sheet.cell(row=15, column=3,
sheet.cell(row=15, column=6, value=df_fx.at[dw_list.index('国检集团'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('国检集团'), f'{i}发布数'])
sheet.cell(row=15, column=6,
value=df_fx.at[dw_list.index('国检集团'), f'供总院{i}稿数'])
sheet.cell(row=15, column=5,
value=df_fx.at[dw_list.index('国检集团'), f'供总院网站{i}稿数'])
sheet.cell(row=16, column=3, value=df_fx.at[dw_list.index('中材高新'), f'{i}发布数']) sheet.cell(row=16, column=3,
sheet.cell(row=16, column=6, value=df_fx.at[dw_list.index('中材高新'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('中材高新'), f'{i}发布数'])
sheet.cell(row=16, column=6,
value=df_fx.at[dw_list.index('中材高新'), f'供总院{i}稿数'])
sheet.cell(row=16, column=5,
value=df_fx.at[dw_list.index('中材高新'), f'供总院网站{i}稿数'])
sheet.cell(row=17, column=3, value=df_fx.at[dw_list.index('哈玻院'), f'{i}发布数']) sheet.cell(row=17, column=3,
sheet.cell(row=17, column=6, value=df_fx.at[dw_list.index('哈玻院'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('哈玻院'), f'{i}发布数'])
sheet.cell(row=17, column=6,
value=df_fx.at[dw_list.index('哈玻院'), f'供总院{i}稿数'])
sheet.cell(row=17, column=5,
value=df_fx.at[dw_list.index('哈玻院'), f'供总院网站{i}稿数'])
sheet.cell(row=18, column=3, value=df_fx.at[dw_list.index('中国新材院'), f'{i}发布数']) sheet.cell(row=18, column=3,
sheet.cell(row=18, column=6, value=df_fx.at[dw_list.index('中国新材院'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('中国新材院'), f'{i}发布数'])
sheet.cell(row=18, column=6,
value=df_fx.at[dw_list.index('中国新材院'), f'供总院{i}稿数'])
sheet.cell(row=18, column=5,
value=df_fx.at[dw_list.index('中国新材院'), f'供总院网站{i}稿数'])
sheet.cell(row=19, column=3, value=df_fx.at[dw_list.index('秦皇岛院'), f'{i}发布数']) sheet.cell(row=19, column=3,
sheet.cell(row=19, column=6, value=df_fx.at[dw_list.index('秦皇岛院'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('秦皇岛院'), f'{i}发布数'])
sheet.cell(row=19, column=6,
value=df_fx.at[dw_list.index('秦皇岛院'), f'供总院{i}稿数'])
sheet.cell(row=19, column=5,
value=df_fx.at[dw_list.index('秦皇岛院'), f'供总院网站{i}稿数'])
sheet.cell(row=20, column=3, value=df_fx.at[dw_list.index('西安墙材院'), f'{i}发布数']) sheet.cell(row=20, column=3,
sheet.cell(row=20, column=6, value=df_fx.at[dw_list.index('西安墙材院'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('西安墙材院'), f'{i}发布数'])
sheet.cell(row=20, column=6,
value=df_fx.at[dw_list.index('西安墙材院'), f'供总院{i}稿数'])
sheet.cell(row=20, column=5,
value=df_fx.at[dw_list.index('西安墙材院'), f'供总院网站{i}稿数'])
sheet.cell(row=21, column=3, value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'{i}发布数']) sheet.cell(row=21, column=3,
sheet.cell(row=21, column=6, value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'{i}发布数'])
sheet.cell(row=21, column=6,
value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'供总院{i}稿数'])
sheet.cell(row=21, column=5,
value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'供总院网站{i}稿数'])
sheet.cell(row=22, column=3, value=df_fx.at[dw_list.index('钟表所'), f'{i}发布数']) sheet.cell(row=22, column=3,
sheet.cell(row=22, column=6, value=df_fx.at[dw_list.index('钟表所'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('钟表所'), f'{i}发布数'])
sheet.cell(row=22, column=6,
value=df_fx.at[dw_list.index('钟表所'), f'供总院{i}稿数'])
sheet.cell(row=22, column=5,
value=df_fx.at[dw_list.index('钟表所'), f'供总院网站{i}稿数'])
# sheet.cell(row=23, column=3, value=df_fx.at[dw_list.index('总院北分'), f'{i}发布数']) # sheet.cell(row=23, column=3, value=df_fx.at[dw_list.index('总院北分'), f'{i}发布数'])
sheet.cell(row=23, column=6, value=df_fx.at[dw_list.index('总院北分'), f'供总院{i}稿数']) sheet.cell(row=23, column=6,
value=df_fx.at[dw_list.index('总院北分'), f'供总院{i}稿数'])
sheet.cell(row=23, column=5,
value=df_fx.at[dw_list.index('总院北分'), f'供总院网站{i}稿数'])
sheet.cell(row=24, column=3, value=df_fx.at[dw_list.index('中岩科技'), f'{i}发布数']) sheet.cell(row=24, column=3,
sheet.cell(row=24, column=6, value=df_fx.at[dw_list.index('中岩科技'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('中岩科技'), f'{i}发布数'])
sheet.cell(row=24, column=6,
value=df_fx.at[dw_list.index('中岩科技'), f'供总院{i}稿数'])
sheet.cell(row=24, column=5,
value=df_fx.at[dw_list.index('中岩科技'), f'供总院网站{i}稿数'])
# sheet.cell(row=25, column=3, value=df_fx.at[dw_list.index('水泥新材院'), f'{i}发布数']) # sheet.cell(row=25, column=3, value=df_fx.at[dw_list.index('水泥新材院'), f'{i}发布数'])
sheet.cell(row=25, column=6, value=df_fx.at[dw_list.index('水泥新材院'), f'供总院{i}稿数']) sheet.cell(row=25, column=6,
value=df_fx.at[dw_list.index('水泥新材院'), f'供总院{i}稿数'])
sheet.cell(row=25, column=5,
value=df_fx.at[dw_list.index('水泥新材院'), f'供总院网站{i}稿数'])
sheet.cell(row=26, column=3, value=df_fx.at[dw_list.index('中建材科创院'), f'{i}发布数']) sheet.cell(row=26, column=3,
sheet.cell(row=26, column=6, value=df_fx.at[dw_list.index('中建材科创院'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('中建材科创院'), f'{i}发布数'])
sheet.cell(row=26, column=6,
value=df_fx.at[dw_list.index('中建材科创院'), f'供总院{i}稿数'])
sheet.cell(row=26, column=5,
value=df_fx.at[dw_list.index('中建材科创院'), f'供总院网站{i}稿数'])
# sheet.cell(row=27, column=3, value=df_fx.at[dw_list.index('科建苑'), f'{i}发布数']) # sheet.cell(row=27, column=3, value=df_fx.at[dw_list.index('科建苑'), f'{i}发布数'])
sheet.cell(row=27, column=6, value=df_fx.at[dw_list.index('科建苑'), f'供总院{i}稿数']) sheet.cell(row=27, column=6,
value=df_fx.at[dw_list.index('科建苑'), f'供总院{i}稿数'])
sheet.cell(row=29, column=2, value=df_fx.at[dw_list.index('办公室(董事会办公室)'), f'供总院{i}稿数']) sheet.cell(row=27, column=5,
sheet.cell(row=30, column=2, value=df_fx.at[dw_list.index('党委组织部/人力资源部'), f'供总院{i}稿数']) value=df_fx.at[dw_list.index('科建苑'), f'供总院网站{i}稿数'])
sheet.cell(row=31, column=2, value=df_fx.at[dw_list.index('财务部'), f'供总院{i}稿数'])
sheet.cell(row=32, column=2, value=df_fx.at[dw_list.index('科技部'), f'供总院{i}稿数'])
sheet.cell(row=33, column=2, value=df_fx.at[dw_list.index('投资部'), f'供总院{i}稿数'])
sheet.cell(row=29, column=7, value=df_fx.at[dw_list.index('企业管理部、安全环保部'), f'供总院{i}稿数'])
sheet.cell(row=30, column=7, value=df_fx.at[dw_list.index('党群部/宣传统战部'), f'供总院{i}稿数'])
sheet.cell(row=31, column=7, value=df_fx.at[dw_list.index('党风办/巡察办、纪委综合室'), f'供总院{i}稿数'])
sheet.cell(row=32, column=7, value=df_fx.at[dw_list.index('监督执纪室'), f'供总院{i}稿数'])
sheet.cell(row=33, column=7, value=df_fx.at[dw_list.index('审计办公室'), f'供总院{i}稿数'])
sheet.cell(row=29, column=2,
value=df_fx.at[dw_list.index('办公室(董事会办公室)'), f'供总院{i}稿数'])
sheet.cell(row=30, column=2,
value=df_fx.at[dw_list.index('党委组织部/人力资源部'), f'供总院{i}稿数'])
sheet.cell(row=31, column=2,
value=df_fx.at[dw_list.index('财务部'), f'供总院{i}稿数'])
sheet.cell(row=32, column=2,
value=df_fx.at[dw_list.index('科技部'), f'供总院{i}稿数'])
sheet.cell(row=33, column=2,
value=df_fx.at[dw_list.index('投资部'), f'供总院{i}稿数'])
sheet.cell(row=29, column=7,
value=df_fx.at[dw_list.index('企业管理部、安全环保部'), f'供总院{i}稿数'])
sheet.cell(row=30, column=7,
value=df_fx.at[dw_list.index('党群部/宣传统战部'), f'供总院{i}稿数'])
sheet.cell(row=31, column=7,
value=df_fx.at[dw_list.index('党风办/巡察办、纪委综合室'), f'供总院{i}稿数'])
sheet.cell(row=32, column=7,
value=df_fx.at[dw_list.index('监督执纪室'), f'供总院{i}稿数'])
sheet.cell(row=33, column=7,
value=df_fx.at[dw_list.index('审计办公室'), f'供总院{i}稿数'])
cbma_month_path = os.path.join(BASE_DIR, f'summary/{year}年_单位月度.xlsx') cbma_month_path = os.path.join(BASE_DIR, f'summary/{year}年_单位月度.xlsx')
workbook3.save(cbma_month_path) workbook3.save(cbma_month_path)
@ -315,7 +475,8 @@ def make_wechat_articles_full():
df = pd.read_csv(os.path.join(wechat_dir, 'articles.csv')) df = pd.read_csv(os.path.join(wechat_dir, 'articles.csv'))
df['content'] = '' df['content'] = ''
for ind, row in df.iterrows(): for ind, row in df.iterrows():
full_path = os.path.join(wechat_dir, row['nickname'], row['id'] + '.md') full_path = os.path.join(
wechat_dir, row['nickname'], row['id'] + '.md')
try: try:
with open(full_path, encoding='utf-8') as f: with open(full_path, encoding='utf-8') as f:
df.at[ind, 'content'] = f.read() df.at[ind, 'content'] = f.read()
@ -324,6 +485,7 @@ def make_wechat_articles_full():
output_path = os.path.join(wechat_dir, 'articles_full.csv') output_path = os.path.join(wechat_dir, 'articles_full.csv')
df.to_csv(output_path) df.to_csv(output_path)
def ana_wechat(): def ana_wechat():
articles_full_path = os.path.join(wechat_dir, 'articles_full.csv') articles_full_path = os.path.join(wechat_dir, 'articles_full.csv')
if not os.path.exists(articles_full_path): if not os.path.exists(articles_full_path):
@ -361,12 +523,14 @@ def ana_wechat():
return output_data return output_data
def find_title(text): def find_title(text):
match = re.search(r'#\s*.*', text, re.MULTILINE) match = re.search(r'#\s*.*', text, re.MULTILINE)
if match: if match:
return match.group(0).strip() # 去除两边的空白字符 return match.group(0).strip() # 去除两边的空白字符
return "/" return "/"
def ana_web(): def ana_web():
output_data = [] output_data = []
index = 1 index = 1
@ -410,6 +574,42 @@ def ana_web():
return output_data return output_data
if __name__ == "__main__":
get_cbma_info_from_db_and_ana()
if __name__ == "__main__":
# get_cbma_info_from_db_and_ana()
import psycopg2
conn = None
try:
conn = psycopg2.connect(
"dbname={} user={} password={} host={} port={}".format('edn_cms', 'auditor', 'Lde78B3_cbma', '10.65.253.10', '54321'))
cur = conn.cursor()
year = 2023
query = f"""
SELECT
a_outer.id,
TO_CHAR(a_outer.ctime, 'YYYY-MM-DD') AS ctime,
a_outer.title,
a_outer.source,
a_outer.hits,
t.title as bankuai,
a_outer.src
FROM
"a_article" a_outer
left join (
select id, title, father, path
from a_article
where father in (20110528, 19080024)
) t on a_outer.father = t.id
WHERE
a_outer.TYPE = 3
and a_outer.deleted is NULL
and EXTRACT ( YEAR FROM a_outer.ctime ) = {year}
and bankuai is not NULL
ORDER BY
a_outer.ctime desc;
"""
df = pd.read_sql_query(query, conn)
print(df)
cur.close()
except Exception as e:
pass

3
ruff.toml Normal file
View File

@ -0,0 +1,3 @@
line-length = 200
fix = true