diff --git a/mycode/main.py b/mycode/main.py index e952a33..af7f6af 100644 --- a/mycode/main.py +++ b/mycode/main.py @@ -13,18 +13,21 @@ web_dir = os.path.join(BASE_DIR, 'web_dir') output_dir = os.path.join(BASE_DIR, 'summary') df_s = pd.read_excel(os.path.join(BASE_DIR, 'biao.xlsx'), sheet_name='筛查内容') + def fix_url_scheme(url, default_scheme='http'): - # 检查URL是否包含方案 - if not url.startswith('http://') and not url.startswith('https://'): - # 如果没有方案,添加默认方案 - url = f'{default_scheme}://{url}' - return url + # 检查URL是否包含方案 + if not url.startswith('http://') and not url.startswith('https://'): + # 如果没有方案,添加默认方案 + url = f'{default_scheme}://{url}' + return url + def trans_to_json(): json_str = df_s.to_json(orient='records', force_ascii=False) with open('biao.json', 'w', encoding='utf-8') as f: f.write(json_str) + def make_simple_csv_from_db(now: datetime): # 只查找当前月份更新的公众号数据 now_month_str = now.strftime('%Y-%m-%d 00:00:00') @@ -49,36 +52,38 @@ def make_simple_csv_from_db(now: datetime): # 将数据写入CSV文件 df.to_csv(os.path.join(wechat_dir, 'articles.csv'), index=False) + def float_to_int(value): try: return int(value) except: return value + def get_cbma_info_from_db_and_ana(year: str = '2023'): # 全年统计数据 zybiz = "MzIzMDU4Njg3MA==" - df_fx = pd.DataFrame({"单位": [ "中国建材总院", - "瑞泰科技", "国检集团", "中材高新", "哈玻院", "中国新材院", "秦皇岛院", "西安墙材院", "咸阳陶瓷院", "钟表所", "总院北分", "中岩科技", "水泥新材院", "中建材科创院", "科建苑", "办公室(董事会办公室)", "党委组织部/人力资源部", "财务部", "科技部", "投资部", "企业管理部、安全环保部", "党群部/宣传统战部", - "党风办/巡察办、纪委综合室", "监督执纪室", "审计办公室"], - "公众号Biz": [zybiz, "MzU0MzgwMzg1NA==", "MzI1MjYzNDQ3NA==", "MzA5MDkzNDA0NQ==", "Mzg2MDg0NjkwNw==", "MzI3MTY5NTExNA==", "MzI1MzY1Njg5MQ==", "MzIxOTQwNjE2MQ==", - "Mzg3OTI0NTYzMA==", "MzA3NTU5NjM2MA==", "", "Mzg2NDgyMDM3OA==","","MzA5NTQ5MjY4Nw==", "", "", "", "", "", "", "", "", "", "", "", ], - # "供总院稿数": [], "供总院专稿数": [], "供总院组稿数": [], "供总院阅读10000及以上数": [], "供总院阅读5000及以上数": [], "供总院阅读1000及以上数": [], - # "1月发布数": [], "1月最高点击文章": [], - # "2月发布数": [], "2月最高点击文章": [], - # "3月发布数": [], "3月最高点击文章": [], - # "4月发布数": [], "4月最高点击文章": [], - # "5月发布数": [], "5月最高点击文章": [], - # "6月发布数": [], "6月最高点击文章": [], - # "7月发布数": [], "7月最高点击文章": [], - # "8月发布数": [], "8月最高点击文章": [], - # "9月发布数": [], "9月最高点击文章": [], - # "10月发布数": [], "10月最高点击文章": [], - # "11月发布数": [], "11月最高点击文章": [], - # "12月发布数": [], "12月最高点击文章": [], - # "半年发布数": [], "半年最高点击文章": [], - # "全年发布数": [], "全年最高点击文章": [] - }) + df_fx = pd.DataFrame({"单位": ["中国建材总院", + "瑞泰科技", "国检集团", "中材高新", "哈玻院", "中国新材院", "秦皇岛院", "西安墙材院", "咸阳陶瓷院", "钟表所", "总院北分", "中岩科技", "水泥新材院", "中建材科创院", "科建苑", "办公室(董事会办公室)", "党委组织部/人力资源部", "财务部", "科技部", "投资部", "企业管理部、安全环保部", "党群部/宣传统战部", + "党风办/巡察办、纪委综合室", "监督执纪室", "审计办公室"], + "公众号Biz": [zybiz, "MzU0MzgwMzg1NA==", "MzI1MjYzNDQ3NA==", "MzA5MDkzNDA0NQ==", "Mzg2MDg0NjkwNw==", "MzI3MTY5NTExNA==", "MzI1MzY1Njg5MQ==", "MzIxOTQwNjE2MQ==", + "Mzg3OTI0NTYzMA==", "MzA3NTU5NjM2MA==", "", "Mzg2NDgyMDM3OA==", "", "MzA5NTQ5MjY4Nw==", "", "", "", "", "", "", "", "", "", "", "", ], + # "供总院稿数": [], "供总院专稿数": [], "供总院组稿数": [], "供总院阅读10000及以上数": [], "供总院阅读5000及以上数": [], "供总院阅读1000及以上数": [], + # "1月发布数": [], "1月最高点击文章": [], + # "2月发布数": [], "2月最高点击文章": [], + # "3月发布数": [], "3月最高点击文章": [], + # "4月发布数": [], "4月最高点击文章": [], + # "5月发布数": [], "5月最高点击文章": [], + # "6月发布数": [], "6月最高点击文章": [], + # "7月发布数": [], "7月最高点击文章": [], + # "8月发布数": [], "8月最高点击文章": [], + # "9月发布数": [], "9月最高点击文章": [], + # "10月发布数": [], "10月最高点击文章": [], + # "11月发布数": [], "11月最高点击文章": [], + # "12月发布数": [], "12月最高点击文章": [], + # "半年发布数": [], "半年最高点击文章": [], + # "全年发布数": [], "全年最高点击文章": [] + }) # 查询所有指定公众号的文章并按年/月排序 conn = sqlite3.connect(os.path.join(BASE_DIR, 'db_folder/test.db')) query_gzhs = f''' @@ -104,13 +109,53 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): pub_year, pub_month, pub_day; ''' df = pd.read_sql_query(query_gzhs, conn) - conn.close + conn.close() + + # 尝试连接官网库进行查询 + import psycopg2 + conn_web = None + df_web = None + try: + conn_web = psycopg2.connect( + "dbname={} user={} password={} host={} port={}".format('edn_cms', 'auditor', 'Lde78B3_cbma', '10.65.253.10', '54321')) + cur_web = conn.cursor() + query_web = f""" + SELECT + a_outer.id, + TO_CHAR(a_outer.ctime, 'YYYY') AS pub_year, + TO_CHAR(a_outer.ctime, 'MM') AS pub_month, + TO_CHAR(a_outer.ctime, 'DD') AS pub_day, + a_outer.title, + a_outer.source, + a_outer.hits, + t.title as bankuai, + a_outer.src + FROM + "a_article" a_outer + left join ( + select id, title, father, path + from a_article + where father in (20110528, 19080024) + ) t on a_outer.father = t.id + WHERE + a_outer.TYPE = 3 + and a_outer.deleted is NULL + and EXTRACT ( YEAR FROM a_outer.ctime ) = {year} + and bankuai is not NULL + ORDER BY + a_outer.ctime; + """ + df_web = pd.read_sql_query(query_web, conn_web) + cur_web.close() + conn_web.close() + except Exception as e: + pass # 追加总院数据来源 - for ind, row in df.iterrows(): if row['gbiz'] == zybiz: - full_path = os.path.join(wechat_dir, row['nickname'], row['id'] + '.md') + full_path = os.path.join( + wechat_dir, row['nickname'], row['id'] + '.md') try: with open(full_path, encoding='utf-8') as f: content = f.read() @@ -136,7 +181,7 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): for ind, row in df_fx.iterrows(): dw = row['单位'] gbiz = row['公众号Biz'] - + # 全年对总院供给统计 # if '、' in dw: # 针对这种同一部门的 # cons = (df['gbiz']==zybiz) @@ -146,37 +191,63 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): # df_fx.at[ind, '供总院全年稿数'] = ((cons_dw_1)&(cons)).sum() # else: # df_fx.at[ind, '供总院全年稿数'] = ((df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() - df_fx.at[ind, '供总院全年专稿数'] = ((df['source'] == dw)&(df['gbiz']==zybiz)).sum() - df_fx.at[ind, '供总院全年组稿数'] = ((df['source'].str.contains(dw)&(df['source']!=dw))&(df['gbiz']==zybiz)).sum() - df_fx.at[ind, '供总院全年阅读10000及以上数'] = ((df['read_num']>=10000)&(df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() - df_fx.at[ind, '供总院全年阅读5000及以上数'] = ((df['read_num']>=5000)&(df['read_num']<10000)&(df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() - df_fx.at[ind, '供总院全年阅读1000及以上数'] = ((df['read_num']>=1000)&(df['read_num']<5000)&(df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() + df_fx.at[ind, '供总院全年专稿数'] = ( + (df['source'] == dw) & (df['gbiz'] == zybiz)).sum() + df_fx.at[ind, '供总院全年网站专稿数'] = ( + (df_web['source'] == dw)).sum() + df_fx.at[ind, '供总院全年组稿数'] = ((df['source'].str.contains( + dw) & (df['source'] != dw)) & (df['gbiz'] == zybiz)).sum() + df_fx.at[ind, '供总院全年网站组稿数'] = ((df_web['source'].str.contains( + dw) & (df_web['source'] != dw))).sum() + df_fx.at[ind, '供总院全年阅读10000及以上数'] = ((df['read_num'] >= 10000) & ( + df['source'].str.contains(dw)) & (df['gbiz'] == zybiz)).sum() + df_fx.at[ind, '供总院全年阅读5000及以上数'] = ((df['read_num'] >= 5000) & ( + df['read_num'] < 10000) & (df['source'].str.contains(dw)) & (df['gbiz'] == zybiz)).sum() + df_fx.at[ind, '供总院全年阅读1000及以上数'] = ((df['read_num'] >= 1000) & ( + df['read_num'] < 5000) & (df['source'].str.contains(dw)) & (df['gbiz'] == zybiz)).sum() for i in ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月', '上半年', '下半年', '全年']: if '月' in i: i_str = i.replace('月', '').zfill(2) - cons_y_m = (df['pub_month']==str(i_str)) + cons_y_m = (df['pub_month'] == str(i_str)) + cons_y_m_web = (df_web['pub_month'] == str(i_str)) elif i == '上半年': - cons_y_m = (df['pub_month'] =='01')|(df['pub_month'] =='02')|(df['pub_month'] =='03')|(df['pub_month'] =='04')|(df['pub_month'] =='05')|(df['pub_month'] =='06') + cons_y_m = (df['pub_month'] == '01') | (df['pub_month'] == '02') | (df['pub_month'] == '03') | ( + df['pub_month'] == '04') | (df['pub_month'] == '05') | (df['pub_month'] == '06') + cons_y_m_web = (df_web['pub_month'] == '01') | (df_web['pub_month'] == '02') | (df_web['pub_month'] == '03') | ( + df_web['pub_month'] == '04') | (df_web['pub_month'] == '05') | (df_web['pub_month'] == '06') elif i == '下半年': - cons_y_m = (df['pub_month'] =='07')|(df['pub_month'] =='08')|(df['pub_month'] =='09')|(df['pub_month'] =='10')|(df['pub_month'] =='11')|(df['pub_month'] =='12') + cons_y_m = (df['pub_month'] == '07') | (df['pub_month'] == '08') | (df['pub_month'] == '09') | ( + df['pub_month'] == '10') | (df['pub_month'] == '11') | (df['pub_month'] == '12') + cons_y_m_web = (df_web['pub_month'] == '07') | (df_web['pub_month'] == '08') | (df_web['pub_month'] == '09') | ( + df_web['pub_month'] == '10') | (df_web['pub_month'] == '11') | (df_web['pub_month'] == '12') elif i == '全年': cons_y_m = pd.Series(True, index=df.index) - + cons_y_m_web = pd.Series(True, index=df_web.index) if '、' in dw: # 针对这种同一部门的 cons_dw_1 = pd.Series(False, index=df.index) + cons_dw_1_web = pd.Series(False, index=df_web.index) for item in dw.split('、'): - cons_dw_1 = (df['source'].str.contains(item))|cons_dw_1 - df_fx.at[ind, f'供总院{i}稿数'] = ((cons_dw_1)&(cons_y_m)&(df['gbiz']==zybiz)).sum() + cons_dw_1 = (df['source'].str.contains(item)) | cons_dw_1 + cons_dw_1_web = (df_web['source'].str.contains(item)) | cons_dw_1_web + df_fx.at[ind, f'供总院{i}稿数'] = ((cons_dw_1) & ( + cons_y_m) & (df['gbiz'] == zybiz)).sum() + df_fx.at[ind, f'供总院网站{i}稿数'] = ((cons_dw_1_web) & ( + cons_y_m_web)).sum() else: - df_fx.at[ind, f'供总院{i}稿数'] = (df['source'].str.contains(dw)&(cons_y_m)&(df['gbiz']==zybiz)).sum() + df_fx.at[ind, f'供总院{i}稿数'] = (df['source'].str.contains( + dw) & (cons_y_m) & (df['gbiz'] == zybiz)).sum() + df_fx.at[ind, f'供总院网站{i}稿数'] = (df_web['source'].str.contains( + dw) & (cons_y_m_web)).sum() df_fx[f'供总院{i}稿数'] = df_fx[f'供总院{i}稿数'].fillna(0) df_fx[f'供总院{i}稿数'] = df_fx[f'供总院{i}稿数'].astype(int) + df_fx[f'供总院网站{i}稿数'] = df_fx[f'供总院网站{i}稿数'].fillna(0) + df_fx[f'供总院网站{i}稿数'] = df_fx[f'供总院网站{i}稿数'].astype(int) if gbiz: # 进行查询 # 条件 - cons = (cons_y_m)&(df['gbiz']==gbiz) + cons = (cons_y_m) & (df['gbiz'] == gbiz) cons_sum = (cons).sum() df_fx.at[ind, f'{i}发布数'] = cons_sum df_fx[f'{i}发布数'] = df_fx[f'{i}发布数'].fillna(0) @@ -184,9 +255,14 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): df_fx.at[ind, f'{i}最高点击文章'] = '' if cons_sum: max_read_row = df[cons].loc[df[cons]['read_num'].idxmax()] - max_read_row_list = [max_read_row['id'], max_read_row['title'], str(max_read_row['read_num']), f'{max_read_row["pub_year"]}-{max_read_row["pub_month"]}-{max_read_row["pub_day"]}', max_read_row['source']] + max_read_row_list = [max_read_row['id'], max_read_row['title'], str( + max_read_row['read_num']), f'{max_read_row["pub_year"]}-{max_read_row["pub_month"]}-{max_read_row["pub_day"]}', max_read_row['source']] df_fx.at[ind, f'{i}最高点击文章'] = '***'.join(max_read_row_list) + df_fx.at[ind, f'总院网站{i}发布数'] = cons_y_m_web.sum() + df_fx[f'总院网站{i}发布数'] = df_fx[f'总院网站{i}发布数'].fillna(0) + df_fx[f'总院网站{i}发布数'] = df_fx[f'总院网站{i}发布数'].astype(int) + # 矫正数据类型 df_fx = df_fx.applymap(float_to_int) # 先输出原始统计数据 @@ -202,29 +278,49 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): for ind, row in df.iterrows(): if row['gbiz'] == zybiz: sheet.cell(row=ind_zy+3, column=1, value=str(ind_zy+1)) - sheet.cell(row=ind_zy+3, column=2, value=f'{row["pub_year"]}-{row["pub_month"]}-{row["pub_day"]}') + sheet.cell(row=ind_zy+3, column=2, + value=f'{row["pub_year"]}-{row["pub_month"]}-{row["pub_day"]}') sheet.cell(row=ind_zy+3, column=3, value=row['title']) sheet.cell(row=ind_zy+3, column=4, value=row['source']) sheet.cell(row=ind_zy+3, column=6, value=row['read_num']) sheet.cell(row=ind_zy+3, column=7, value=row['id']) sheet.cell(row=ind_zy+3, column=8, value=row['content_url']) ind_zy = ind_zy + 1 + sheet_web = workbook['官方网站更新数'] + sheet_web.cell(row=1, column=1, value=f'关于{year}年度中国建材总院新媒体更新情况明细表\n(网站)') + ind_zyweb = 0 + for ind, row in df_web.iterrows(): + sheet.cell(row=ind_zy+3, column=1, value=str(ind_zyweb+1)) + sheet.cell(row=ind_zy+3, column=2, value=f'{row["pub_year"]}-{row["pub_month"]}-{row["pub_day"]}') + sheet.cell(row=ind_zy+3, column=3, value=row['title']) + sheet.cell(row=ind_zy+3, column=4, value=row['source']) + sheet.cell(row=ind_zy+3, column=5, value=row['bankuai']) + ind_zyweb = ind_zyweb + 1 cbma_path = os.path.join(BASE_DIR, f'summary/{year}年_总院文章.xlsx') workbook.save(cbma_path) print(f'总院{year}年文章表生成完毕!') - template_cal_path = os.path.join(BASE_DIR, 'summary/template_cbma_cal.xlsx') + template_cal_path = os.path.join( + BASE_DIR, 'summary/template_cbma_cal.xlsx') workbook2 = load_workbook(template_cal_path) - need_df_list = [ "瑞泰科技", "国检集团", "中材高新", "哈玻院", "中国新材院", "秦皇岛院", "西安墙材院", "咸阳陶瓷院", "钟表所", "总院北分", "中岩科技", "水泥新材院", "中建材科创院", "科建苑"] - sheet2= workbook2['打分表'] + need_df_list = ["瑞泰科技", "国检集团", "中材高新", "哈玻院", "中国新材院", "秦皇岛院", + "西安墙材院", "咸阳陶瓷院", "钟表所", "总院北分", "中岩科技", "水泥新材院", "中建材科创院", "科建苑"] + sheet2 = workbook2['打分表'] sheet2.cell(row=1, column=1, value=f'中国建材总院宣传工作计分表({year}年度)') for ind, val in enumerate(need_df_list): row_ind_df_fx = df_fx['单位'].to_list().index(val) - sheet2.cell(row=6, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年专稿数']) - sheet2.cell(row=10, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年组稿数']) - sheet2.cell(row=12, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年阅读10000及以上数']) - sheet2.cell(row=13, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年阅读5000及以上数']) - sheet2.cell(row=14, column=5+2*ind, value=df_fx.at[row_ind_df_fx, '供总院全年阅读1000及以上数']) + sheet2.cell(row=6, column=5+2*ind, + value=df_fx.at[row_ind_df_fx, '供总院全年专稿数']) + sheet2.cell(row=7, column=5+2*ind, + value=df_fx.at[row_ind_df_fx, '供总院网站全年专稿数']) + sheet2.cell(row=10, column=5+2*ind, + value=df_fx.at[row_ind_df_fx, '供总院全年组稿数']) + sheet2.cell(row=12, column=5+2*ind, + value=df_fx.at[row_ind_df_fx, '供总院全年阅读10000及以上数']) + sheet2.cell(row=13, column=5+2*ind, + value=df_fx.at[row_ind_df_fx, '供总院全年阅读5000及以上数']) + sheet2.cell(row=14, column=5+2*ind, + value=df_fx.at[row_ind_df_fx, '供总院全年阅读1000及以上数']) cbma_cal_path = os.path.join(BASE_DIR, f'summary/{year}年_总院打分.xlsx') workbook2.save(cbma_cal_path) print(f'总院{year}年打分表生成完毕!') @@ -235,13 +331,15 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): workbook3 = load_workbook(template_month_path) for i in ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月', '上半年', '下半年', '全年']: try: - sheet= workbook3[i] + sheet = workbook3[i] except KeyError: sheet = workbook3.copy_worksheet(workbook3['1月']) sheet.title = i - sheet.cell(row=1, column=1, value=f'关于{year}年度中国建材总院各企业新媒体更新情况统计表\n({i})') + sheet.cell(row=1, column=1, + value=f'关于{year}年度中国建材总院各企业新媒体更新情况统计表\n({i})') # 开始总院填充数据 sheet.cell(row=4, column=3, value=df_fx.at[0, f'{i}发布数']) + sheet.cell(row=4, column=2, value=df_fx.at[0, f'总院网站{i}发布数']) max_read_row = df_fx.at[dw_list.index('中国建材总院'), f'{i}最高点击文章'] if max_read_row: _, title, read_num, pub_date, source = max_read_row.split('***') @@ -250,59 +348,121 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): sheet.cell(row=7, column=5, value=pub_date) sheet.cell(row=7, column=6, value=source) # 开始填充各单位数据 - sheet.cell(row=14, column=3, value=df_fx.at[dw_list.index('瑞泰科技'), f'{i}发布数']) - sheet.cell(row=14, column=6, value=df_fx.at[dw_list.index('瑞泰科技'), f'供总院{i}稿数']) + sheet.cell(row=14, column=3, + value=df_fx.at[dw_list.index('瑞泰科技'), f'{i}发布数']) + sheet.cell(row=14, column=6, + value=df_fx.at[dw_list.index('瑞泰科技'), f'供总院{i}稿数']) + sheet.cell(row=14, column=5, + value=df_fx.at[dw_list.index('瑞泰科技'), f'供总院网站{i}稿数']) - sheet.cell(row=15, column=3, value=df_fx.at[dw_list.index('国检集团'), f'{i}发布数']) - sheet.cell(row=15, column=6, value=df_fx.at[dw_list.index('国检集团'), f'供总院{i}稿数']) - - sheet.cell(row=16, column=3, value=df_fx.at[dw_list.index('中材高新'), f'{i}发布数']) - sheet.cell(row=16, column=6, value=df_fx.at[dw_list.index('中材高新'), f'供总院{i}稿数']) - - sheet.cell(row=17, column=3, value=df_fx.at[dw_list.index('哈玻院'), f'{i}发布数']) - sheet.cell(row=17, column=6, value=df_fx.at[dw_list.index('哈玻院'), f'供总院{i}稿数']) - - sheet.cell(row=18, column=3, value=df_fx.at[dw_list.index('中国新材院'), f'{i}发布数']) - sheet.cell(row=18, column=6, value=df_fx.at[dw_list.index('中国新材院'), f'供总院{i}稿数']) - - sheet.cell(row=19, column=3, value=df_fx.at[dw_list.index('秦皇岛院'), f'{i}发布数']) - sheet.cell(row=19, column=6, value=df_fx.at[dw_list.index('秦皇岛院'), f'供总院{i}稿数']) - - sheet.cell(row=20, column=3, value=df_fx.at[dw_list.index('西安墙材院'), f'{i}发布数']) - sheet.cell(row=20, column=6, value=df_fx.at[dw_list.index('西安墙材院'), f'供总院{i}稿数']) - - sheet.cell(row=21, column=3, value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'{i}发布数']) - sheet.cell(row=21, column=6, value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'供总院{i}稿数']) - - sheet.cell(row=22, column=3, value=df_fx.at[dw_list.index('钟表所'), f'{i}发布数']) - sheet.cell(row=22, column=6, value=df_fx.at[dw_list.index('钟表所'), f'供总院{i}稿数']) + sheet.cell(row=15, column=3, + value=df_fx.at[dw_list.index('国检集团'), f'{i}发布数']) + sheet.cell(row=15, column=6, + value=df_fx.at[dw_list.index('国检集团'), f'供总院{i}稿数']) + sheet.cell(row=15, column=5, + value=df_fx.at[dw_list.index('国检集团'), f'供总院网站{i}稿数']) - # sheet.cell(row=23, column=3, value=df_fx.at[dw_list.index('总院北分'), f'{i}发布数']) - sheet.cell(row=23, column=6, value=df_fx.at[dw_list.index('总院北分'), f'供总院{i}稿数']) + sheet.cell(row=16, column=3, + value=df_fx.at[dw_list.index('中材高新'), f'{i}发布数']) + sheet.cell(row=16, column=6, + value=df_fx.at[dw_list.index('中材高新'), f'供总院{i}稿数']) + sheet.cell(row=16, column=5, + value=df_fx.at[dw_list.index('中材高新'), f'供总院网站{i}稿数']) + + sheet.cell(row=17, column=3, + value=df_fx.at[dw_list.index('哈玻院'), f'{i}发布数']) + sheet.cell(row=17, column=6, + value=df_fx.at[dw_list.index('哈玻院'), f'供总院{i}稿数']) + sheet.cell(row=17, column=5, + value=df_fx.at[dw_list.index('哈玻院'), f'供总院网站{i}稿数']) + + sheet.cell(row=18, column=3, + value=df_fx.at[dw_list.index('中国新材院'), f'{i}发布数']) + sheet.cell(row=18, column=6, + value=df_fx.at[dw_list.index('中国新材院'), f'供总院{i}稿数']) + sheet.cell(row=18, column=5, + value=df_fx.at[dw_list.index('中国新材院'), f'供总院网站{i}稿数']) - sheet.cell(row=24, column=3, value=df_fx.at[dw_list.index('中岩科技'), f'{i}发布数']) - sheet.cell(row=24, column=6, value=df_fx.at[dw_list.index('中岩科技'), f'供总院{i}稿数']) + sheet.cell(row=19, column=3, + value=df_fx.at[dw_list.index('秦皇岛院'), f'{i}发布数']) + sheet.cell(row=19, column=6, + value=df_fx.at[dw_list.index('秦皇岛院'), f'供总院{i}稿数']) + sheet.cell(row=19, column=5, + value=df_fx.at[dw_list.index('秦皇岛院'), f'供总院网站{i}稿数']) + + sheet.cell(row=20, column=3, + value=df_fx.at[dw_list.index('西安墙材院'), f'{i}发布数']) + sheet.cell(row=20, column=6, + value=df_fx.at[dw_list.index('西安墙材院'), f'供总院{i}稿数']) + sheet.cell(row=20, column=5, + value=df_fx.at[dw_list.index('西安墙材院'), f'供总院网站{i}稿数']) + + sheet.cell(row=21, column=3, + value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'{i}发布数']) + sheet.cell(row=21, column=6, + value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'供总院{i}稿数']) + sheet.cell(row=21, column=5, + value=df_fx.at[dw_list.index('咸阳陶瓷院'), f'供总院网站{i}稿数']) + + sheet.cell(row=22, column=3, + value=df_fx.at[dw_list.index('钟表所'), f'{i}发布数']) + sheet.cell(row=22, column=6, + value=df_fx.at[dw_list.index('钟表所'), f'供总院{i}稿数']) + sheet.cell(row=22, column=5, + value=df_fx.at[dw_list.index('钟表所'), f'供总院网站{i}稿数']) + + # sheet.cell(row=23, column=3, value=df_fx.at[dw_list.index('总院北分'), f'{i}发布数']) + sheet.cell(row=23, column=6, + value=df_fx.at[dw_list.index('总院北分'), f'供总院{i}稿数']) + sheet.cell(row=23, column=5, + value=df_fx.at[dw_list.index('总院北分'), f'供总院网站{i}稿数']) + + sheet.cell(row=24, column=3, + value=df_fx.at[dw_list.index('中岩科技'), f'{i}发布数']) + sheet.cell(row=24, column=6, + value=df_fx.at[dw_list.index('中岩科技'), f'供总院{i}稿数']) + sheet.cell(row=24, column=5, + value=df_fx.at[dw_list.index('中岩科技'), f'供总院网站{i}稿数']) # sheet.cell(row=25, column=3, value=df_fx.at[dw_list.index('水泥新材院'), f'{i}发布数']) - sheet.cell(row=25, column=6, value=df_fx.at[dw_list.index('水泥新材院'), f'供总院{i}稿数']) + sheet.cell(row=25, column=6, + value=df_fx.at[dw_list.index('水泥新材院'), f'供总院{i}稿数']) + sheet.cell(row=25, column=5, + value=df_fx.at[dw_list.index('水泥新材院'), f'供总院网站{i}稿数']) - sheet.cell(row=26, column=3, value=df_fx.at[dw_list.index('中建材科创院'), f'{i}发布数']) - sheet.cell(row=26, column=6, value=df_fx.at[dw_list.index('中建材科创院'), f'供总院{i}稿数']) + sheet.cell(row=26, column=3, + value=df_fx.at[dw_list.index('中建材科创院'), f'{i}发布数']) + sheet.cell(row=26, column=6, + value=df_fx.at[dw_list.index('中建材科创院'), f'供总院{i}稿数']) + sheet.cell(row=26, column=5, + value=df_fx.at[dw_list.index('中建材科创院'), f'供总院网站{i}稿数']) # sheet.cell(row=27, column=3, value=df_fx.at[dw_list.index('科建苑'), f'{i}发布数']) - sheet.cell(row=27, column=6, value=df_fx.at[dw_list.index('科建苑'), f'供总院{i}稿数']) + sheet.cell(row=27, column=6, + value=df_fx.at[dw_list.index('科建苑'), f'供总院{i}稿数']) + sheet.cell(row=27, column=5, + value=df_fx.at[dw_list.index('科建苑'), f'供总院网站{i}稿数']) - sheet.cell(row=29, column=2, value=df_fx.at[dw_list.index('办公室(董事会办公室)'), f'供总院{i}稿数']) - sheet.cell(row=30, column=2, value=df_fx.at[dw_list.index('党委组织部/人力资源部'), f'供总院{i}稿数']) - sheet.cell(row=31, column=2, value=df_fx.at[dw_list.index('财务部'), f'供总院{i}稿数']) - sheet.cell(row=32, column=2, value=df_fx.at[dw_list.index('科技部'), f'供总院{i}稿数']) - sheet.cell(row=33, column=2, value=df_fx.at[dw_list.index('投资部'), f'供总院{i}稿数']) - sheet.cell(row=29, column=7, value=df_fx.at[dw_list.index('企业管理部、安全环保部'), f'供总院{i}稿数']) - sheet.cell(row=30, column=7, value=df_fx.at[dw_list.index('党群部/宣传统战部'), f'供总院{i}稿数']) - sheet.cell(row=31, column=7, value=df_fx.at[dw_list.index('党风办/巡察办、纪委综合室'), f'供总院{i}稿数']) - sheet.cell(row=32, column=7, value=df_fx.at[dw_list.index('监督执纪室'), f'供总院{i}稿数']) - sheet.cell(row=33, column=7, value=df_fx.at[dw_list.index('审计办公室'), f'供总院{i}稿数']) - + sheet.cell(row=29, column=2, + value=df_fx.at[dw_list.index('办公室(董事会办公室)'), f'供总院{i}稿数']) + sheet.cell(row=30, column=2, + value=df_fx.at[dw_list.index('党委组织部/人力资源部'), f'供总院{i}稿数']) + sheet.cell(row=31, column=2, + value=df_fx.at[dw_list.index('财务部'), f'供总院{i}稿数']) + sheet.cell(row=32, column=2, + value=df_fx.at[dw_list.index('科技部'), f'供总院{i}稿数']) + sheet.cell(row=33, column=2, + value=df_fx.at[dw_list.index('投资部'), f'供总院{i}稿数']) + sheet.cell(row=29, column=7, + value=df_fx.at[dw_list.index('企业管理部、安全环保部'), f'供总院{i}稿数']) + sheet.cell(row=30, column=7, + value=df_fx.at[dw_list.index('党群部/宣传统战部'), f'供总院{i}稿数']) + sheet.cell(row=31, column=7, + value=df_fx.at[dw_list.index('党风办/巡察办、纪委综合室'), f'供总院{i}稿数']) + sheet.cell(row=32, column=7, + value=df_fx.at[dw_list.index('监督执纪室'), f'供总院{i}稿数']) + sheet.cell(row=33, column=7, + value=df_fx.at[dw_list.index('审计办公室'), f'供总院{i}稿数']) cbma_month_path = os.path.join(BASE_DIR, f'summary/{year}年_单位月度.xlsx') workbook3.save(cbma_month_path) @@ -312,10 +472,11 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): def make_wechat_articles_full(): - df = pd.read_csv(os.path.join(wechat_dir, 'articles.csv')) + df = pd.read_csv(os.path.join(wechat_dir, 'articles.csv')) df['content'] = '' for ind, row in df.iterrows(): - full_path = os.path.join(wechat_dir, row['nickname'], row['id'] + '.md') + full_path = os.path.join( + wechat_dir, row['nickname'], row['id'] + '.md') try: with open(full_path, encoding='utf-8') as f: df.at[ind, 'content'] = f.read() @@ -324,6 +485,7 @@ def make_wechat_articles_full(): output_path = os.path.join(wechat_dir, 'articles_full.csv') df.to_csv(output_path) + def ana_wechat(): articles_full_path = os.path.join(wechat_dir, 'articles_full.csv') if not os.path.exists(articles_full_path): @@ -341,7 +503,7 @@ def ana_wechat(): if not result.empty: for ind2, row2 in result.iterrows(): - if row['错误表述'] == '“两学一做”学习' and '“两学一做”学习教育' in row2['content']: + if row['错误表述'] == '“两学一做”学习' and '“两学一做”学习教育' in row2['content']: continue if row['错误表述'] == '20大': continue @@ -361,12 +523,14 @@ def ana_wechat(): return output_data + def find_title(text): match = re.search(r'#\s*.*', text, re.MULTILINE) if match: return match.group(0).strip() # 去除两边的空白字符 return "/" + def ana_web(): output_data = [] index = 1 @@ -382,15 +546,15 @@ def ana_web(): name = row['主办'] url = fix_url_scheme(row['地址'].strip()) domain = urlparse(url).netloc.replace('www.', '') - full_path = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') - if os.path.exists(full_path) and os.path.getsize(full_path) > 0: + full_path = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') + if os.path.exists(full_path) and os.path.getsize(full_path) > 0: df = pd.read_excel(os.path.join(full_path), engine='openpyxl') for ind, row in df_s.iterrows(): mask = df['text'].str.contains(row['错误表述'], na=False) result = df[mask] if not result.empty: for ind2, row2 in result.iterrows(): - if row['错误表述'] == '“两学一做”学习' and '“两学一做”学习教育' in row2['text']: + if row['错误表述'] == '“两学一做”学习' and '“两学一做”学习教育' in row2['text']: continue if row['错误表述'] == '20大': continue @@ -410,6 +574,42 @@ def ana_web(): return output_data -if __name__ == "__main__": - get_cbma_info_from_db_and_ana() +if __name__ == "__main__": + # get_cbma_info_from_db_and_ana() + import psycopg2 + conn = None + try: + conn = psycopg2.connect( + "dbname={} user={} password={} host={} port={}".format('edn_cms', 'auditor', 'Lde78B3_cbma', '10.65.253.10', '54321')) + cur = conn.cursor() + year = 2023 + query = f""" + SELECT + a_outer.id, + TO_CHAR(a_outer.ctime, 'YYYY-MM-DD') AS ctime, + a_outer.title, + a_outer.source, + a_outer.hits, + t.title as bankuai, + a_outer.src + FROM + "a_article" a_outer + left join ( + select id, title, father, path + from a_article + where father in (20110528, 19080024) + ) t on a_outer.father = t.id + WHERE + a_outer.TYPE = 3 + and a_outer.deleted is NULL + and EXTRACT ( YEAR FROM a_outer.ctime ) = {year} + and bankuai is not NULL + ORDER BY + a_outer.ctime desc; + """ + df = pd.read_sql_query(query, conn) + print(df) + cur.close() + except Exception as e: + pass \ No newline at end of file diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..c514775 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,3 @@ + +line-length = 200 +fix = true \ No newline at end of file