diff --git a/mycode/main.py b/mycode/main.py index 2c8a795..2668747 100644 --- a/mycode/main.py +++ b/mycode/main.py @@ -150,6 +150,7 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): df_web = pd.read_sql_query(query_web, conn_web) conn_web.close() except Exception as e: + df_web = None print(e) df['source'] = '' @@ -195,12 +196,12 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): # df_fx.at[ind, '供总院全年稿数'] = ((df['source'].str.contains(dw))&(df['gbiz']==zybiz)).sum() df_fx.at[ind, '供总院全年专稿数'] = ( (df['source'] == dw) & (df['gbiz'] == zybiz)).sum() - df_fx.at[ind, '供总院网站全年专稿数'] = ( - (df_web['source'] == dw)).sum() df_fx.at[ind, '供总院全年组稿数'] = ((df['source'].str.contains( dw) & (df['source'] != dw)) & (df['gbiz'] == zybiz)).sum() + df_fx.at[ind, '供总院网站全年专稿数'] = ( + (df_web['source'] == dw)).sum() if df_web is not None else 0 df_fx.at[ind, '供总院全年网站组稿数'] = ((df_web['source'].str.contains( - dw) & (df_web['source'] != dw))).sum() + dw) & (df_web['source'] != dw))).sum() if df_web is not None else 0 df_fx.at[ind, '供总院全年阅读10000及以上数'] = ((df['read_num'] >= 10000) & ( df['source'].str.contains(dw)) & (df['gbiz'] == zybiz)).sum() df_fx.at[ind, '供总院全年阅读5000及以上数'] = ((df['read_num'] >= 5000) & ( @@ -212,26 +213,26 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): if '月' in i: i_str = i.replace('月', '').zfill(2) cons_y_m = (df['pub_month'] == str(i_str)) - cons_y_m_web = (df_web['pub_month'] == str(i_str)) + cons_y_m_web = (df_web['pub_month'] == str(i_str)) if df_web is not None else pd.Series(False) elif i == '上半年': cons_y_m = (df['pub_month'] == '01') | (df['pub_month'] == '02') | (df['pub_month'] == '03') | ( df['pub_month'] == '04') | (df['pub_month'] == '05') | (df['pub_month'] == '06') cons_y_m_web = (df_web['pub_month'] == '01') | (df_web['pub_month'] == '02') | (df_web['pub_month'] == '03') | ( - df_web['pub_month'] == '04') | (df_web['pub_month'] == '05') | (df_web['pub_month'] == '06') + df_web['pub_month'] == '04') | (df_web['pub_month'] == '05') | (df_web['pub_month'] == '06') if df_web is not None else pd.Series(False) elif i == '下半年': cons_y_m = (df['pub_month'] == '07') | (df['pub_month'] == '08') | (df['pub_month'] == '09') | ( df['pub_month'] == '10') | (df['pub_month'] == '11') | (df['pub_month'] == '12') cons_y_m_web = (df_web['pub_month'] == '07') | (df_web['pub_month'] == '08') | (df_web['pub_month'] == '09') | ( - df_web['pub_month'] == '10') | (df_web['pub_month'] == '11') | (df_web['pub_month'] == '12') + df_web['pub_month'] == '10') | (df_web['pub_month'] == '11') | (df_web['pub_month'] == '12') if df_web is not None else pd.Series(False) elif i == '全年': cons_y_m = pd.Series(True, index=df.index) - cons_y_m_web = pd.Series(True, index=df_web.index) + cons_y_m_web = pd.Series(True, index=df_web.index) if df_web is not None else pd.Series(False) if '、' in dw: # 针对这种同一部门的 cons_dw_1 = pd.Series(False, index=df.index) - cons_dw_1_web = pd.Series(False, index=df_web.index) + cons_dw_1_web = pd.Series(False, index=df_web.index) if df_web is not None else pd.Series(False) for item in dw.split('、'): cons_dw_1 = (df['source'].str.contains(item)) | cons_dw_1 - cons_dw_1_web = (df_web['source'].str.contains(item)) | cons_dw_1_web + cons_dw_1_web = (df_web['source'].str.contains(item)) | cons_dw_1_web if df_web is not None else (df_web['source'].str.contains(item)) df_fx.at[ind, f'供总院{i}稿数'] = ((cons_dw_1) & ( cons_y_m) & (df['gbiz'] == zybiz)).sum() df_fx.at[ind, f'供总院网站{i}稿数'] = ((cons_dw_1_web) & ( @@ -240,7 +241,7 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): df_fx.at[ind, f'供总院{i}稿数'] = (df['source'].str.contains( dw) & (cons_y_m) & (df['gbiz'] == zybiz)).sum() df_fx.at[ind, f'供总院网站{i}稿数'] = (df_web['source'].str.contains( - dw) & (cons_y_m_web)).sum() + dw) & (cons_y_m_web)).sum() if df_web is not None else 0 df_fx[f'供总院{i}稿数'] = df_fx[f'供总院{i}稿数'].fillna(0) df_fx[f'供总院{i}稿数'] = df_fx[f'供总院{i}稿数'].astype(int) df_fx[f'供总院网站{i}稿数'] = df_fx[f'供总院网站{i}稿数'].fillna(0) @@ -261,7 +262,7 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): max_read_row['read_num']), f'{max_read_row["pub_year"]}-{max_read_row["pub_month"]}-{max_read_row["pub_day"]}', max_read_row['source']] df_fx.at[ind, f'{i}最高点击文章'] = '***'.join(max_read_row_list) - df_fx.at[ind, f'总院网站{i}发布数'] = cons_y_m_web.sum() + df_fx.at[ind, f'总院网站{i}发布数'] = cons_y_m_web.sum() if df_web is not None else 0 df_fx[f'总院网站{i}发布数'] = df_fx[f'总院网站{i}发布数'].fillna(0) df_fx[f'总院网站{i}发布数'] = df_fx[f'总院网站{i}发布数'].astype(int) @@ -291,13 +292,14 @@ def get_cbma_info_from_db_and_ana(year: str = '2023'): sheet_web = workbook['官方网站更新数'] sheet_web.cell(row=1, column=1, value=f'关于{year}年度中国建材总院新媒体更新情况明细表\n(网站)') ind_zyweb = 0 - for ind, row in df_web.iterrows(): - sheet_web.cell(row=ind_zyweb+3, column=1, value=str(ind_zyweb+1)) - sheet_web.cell(row=ind_zyweb+3, column=2, value=f'{row["pub_year"]}-{row["pub_month"]}-{row["pub_day"]}') - sheet_web.cell(row=ind_zyweb+3, column=3, value=row['title']) - sheet_web.cell(row=ind_zyweb+3, column=4, value=row['source']) - sheet_web.cell(row=ind_zyweb+3, column=5, value=row['bankuai']) - ind_zyweb = ind_zyweb + 1 + if df_web: + for ind, row in df_web.iterrows(): + sheet_web.cell(row=ind_zyweb+3, column=1, value=str(ind_zyweb+1)) + sheet_web.cell(row=ind_zyweb+3, column=2, value=f'{row["pub_year"]}-{row["pub_month"]}-{row["pub_day"]}') + sheet_web.cell(row=ind_zyweb+3, column=3, value=row['title']) + sheet_web.cell(row=ind_zyweb+3, column=4, value=row['source']) + sheet_web.cell(row=ind_zyweb+3, column=5, value=row['bankuai']) + ind_zyweb = ind_zyweb + 1 cbma_path = os.path.join(BASE_DIR, f'summary/{year}年_总院文章.xlsx') workbook.save(cbma_path) print(f'总院{year}年文章表生成完毕!')