import pandas as pd from docx import Document from openpyxl import load_workbook import re QUES_CLASS = '安全领域' OPTION_LIST=[ [r"A:\s*(\S+)", "D"], [r"B:\s*(\S+)", "E"], [r"C:\s*(\S+)", "F"], [r"D:\s*(\S+)", "G"], [r"E:\s*(\S+)", "H"], [r"F:\s*(\S+)", "I"], ] def fill_excel(matches, excel_path, local): wb = load_workbook(excel_path) ws = wb.active if matches: ws[local] = matches wb.save(excel_path) def match_text(text, pattern): matches = re.search(pattern, text) if matches: results = matches.group(1) return results return '' # 解析word文档 def interpret_text(start:int, excel_path:str, doc_path:str): wordfile = Document(doc_path) correct_dict = {} option_dict = {} question_type = {} ques_text = {} for index, p in enumerate(wordfile.paragraphs): correct_answer = match_text(p.text, r"正确答案:\s*(\S+)") # 匹配正确答案 if correct_answer: correct_dict.setdefault("correct_answer", []).append(correct_answer) for e in OPTION_LIST: # 匹配选项 result = match_text(p.text, e[0]) if result: option_dict.setdefault(e[1], []).append(result) # 题目类型 # fill_excel(QUES_CLASS, excel_path, 'B'+str(index+start)) if p.text[:1]=='【' and p.text[4:5]=='】': q_type = p.text[1:4] # 题目类型 question_type.setdefault("question_type", []).append(q_type) if p.text[-2]=='分': #(3分) question_text = p.text[5:-4].strip() result = bool(re.match(r'\d+、', question_text)) # 处理题目前的序号 if result: question = re.sub(r'\d+、', '',question_text) ques_text.setdefault("question_text", []).append(question) dict_list = [correct_dict,ques_text,option_dict,question_type] for d in dict_list: for key,value in d.items(): if key == "correct_answer": for v in range(len(value)): fill_excel(value[v], excel_path, 'J'+str(start+v)) elif key == "question_type": for v in range(len(value)): fill_excel(value[v], excel_path, 'A'+str(start+v)) fill_excel(QUES_CLASS, excel_path, 'B'+str(start+v)) elif key == "question_text": for v in range(len(value)): fill_excel(value[v], excel_path, 'C'+str(start+v)) elif key == "D": for v in range(len(value)): fill_excel(value[v], excel_path, 'D'+str(start+v)) elif key == "E": for v in range(len(value)): fill_excel(value[v], excel_path, 'E'+str(start+v)) elif key == "F": for v in range(len(value)): fill_excel(value[v], excel_path, 'F'+str(start+v)) elif key == "G": for v in range(len(value)): fill_excel(value[v], excel_path, 'G'+str(start+v)) elif key == "H": for v in range(len(value)): fill_excel(value[v], excel_path, 'H'+str(start+v)) elif key == "I": for v in range(len(value)): fill_excel(value[v], excel_path, 'I'+str(start+v)) if __name__ == '__main__': doc_path = "C:\code\data\\test.docx" excel_path = "C:\code\data\question.xlsx" interpret_text(3, excel_path, doc_path)