99 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			99 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
| 
 | ||
| from docx import Document
 | ||
| from openpyxl import load_workbook
 | ||
| import re
 | ||
| 
 | ||
| 
 | ||
| QUES_CLASS = '安全领域'
 | ||
| 
 | ||
| OPTION_LIST=[
 | ||
|     [r"A:\s*(\S+)", "D"],
 | ||
|     [r"B:\s*(\S+)", "E"],
 | ||
|     [r"C:\s*(\S+)", "F"],
 | ||
|     [r"D:\s*(\S+)", "G"],
 | ||
|     [r"E:\s*(\S+)", "H"],
 | ||
|     [r"F:\s*(\S+)", "I"],
 | ||
| ]
 | ||
| 
 | ||
| def fill_excel(matches, excel_path, local):  
 | ||
|     wb = load_workbook(excel_path)  
 | ||
|     ws = wb.active 
 | ||
|     if matches:
 | ||
|         ws[local] = matches
 | ||
|     wb.save(excel_path) 
 | ||
| 
 | ||
| 
 | ||
| def match_text(text, pattern): 
 | ||
|     matches = re.search(pattern, text)
 | ||
|     if matches:
 | ||
|         results = matches.group(1)
 | ||
|         return results
 | ||
|     return ''
 | ||
| 
 | ||
| # 解析word文档
 | ||
| def interpret_text(start:int, excel_path:str, doc_path:str, field=None):
 | ||
|     wordfile = Document(doc_path)
 | ||
|     correct_dict = {}
 | ||
|     option_dict = {}
 | ||
|     question_type = {}
 | ||
|     ques_text = {}
 | ||
|     for index, p in enumerate(wordfile.paragraphs):
 | ||
|         correct_answer = match_text(p.text, r"正确答案:\s*(\S+)")  # 匹配正确答案
 | ||
|         if correct_answer:
 | ||
|             correct_dict.setdefault("correct_answer", []).append(correct_answer)
 | ||
|         for e in OPTION_LIST: # 匹配选项
 | ||
|             result = match_text(p.text, e[0])
 | ||
|             if result:
 | ||
|                     option_dict.setdefault(e[1], []).append(result)
 | ||
|         # 题目类型
 | ||
|         # fill_excel(QUES_CLASS, excel_path, 'B'+str(index+start))
 | ||
|         if p.text[:1]=='【' and p.text[4:5]=='】':
 | ||
|             q_type = p.text[1:3] # 题目类型
 | ||
|             question_type.setdefault("question_type", []).append(q_type)
 | ||
|             if p.text[-2]=='分':  #(3分)
 | ||
|                 question_text = p.text[5:-4].strip()
 | ||
|                 result = bool(re.match(r'\d+、', question_text)) # 处理题目前的序号
 | ||
|                 if result:
 | ||
|                     question = re.sub(r'\d+、', '',question_text)
 | ||
|                     ques_text.setdefault("question_text", []).append(question)
 | ||
|     dict_list = [correct_dict,ques_text,option_dict,question_type]
 | ||
|     for d in dict_list:
 | ||
|         for key,value in d.items():
 | ||
|            if key == "correct_answer":
 | ||
|                for v in range(len(value)):
 | ||
|                    fill_excel(value[v], excel_path, 'J'+str(start+v))
 | ||
|            elif key == "question_type":
 | ||
|                for v in range(len(value)):
 | ||
|                     fill_excel(value[v], excel_path, 'A'+str(start+v))
 | ||
|                     if field:
 | ||
|                         fill_excel(field, excel_path, 'B'+str(start+v))
 | ||
|                     else:
 | ||
|                         fill_excel(QUES_CLASS, excel_path, 'B'+str(start+v))
 | ||
|            elif key == "question_text":
 | ||
|                for v in range(len(value)):
 | ||
|                     fill_excel(value[v], excel_path, 'C'+str(start+v))
 | ||
|            elif key == "D":
 | ||
|                 for v in range(len(value)):
 | ||
|                     fill_excel(value[v], excel_path, 'D'+str(start+v))
 | ||
|            elif key == "E":
 | ||
|                 for v in range(len(value)):
 | ||
|                     fill_excel(value[v], excel_path, 'E'+str(start+v))
 | ||
|            elif key == "F":
 | ||
|                 for v in range(len(value)):
 | ||
|                     fill_excel(value[v], excel_path, 'F'+str(start+v))
 | ||
|            elif key == "G":
 | ||
|                 for v in range(len(value)):
 | ||
|                     fill_excel(value[v], excel_path, 'G'+str(start+v))
 | ||
|            elif key == "H":
 | ||
|                 for v in range(len(value)):
 | ||
|                     fill_excel(value[v], excel_path, 'H'+str(start+v))
 | ||
|            elif key == "I":
 | ||
|                 for v in range(len(value)):
 | ||
|                     fill_excel(value[v], excel_path, 'I'+str(start+v))
 | ||
|     return 'OK'
 | ||
| 
 | ||
| 
 | ||
| if __name__ == '__main__':
 | ||
|     doc_path = "C:\code\data\\test.docx"
 | ||
|     excel_path = "C:\code\data\question.xlsx"
 | ||
|     interpret_text(3, excel_path, doc_path) |