99 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			99 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
 | 
						||
from docx import Document
 | 
						||
from openpyxl import load_workbook
 | 
						||
import re
 | 
						||
 | 
						||
 | 
						||
QUES_CLASS = '安全领域'
 | 
						||
 | 
						||
OPTION_LIST=[
 | 
						||
    [r"A:\s*(\S+)", "D"],
 | 
						||
    [r"B:\s*(\S+)", "E"],
 | 
						||
    [r"C:\s*(\S+)", "F"],
 | 
						||
    [r"D:\s*(\S+)", "G"],
 | 
						||
    [r"E:\s*(\S+)", "H"],
 | 
						||
    [r"F:\s*(\S+)", "I"],
 | 
						||
]
 | 
						||
 | 
						||
def fill_excel(matches, excel_path, local):  
 | 
						||
    wb = load_workbook(excel_path)  
 | 
						||
    ws = wb.active 
 | 
						||
    if matches:
 | 
						||
        ws[local] = matches
 | 
						||
    wb.save(excel_path) 
 | 
						||
 | 
						||
 | 
						||
def match_text(text, pattern): 
 | 
						||
    matches = re.search(pattern, text)
 | 
						||
    if matches:
 | 
						||
        results = matches.group(1)
 | 
						||
        return results
 | 
						||
    return ''
 | 
						||
 | 
						||
# 解析word文档
 | 
						||
def interpret_text(start:int, excel_path:str, doc_path:str, field=None):
 | 
						||
    wordfile = Document(doc_path)
 | 
						||
    correct_dict = {}
 | 
						||
    option_dict = {}
 | 
						||
    question_type = {}
 | 
						||
    ques_text = {}
 | 
						||
    for index, p in enumerate(wordfile.paragraphs):
 | 
						||
        correct_answer = match_text(p.text, r"正确答案:\s*(\S+)")  # 匹配正确答案
 | 
						||
        if correct_answer:
 | 
						||
            correct_dict.setdefault("correct_answer", []).append(correct_answer)
 | 
						||
        for e in OPTION_LIST: # 匹配选项
 | 
						||
            result = match_text(p.text, e[0])
 | 
						||
            if result:
 | 
						||
                    option_dict.setdefault(e[1], []).append(result)
 | 
						||
        # 题目类型
 | 
						||
        # fill_excel(QUES_CLASS, excel_path, 'B'+str(index+start))
 | 
						||
        if p.text[:1]=='【' and p.text[4:5]=='】':
 | 
						||
            q_type = p.text[1:3] # 题目类型
 | 
						||
            question_type.setdefault("question_type", []).append(q_type)
 | 
						||
            if p.text[-2]=='分':  #(3分)
 | 
						||
                question_text = p.text[5:-4].strip()
 | 
						||
                result = bool(re.match(r'\d+、', question_text)) # 处理题目前的序号
 | 
						||
                if result:
 | 
						||
                    question = re.sub(r'\d+、', '',question_text)
 | 
						||
                    ques_text.setdefault("question_text", []).append(question)
 | 
						||
    dict_list = [correct_dict,ques_text,option_dict,question_type]
 | 
						||
    for d in dict_list:
 | 
						||
        for key,value in d.items():
 | 
						||
           if key == "correct_answer":
 | 
						||
               for v in range(len(value)):
 | 
						||
                   fill_excel(value[v], excel_path, 'J'+str(start+v))
 | 
						||
           elif key == "question_type":
 | 
						||
               for v in range(len(value)):
 | 
						||
                    fill_excel(value[v], excel_path, 'A'+str(start+v))
 | 
						||
                    if field:
 | 
						||
                        fill_excel(field, excel_path, 'B'+str(start+v))
 | 
						||
                    else:
 | 
						||
                        fill_excel(QUES_CLASS, excel_path, 'B'+str(start+v))
 | 
						||
           elif key == "question_text":
 | 
						||
               for v in range(len(value)):
 | 
						||
                    fill_excel(value[v], excel_path, 'C'+str(start+v))
 | 
						||
           elif key == "D":
 | 
						||
                for v in range(len(value)):
 | 
						||
                    fill_excel(value[v], excel_path, 'D'+str(start+v))
 | 
						||
           elif key == "E":
 | 
						||
                for v in range(len(value)):
 | 
						||
                    fill_excel(value[v], excel_path, 'E'+str(start+v))
 | 
						||
           elif key == "F":
 | 
						||
                for v in range(len(value)):
 | 
						||
                    fill_excel(value[v], excel_path, 'F'+str(start+v))
 | 
						||
           elif key == "G":
 | 
						||
                for v in range(len(value)):
 | 
						||
                    fill_excel(value[v], excel_path, 'G'+str(start+v))
 | 
						||
           elif key == "H":
 | 
						||
                for v in range(len(value)):
 | 
						||
                    fill_excel(value[v], excel_path, 'H'+str(start+v))
 | 
						||
           elif key == "I":
 | 
						||
                for v in range(len(value)):
 | 
						||
                    fill_excel(value[v], excel_path, 'I'+str(start+v))
 | 
						||
    return 'OK'
 | 
						||
 | 
						||
 | 
						||
if __name__ == '__main__':
 | 
						||
    doc_path = "C:\code\data\\test.docx"
 | 
						||
    excel_path = "C:\code\data\question.xlsx"
 | 
						||
    interpret_text(3, excel_path, doc_path) |