From 442187a80d3c3d4dc0e6bcd26fd8057f5620aedc Mon Sep 17 00:00:00 2001 From: zty Date: Fri, 15 Nov 2024 09:36:48 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0=E8=BD=AF=E8=91=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/main.py b/main.py index 91e9849..6b60140 100644 --- a/main.py +++ b/main.py @@ -221,6 +221,16 @@ province_patterns = [ ("颁发日期", r"\d{4}年\d{1,2}月\d{1,2}日"), ] +# 软件著作权奖 +software_patterns = [ + ("软件名称", r"软件名称[::](.*?)"), + ("著作权人", r"著[\s]*作[\s]*权[\s]*人[\s]*[::](.+?)\n",), + ("开发完成日期", r"\d{4}年\d{1,2}月\d{1,2}日"), + ("首次发表日期", r"[::](.+?)"), + ("权力取得方式", r"[::](.+?)"), + ("登记号", r"登[\s]*记[\s]*号[::](.+?)"), +] + @app.post( "/extract", summary="提取专利/标准等文件里的信息", @@ -304,6 +314,9 @@ async def extract_info( elif file_type == "province": info = extract_social_info(ocr_text, province_patterns) clean_info(info) + elif file_type == "software": + info = extract_social_info(ocr_text, software_patterns) + clean_info(info) else: raise HTTPException( 400, detail="Invalid file type. Please choose 'standard' or 'patent'."