feat: cp_app首次提交

2025-10-31 16:18:15 +08:00 · 2025-10-31 16:18:15 +08:00 · 48146b1c8c
commit 48146b1c8c
6 changed files with 1566 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+dist/*
--- a/cp_app.py
+++ b/cp_app.py
@ -0,0 +1,326 @@
+from flask import Flask, jsonify, request, send_from_directory
+from flask_cors import CORS
+import json
+from docx import Document
+import io
+import fitz  # PyMuPDF
+import requests
+import os
+import re
+from flask_jwt_extended import JWTManager, create_access_token, jwt_required, get_jwt_identity
+
+app = Flask(__name__, static_folder='dist/assets', static_url_path='/assets')
+CORS(app)
+VUE_DIST_DIR = os.path.join(os.path.dirname(__file__), 'dist')
+app.config['JWT_SECRET_KEY'] = 'carbon'
+jwt = JWTManager(app)
+
+LLM_URL = "http://106.0.4.200:9000/v1/chat/completions"
+API_KEY = "JJVAide0hw3eaugGmxecyYYFw45FX2LfhnYJtC+W2rw"
+MODEL = "Qwen/QwQ-32B"
+HEADERS = {
+    "Authorization": f"Bearer {API_KEY}",
+    "Content-Type": "application/json"
+}
+OCR_URL = "http://127.0.0.1:3402/ocr_full"
+
+def get_standard():
+    with open("./standard.json", "r", encoding="utf-8") as f:
+        standard = json.load(f)
+        return standard
+
+
+@app.route("/api/standard/", methods=["GET"])
+def get_s():
+    return get_standard()
+
+@app.route('/')
+def index():
+    return send_from_directory(VUE_DIST_DIR, 'index.html')
+
+def get_users():
+    with open("./users.json", "r", encoding="utf-8") as f:
+        users = json.load(f)
+        return users
+
+@app.route('/api/login/', methods=["POST"])
+def login():
+    username = request.json.get("username", "unknown")
+    password = request.json.get("password", "unknown")
+    users = get_users()
+    if username in users and users[username]["password"] == password:
+        access = create_access_token(identity=username)
+        return jsonify({"access": access, "userInfo": {"username": username, "name": users[username]["name"]}}), 200
+    return jsonify({"err_msg": "用户名或密码错误"}), 400
+
+@app.route("/api/cal/", methods=["POST"])
+@jwt_required()
+def cal():
+    data = get_standard()
+    file1 = request.files.get("file1", None)
+    file2 = request.files.get("file2", None)
+    file3 = request.files.get("file3", None)
+    file4 = request.files.get("file4", None)
+    file5 = request.files.get("file5", None)
+    file6 = request.files.get("file6", None)
+    if file1 or file2 or file3 or file4 or file5 or file6:
+        pass
+    else:
+        return jsonify({"err_msg": "请至少上传一个文件"}), 400
+    total_score = 0
+    if file1:
+        for item in data:
+            if item["thirdLevel"] in [
+                "碳中和路线图",
+                "短期/中期/长期减碳目标",
+                "设立碳管理相关部门",
+                "气候相关风险评估机制",
+                "内部碳定价机制",
+                "碳管理数字化平台建设",
+                "碳交易与履约能力",
+                "CCER等减排项目开发管理",
+                "数字化碳管理平台",
+            ]:
+                item["result"] = item["scoringCriteria"][0]["选项"]
+                item["score"] = item["fullScore"]
+                total_score += item["score"]
+    if file2:
+        for item in data:
+            if item["thirdLevel"] in [
+                "能源与碳排放管理体系",
+                "碳排放数据监测、报告与核查",
+                "参与权威信息平台披露",
+                "碳中和目标与进展经第三方认证",
+                "碳排放实时监测覆盖率达标",
+                "数据自动化采集比例达标",
+                "数据质量与校验机制",
+            ]:
+                item["result"] = item["scoringCriteria"][0]["选项"]
+                item["score"] = item["fullScore"]
+                total_score += item["score"]
+    if file3:
+        for item in data:
+            if item["thirdLevel"] in [
+                "ESG报告",
+                "工业固废/生物质资源利用率数据",
+                "硫化物减排措施",
+                "氮氧化物减排措施",
+                "其他污染物减排措施",
+                "项目选址生态避让与保护",
+                "矿山生态修复与复垦方案",
+                "厂区绿化与生态碳汇措施",
+                "低碳产品认证与标识",
+                "产品耐久性与回收性设计",
+                "无环保处罚与信访记录",
+                "环境应急管理体系",
+                "员工健康安全管理体系与制度",
+                "符合标准的物理环境与防护措施",
+                "员工心理健康支持计划",
+                "社区沟通与透明度机制",
+                "社区经济与发展贡献措施",
+                "社区负面影响缓解措施",
+                "供应商行为准则",
+                "供应商筛查与评估机制",
+                "供应商审核与改进机制",
+                "完善的治理结构",
+                "商业道德与反腐败制度",
+            ]:
+                item["result"] = item["scoringCriteria"][0]["选项"]
+                item["score"] = item["fullScore"]
+                total_score += item["score"]
+    if file4:
+        for item in data:
+            if item["thirdLevel"] in [
+                "资金分配明细",
+                "资本金比例与到位证明",
+                "融资渠道多样性",
+                "成本效益分析",
+                "碳减排收益量化",
+                "社会效益评估",
+                "风险管控方案",
+                "关键风险应对策略与预案",
+                "金融机构或第三方风险分担机制",
+                "绿色金融资质认证与资金用途",
+                "融资条款与ESG绩效挂钩",
+                "国际合作资金申请与利用",
+                "应急响应与能力建设机制",
+            ]:
+                item["result"] = item["scoringCriteria"][0]["选项"]
+                item["score"] = item["fullScore"]
+                total_score += item["score"]
+    if file5:
+        for item in data:
+            if item["thirdLevel"] in [
+                "AI预测减碳潜力应用",
+                "智能优化控制算法应用",
+                "ERP/EMS/MES系统集成度达标",
+                "IoT设备覆盖率达标",
+                "跨系统数据协同能力",
+                "碳数据安全管理措施",
+                "系统抗攻击能力达标",
+                "数据合规性与审计追踪机制",
+            ]:
+                item["result"] = item["scoringCriteria"][0]["选项"]
+                item["score"] = item["fullScore"]
+                total_score += item["score"]
+
+    e_filename = None
+    e_content, e_err_msg = None, None
+    if file6:
+        # 获取文件名和类型
+        filename = file6.filename
+        e_filename = filename
+        file_type = filename.rsplit('.', 1)[1].lower() if '.' in filename else None
+        
+        content, err_msg = parse_file(file6, file_type)
+        e_content = content
+        e_err_msg = err_msg
+        if content:
+            res = ask(f'以下内容为用户报告:  {content}', "tec")
+            if res == "是":
+                for item in data:
+                    if item["firstLevel"] == "二、技术路径（35 分）":
+                        item["result"] = item["scoringCriteria"][0]["选项"]
+                        item["score"] = item["fullScore"]
+                        total_score += item["score"]
+        else:
+            return jsonify({"err_msg": err_msg}), 400
+    
+    if file1:
+        filename = file1.filename
+        file_type = filename.rsplit('.', 1)[1].lower() if '.' in filename else None
+        if filename == e_filename:
+            content, err_msg = e_content, e_err_msg
+        else:
+            content, err_msg = parse_file(file1, file_type)
+        if content:
+            if bool(re.search(r'碳?减排目标', content)):
+                data[3]["result"] = "有"
+                data[3]["score"] = data[3]["fullScore"]
+                total_score += data[3]["score"]
+
+
+            def cal_percent(decline_patterns, content, data, index, total_score):
+                decline_percent = None
+                for pattern in decline_patterns:
+                    match = re.search(pattern, content, re.DOTALL)
+                    if match:
+                        decline_percent = float(match.group(1))
+                        break
+                if decline_percent:
+                    if decline_percent >= 10:
+                        data[index]["result"] = 3
+                        data[index]["score"] = 5
+                    elif decline_percent >= 5:
+                        data[index]["result"] = 2
+                        data[index]["score"] = 2.5
+                    elif decline_percent > 0:
+                        data[index]["result"] = 1
+                        data[index]["score"] = 1.5
+                    total_score += data[index].get("score", 0)
+                return total_score
+
+            # 碳排放总量
+            decline_patterns1 = [
+                    r'碳排放总量[^，。]*?下降\s*([\d.]+)%',
+                    r'碳排放[^，。]*?总量[^，。]*?下降\s*([\d.]+)%',
+                    r'碳总量[^，。]*?下降\s*([\d.]+)%',
+                    r'排放总量[^，。]*?下降\s*([\d.]+)%',
+                    r'排放[^，。]*?下降\s*([\d.]+)%'
+            ]
+            total_score = cal_percent(decline_patterns1, content, data, 0, total_score)
+
+            # 碳排放强度
+            decline_patterns2 = [
+                    r'碳排放强度[^，。]*?下降\s*([\d.]+)%',
+                    r'碳强度[^，。]*?总量[^，。]*?下降\s*([\d.]+)%',
+                    r'排放强度[^，。]*?下降\s*([\d.]+)%'
+            ]
+            total_score = cal_percent(decline_patterns2, content, data, 1, total_score)
+
+            # 产品碳足迹
+            decline_patterns3 = [
+                    r'产品碳足迹[^，。]*?下降\s*([\d.]+)%',
+                    r'碳足迹[^，。]*?下降\s*([\d.]+)%',
+                    r'产品足迹[^，。]*?下降\s*([\d.]+)%'
+            ]
+            total_score = cal_percent(decline_patterns3, content, data, 2, total_score)
+        else:
+            return jsonify({"err_msg": err_msg}), 400
+    return jsonify({"total_score": round(total_score, 2), "data": data})
+
+def ask(input:str, p_name:str, stream=False):
+    with open (f"promot/{p_name}.md", "r", encoding="utf-8") as f:
+        promot_str = f.read()
+    his = [{"role":"system", "content": promot_str}]
+    his.append({"role":"user", "content": input})
+    payload = {
+                "model": MODEL,
+                "messages": his,
+                "temperature": 0,
+                "stream": stream,
+                "chat_template_kwargs": {"enable_thinking": False}
+                }
+    response = requests.post(LLM_URL, headers=HEADERS, json=payload, stream=stream, timeout=(60, 240))
+    print(response.json())
+    if not stream:
+        return response.json()["choices"][0]["message"]["content"]
+    
+def parse_file(file_content, file_type):
+    try:
+        if file_type == "pdf":
+            # 将文件内容转换为字节流
+            pdf_bytes = file_content.read()  
+            pdf_stream = io.BytesIO(pdf_bytes)
+            doc = fitz.open(stream=pdf_stream, filetype="pdf")
+            text_content = ""
+            
+            # 首先尝试直接提取文本
+            for page_num in range(len(doc)):
+                page = doc[page_num]
+                text_content += page.get_text() + "\n"
+            
+            t_plain = text_content.strip()
+            if t_plain:
+                doc.close()
+                return t_plain, None
+            else:
+                # 直接转发字节流
+                # resp = requests.post(OCR_URL,
+                #                     files={"pdf": (file_content.filename,
+                #                                     pdf_stream,
+                #                                     "application/pdf")},
+                #                     timeout=120)          # 大文件酌情加长
+                # resp.raise_for_status()
+                # return resp.json()["full_text"], None
+                return None, "无法直接提取文本，请使用OCR处理"
+            
+        elif file_type == "docx":
+            # 将文件内容转换为字节流
+            doc_stream = io.BytesIO(file_content.read())
+            doc = Document(doc_stream)
+            
+            # 提取所有段落的文本
+            text_content = ""
+            for paragraph in doc.paragraphs:
+                text_content += paragraph.text + "\n"
+                
+            # 提取表格中的文本
+            for table in doc.tables:
+                for row in table.rows:
+                    for cell in row.cells:
+                        text_content += cell.text + " "
+                    text_content += "\n"
+            return text_content, None
+        
+        # 如果需要支持其他文件类型，可以在这里添加处理逻辑
+        elif file_type == "txt":
+            text_content = file_content.read().decode("utf-8")
+            return text_content, None
+        return None, "不支持的文件类型"
+    except Exception as e:
+        return None, f"文件解析错误: {str(e)}"
+
+if __name__ == "__main__":
+    # get_ocr_engine()
+    app.run(debug=True, port=3401)
--- a/cp_ocr.py
+++ b/cp_ocr.py
@ -0,0 +1,32 @@
+from flask import Flask, request, jsonify
+from paddleocr import PaddleOCR
+import fitz
+from concurrent.futures import ThreadPoolExecutor
+import numpy as np
+from PIL import Image
+import io
+import paddle
+
+app = Flask(__name__)
+paddle.set_device("gpu")
+ocr = PaddleOCR(use_textline_orientation=True, lang="ch")  # PaddleOCR初始化
+executor = ThreadPoolExecutor(max_workers=4)   # 并行页
+
+@app.route("/ocr_full", methods=["POST"])
+def ocr_full():
+    pdf_bytes = request.files["pdf"].read()
+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    results = list(executor.map(_ocr_page, doc))
+    doc.close()
+    print(results)
+    return jsonify({"full_text": "\n\n".join(results)})
+
+def _ocr_page(page: fitz.Page):
+    pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))
+    img = Image.open(io.BytesIO(pix.tobytes("png")))
+    result = ocr.predict(np.array(img))
+    texts = [line[1][0] for line in result[0]]
+    return f"--- 第 {page.number+1} 页 ---\n" + " ".join(texts)
+
+if __name__ == '__main__':
+    app.run(host='127.0.0.1', port=3402, threaded=False)
--- a/promot/tec.md
+++ b/promot/tec.md
@ -0,0 +1,9 @@
+# 角色
+你是一位低碳技术专家，具备深厚的专业知识和丰富的实践经验。你能够精准理解用户的报告。
+# 技能
+1. 仔细分析用户提供的技术报告，获取里面的技术关键点。
+2. 根据技术关键点，判断报告中的技术是否属于低碳技术。
+3 回答 是否采用所列绿色能源、原燃料替代、能效提升、低碳水泥、碳捕集等任一技术（关键技术词汇包括石灰石原料替代减碳技术、熟料烧成化石能源替代减碳技术、水泥工厂近零购电改造技术、水泥低碳烧成技术与装备、节能低碳粉磨工艺及装备、水泥窑炉系统隔热保温整体提升技术、矿渣粉磨制备水泥技术、水泥窑富氧燃烧技术、水泥工厂数字化、智能化技术、高贝利特低热/中热硅酸盐水泥及其制备技术、硫铝酸盐和铁铝酸盐水泥及其制备技术、贝利特-硫铝酸钙-硫硅酸钙新型低碳熟料及其制备技术、大掺量固废制备少熟料低碳水泥技术、镁质低碳水泥及其制备技术、热活化高岭土制备低碳水泥技术、全氧燃烧耦合低能耗碳捕集技术、水泥窑炉烟气捕集CO2技术、水泥窑全氧燃烧耦合CO2纯化捕集技术、水泥工业固体胺碳捕集技术、建材矿山均化开采配矿技术、建材矿山生态修复、绿色改造技术、绿色清洁运输技术、水泥超低排放改造、建设碳排放和污染物排放全过程智能管控与评估平台）
+# 回答要求
+仅回答 是 或者 否  不要回复其他内容
+# 以下是用户报告内容
--- a/standard.json
+++ b/standard.json
--- a/users.json
+++ b/users.json
@ -0,0 +1,10 @@
+{
+    "user1": {
+        "name": "用户1",
+        "password": "user1234"
+    },
+    "user2": {
+        "name": "用户2",
+        "password": "user5678"
+    }
+}