feat: 添加贷后计算

This commit is contained in:
caoqianming 2026-01-16 16:58:18 +08:00
parent 3bcca355ac
commit b087a94335
4 changed files with 147 additions and 10 deletions

View File

@ -0,0 +1,33 @@
# Generated by Django 4.2.27 on 2026-01-16 08:34
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('carbon', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='Fingerprint',
fields=[
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
('fp_hex', models.CharField(max_length=16, unique=True, verbose_name='simhash(hex)')),
('fp_int', models.BigIntegerField(db_index=True, verbose_name='simhash(int)')),
('seg1', models.IntegerField(db_index=True)),
('seg2', models.IntegerField(db_index=True)),
('seg3', models.IntegerField(db_index=True)),
('seg4', models.IntegerField(db_index=True)),
('score', models.FloatField(verbose_name='得分')),
],
options={
'indexes': [models.Index(fields=['seg1'], name='carbon_fing_seg1_a18a6c_idx'), models.Index(fields=['seg2'], name='carbon_fing_seg2_5c4cfa_idx'), models.Index(fields=['seg3'], name='carbon_fing_seg3_87273b_idx'), models.Index(fields=['seg4'], name='carbon_fing_seg4_60f65c_idx')],
},
),
]

View File

@ -1,5 +1,5 @@
from django.db import models
from apps.utils.models import CommonBDModel
from apps.utils.models import CommonBDModel, BaseModel
# Create your models here.
class Work(CommonBDModel):
@ -28,3 +28,27 @@ class Work(CommonBDModel):
dh_file4 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file4')
dh_file5 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file5')
dh_file6 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file6')
class Fingerprint(BaseModel):
fp_hex = models.CharField(
max_length=16, unique=True, verbose_name="simhash(hex)"
)
fp_int = models.BigIntegerField(
db_index=True, verbose_name="simhash(int)"
)
seg1 = models.IntegerField(db_index=True)
seg2 = models.IntegerField(db_index=True)
seg3 = models.IntegerField(db_index=True)
seg4 = models.IntegerField(db_index=True)
score = models.FloatField(verbose_name="得分")
class Meta:
indexes = [
models.Index(fields=["seg1"]),
models.Index(fields=["seg2"]),
models.Index(fields=["seg3"]),
models.Index(fields=["seg4"]),
]

View File

@ -59,6 +59,16 @@ def parse_file(file_path:str):
def get_fingerprint(text):
return Simhash(text).value
# --- 汉明距离 ---
def hamming_distance(a, b):
return bin(a ^ b).count("1")
MASK_64 = (1 << 64) - 1
def hamming_distance(a_u, b_s):
return ((a_u ^ (b_s & MASK_64)) & MASK_64).bit_count()
def split_simhash(fp_int: int):
return (
(fp_int >> 48) & 0xffff,
(fp_int >> 32) & 0xffff,
(fp_int >> 16) & 0xffff,
fp_int & 0xffff,
)

View File

@ -1,17 +1,18 @@
from django.shortcuts import render
from .models import Work
from .models import Work, Fingerprint
from .serializers import WorkSerializer, WorkCreateSerializer, WorkDqCalSerializer, WorkDhCalSerializer
from apps.utils.viewsets import CustomModelViewSet
from rest_framework.decorators import action
import os
from django.conf import settings
import json
from apps.carbon.service import parse_file
from apps.carbon.service import parse_file, get_fingerprint, hamming_distance, split_simhash
import requests
from rest_framework.exceptions import ParseError
import re
from rest_framework.response import Response
from django.db import transaction
from django.db import transaction, IntegrityError
from django.db.models import Q
# Create your views here.
LLM_URL = "http://106.0.4.200:9000/v1/chat/completions"
@ -48,6 +49,11 @@ def ask(input:str, p_name:str, stream=False):
raise ParseError("模型处理错误超过最大token限制")
return response.json()["choices"][0]["message"]["content"]
def simhash_to_db(n: int) -> int:
return n if n < (1 << 63) else n - (1 << 64)
def simhash_from_db(n: int) -> int:
return n if n >= 0 else n + (1 << 64)
class WorkViewSet(CustomModelViewSet):
queryset = Work.objects.all()
@ -246,13 +252,77 @@ class WorkViewSet(CustomModelViewSet):
return Response({"total_score": total_score, "data": data})
@staticmethod
def parse_files():
pass
def parse_files(work: Work):
contents = []
filenames = []
for file in [work.dh_file1, work.dh_file2, work.dh_file3, work.dh_file4, work.dh_file5, work.dh_file6]:
if file:
if file.name in filenames:
continue
path = (settings.BASE_DIR + file.path).replace('\\', '/')
content = parse_file(path)
filenames.append(file.name)
contents.append(content)
return '\n'.join(contents)
@action(detail=True, methods=['post'], serializer_class=WorkDhCalSerializer)
@transaction.atomic
def cal_dh(self, request, pk):
work = self.get_object()
sr = WorkDqCalSerializer(work, data=request.data)
sr.is_valid(raise_exception=True)
sr.save()
work = Work.objects.get(pk=pk)
content = WorkViewSet.parse_files(work)
fp_u = get_fingerprint(content) # unsigned
fp_int = simhash_to_db(fp_u) # signed for db
fp_hex = format(fp_u, "016x")
s1, s2, s3, s4 = split_simhash(fp_int)
# 1⃣ 分段粗筛
candidates = (
Fingerprint.objects
.filter(
Q(seg1=s1) |
Q(seg2=s2) |
Q(seg3=s3) |
Q(seg4=s4)
)
.only("fp_int", "score")
)
# 2⃣ 精确海明距离
for obj in candidates:
if hamming_distance(fp_u, obj.fp_int) <= HAMMING_THRESHOLD:
work.score_dh = obj.score
work.save(update_fields=["score_dh"])
return Response({"total_score": obj.score})
# 3⃣ 未命中 → 调用 AI
res = ask(content, "tec_dh")
score = round(float(res), 2)
work.score_dh = score
work.save(update_fields=["score_dh"])
# 4⃣ 并发安全写入指纹库
try:
Fingerprint.objects.create(
fp_hex=fp_hex,
fp_int=fp_int,
seg1=s1,
seg2=s2,
seg3=s3,
seg4=s4,
score=score,
)
except IntegrityError:
# 并发下已存在,忽略即可
pass
return Response({"total_score": score})