feat: 添加贷后计算

This commit is contained in:
caoqianming 2026-01-16 16:58:18 +08:00
parent 3bcca355ac
commit b087a94335
4 changed files with 147 additions and 10 deletions

View File

@ -0,0 +1,33 @@
# Generated by Django 4.2.27 on 2026-01-16 08:34
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('carbon', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='Fingerprint',
fields=[
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
('fp_hex', models.CharField(max_length=16, unique=True, verbose_name='simhash(hex)')),
('fp_int', models.BigIntegerField(db_index=True, verbose_name='simhash(int)')),
('seg1', models.IntegerField(db_index=True)),
('seg2', models.IntegerField(db_index=True)),
('seg3', models.IntegerField(db_index=True)),
('seg4', models.IntegerField(db_index=True)),
('score', models.FloatField(verbose_name='得分')),
],
options={
'indexes': [models.Index(fields=['seg1'], name='carbon_fing_seg1_a18a6c_idx'), models.Index(fields=['seg2'], name='carbon_fing_seg2_5c4cfa_idx'), models.Index(fields=['seg3'], name='carbon_fing_seg3_87273b_idx'), models.Index(fields=['seg4'], name='carbon_fing_seg4_60f65c_idx')],
},
),
]

View File

@ -1,5 +1,5 @@
from django.db import models from django.db import models
from apps.utils.models import CommonBDModel from apps.utils.models import CommonBDModel, BaseModel
# Create your models here. # Create your models here.
class Work(CommonBDModel): class Work(CommonBDModel):
@ -28,3 +28,27 @@ class Work(CommonBDModel):
dh_file4 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file4') dh_file4 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file4')
dh_file5 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file5') dh_file5 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file5')
dh_file6 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file6') dh_file6 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file6')
class Fingerprint(BaseModel):
fp_hex = models.CharField(
max_length=16, unique=True, verbose_name="simhash(hex)"
)
fp_int = models.BigIntegerField(
db_index=True, verbose_name="simhash(int)"
)
seg1 = models.IntegerField(db_index=True)
seg2 = models.IntegerField(db_index=True)
seg3 = models.IntegerField(db_index=True)
seg4 = models.IntegerField(db_index=True)
score = models.FloatField(verbose_name="得分")
class Meta:
indexes = [
models.Index(fields=["seg1"]),
models.Index(fields=["seg2"]),
models.Index(fields=["seg3"]),
models.Index(fields=["seg4"]),
]

View File

@ -59,6 +59,16 @@ def parse_file(file_path:str):
def get_fingerprint(text): def get_fingerprint(text):
return Simhash(text).value return Simhash(text).value
# --- 汉明距离 --- MASK_64 = (1 << 64) - 1
def hamming_distance(a, b):
return bin(a ^ b).count("1") def hamming_distance(a_u, b_s):
return ((a_u ^ (b_s & MASK_64)) & MASK_64).bit_count()
def split_simhash(fp_int: int):
return (
(fp_int >> 48) & 0xffff,
(fp_int >> 32) & 0xffff,
(fp_int >> 16) & 0xffff,
fp_int & 0xffff,
)

View File

@ -1,17 +1,18 @@
from django.shortcuts import render from django.shortcuts import render
from .models import Work from .models import Work, Fingerprint
from .serializers import WorkSerializer, WorkCreateSerializer, WorkDqCalSerializer, WorkDhCalSerializer from .serializers import WorkSerializer, WorkCreateSerializer, WorkDqCalSerializer, WorkDhCalSerializer
from apps.utils.viewsets import CustomModelViewSet from apps.utils.viewsets import CustomModelViewSet
from rest_framework.decorators import action from rest_framework.decorators import action
import os import os
from django.conf import settings from django.conf import settings
import json import json
from apps.carbon.service import parse_file from apps.carbon.service import parse_file, get_fingerprint, hamming_distance, split_simhash
import requests import requests
from rest_framework.exceptions import ParseError from rest_framework.exceptions import ParseError
import re import re
from rest_framework.response import Response from rest_framework.response import Response
from django.db import transaction from django.db import transaction, IntegrityError
from django.db.models import Q
# Create your views here. # Create your views here.
LLM_URL = "http://106.0.4.200:9000/v1/chat/completions" LLM_URL = "http://106.0.4.200:9000/v1/chat/completions"
@ -48,6 +49,11 @@ def ask(input:str, p_name:str, stream=False):
raise ParseError("模型处理错误超过最大token限制") raise ParseError("模型处理错误超过最大token限制")
return response.json()["choices"][0]["message"]["content"] return response.json()["choices"][0]["message"]["content"]
def simhash_to_db(n: int) -> int:
return n if n < (1 << 63) else n - (1 << 64)
def simhash_from_db(n: int) -> int:
return n if n >= 0 else n + (1 << 64)
class WorkViewSet(CustomModelViewSet): class WorkViewSet(CustomModelViewSet):
queryset = Work.objects.all() queryset = Work.objects.all()
@ -246,14 +252,78 @@ class WorkViewSet(CustomModelViewSet):
return Response({"total_score": total_score, "data": data}) return Response({"total_score": total_score, "data": data})
@staticmethod @staticmethod
def parse_files(): def parse_files(work: Work):
pass contents = []
filenames = []
for file in [work.dh_file1, work.dh_file2, work.dh_file3, work.dh_file4, work.dh_file5, work.dh_file6]:
if file:
if file.name in filenames:
continue
path = (settings.BASE_DIR + file.path).replace('\\', '/')
content = parse_file(path)
filenames.append(file.name)
contents.append(content)
return '\n'.join(contents)
@action(detail=True, methods=['post'], serializer_class=WorkDhCalSerializer) @action(detail=True, methods=['post'], serializer_class=WorkDhCalSerializer)
@transaction.atomic @transaction.atomic
def cal_dh(self, request, pk): def cal_dh(self, request, pk):
work = self.get_object() work = self.get_object()
sr = WorkDqCalSerializer(work, data=request.data)
sr.is_valid(raise_exception=True)
sr.save()
work = Work.objects.get(pk=pk)
content = WorkViewSet.parse_files(work)
fp_u = get_fingerprint(content) # unsigned
fp_int = simhash_to_db(fp_u) # signed for db
fp_hex = format(fp_u, "016x")
s1, s2, s3, s4 = split_simhash(fp_int)
# 1⃣ 分段粗筛
candidates = (
Fingerprint.objects
.filter(
Q(seg1=s1) |
Q(seg2=s2) |
Q(seg3=s3) |
Q(seg4=s4)
)
.only("fp_int", "score")
)
# 2⃣ 精确海明距离
for obj in candidates:
if hamming_distance(fp_u, obj.fp_int) <= HAMMING_THRESHOLD:
work.score_dh = obj.score
work.save(update_fields=["score_dh"])
return Response({"total_score": obj.score})
# 3⃣ 未命中 → 调用 AI
res = ask(content, "tec_dh")
score = round(float(res), 2)
work.score_dh = score
work.save(update_fields=["score_dh"])
# 4⃣ 并发安全写入指纹库
try:
Fingerprint.objects.create(
fp_hex=fp_hex,
fp_int=fp_int,
seg1=s1,
seg2=s2,
seg3=s3,
seg4=s4,
score=score,
)
except IntegrityError:
# 并发下已存在,忽略即可
pass
return Response({"total_score": score})