feat: 添加贷后计算
This commit is contained in:
parent
3bcca355ac
commit
b087a94335
|
|
@ -0,0 +1,33 @@
|
||||||
|
# Generated by Django 4.2.27 on 2026-01-16 08:34
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.utils.timezone
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('carbon', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Fingerprint',
|
||||||
|
fields=[
|
||||||
|
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
|
||||||
|
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
|
||||||
|
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
|
||||||
|
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
|
||||||
|
('fp_hex', models.CharField(max_length=16, unique=True, verbose_name='simhash(hex)')),
|
||||||
|
('fp_int', models.BigIntegerField(db_index=True, verbose_name='simhash(int)')),
|
||||||
|
('seg1', models.IntegerField(db_index=True)),
|
||||||
|
('seg2', models.IntegerField(db_index=True)),
|
||||||
|
('seg3', models.IntegerField(db_index=True)),
|
||||||
|
('seg4', models.IntegerField(db_index=True)),
|
||||||
|
('score', models.FloatField(verbose_name='得分')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'indexes': [models.Index(fields=['seg1'], name='carbon_fing_seg1_a18a6c_idx'), models.Index(fields=['seg2'], name='carbon_fing_seg2_5c4cfa_idx'), models.Index(fields=['seg3'], name='carbon_fing_seg3_87273b_idx'), models.Index(fields=['seg4'], name='carbon_fing_seg4_60f65c_idx')],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from apps.utils.models import CommonBDModel
|
from apps.utils.models import CommonBDModel, BaseModel
|
||||||
|
|
||||||
# Create your models here.
|
# Create your models here.
|
||||||
class Work(CommonBDModel):
|
class Work(CommonBDModel):
|
||||||
|
|
@ -28,3 +28,27 @@ class Work(CommonBDModel):
|
||||||
dh_file4 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file4')
|
dh_file4 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file4')
|
||||||
dh_file5 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file5')
|
dh_file5 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file5')
|
||||||
dh_file6 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file6')
|
dh_file6 = models.ForeignKey("system.file", on_delete=models.SET_NULL, null=True, blank=True, related_name='work_dh_file6')
|
||||||
|
|
||||||
|
|
||||||
|
class Fingerprint(BaseModel):
|
||||||
|
fp_hex = models.CharField(
|
||||||
|
max_length=16, unique=True, verbose_name="simhash(hex)"
|
||||||
|
)
|
||||||
|
fp_int = models.BigIntegerField(
|
||||||
|
db_index=True, verbose_name="simhash(int)"
|
||||||
|
)
|
||||||
|
|
||||||
|
seg1 = models.IntegerField(db_index=True)
|
||||||
|
seg2 = models.IntegerField(db_index=True)
|
||||||
|
seg3 = models.IntegerField(db_index=True)
|
||||||
|
seg4 = models.IntegerField(db_index=True)
|
||||||
|
|
||||||
|
score = models.FloatField(verbose_name="得分")
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["seg1"]),
|
||||||
|
models.Index(fields=["seg2"]),
|
||||||
|
models.Index(fields=["seg3"]),
|
||||||
|
models.Index(fields=["seg4"]),
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,16 @@ def parse_file(file_path:str):
|
||||||
def get_fingerprint(text):
|
def get_fingerprint(text):
|
||||||
return Simhash(text).value
|
return Simhash(text).value
|
||||||
|
|
||||||
# --- 汉明距离 ---
|
MASK_64 = (1 << 64) - 1
|
||||||
def hamming_distance(a, b):
|
|
||||||
return bin(a ^ b).count("1")
|
def hamming_distance(a_u, b_s):
|
||||||
|
return ((a_u ^ (b_s & MASK_64)) & MASK_64).bit_count()
|
||||||
|
|
||||||
|
|
||||||
|
def split_simhash(fp_int: int):
|
||||||
|
return (
|
||||||
|
(fp_int >> 48) & 0xffff,
|
||||||
|
(fp_int >> 32) & 0xffff,
|
||||||
|
(fp_int >> 16) & 0xffff,
|
||||||
|
fp_int & 0xffff,
|
||||||
|
)
|
||||||
|
|
@ -1,17 +1,18 @@
|
||||||
from django.shortcuts import render
|
from django.shortcuts import render
|
||||||
from .models import Work
|
from .models import Work, Fingerprint
|
||||||
from .serializers import WorkSerializer, WorkCreateSerializer, WorkDqCalSerializer, WorkDhCalSerializer
|
from .serializers import WorkSerializer, WorkCreateSerializer, WorkDqCalSerializer, WorkDhCalSerializer
|
||||||
from apps.utils.viewsets import CustomModelViewSet
|
from apps.utils.viewsets import CustomModelViewSet
|
||||||
from rest_framework.decorators import action
|
from rest_framework.decorators import action
|
||||||
import os
|
import os
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
import json
|
import json
|
||||||
from apps.carbon.service import parse_file
|
from apps.carbon.service import parse_file, get_fingerprint, hamming_distance, split_simhash
|
||||||
import requests
|
import requests
|
||||||
from rest_framework.exceptions import ParseError
|
from rest_framework.exceptions import ParseError
|
||||||
import re
|
import re
|
||||||
from rest_framework.response import Response
|
from rest_framework.response import Response
|
||||||
from django.db import transaction
|
from django.db import transaction, IntegrityError
|
||||||
|
from django.db.models import Q
|
||||||
# Create your views here.
|
# Create your views here.
|
||||||
|
|
||||||
LLM_URL = "http://106.0.4.200:9000/v1/chat/completions"
|
LLM_URL = "http://106.0.4.200:9000/v1/chat/completions"
|
||||||
|
|
@ -48,6 +49,11 @@ def ask(input:str, p_name:str, stream=False):
|
||||||
raise ParseError("模型处理错误超过最大token限制")
|
raise ParseError("模型处理错误超过最大token限制")
|
||||||
return response.json()["choices"][0]["message"]["content"]
|
return response.json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
def simhash_to_db(n: int) -> int:
|
||||||
|
return n if n < (1 << 63) else n - (1 << 64)
|
||||||
|
|
||||||
|
def simhash_from_db(n: int) -> int:
|
||||||
|
return n if n >= 0 else n + (1 << 64)
|
||||||
|
|
||||||
class WorkViewSet(CustomModelViewSet):
|
class WorkViewSet(CustomModelViewSet):
|
||||||
queryset = Work.objects.all()
|
queryset = Work.objects.all()
|
||||||
|
|
@ -246,13 +252,77 @@ class WorkViewSet(CustomModelViewSet):
|
||||||
return Response({"total_score": total_score, "data": data})
|
return Response({"total_score": total_score, "data": data})
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_files():
|
def parse_files(work: Work):
|
||||||
pass
|
contents = []
|
||||||
|
filenames = []
|
||||||
|
for file in [work.dh_file1, work.dh_file2, work.dh_file3, work.dh_file4, work.dh_file5, work.dh_file6]:
|
||||||
|
if file:
|
||||||
|
if file.name in filenames:
|
||||||
|
continue
|
||||||
|
path = (settings.BASE_DIR + file.path).replace('\\', '/')
|
||||||
|
content = parse_file(path)
|
||||||
|
filenames.append(file.name)
|
||||||
|
contents.append(content)
|
||||||
|
return '\n'.join(contents)
|
||||||
|
|
||||||
@action(detail=True, methods=['post'], serializer_class=WorkDhCalSerializer)
|
@action(detail=True, methods=['post'], serializer_class=WorkDhCalSerializer)
|
||||||
@transaction.atomic
|
@transaction.atomic
|
||||||
def cal_dh(self, request, pk):
|
def cal_dh(self, request, pk):
|
||||||
work = self.get_object()
|
work = self.get_object()
|
||||||
|
sr = WorkDqCalSerializer(work, data=request.data)
|
||||||
|
sr.is_valid(raise_exception=True)
|
||||||
|
sr.save()
|
||||||
|
work = Work.objects.get(pk=pk)
|
||||||
|
content = WorkViewSet.parse_files(work)
|
||||||
|
|
||||||
|
fp_u = get_fingerprint(content) # unsigned
|
||||||
|
fp_int = simhash_to_db(fp_u) # signed for db
|
||||||
|
fp_hex = format(fp_u, "016x")
|
||||||
|
|
||||||
|
s1, s2, s3, s4 = split_simhash(fp_int)
|
||||||
|
|
||||||
|
# 1️⃣ 分段粗筛
|
||||||
|
candidates = (
|
||||||
|
Fingerprint.objects
|
||||||
|
.filter(
|
||||||
|
Q(seg1=s1) |
|
||||||
|
Q(seg2=s2) |
|
||||||
|
Q(seg3=s3) |
|
||||||
|
Q(seg4=s4)
|
||||||
|
)
|
||||||
|
.only("fp_int", "score")
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2️⃣ 精确海明距离
|
||||||
|
for obj in candidates:
|
||||||
|
if hamming_distance(fp_u, obj.fp_int) <= HAMMING_THRESHOLD:
|
||||||
|
work.score_dh = obj.score
|
||||||
|
work.save(update_fields=["score_dh"])
|
||||||
|
return Response({"total_score": obj.score})
|
||||||
|
|
||||||
|
# 3️⃣ 未命中 → 调用 AI
|
||||||
|
res = ask(content, "tec_dh")
|
||||||
|
score = round(float(res), 2)
|
||||||
|
|
||||||
|
work.score_dh = score
|
||||||
|
work.save(update_fields=["score_dh"])
|
||||||
|
|
||||||
|
# 4️⃣ 并发安全写入指纹库
|
||||||
|
try:
|
||||||
|
Fingerprint.objects.create(
|
||||||
|
fp_hex=fp_hex,
|
||||||
|
fp_int=fp_int,
|
||||||
|
seg1=s1,
|
||||||
|
seg2=s2,
|
||||||
|
seg3=s3,
|
||||||
|
seg4=s4,
|
||||||
|
score=score,
|
||||||
|
)
|
||||||
|
except IntegrityError:
|
||||||
|
# 并发下已存在,忽略即可
|
||||||
|
pass
|
||||||
|
|
||||||
|
return Response({"total_score": score})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue