feat:解析pdf文件
This commit is contained in:
parent
6929fb7753
commit
69ea0b9c1a
|
@ -43,3 +43,20 @@ export function getCtAll(query) {
|
|||
method: 'delete'
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
export function impData(data) {
|
||||
return request({
|
||||
url: `/info/contact/imp/`,
|
||||
method: 'post',
|
||||
data
|
||||
})
|
||||
}
|
||||
|
||||
export function parsePDF(data) {
|
||||
return request({
|
||||
url: `/info/contact/parse_pdf/`,
|
||||
method: 'post',
|
||||
data
|
||||
})
|
||||
}
|
|
@ -457,6 +457,12 @@ export const asyncRoutes = [
|
|||
component: () => import('@/views/informatiomCollect/externalAuditor.vue'),
|
||||
meta: { title: '外审员情况', perms: ['infoCollect_EA'] }
|
||||
},
|
||||
{
|
||||
path: 'yearReport',
|
||||
name: 'yearReport',
|
||||
component: () => import('@/views/informatiomCollect/yearReport.vue'),
|
||||
meta: { title: '实验室年度报告', perms: ['infoCollect_report'] }
|
||||
},
|
||||
{
|
||||
path: 'smsMessage',
|
||||
name: 'smsMessage',
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
<div class="app-container">
|
||||
<el-card class="elHeader">
|
||||
<el-button type="primary" icon="el-icon-plus" @click="handleAddFile">新增</el-button>
|
||||
<el-button type="primary" icon="el-icon-plus" @click="handleExport">导出</el-button>
|
||||
<el-button type="primary" icon="el-icon-plus" @click="handleImport">导入</el-button>
|
||||
<el-button type="primary" @click="handleExport">导出</el-button>
|
||||
</el-card>
|
||||
<el-card style="margin-top: 10px">
|
||||
<el-table v-loading="listLoading" :data="tableData.results" :hieght="tableHeight" border fit stripe
|
||||
|
@ -38,12 +39,6 @@
|
|||
<el-table-column label="质量负责人邮箱">
|
||||
<template slot-scope="scope">{{ scope.row.email_quality }}</template>
|
||||
</el-table-column>
|
||||
|
||||
<!-- <el-table-column align="center" label="文件下载">
|
||||
<template slot-scope="scope" v-if="scope.row.file_.file">
|
||||
<el-link :href="scope.row.file_.file" type="primary">下载</el-link>
|
||||
</template>
|
||||
</el-table-column> -->
|
||||
<el-table-column align="center" label="操作" width="120px" fixed="right">
|
||||
<template slot-scope="scope">
|
||||
<el-link v-if="currentDept=scope.row.belong_dept" :disabled="!checkPermission(['infoCollect_LC'])" type="primary" size="small"
|
||||
|
@ -99,6 +94,40 @@
|
|||
<el-button type="primary" @click="confirm('Form')">确认</el-button>
|
||||
</div>
|
||||
</el-dialog>
|
||||
<el-dialog
|
||||
:visible.sync="impDialogVisible"
|
||||
title="导入"
|
||||
:close-on-click-modal="false"
|
||||
>
|
||||
<el-form
|
||||
ref="Form"
|
||||
label-width="80px"
|
||||
label-position="right"
|
||||
:rules="rule"
|
||||
>
|
||||
<el-form-item label="下载模板" prop="path">
|
||||
<a :href="downloadUrl">点击下载模板</a>
|
||||
</el-form-item>
|
||||
<el-form-item label="导入文件" prop="file">
|
||||
<el-upload
|
||||
ref="upload"
|
||||
:action="upUrl"
|
||||
:on-success="handleUpSuccess"
|
||||
:on-remove="handleRemove"
|
||||
:on-preview="handlePreview"
|
||||
:headers="upHeaders"
|
||||
:file-list="fileList"
|
||||
:limit="1"
|
||||
accept=".doc,.docx,.xls,.xlsx,.ppt,.pptx,.pdf,.zip">
|
||||
<el-button size="small" type="primary">上传文件</el-button>
|
||||
</el-upload>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<div style="text-align: right">
|
||||
<el-button type="danger" @click="close_dialog">取消</el-button>
|
||||
<el-button type="primary" @click="submitUpload">确认</el-button>
|
||||
</div>
|
||||
</el-dialog>
|
||||
</div>
|
||||
</template>
|
||||
<script>
|
||||
|
@ -107,7 +136,7 @@ import Pagination from "@/components/Pagination"; // secondary package based on
|
|||
import Treeselect from "@riophae/vue-treeselect";
|
||||
import "@riophae/vue-treeselect/dist/vue-treeselect.css";
|
||||
import { getDictList, getDictTypeList } from "@/api/dict";
|
||||
import {createCt, del_contact, updateContact, getCtAll, getCt } from "@/api/contacts";
|
||||
import {createCt, del_contact, updateContact, getCtAll, getCt, impData} from "@/api/contacts";
|
||||
import { upUrl, upHeaders } from "@/api/file";
|
||||
import { saveAs } from 'file-saver';
|
||||
import XLSX from 'xlsx';
|
||||
|
@ -149,6 +178,9 @@ export default {
|
|||
typeOptions: [],
|
||||
listLoading: false,
|
||||
dialogVisible: false,
|
||||
saveLoading:false,
|
||||
impDialogVisible: false,
|
||||
downloadUrl: process.env.VUE_APP_BASE_API,
|
||||
listQuery: {
|
||||
page: 1,
|
||||
page_size: 20,
|
||||
|
@ -156,6 +188,9 @@ export default {
|
|||
tableData: {
|
||||
count: 0
|
||||
},
|
||||
impForm:{
|
||||
file:''
|
||||
},
|
||||
dialogType: "new",
|
||||
rule: {
|
||||
name: [{ required: true, message: "请输入名称", trigger: "blur" }],
|
||||
|
@ -186,6 +221,36 @@ export default {
|
|||
getTableList() {
|
||||
this.getList();
|
||||
},
|
||||
handleImport(){
|
||||
this.impForm.file = '';
|
||||
this.fileList = [];
|
||||
this.impDialogVisible = true;
|
||||
let file_url = '';
|
||||
file_url = "media/default/实验室联系方式.xlsx";
|
||||
let base_url = this.downloadUrl.substr(0, this.downloadUrl.length - 3);
|
||||
this.downloadUrl = base_url+file_url;
|
||||
},
|
||||
close_dialog(){
|
||||
this.impDialogVisible = false;
|
||||
this.fileList = [];
|
||||
this.impForm.file = '';
|
||||
},
|
||||
submitUpload() {
|
||||
this.$refs.upload.submit();
|
||||
this.impDialogVisible = false;
|
||||
this.saveLoading = true;
|
||||
impData(this.impForm).then(res=>{
|
||||
console.log(this.impForm)
|
||||
if(res.code>=200){
|
||||
this.$message.success("提交成功");
|
||||
this.getList();
|
||||
}else{
|
||||
this.$message.error(res.message);
|
||||
this.saveLoading = false;
|
||||
this.fileList = [];
|
||||
}
|
||||
})
|
||||
},
|
||||
handlePreview(file) {
|
||||
if ("url" in file) {
|
||||
window.open(file.url);
|
||||
|
@ -195,10 +260,12 @@ export default {
|
|||
},
|
||||
handleUpSuccess(res, file, filelist) {
|
||||
this.Content.file = res.data.id;
|
||||
this.impForm.file = res.data.path;
|
||||
|
||||
},
|
||||
handleRemove(file, filelist) {
|
||||
this.Content.file = null;
|
||||
this.impForm.file = '';
|
||||
},
|
||||
checkPermission,
|
||||
|
||||
|
|
|
@ -419,7 +419,6 @@ export default {
|
|||
await del_activate(scope.row.id).then(() => {
|
||||
this.getList();
|
||||
this.$message.success("成功");
|
||||
this.getTableList();
|
||||
})
|
||||
|
||||
})
|
||||
|
|
|
@ -256,7 +256,7 @@ export default {
|
|||
this.saveLoading = false;
|
||||
}
|
||||
})
|
||||
},
|
||||
},
|
||||
handlePreview(file) {
|
||||
if ("url" in file) {
|
||||
window.open(file.url);
|
||||
|
@ -266,10 +266,12 @@ export default {
|
|||
},
|
||||
handleUpSuccess(res, file, filelist) {
|
||||
this.Content.file = res.data.id;
|
||||
this.impForm.file = res.data.path;
|
||||
|
||||
},
|
||||
handleRemove(file, filelist) {
|
||||
this.Content.file = null;
|
||||
this.impForm.file = '';
|
||||
},
|
||||
checkPermission,
|
||||
|
||||
|
|
|
@ -15,13 +15,16 @@
|
|||
|
||||
<script>
|
||||
import {sendMsg} from "@/api/msg";
|
||||
import checkPermission from "@/utils/permission";
|
||||
import "@riophae/vue-treeselect/dist/vue-treeselect.css";
|
||||
export default {
|
||||
data() {
|
||||
return {
|
||||
form: {names: []}, // 用于存储复选框的选中状态
|
||||
};
|
||||
},
|
||||
methods: {
|
||||
methods: {
|
||||
checkPermission,
|
||||
handleSendEmail() {
|
||||
// 检查是否有复选框被选中
|
||||
if (this.form.names.length > 0) {
|
||||
|
|
|
@ -0,0 +1,155 @@
|
|||
<template>
|
||||
<div class="app-container">
|
||||
<el-card class="elHeader">
|
||||
<el-button type="primary" @click="handleImport">导入文件</el-button>
|
||||
</el-card>
|
||||
<el-dialog
|
||||
:visible.sync="impDialogVisible"
|
||||
title="导入"
|
||||
:close-on-click-modal="false"
|
||||
>
|
||||
<el-form
|
||||
ref="Form"
|
||||
label-width="200px"
|
||||
label-position="right"
|
||||
>
|
||||
<el-form-item label="检验检测服务业统计-PDF" prop="file">
|
||||
<el-upload
|
||||
ref="upload"
|
||||
:action="upUrl"
|
||||
:on-success="handleUpSuccess"
|
||||
:on-remove="handleRemove"
|
||||
:on-preview="handlePreview"
|
||||
:headers="upHeaders"
|
||||
:file-list="fileList"
|
||||
:limit="1"
|
||||
accept=".doc,.docx,.xls,.xlsx,.ppt,.pptx,.pdf,.zip">
|
||||
<el-button size="small" type="primary">上传文件</el-button>
|
||||
</el-upload>
|
||||
</el-form-item>
|
||||
<el-form-item label="检验检测服务业统计-EXCEL" prop="file">
|
||||
<el-upload
|
||||
ref="upload"
|
||||
:action="upUrl"
|
||||
:on-success="handleUpSuccess_res"
|
||||
:on-remove="handleRemove"
|
||||
:on-preview="handlePreview"
|
||||
:headers="upHeaders"
|
||||
:file-list="fileList_res"
|
||||
:limit="1"
|
||||
accept=".doc,.docx,.xls,.xlsx,.ppt,.pptx,.pdf,.zip">
|
||||
<el-button size="small" type="primary">上传文件</el-button>
|
||||
</el-upload>
|
||||
</el-form-item>
|
||||
|
||||
</el-form>
|
||||
<div style="text-align: right">
|
||||
<el-button type="danger" @click="close_dialog">取消</el-button>
|
||||
<el-button type="primary" @click="submitUpload" :disabled="saveLoading">确认</el-button>
|
||||
</div>
|
||||
</el-dialog>
|
||||
</div>
|
||||
</template>
|
||||
<script>
|
||||
import checkPermission from "@/utils/permission";
|
||||
import Pagination from "@/components/Pagination"; // secondary package based on el-pagination
|
||||
import Treeselect from "@riophae/vue-treeselect";
|
||||
import "@riophae/vue-treeselect/dist/vue-treeselect.css";
|
||||
import { parsePDF } from "@/api/contacts";
|
||||
import { getMyQi} from "@/api/qualificationInfo";
|
||||
import { upUrl, upHeaders } from "@/api/file";
|
||||
|
||||
|
||||
export default {
|
||||
components: { Pagination, Treeselect },
|
||||
data(){
|
||||
return {
|
||||
isoading: false, //是否正在加载数据
|
||||
upHeaders: upHeaders(),
|
||||
upUrl: upUrl(),
|
||||
fileList: [],
|
||||
fileList_res:[],
|
||||
dialogVisible: false,
|
||||
showExportDialog: false,
|
||||
saveLoading:false,
|
||||
impDialogVisible: false,
|
||||
downloadUrl: process.env.VUE_APP_BASE_API,
|
||||
impForm:{
|
||||
pdf_file:'',
|
||||
excel_file:'',
|
||||
},
|
||||
};
|
||||
},
|
||||
mounted() {
|
||||
let that = this;
|
||||
let height1 = document.getElementsByClassName('app-main')[0].clientHeight;
|
||||
let height2 = document.getElementsByClassName('elHeader')[0].clientHeight;
|
||||
that.tableHeight = height1 - height2 - 70;
|
||||
},
|
||||
methods: {
|
||||
handleImport(){
|
||||
this.impDialogVisible = true;
|
||||
let file_url = '';
|
||||
file_url = "media/default/检验检测服务业统计数据上报任务.xlsx";
|
||||
let base_url = this.downloadUrl.substr(0, this.downloadUrl.length - 3);
|
||||
this.downloadUrl = base_url+file_url;
|
||||
},
|
||||
close_dialog(){
|
||||
this.impDialogVisible = false;
|
||||
this.fileList = [];
|
||||
this.fileList_res = [];
|
||||
this.impForm.pdf_file = '';
|
||||
this.impForm.excel__file = '';
|
||||
},
|
||||
submitUpload() {
|
||||
this.$refs.upload.submit();
|
||||
this.impDialogVisible = false;
|
||||
this.saveLoading = true;
|
||||
console.log(this.impForm)
|
||||
parsePDF(this.impForm).then(res=>{
|
||||
if(res.code>=200){
|
||||
downloadFile(res.data.url)
|
||||
this.saveLoading = false;
|
||||
this.$message.success("解析成功");
|
||||
}else{
|
||||
this.$message.error(res.message);
|
||||
this.saveLoading = false;
|
||||
}
|
||||
})
|
||||
},
|
||||
downloadFile(url) {
|
||||
const link = document.createElement('a');
|
||||
link.href = url;
|
||||
// link.download = '检验检测服务业统计.xlsx';
|
||||
link.setAttribute('download', '检验检测服务业统计.xlsx'); // 设置下载文件名
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
document.body.removeChild(link);
|
||||
}
|
||||
},
|
||||
handlePreview(file) {
|
||||
if ("url" in file) {
|
||||
window.open(file.url);
|
||||
} else {
|
||||
window.open(file.response.data.path);
|
||||
}
|
||||
},
|
||||
handleUpSuccess(res, file, filelist) {
|
||||
this.fileList = [file];
|
||||
this.impForm.pdf_file = res.data.path;
|
||||
},
|
||||
handleUpSuccess_res(res, file, filelist) {
|
||||
this.fileList_res = [file];
|
||||
this.impForm.excel_file = res.data.path;
|
||||
},
|
||||
handleRemove(file, filelist) {
|
||||
this.Content.file = null;
|
||||
this.fileList = [];
|
||||
this.impForm.pdf_file = '';
|
||||
this.impForm.excel_file = '';
|
||||
},
|
||||
checkPermission,
|
||||
|
||||
};
|
||||
</script>
|
||||
<style></style>
|
|
@ -93,7 +93,7 @@ class QualityActivities(CommonBDModel):
|
|||
|
||||
|
||||
class Contact(CommonBDModel):
|
||||
name = models.CharField(max_length=20, unique=True, verbose_name='姓名')
|
||||
name = models.CharField(max_length=20, unique=True, verbose_name='公司名称')
|
||||
address = models.CharField(max_length=100, verbose_name='地址')
|
||||
header = models.CharField(max_length=20, verbose_name='负责人')
|
||||
tel = models.CharField(max_length=20, verbose_name='负责人电话')
|
||||
|
|
|
@ -12,9 +12,10 @@ from rest_framework.exceptions import ParseError
|
|||
from apps.system.models import Organization
|
||||
from .models import *
|
||||
from .serializers import *
|
||||
|
||||
from utils.pdf2txt import run
|
||||
from datetime import datetime
|
||||
import os
|
||||
import traceback
|
||||
|
||||
|
||||
class ImpMixin:
|
||||
|
@ -55,6 +56,7 @@ class ImpMixin:
|
|||
if 'file' not in request.data:
|
||||
raise ParseError('请提供文件')
|
||||
path = request.data['file']
|
||||
print(path, "---------ssss")
|
||||
|
||||
if not str(path).endswith('.xlsx'):
|
||||
raise ParseError('请提供xlsx格式文件')
|
||||
|
@ -302,7 +304,7 @@ class QualityActivitiesViewSet(ImpMixin, RbacFilterSet, CreateUpdateCustomMixin,
|
|||
return self.gen_imp_view(request, 2, QualityActivitiesSerializer)
|
||||
|
||||
|
||||
class ContactViewSet(CreateUpdateCustomMixin, ModelViewSet):
|
||||
class ContactViewSet(ImpMixin, CreateUpdateCustomMixin, ModelViewSet):
|
||||
queryset = Contact.objects.all()
|
||||
serializer_class = ContactSerializer
|
||||
|
||||
|
@ -321,18 +323,18 @@ class ContactViewSet(CreateUpdateCustomMixin, ModelViewSet):
|
|||
data_list = []
|
||||
for row in sheet.iter_rows(min_row=start, values_only=True): # 假设第一行是表头,从第二行开始读取数据
|
||||
if row[0] is not None:
|
||||
activate_time = row[6].strftime("%Y-%m-%d")
|
||||
role_dict = {"组织方":0, "参与方":1}
|
||||
serializer_data = {
|
||||
'name': row[1], # 第一列是名字
|
||||
'roles':role_dict.get(row[2]),
|
||||
'collaborators':row[3],
|
||||
'orgunits':row[4],
|
||||
'place':row[5],
|
||||
'activate_time':activate_time,
|
||||
'participations':row[7],
|
||||
'function':row[8],
|
||||
'earnings':row[9]
|
||||
'address':row[2],
|
||||
'header':row[3],
|
||||
'tel':row[4],
|
||||
'email':row[5],
|
||||
'head_technology':row[6],
|
||||
'tel_technology':row[7],
|
||||
'email_technology':row[8],
|
||||
'head_quality':row[9],
|
||||
'tel_quality':row[10],
|
||||
'email_quality':row[11],
|
||||
}
|
||||
data_list.append(serializer_data)
|
||||
return data_list
|
||||
|
@ -344,6 +346,23 @@ class ContactViewSet(CreateUpdateCustomMixin, ModelViewSet):
|
|||
"""
|
||||
return self.gen_imp_view(request, 2, ContactSerializer)
|
||||
|
||||
#解析pdf到excel
|
||||
@action(detail=False, methods=['post'])
|
||||
@transaction.atomic
|
||||
def parse_pdf(self, request, *args, **kwargs):
|
||||
"""
|
||||
解析pdf到excel
|
||||
"""
|
||||
try:
|
||||
pdf_file = request.data['pdf_file']
|
||||
excel_file = request.data['excel_file']
|
||||
# 读数据路径copy 在media 下新建excel,解析完成后存入数据库。
|
||||
run(pdf_file, excel_file)
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
return Response({"message":"解析失败"}, status = status.HTTP_400_BAD_REQUEST)
|
||||
return Response({"message":"解析成功", "url":excel_file}, status = status.HTTP_200_OK)
|
||||
|
||||
|
||||
class ExternalAuditorsViewSet(ImpMixin, RbacFilterSet, CreateUpdateCustomMixin, ModelViewSet):
|
||||
queryset = ExternalAuditors.objects.all()
|
||||
|
|
|
@ -0,0 +1,387 @@
|
|||
"""
|
||||
Author: tianyang.zhang
|
||||
Date: 2024-04-25
|
||||
param: {pdf_file:输入PDF文件路径,excel_file:输入Excel文件路径}
|
||||
description: 提取PDF文件中的文字信息,并将其保存到Excel文件中,正则表达式提取信息。
|
||||
|
||||
"""
|
||||
import re
|
||||
import fitz # PyMuPDF
|
||||
import re
|
||||
from openpyxl import load_workbook
|
||||
|
||||
RE_LIST = [
|
||||
[r"法定代表人(单位负责人):(.*?)\n", "C4"],
|
||||
[r"法定代表人(单位负责人):(.*?)\n", "C22"],
|
||||
[r"本机构名称:(.*?)\n","C5"],
|
||||
[r"单位注册地址及行政区划 (.*?)\n","C8"],
|
||||
[r" 单位所在地址及行政区划 (.*?)\n","C9"],
|
||||
[r"执行会计制度:(.*?)\n","C7"],
|
||||
[r"单位类型\(01-1\): (.*?)\n", "C16"],
|
||||
[r"法人单位名称\(02-1\):(.*?)\n", "C17"],
|
||||
[r"4\):(.*?)\n", "F16"],
|
||||
[r"法人单位行业分类代码(四位数字):(.*?)\n", "F17"],
|
||||
[r"固定电话:(.*?)\n", "F24"],
|
||||
[r"传真号码:(.*?)\n", "F25"],
|
||||
[r"电子邮箱:(.*?)\n", "F26"],
|
||||
[r"登记注册类型:(.*?)\n", "F28"],
|
||||
[r"所属管辖区:(.*?)\n", "C27"],
|
||||
[r"机构类型:(.*?)\n", "C29"],
|
||||
[r"是否由事业单位转企改制而来:(.*?)\n", "F29"],
|
||||
[r"企业控股情况:(.*?)\n", "C32"],
|
||||
[r"营业状态:(.*?)\n", "C33" ],
|
||||
[r"是否高新技术企业认定:(.*?)\n", "C35"],
|
||||
[r"认定或复审高新技术企业证书编号:(.*?)\n", "C36"],
|
||||
[r"高新技术企业的日期:(.*?)\n", "F36"],
|
||||
[r"是否境内上市和在新三板挂牌?(.*?)\n", "C38"],
|
||||
[r"上市挂牌时间:(.*?)\n", "H39"],
|
||||
[r"保费支出(\d+\.\d+?)万元\n", "F41"],
|
||||
[r"检验检测机构责\n任险:\n(.*?)\n", "C41"],
|
||||
[r"检验检测机构责\n任险:\n(.*?)\n", "C41"],
|
||||
[r"检验检测人员职\n业责任险:\n(.*?)\n", "C42"],
|
||||
[r"保费支出(\d+\.\d+?)万元\n", "F42"],
|
||||
[r"企业集团:(.+?)\n", "C30"],
|
||||
[r"领域一(.+?)\n", "C52"],
|
||||
[r"领域二(.+?)\n", "C53"],
|
||||
[r"领域三(.+?)\n", "C54"],
|
||||
[r"领域四(.+?)\n", "C55"],
|
||||
[r"领域五(.+?)\n", "C56"],
|
||||
[r"其他领域描述:(.+?)\n", "C58"],
|
||||
[r"检验检测服务关键词:(.*?)\n", "C60"],
|
||||
[r"本检验检测机构当年接受各类技术评审:(.*?)次", "E85"],
|
||||
[r"接受资质认定技术\n评审:\n(.*?)次", "E86"],
|
||||
[r"管部门组织检查:\n(.*?)次", "E87"],
|
||||
[r"技术监督部门(市\n场监管部门)组织\n评审:\n(.*?)次", "E88"],
|
||||
[r"接受行业管理部门\n组织技术评审:\n(.*?)次", "E89"],
|
||||
[r"其中国家行业管\n理部门组织评审:\n(.+?)次", "E90"],
|
||||
[r"本检验检测机构当年接受各类监督检查(.*?)项", "E94"],
|
||||
[r"接受资质认定专项监督检查:\n(.*?)项", "E95"],
|
||||
[r"其中国家市场监管部门组织检查:\n(.*?)项", "E96"],
|
||||
[r"省级及以下质量技术监督部门(市场监管部门)\n组织检查:\n(.*?)项", "E97"],
|
||||
[r"接受行业管理部门监督检查:\n(.*?)项", "E98"],
|
||||
[r"其中国家行业管理部门组织检查:\n(.*?)项", "E99"],
|
||||
[r"省级及以下行业管理部门组织检查:\n(.*?)项", "E100"],
|
||||
[r"接受其他社会组织、团体及境内外评价机构监督检\n查:\n(.*?)项", "E101"],
|
||||
[r"本单位是否处于国家检验检测认证公共服务平台示范区:(.*?)\n", "E102"],
|
||||
[r"本单位处于工业园区(开发区):(.*?)\n", "E103"],
|
||||
[r"是否加入国内外产业联盟:(.*?)\n", "C104"],
|
||||
[r"产业联盟1名称:(.*?)\n", "C105"],
|
||||
[r"产业联盟2名称:(.*?)\n", "E105"],
|
||||
[r"产业联盟3名称:(.*?)\n", "C106"],
|
||||
[r"产业联盟4名称:(.*?)\n", "E106"],
|
||||
[r"是否加入国内外行业协会组织:(.*?)\n", "E107"],
|
||||
[r"存货\n01\n(.*?)\n", "D111"],
|
||||
[r"固定资产原值\(原价\)\n02\n(.*?)\n", "D112"],
|
||||
[r"累计折旧\n03\n(.*?)\n", "D113"],
|
||||
[r"其中,本年折旧\n04\n(.*?)\n", "D114"],
|
||||
[r"资产总计\n05\n(.*?)\n", "D115"],
|
||||
[r"负债合计\n06\n(.*?)\n", "D116"],
|
||||
[r"营业收入\n07\n(.*?)\n", "D117"],
|
||||
[r"营业成本\n08\n(.*?)\n", "D118"],
|
||||
[r"营业税金及附加\n09\n(.*?)\n", "D119"],
|
||||
[r"销售费用\n10\n(.*?)\n", "D120"],
|
||||
[r"管理费用\n11\n(.*?)\n", "D121"],
|
||||
[r"其中,税金\n12\n(.*?)\n", "D122"],
|
||||
[r"差旅费\n13\n(.*?)\n", "D123"],
|
||||
[r"财务费用\n14\n(.*?)\n", "D124"],
|
||||
[r"其中,利息净支出\n15\n(.*?)\n", "D125"],
|
||||
[r"资产减值损失\n16\n(.*?)\n", "D126"],
|
||||
[r"公允价值变动收益\n17\n(.*?)\n", "D127"],
|
||||
[r"投资收益\n18\n(.*?)\n", "D128"],
|
||||
[r"其他收益\n19\n(.*?)\n", "D129"],
|
||||
[r"营业利润\n20\n(.*?)\n", "D130"],
|
||||
[r"营业外收入\n21\n(.*?)\n", "D131"],
|
||||
[r"营业外支出\n23\n(.*?)\n", "D132"],
|
||||
[r"营业外支出\n23\n(.*?)\n", "D132"],
|
||||
[r"利润总额\n24\n(.*?)\n", "D133"],
|
||||
[r"净利润\n25\n(.*?)\n", "D134"],
|
||||
[r"所得税费用\n26\n(.*?)\n", "D135"],
|
||||
[r"应付职工薪酬\(本期贷方\n累计发生额\)\n27\n(.*?)\n", "D136"],
|
||||
[r"本年应交增值税\(本期累\n计发生额\)\n28\n(.*?)\n", "D137"],
|
||||
[r"检验检测业务活动类型\(可多选\)\(00-1\):(.*?)\n", "C140"],
|
||||
[r"检验检测业务活动特点\(00-2\)(.*?)\n", "C141"],
|
||||
[r"检验检测报告数合计:\n(.*?)份", "F144"],
|
||||
[r"合格的报告数:(.*?)份\n", "F145"],
|
||||
[r"其中,为省\(自治区、直辖市\)外出具\n检验检测报告数:\n(.*?)份", "F146"],
|
||||
[r"行政执法或政府委托检验检测\n报告份数(.*?)份", "F147"],
|
||||
[r"收入\s(\d+\.\d+)万元\(01-101-2\)\n", "I147"],
|
||||
[r"其中,当年承担产品质量国家监督抽查\n工作出具检验检测报告数:\n(.*?)份", "F148"],
|
||||
[r"其中,当年承担产品质量地方监督抽查\n工作出具检验检测报告数:\n(.*?)份", "F149"],
|
||||
[r"其中,当年承担3C强制性认证检验检\n测报告数:\n(.*?)份", "F150"],
|
||||
[r"其中,当年承担生产许可证检验检测报\n告数数:\n(.*?)份", "F151"],
|
||||
[r"社会委托检验检测\n报告份数(.*?)份", "F167"],
|
||||
[r"\(01-102-1\) 收入 (\d+\.\d+) 万元\(01-102-2\)\n", "I167"],
|
||||
[r"司法鉴定、仲裁检验检测\n报告份数(.*?)份","F170"],
|
||||
[r"01-103-1\)\n收入 (\d+\.\d+) 万元\(01-103-2\)\n","I170"],
|
||||
[r"其他技术服务\n报告份数 (\d+) 份","F172"],
|
||||
[r"01-104-1\)\n收入 (\d+\.\d+) 万元\(01-104-2\)\n", "I172"],
|
||||
[r"是否服务制造业企业\(选是,继续填报以下\n内容\)\n(.*?)\n", "E174"],
|
||||
[r"服务制造业企业的业务收入占总收入:\n(.*?)%", "E175"],
|
||||
[r"服务制造业企业的业务收入较去年\n增长:\n(.*?)%", "H175"],
|
||||
[r"是否服务个人消费者\(选是,继续填报以下\n内容\)\n(.*?)\n", "H176"],
|
||||
[r"服务个人消费者的业务收入占总收入:\n(\d+\.\d+)%", "E177"],
|
||||
[r"服务个人消费者的业务收入较去年年\n增长:\n(\d+\.\d+)%", "H177"],
|
||||
[r"是否为高技术产业(制造业)提供检验检测服\n务\n(.*?)\n", "E178"],
|
||||
[r"是否为高技术产业\(制造业\)提供检验检测服\n务\n(.*?)\n", "E187"],
|
||||
[r"新一代信息技术产业,业务比重:\n(.*?)%", "E190"],
|
||||
[r"高端装备,业务比重:\n(.*?)%", "E191"],
|
||||
[r"新材料,业务比重:\n(.*?)%", "E192"],
|
||||
[r"生物产业,业务比重:\n(.*?)%", "E193"],
|
||||
[r"新能源汽车,业务比重:\n(.*?)%", "E194"],
|
||||
[r"新能源产业,业务比重:\n(.*?)%", "E195"],
|
||||
[r"节能环保,业务比重:\n(.*?)%", "E196"],
|
||||
[r"全部仪器设备\n(.*?)台套", "D200"],
|
||||
[r"其中,50万元\n以上仪器设备\n(.*?)台套", "D201"],
|
||||
[r"其中,进口仪\n器设备\n(.*?) 台套", "D205"],
|
||||
[r"全部仪器设备资\n产原值\n(\d+\.\d+) 万元", "D206"],
|
||||
[r"其中,50万元\n以上仪器设备资产\n原值\n(\d+\.\d+) 万元", "D207"],
|
||||
[r"50-100万元\n仪器设备资产\(在\n用\)原值\n(\d+\.\d+) 万元", "D208"],
|
||||
[r"其中,进口仪\n器设备资产原值\n(\d+\.\d+) 万元", "D217"],
|
||||
[r"与检验检测相关\n的固定资产原值\n\(设备\)\n(\d+\.\d+) 万元", "D218"],
|
||||
[r"当年新增仪器设\n备\n(.*?)台套", "D219"],
|
||||
[r"其中,当年新\n增50万元以上仪器\n设备\(设备\)\n(.*?)台套", "D220"],
|
||||
[r" 其中,50-\n100万元仪器设备\n(.*?)台套", "D221"],
|
||||
[r" 其中,200\n万以上仪器设备\n(.*?)台套", "D223"],
|
||||
[r"当年新增仪器设\n备原值合计\n(.*?)万元", "D224"],
|
||||
[r"其中,当年新增\n50万元以上仪器设\n备资产原值\n(.*?)万元", "D225"],
|
||||
[r"其中,50-100\n万元仪器设备资产\n原值\n(.*?)万元", "D226"],
|
||||
[r"其中,100-\n200万仪器设备资产\n原值\n(.*?)万元", "D227"],
|
||||
[r"其中,200万\n元以上仪器设备资\n产原值\n(.*?)万元", "D228"],
|
||||
[r"机构总面积\n(.*?)平方米", "D231"],
|
||||
[r"其中办公面积(.*?)平方米", "D232"],
|
||||
[r"实验室面积\n(.*?)平方米", "D233"],
|
||||
[r"其中,恒温\n恒湿实验室\n(.*?)平方米", "D234"],
|
||||
[r"其中,P2\n以上生物安全实验\n室\n(.*?)平方米", "D235"],
|
||||
[r"其中,二恶\n英实验室\n(.*?)平方米", "D236"],
|
||||
[r"其中,电磁\n屏蔽实验室\n(.*?)平方米", "D237"],
|
||||
[r"其中,消声\n实验室\n(.*?)平方米", "D238"],
|
||||
[r"其中,放射\n性实验室\n(.*?)平方米", "D239"],
|
||||
[r" 其中,动物\n房\n(.*?)平方米", "D240"],
|
||||
[r"专用室外试验\n场\n(.*?)平方米", "D241"],
|
||||
[r"参数(.*?)项", "C244"],
|
||||
[r"产品标准(.*?)项", "E244"],
|
||||
[r"方法标准(.*?)项", "G244"],
|
||||
[r"检验检测从业人员期末人数(.*?)人", "D246"],
|
||||
[r"其中:研究生及\n以上学历\(03-401\)(.*?)人", "D247"],
|
||||
[r"大学本科\n学历\(03-402\)\n(.*?)人", "D248"],
|
||||
[r"专科及以\n下学历\(03-403\)\n(.*?)人", "D249"],
|
||||
[r"其中:高级技术\n职称人员\(03-404\)\n(.*?)人", "D250"],
|
||||
[r"中级技术\n职称人员\(03-405\)\n(.*?)人", "D251"],
|
||||
[r"初级技术\n职称人员\(03-406\)\n(.*?)人", "D252"],
|
||||
[r"具备中级\n技术职称同等水平\n的技术能力人员\n(.*?)人", "D253"],
|
||||
[r"其他\(03-\n407\)\n(.*?)人", "D254"],
|
||||
[r"其中:授权签字人\n\(03-408\)\n(.*?)人", "D255"],
|
||||
[r"管理人员\n\(03-409\)\n(.*?)人", "D256"],
|
||||
[r"检验检测\n\技术人员\(03-410\)\n(.*?)人", "D257"],
|
||||
[r"其中:两院院士\n\(03-411\)\n(.*?)人", "D259"],
|
||||
[r"选人员\(03-413\)\n(.*?)人", "D260"],
|
||||
[r"其他:\n(.*?)研发活动及相关\(03-5\)\n","D261"],
|
||||
[r"当年专利申请受\n理数\n(.*?)件", "D264"],
|
||||
[r"其中:当年发明\n专利申请受理数\n(.*?)件", "D265"],
|
||||
[r"其中:申请欧美\n日专利\n(.*?)件", "D266"],
|
||||
[r"其中:申请PCT\n国际专利\n(.*?)件", "D267"],
|
||||
[r"当年专利授权书\n数\n(.*?)件", "D268"],
|
||||
[r"其中,当年发明\n专利授权数\n(.*?)件", "D269"],
|
||||
[r"其中:授权欧美\n日专利\n(.*?)件", "D270"],
|
||||
[r"期末有效专利数\n(.*?)件", "D271"],
|
||||
[r"其中:期末有效\n发明专利数\n(.*?)件", "D272"],
|
||||
[r"其中:拥有境外\n授权专利\n(.*?)件", "D273"],
|
||||
[r"期末拥有注册商\n标\n(.*?)件", "D274"],
|
||||
[r"其中:当年注\n册商标\n(.*?)件", "D275"],
|
||||
[r"其中:境外注\n册商标\n(.*?)件", "D276"],
|
||||
[r"其中:驰名商\n标\n(.*?)件", "D277"],
|
||||
[r"马德里商标国\n际注册申请量\n(.*?)件", "D278"],
|
||||
[r"拥有软件著作权\n(.*?)件", "D279"],
|
||||
[r"其中:当年获\n得软件著作权\n(.*?)件", "D280"],
|
||||
[r"是否获得本年度\n国务院国家科学技\n术奖:\n(.*?)\n", "E282"],
|
||||
[r"市人民政府设立的\n省级科学技术奖:\n(.*?)\n", "E284"],
|
||||
[r" 其他\n(.*?)04\n","E287"],
|
||||
[r"主要服务地域\(04-1\):(.*?)\n", "D289"],
|
||||
[r"主要客户类型\(可多选\)\(04-2\):(.*?)\n", "D290"],
|
||||
[r"\s+科研项目总计(.*?)项", "D293"],
|
||||
[r"其中,国家级项目(.*?)项", "D294"],
|
||||
[r"其中,省部级项目(.*?)项", "D295"],
|
||||
[r"科研经费总计(.*?)万元", "D296"],
|
||||
[r"其中,国家级项目(.*?)万元", "D297"],
|
||||
[r"其中,省部级项目(.*?)万元", "D298"],
|
||||
[r"标准制修订经费总计(.*?)万元", "D300"],
|
||||
[r"其中,国家标准(.*?)项", "D301"],
|
||||
[r"其中,行业标准(.*?)项", "D302"],
|
||||
[r"其中,地方标准(.*?)项", "D303"],
|
||||
[r"其中,国际标准(.*?)项", "D304"],
|
||||
[r"其中,国际标准(.*?)项", "D304"],
|
||||
[r"本机构人员是否在认证认可、检验检测相关领域国际标准化组织任职(.*?)\(05-202\)", "D305"],
|
||||
[r"人员姓名:(.*?)\(05-203-1\)", "C306"],
|
||||
[r"担任职务:(.*?)\(05-203-2\)", "C307"],
|
||||
[r"本年度参加能力\n验证计划合计\n(.*?)项", "D313"],
|
||||
[r"其中,国家级\n能力验证项目\n(.*?)项", "D314"],
|
||||
[r"市场监管总局\n\(国家认监委\)能\n力验证项目\n(.*?)项", "D315"],
|
||||
[r"国家有关行业\n主管部门能力验证\n项目\n(.*?)项", "D316"],
|
||||
[r"省级能力验证\n项目\n(.*?)项", "D317"],
|
||||
[r"国内能力验证\n提供者项目\n(.*?)项", "D318"],
|
||||
[r"国际能力验证\n提供者和国家相关\n组织项目\n(.*?)项", "D319"],
|
||||
[r"其他能力验证\n项目\n(.*?)项", "D320"],
|
||||
[r"参加测量审核合\n计\n(.*?)项", "D321"],
|
||||
[r"机构本年度是否发生变更(.*?)\(07-1\)", "D323"],
|
||||
[r"是否工业和信息化部认定的“工业产品质量控制和技术评价实验室”(.*?)\n", "D344"],
|
||||
[r"是否工业和信息化部认定的“工业产品质量控制和技术评价实验室”(.*?)\n", "D344"],
|
||||
[r"实验室名称:(.*?)所属行业", "C345"],
|
||||
[r"所属行业:(.*?)授牌","E345"],
|
||||
[r"授牌\n年份:(.*?)\n","G345"],
|
||||
[r"是否通过互联网开展检验检测业务\?(.*?)\n","D347"],
|
||||
[r"单位负责人:(.*?)\n","C351"],
|
||||
[r"财务负责人:(.*?)\n","E351"],
|
||||
[r"填表人:(.*?)\n","H351"],
|
||||
[r"单位负责人电话:(.*?)\n","C352"],
|
||||
[r"财务负责人电话:(.*?)\n","E352"],
|
||||
[r"填表人电话:(.*?)\n","H352"],
|
||||
[r"资质认定联系人座\n机:(.*?)资质认定联系人手\n","C353"],
|
||||
[r"资质认定联系人手\n机:(.*?)\n","E353"],
|
||||
[r"资质认定联系人邮\n箱:(.*?)\n","H353"],
|
||||
[r"资质认定联系人姓\n名:(.*?)\n","C354"],
|
||||
]
|
||||
SPECIALLIST = [
|
||||
[r"开业\(成立\)时间:", "F22", 11],
|
||||
[r"是否含有外资:", "C28", 2],
|
||||
[r"执行企业会计准则情况:", "F31", 15],
|
||||
[r"境外认可机构颁发证书:", "E68", 4],
|
||||
[r"质检中\n心:", "C82", 4],
|
||||
[r"组织评审:;(.*?)", "E91", 4],
|
||||
[r"接受认可机构评审:(.*?)", "E92", 4],
|
||||
[r"评价机构评审:", "E93", 4],
|
||||
[r"行业协会1名称:", "C108", 11],
|
||||
[r"行业协会2名称:", "E108", 11],
|
||||
[r"其中,100-\n200万仪器设备\n", "D222", 2],
|
||||
[r"是否愿意将仪器\n设备对外共享:\n", "D229", 2],
|
||||
]
|
||||
COMMON_KEY = [
|
||||
[r"其他地址:(.*?)\n", ["C10", "C11", "C12"]],
|
||||
[r"闲置仪器设\n备原值\n (\d+\.\d+) 万元", ["D209","D212", "D215"]]
|
||||
]
|
||||
SPECIALLIST_2 = [
|
||||
[r"长途区\n(.*?)\n", "C24"],
|
||||
[r"移动电\n(.*?)\n", "C25"],
|
||||
[r"邮政编\n(.*?)\n", "C26"],
|
||||
]
|
||||
SPECIALLIST_3 = [
|
||||
[r"收\n入\n占\n比\n(.*?)\n", ["E52", "E53", "E54", "E55", "E56"]],
|
||||
[r"业\n务\n饱\n和\n度\n(.*?)\n", ["H52", "H53", "H54", "H55", "H56"]],
|
||||
[r"构资质认定证书:\n(.*?)\n", ["E61"]],
|
||||
[r"颁发的资质认定证书:\n(.*?)\n", ["E62"]],
|
||||
[r"国家市场监督管理部门颁发的特种设备检验检测机构核准证:\n(.*?)\n", ["E63"]],
|
||||
[r"省级市场监督管理部门颁发的特种设备检验检测机构核准证:\n(.*?)\n", ["E64"]],
|
||||
[r"资质、资格证书:\n(.*?)\n", ["E65"]],
|
||||
[r"中国合格评定国家认可中心颁发的:\n(.*?)\n", ["E66"]],
|
||||
[r"其他社会组织、团体颁发的证书:\n(.*?)\n", ["E67"]],
|
||||
[r"\s+其中,个人委托检验检测报告:\n(.*?)\n", ["F168"]],
|
||||
["其中,单位委托检验检测报告:\n(.*?)\n", ["F169"]],
|
||||
[r"其中,出具司法鉴定意见书:\n(.*?)\n",["F171"]],
|
||||
]
|
||||
SPEMANY = [
|
||||
[r"50-100万元\n仪器设备在用\n(.*?)台套 ,闲置 (.*?) 台套 ,待报废 (.*?) 台套", ["D202", "G202","I202"]],
|
||||
[r"100-200万仪\n器设备在用\n(.*?)台套 ,闲置 (.*?) 台套 ,待报废 (.*?) 台套", ["D203", "G203","I203"]],
|
||||
[r" 200万元以上\n仪器设备在用\n(.*?)台套 ,闲置 (.*?) 台套 ,待报废 (.*?) 台套", ["D204", "G204","I204"]],
|
||||
[r"50-100万元\n仪器设备资产\(在\n用\)原值\n(\d+\.\d+) 万元\n\s*闲置仪器设\n备原值\n(\d+\.\d+) 万元\n\s*待报废仪器\n设备原值\n(\d+\.\d+) 万元", ["D208", "D209", "D210"]],
|
||||
[r"100-200万仪\n器设备资产\(在\n用\)原值\n(\d+\.\d+) 万元\n\s*闲置仪器设\n备原值\n(\d+\.\d+) 万元\n\s*待报废仪器\n设备原值\n(\d+\.\d+) 万元", ["D211", "D212", "D213"]],
|
||||
[r"200万元以上\n仪器设备\(在用\)\n原值\n(\d+\.\d+) 万元\n\s*闲置仪器设\n备原值\n(\d+\.\d+) 万元\n\s*待报废仪器\n设备原值\n(\d+\.\d+) 万元", ["D214", "D215", "D216"]],
|
||||
]
|
||||
|
||||
|
||||
# 从PDF中提取文本
|
||||
def extract_text_from_pdf(pdf_path):
|
||||
doc = fitz.open(pdf_path)
|
||||
text = ""
|
||||
for page in doc:
|
||||
text += page.get_text()
|
||||
doc.close()
|
||||
return text
|
||||
|
||||
# 使用正则表达式匹配文本
|
||||
def match_text_with_regex(text, pattern, num:int):
|
||||
matches = re.search(pattern, text, re.DOTALL)
|
||||
if matches:
|
||||
results = matches.group(num).strip() if matches else None
|
||||
return results
|
||||
|
||||
# 使用正则表达式匹配文本
|
||||
def match_text_with_all(text, pattern):
|
||||
matches = re.finditer(pattern, text)
|
||||
results = [i.group().strip() if i else None for i in matches]
|
||||
return results
|
||||
|
||||
def match_text_with_group(text, pattern):
|
||||
matches = re.finditer(pattern, text)
|
||||
results = [i.group(1).strip()[:-1] if i else None for i in matches]
|
||||
return results
|
||||
|
||||
def match_many_res(text, pattern):
|
||||
matches = re.findall(pattern, text)
|
||||
if matches:
|
||||
results = [i if i else None for i in matches[0]]
|
||||
return results
|
||||
else:
|
||||
return ''
|
||||
|
||||
def match_text_with_match(text, pattern):
|
||||
matches = re.search(pattern, text)
|
||||
results = matches.group().strip() if matches else None
|
||||
new_results = results.split(":")[1]
|
||||
return new_results
|
||||
|
||||
# 将匹配结果填入Excel
|
||||
def fill_excel(matches, EXCEL_PATH, local):
|
||||
wb = load_workbook(EXCEL_PATH)
|
||||
ws = wb.active
|
||||
if matches:
|
||||
ws[local] = matches
|
||||
wb.save(EXCEL_PATH)
|
||||
|
||||
def get_index(text, pattern, span):
|
||||
matchs = re.search(pattern, text)
|
||||
if matchs:
|
||||
start_index = matchs.end()
|
||||
remain_text = text[start_index:start_index+span]
|
||||
return remain_text
|
||||
|
||||
def run(pdf_path, excel_path):
|
||||
# 提取PDF文本
|
||||
text = extract_text_from_pdf(pdf_path)
|
||||
with open("pdf2txt.txt", "w", encoding="utf-8") as f:
|
||||
f.write(text)
|
||||
for pattern, local in RE_LIST:
|
||||
# 使用正则表达式匹配文本
|
||||
matches = match_text_with_regex(text, pattern, 1)
|
||||
print(matches,"------")
|
||||
fill_excel(matches, excel_path, local)
|
||||
# 特殊处理的
|
||||
for p, l, s in SPECIALLIST:
|
||||
results = get_index(text, p, s)
|
||||
fill_excel(results, excel_path, l)
|
||||
# 公共的key
|
||||
for p, l in COMMON_KEY:
|
||||
res_list = match_text_with_all(text, p)
|
||||
for u in range(len(res_list)):
|
||||
fill_excel(res_list[u], excel_path, l[u])
|
||||
|
||||
for p , l in SPECIALLIST_2:
|
||||
res = match_text_with_match(text, p)
|
||||
fill_excel(res, excel_path, l)
|
||||
|
||||
for p , l in SPECIALLIST_3:
|
||||
res = match_text_with_group(text, p)
|
||||
for u in range(len(res)):
|
||||
fill_excel(res[u], excel_path, l[u])
|
||||
|
||||
for p , l in SPEMANY:
|
||||
res_many = match_many_res(text, p)
|
||||
if res_many:
|
||||
for u in range(len(res_many)):
|
||||
fill_excel(res_many[u], excel_path, l[u])
|
||||
else:
|
||||
return
|
||||
return ''
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 定义你的正则表达式模式
|
||||
run("C:\code\pdf_exc\检验检测机构数据查看页2022年.pdf", "C:\code\pdf_exc\检验检测服务业统计数据上报任务-空表.xlsx")
|
Loading…
Reference in New Issue