safesite/safesite/safespider.py

62 lines
4.1 KiB
Python

import requests
from urllib import parse
from lxml import etree
headers = {
'Host': 'cx.saws.org.cn',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0',
'Accept': 'text/plain, */*; q=0.01',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate',
'Content-Type':'application/x-www-form-urlencoded',
'Connection': 'keep-alive',
'Referer':'http://cx.saws.org.cn/cms/html/certQuery/certQuery.do?method=getCertQueryIndex&ref=ch',
'X-Requested-With':'XMLHttpRequest'}
def getTzzs(certnum,stu_name):#特种证书(身份证号,姓名,均是字符)
certtype_code='720'
stu_name=parse.quote(parse.quote(stu_name))
sessionId = eval(requests.post('http://cx.saws.org.cn/cms/html/certQuery/certQuery.do?method=getServerTime',headers=headers).text)['time']
url = "http://cx.saws.org.cn/cms/html/certQuery/certQuery.do?method=getCertQueryResult&ref=ch&certtype_code="+certtype_code+"&certnum="+certnum+"&stu_name="+stu_name+'&passcode=1234'+'&sessionId='+sessionId
data = requests.get(url,headers=headers).text
tree = etree.HTML(data)
e1 = tree.xpath("//th[text()='姓名']/following-sibling::td[1]/text()")
e2 = tree.xpath("//th[text()='性别']/following-sibling::td[1]/text()")
e3 = tree.xpath("//th[text()='作业类别']/following-sibling::td[1]/text()")
e4 = tree.xpath("//th[text()='操作项目']/following-sibling::td[1]/text()")
e5 = tree.xpath("//th[text()='发证机关']/following-sibling::td[1]/text()")
e6 = tree.xpath("//th[text()='初次发证日期']/following-sibling::td[1]/text()")
e7 = tree.xpath("//th[text()='应复审日期']/following-sibling::td[1]/text()")
e8 = tree.xpath("//th[text()='有效期开始时间']/following-sibling::td[1]/text()")
e9 = tree.xpath("//th[text()='有效期结束时间']/following-sibling::td[1]/text()")
e10 = tree.xpath("//th[text()='实际复审时间']/following-sibling::td[1]/text()")
cdata = []
for i in range(len(e1)):
cdata.append({'姓名':e1[i],'性别':e2[i],'作业类别':e3[i],'操作项目':e4[i],'发证机关':e5[i],'初次发证日期':e6[i].strip(),'应复审日期':e7[i].strip(),'有效期开始时间':e8[i].strip(),'有效期结束时间':e9[i].strip(),'实际复审时间':e10[i].strip(),'url':url})
return cdata
def getAqzs(certnum,stu_name):#安全证书(身份证号,姓名,均是字符)
certtype_code='720'
stu_name=parse.quote(parse.quote(stu_name))
sessionId = eval(requests.post('http://cx.saws.org.cn/cms/html/certQuery/certQuery.do?method=getServerTime',headers=headers).text)['time']
url = "http://cx.saws.org.cn/cms/html/certQuery/certQuery.do?method=getCertQueryResult2&ref=ch&certtype_code="+certtype_code+"&certnum="+certnum+"&stu_name="+stu_name+'&passcode=1234'+'&sessionId='+sessionId
data = requests.get(url,headers=headers).text
tree = etree.HTML(data)
e1 = tree.xpath("//th[text()='姓名']/following-sibling::td[1]/text()")
e2 = tree.xpath("//th[text()='性别']/following-sibling::td[1]/text()")
e3 = tree.xpath("//th[text()='资格类型']/following-sibling::td[1]/text()")
e4 = tree.xpath("//th[text()='单位类型']/following-sibling::td[1]/text()")
e5 = tree.xpath("//th[text()='发证机关']/following-sibling::td[1]")
e6 = tree.xpath("//th[text()='初领日期']/following-sibling::td[1]/text()")
e7 = tree.xpath("//th[text()='应复审日期']/following-sibling::td[1]/text()")
e8 = tree.xpath("//th[text()='有效期开始时间']/following-sibling::td[1]/text()")
e9 = tree.xpath("//th[text()='有效期结束时间']/following-sibling::td[1]/text()")
cdata = []
x5=[]
for i in e5:
x5.append(i.xpath("string(.)"))
print(e6)
for i in range(len(e1)):
cdata.append({'姓名':e1[i],'性别':e2[i],'资格类型':e3[i],'单位类型':e4[i],'发证机关':x5[i],'应复审日期':e7[i].strip(),'有效期开始时间':e8[i].strip(),'有效期结束时间':e9[i].strip(),'初领日期':e6[i].strip(),'url':url})
return cdata