feat: 修改web3.py文件地址有误
This commit is contained in:
parent
129b1ff79d
commit
901e91d366
|
@ -1,5 +1,6 @@
|
||||||
## 安装告知
|
## 安装告知
|
||||||
务必先卸载本机的chrome浏览器,并安装文件夹下的chrome117.exe
|
务必先卸载本机的chrome浏览器,并安装文件夹下的chrome117.exe
|
||||||
|
biao.xlsx 为标准对比文件,需保持定期更新
|
||||||
|
|
||||||
## 操作说明
|
## 操作说明
|
||||||
用于执行内容巡查, 请按以下步骤依次执行
|
用于执行内容巡查, 请按以下步骤依次执行
|
||||||
|
|
5
web3.py
5
web3.py
|
@ -11,7 +11,8 @@ from mycode.base import BASE_DIR
|
||||||
from mycode.main import make_simple_csv_from_db, make_wechat_articles_full, ana_web, ana_wechat, output_dir
|
from mycode.main import make_simple_csv_from_db, make_wechat_articles_full, ana_web, ana_wechat, output_dir
|
||||||
from mycode.crawl_chrome import chrome_main, failed_sites_file
|
from mycode.crawl_chrome import chrome_main, failed_sites_file
|
||||||
|
|
||||||
|
python_exe = os.path.join(BASE_DIR, 'runtime/python.exe')
|
||||||
|
scrapy_exe = os.path.join(BASE_DIR, 'runtime/Scripts/scrapy.exe')
|
||||||
|
|
||||||
def save_info_to_excel(info_list, output_filename):
|
def save_info_to_excel(info_list, output_filename):
|
||||||
df = pd.DataFrame(info_list, columns=['单位', '主办' , '地址'])
|
df = pd.DataFrame(info_list, columns=['单位', '主办' , '地址'])
|
||||||
|
@ -50,7 +51,7 @@ if __name__ == '__main__':
|
||||||
url = row['地址']
|
url = row['地址']
|
||||||
domain = urlparse(url).netloc.replace('www.', '')
|
domain = urlparse(url).netloc.replace('www.', '')
|
||||||
# output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx')
|
# output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx')
|
||||||
cmd = ['./runtime/Scripts/scrapy.exe', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
|
cmd = [scrapy_exe, 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
|
||||||
# cmd = ['scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
|
# cmd = ['scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
|
||||||
process = subprocess.Popen(cmd)
|
process = subprocess.Popen(cmd)
|
||||||
processes.append(process)
|
processes.append(process)
|
||||||
|
|
Loading…
Reference in New Issue