fix: web3 scrapy cmd 地址修复

This commit is contained in:
shijing 2023-08-25 16:30:44 +08:00
parent 901e91d366
commit 8888e19fc3
1 changed files with 2 additions and 2 deletions

View File

@ -12,7 +12,7 @@ from mycode.main import make_simple_csv_from_db, make_wechat_articles_full, ana_
from mycode.crawl_chrome import chrome_main, failed_sites_file from mycode.crawl_chrome import chrome_main, failed_sites_file
python_exe = os.path.join(BASE_DIR, 'runtime/python.exe') python_exe = os.path.join(BASE_DIR, 'runtime/python.exe')
scrapy_exe = os.path.join(BASE_DIR, 'runtime/Scripts/scrapy.exe') # scrapy_exe = os.path.join(BASE_DIR, 'runtime/Scripts/scrapy.exe')
def save_info_to_excel(info_list, output_filename): def save_info_to_excel(info_list, output_filename):
df = pd.DataFrame(info_list, columns=['单位', '主办' , '地址']) df = pd.DataFrame(info_list, columns=['单位', '主办' , '地址'])
@ -51,7 +51,7 @@ if __name__ == '__main__':
url = row['地址'] url = row['地址']
domain = urlparse(url).netloc.replace('www.', '') domain = urlparse(url).netloc.replace('www.', '')
# output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') # output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx')
cmd = [scrapy_exe, 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx'] cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx']
# cmd = ['scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}'] # cmd = ['scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}']
process = subprocess.Popen(cmd) process = subprocess.Popen(cmd)
processes.append(process) processes.append(process)