diff --git a/web3.py b/web3.py index bad4f66..94d0006 100644 --- a/web3.py +++ b/web3.py @@ -12,7 +12,7 @@ from mycode.main import make_simple_csv_from_db, make_wechat_articles_full, ana_ from mycode.crawl_chrome import chrome_main, failed_sites_file python_exe = os.path.join(BASE_DIR, 'runtime/python.exe') -scrapy_exe = os.path.join(BASE_DIR, 'runtime/Scripts/scrapy.exe') +# scrapy_exe = os.path.join(BASE_DIR, 'runtime/Scripts/scrapy.exe') def save_info_to_excel(info_list, output_filename): df = pd.DataFrame(info_list, columns=['单位', '主办' , '地址']) @@ -51,7 +51,7 @@ if __name__ == '__main__': url = row['地址'] domain = urlparse(url).netloc.replace('www.', '') # output = os.path.join(BASE_DIR, f'web_dir/{name}_{domain}.xlsx') - cmd = [scrapy_exe, 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx'] + cmd = [python_exe, '-m', 'scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-o', f'web_dir/{name}_{domain}.xlsx'] # cmd = ['scrapy', 'crawl', 'basespider', '-a', f'domain={domain}', '-a', f'start_url={url}', '-a', f'name={name}', '-a', f'group={group}', '-a', f'output={output}'] process = subprocess.Popen(cmd) processes.append(process)