feat: get_abstract_from_elsevier 使用instoken
This commit is contained in:
parent
e2687874eb
commit
b24bb64485
|
|
@ -21,6 +21,12 @@ config.retry_http_codes = [429, 500, 503]
|
|||
OPENALEX_KEY = "NPimoE2ecdWmfdhH8abxEp"
|
||||
config.api_key = OPENALEX_KEY
|
||||
|
||||
ELSEVIER_APIKEY = 'aa8868cac9e27d6153ab0a0acd7b50bf'
|
||||
ELSEVIER_HEADERS = {
|
||||
"X-ELS-Insttoken": "135fa874aea9f0de11cad187ccb4878c",
|
||||
"X-ELS-APIKey": ELSEVIER_APIKEY,
|
||||
}
|
||||
|
||||
@shared_task(base=CustomTask)
|
||||
def get_paper_meta_from_openalex(publication_year:int, keywords:str="", search:str="", end_year:int=None):
|
||||
cache_key = f"openalex_cursor_{publication_year}_{keywords}{search}"
|
||||
|
|
@ -94,9 +100,6 @@ def get_paper_meta_from_openalex(publication_year:int, keywords:str="", search:s
|
|||
countdown=5
|
||||
)
|
||||
|
||||
|
||||
ELSEVIER_APIKEY = 'aa8868cac9e27d6153ab0a0acd7b50bf'
|
||||
|
||||
# 常用的 User-Agent 列表
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
|
|
@ -126,14 +129,15 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
|
|||
qs = qs.exclude(
|
||||
fail_reason__contains="elsevier_doi_not_found"
|
||||
).exclude(fail_reason__contains="elsevier_abstract_not_found"
|
||||
).exclude(fetch_status="downloading").order_by("publication_date")
|
||||
).exclude(fetch_status="downloading"
|
||||
).filter(doi__startswith="10.1016").order_by("publication_date")
|
||||
|
||||
if not qs.exists():
|
||||
return "done"
|
||||
|
||||
params = {
|
||||
"apiKey": ELSEVIER_APIKEY,
|
||||
"httpAccept": "text/xml"
|
||||
"httpAccept": "text/xml",
|
||||
"view": "FULL"
|
||||
}
|
||||
err_msg = ""
|
||||
count_abs = 0
|
||||
|
|
@ -150,6 +154,7 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
|
|||
res = req.get(
|
||||
f"https://api.elsevier.com/content/article/doi/{paper.doi}",
|
||||
params=params,
|
||||
headers = ELSEVIER_HEADERS,
|
||||
timeout=(3, 15)
|
||||
)
|
||||
except requests.RequestException:
|
||||
|
|
@ -191,7 +196,6 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
|
|||
if has_fulltext:
|
||||
paper.has_fulltext = True
|
||||
paper.has_fulltext_xml = True
|
||||
save_pdf_from_elsevier(paper)
|
||||
count_fulltext += 1
|
||||
|
||||
paper.save_file_xml(xml_str)
|
||||
|
|
@ -351,6 +355,9 @@ def save_pdf_from_oa_url(paper:Paper):
|
|||
if is_pdf and len(res.content) > 1024: # 至少1KB
|
||||
paper.save_file_pdf(res.content, save_obj=True)
|
||||
return "success"
|
||||
else:
|
||||
paper.save_fail_reason("oa_url_not_pdf")
|
||||
return "oa_url_not_pdf"
|
||||
return f"oa_url_pdf_error: {res.status_code}"
|
||||
|
||||
def save_pdf_from_openalex(paper:Paper):
|
||||
|
|
@ -375,8 +382,7 @@ def save_pdf_from_openalex(paper:Paper):
|
|||
|
||||
def save_pdf_from_elsevier(paper:Paper):
|
||||
params = {
|
||||
"apiKey": ELSEVIER_APIKEY,
|
||||
"httpAccept": "application/pdf"
|
||||
"httpAccept": "application/pdf"
|
||||
}
|
||||
try:
|
||||
res = requests.get(
|
||||
|
|
|
|||
Loading…
Reference in New Issue