Compare commits

...

2 Commits

1 changed files with 11 additions and 9 deletions

View File

@ -22,9 +22,9 @@ OPENALEX_KEY = "NPimoE2ecdWmfdhH8abxEp"
config.api_key = OPENALEX_KEY config.api_key = OPENALEX_KEY
ELSEVIER_APIKEY = 'aa8868cac9e27d6153ab0a0acd7b50bf' ELSEVIER_APIKEY = 'aa8868cac9e27d6153ab0a0acd7b50bf'
ELSEVIER_HEAEDERS = { ELSEVIER_HEADERS = {
"X-ELS-Insttoken": "135fa874aea9f0de11cad187ccb4878c",
"X-ELS-APIKey": ELSEVIER_APIKEY, "X-ELS-APIKey": ELSEVIER_APIKEY,
"X-ELS-Insttoken": "135fa874aea9f0de11cad187ccb4878c "
} }
@shared_task(base=CustomTask) @shared_task(base=CustomTask)
@ -129,14 +129,15 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
qs = qs.exclude( qs = qs.exclude(
fail_reason__contains="elsevier_doi_not_found" fail_reason__contains="elsevier_doi_not_found"
).exclude(fail_reason__contains="elsevier_abstract_not_found" ).exclude(fail_reason__contains="elsevier_abstract_not_found"
).exclude(fetch_status="downloading").order_by("publication_date") ).exclude(fetch_status="downloading"
).filter(doi__startswith="10.1016").order_by("publication_date")
if not qs.exists(): if not qs.exists():
return "done" return "done"
params = { params = {
"apiKey": ELSEVIER_APIKEY, "httpAccept": "text/xml",
"httpAccept": "text/xml" "view": "FULL"
} }
err_msg = "" err_msg = ""
count_abs = 0 count_abs = 0
@ -153,6 +154,7 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
res = req.get( res = req.get(
f"https://api.elsevier.com/content/article/doi/{paper.doi}", f"https://api.elsevier.com/content/article/doi/{paper.doi}",
params=params, params=params,
headers = ELSEVIER_HEADERS,
timeout=(3, 15) timeout=(3, 15)
) )
except requests.RequestException: except requests.RequestException:
@ -194,8 +196,6 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
if has_fulltext: if has_fulltext:
paper.has_fulltext = True paper.has_fulltext = True
paper.has_fulltext_xml = True paper.has_fulltext_xml = True
if paper.has_fulltext_pdf is False:
save_pdf_from_elsevier(paper)
count_fulltext += 1 count_fulltext += 1
paper.save_file_xml(xml_str) paper.save_file_xml(xml_str)
@ -355,6 +355,9 @@ def save_pdf_from_oa_url(paper:Paper):
if is_pdf and len(res.content) > 1024: # 至少1KB if is_pdf and len(res.content) > 1024: # 至少1KB
paper.save_file_pdf(res.content, save_obj=True) paper.save_file_pdf(res.content, save_obj=True)
return "success" return "success"
else:
paper.save_fail_reason("oa_url_not_pdf")
return "oa_url_not_pdf"
return f"oa_url_pdf_error: {res.status_code}" return f"oa_url_pdf_error: {res.status_code}"
def save_pdf_from_openalex(paper:Paper): def save_pdf_from_openalex(paper:Paper):
@ -379,8 +382,7 @@ def save_pdf_from_openalex(paper:Paper):
def save_pdf_from_elsevier(paper:Paper): def save_pdf_from_elsevier(paper:Paper):
params = { params = {
"apiKey": ELSEVIER_APIKEY, "httpAccept": "application/pdf"
"httpAccept": "application/pdf"
} }
try: try:
res = requests.get( res = requests.get(