|
|
|
|
|
import requests |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
|
|
|
|
|
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
|
|
|
bio_tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large") |
|
|
bio_model = AutoModelForCausalLM.from_pretrained("microsoft/BioGPT-Large").to(DEVICE) |
|
|
|
|
|
def search_pubmed(query, max_results=5): |
|
|
""" |
|
|
البحث عن أبحاث في PubMed باستخدام اسم المرض |
|
|
""" |
|
|
url = f"https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?term={query}&format=json" |
|
|
response = requests.get(url) |
|
|
if response.status_code != 200: |
|
|
return [] |
|
|
data = response.json() |
|
|
articles = [] |
|
|
for item in data.get("records", [])[:max_results]: |
|
|
articles.append({ |
|
|
"title": item.get("title", ""), |
|
|
"link": item.get("url", ""), |
|
|
"abstract": item.get("abstract", "") |
|
|
}) |
|
|
return articles |
|
|
|
|
|
def summarize_text(text): |
|
|
""" |
|
|
تلخيص نص البحث باستخدام BioGPT |
|
|
ملاحظة: BioGPT موديل causal، مش seq2seq، لذلك النتيجة توليد نصي وليست تلخيص مثالي |
|
|
""" |
|
|
inputs = bio_tokenizer(text, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE) |
|
|
|
|
|
summary_ids = bio_model.generate( |
|
|
**inputs, |
|
|
max_length=150, |
|
|
min_length=40, |
|
|
length_penalty=2.0, |
|
|
num_beams=4 |
|
|
) |
|
|
summary = bio_tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
|
return summary |