News Application using Neurelo’s Python SDKs
feeds.json
feeds.json
{
"trending": [
"https://feeds.bbci.co.uk/news/rss.xml",
"https://www.cbc.ca/webfeed/rss/rss-topstories"
]
}
fetch.py
fetch.py
import feedparser
import scrapy
from scrapy.crawler import CrawlerProcess
import json
from newspaper import fulltext
import requests
from dotenv import load_dotenv
import os
import argparse
from neurelo.configuration import Configuration
from neurelo.api_client import ApiClient
from neurelo.api.news_api import NewsApi
scraped = {}
def start_scrapy(urls):
process = CrawlerProcess(settings={"FEED_FORMAT": "json", "FEED_URI": "items.json"})
process.crawl(Scrape, kwargs={"urls": urls})
process.start()
class Scrape(scrapy.Spider):
def __init__(self, **kwargs):
self.name = ""
self.urls = kwargs["kwargs"]["urls"]
def start_requests(self):
for url in self.urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
try:
scraped[response.request.meta["redirect_urls"][0]] = response.text
except:
scraped[response.url] = response.text
class Parse:
def __init__(self):
pass
def fetch_feeds(self):
feeds = json.load(open("./feeds.json"))
topics = list(feeds.keys())
return (feeds, topics)
def parse_rss_feeds(self, feeds, topics, limit=5):
content = {}
need_to_scrape = []
index = 0
for topic in topics:
for feed in feeds[topic]:
fetched = feedparser.parse(feed)
for news in fetched.entries[:limit]:
content[index] = {}
content[index]["title"] = news["title"]
content[index]["link"] = news["link"]
content[index]["published"] = news["published"]
content[index]["category"] = topic
need_to_scrape.append(news["link"])
index += 1
return (content, need_to_scrape)
def get_news(self, rss_content):
for index in rss_content.keys():
feed_url = rss_content[index]["link"]
scraped_news = fulltext(scraped[feed_url], language="en")
rss_content[index]["summary"] = scraped_news
return rss_content
def parse(self):
rss_feeds, topics = self.fetch_feeds()
content, need_to_scrape = self.parse_rss_feeds(rss_feeds, topics)
start_scrapy(need_to_scrape)
return self.get_news(content)
class News:
def __init__(self) -> None:
host, key = self.env()
self.api_client = self.conf(host, key)
def env(self):
load_dotenv()
return (os.getenv("NEURELO_API_HOST") or "", os.getenv("NEURELO_API_KEY") or "")
def conf(self, host, key):
configuration = Configuration(host, api_key={"ApiKey": key})
return ApiClient(configuration=configuration)
def fetch(self):
news_api = NewsApi(self.api_client)
news_item = news_api.find_news()
return news_item
def store(self):
parse = Parse()
news = parse.parse()
articles = list(news.values())
news_api = NewsApi(self.api_client)
response = news_api.create_many_news(articles)
return response
def delete(self):
news_api = NewsApi(self.api_client)
deleted = news_api.delete_news()
return deleted
if __name__ == "__main__":
news = News()
print(news.fetch())
Last updated