{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "baabf11e-66a5-43a0-af86-25d998eab011", "metadata": {}, "outputs": [], "source": [ "# DEMO: Text Analytics with API call, required API key\n", "# Developed by Dr. Benyawarath \"Yaa\" Nithithanatchinnapat\n", "# Reference: Data Science from Scratch: First Principles with Python 2nd Edition by Joel Grus\n", "\n", "# Cell 1: Install the required packages\n", "# We add '--quiet' to hide the long installation text\n", "!pip install --quiet vaderSentiment wordcloud requests pandas matplotlib" ] }, { "cell_type": "code", "execution_count": null, "id": "091f728f-3d09-4ae1-982d-070e56a1badf", "metadata": {}, "outputs": [], "source": [ "# Cell 2: Import the libraries (now that they're installed)\n", "import requests\n", "import pandas as pd\n", "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", "from wordcloud import WordCloud\n", "import matplotlib.pyplot as plt\n", "\n", "# --- Students: Change this to your API key ---\n", "API_KEY = \"---YOUR API KEY---\" \n", "COMPANY = \"COMPANY NAME\" \n", "\n", "# This is the \"menu item\" we're ordering\n", "url = f\"https://gnews.io/api/v4/search?q={COMPANY}&lang=en&token={API_KEY}\"\n", "\n", "# Make the request\n", "response = requests.get(url)\n", "data = response.json()\n", "\n", "# Show the raw, messy data\n", "print(\"Raw JSON data from the API:\")\n", "print(data)" ] }, { "cell_type": "code", "execution_count": null, "id": "86996a7a-e1dd-46ed-8b40-f61896627534", "metadata": {}, "outputs": [], "source": [ "# The 'articles' key contains the list we want\n", "df = pd.DataFrame(data['articles'])\n", "\n", "# Let's just keep the columns we care about\n", "df = df[['title', 'description', 'url', 'source']]\n", "\n", "print(\"\\nCleaned Pandas DataFrame:\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "e3725bea-a3e8-402a-a952-5dfea38eb16a", "metadata": {}, "outputs": [], "source": [ "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", "\n", "# Create our analyzer \"object\"\n", "analyzer = SentimentIntensityAnalyzer()\n", "\n", "# A 'lambda' function is just a small, one-line function.\n", "# This one takes text (x) and gets the 'compound' score.\n", "# The 'compound' score is a single value from -1 (very negative) to +1 (very positive).\n", "\n", "df['sentiment_score'] = df['description'].apply(\n", " lambda x: analyzer.polarity_scores(x)['compound']\n", ")\n", "\n", "print(\"\\nDataFrame with Sentiment Scores:\")\n", "print(df[['source', 'sentiment_score', 'title']])" ] }, { "cell_type": "code", "execution_count": null, "id": "d800cea4-6c2b-4352-b731-517e2f562175", "metadata": {}, "outputs": [], "source": [ "from wordcloud import WordCloud\n", "import matplotlib.pyplot as plt\n", "\n", "# Combine all descriptions into one big block of text\n", "all_text = \" \".join(df['description'])\n", "\n", "# Create the word cloud\n", "wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_text)\n", "\n", "# Display the visualization\n", "print(\"\\nKey Themes in the News:\")\n", "plt.figure(figsize=(10, 5))\n", "plt.imshow(wordcloud, interpolation='bilinear')\n", "plt.axis('off') # Hide the\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "b9012ffb-a3c9-4614-b654-7845f0e7b836", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }