summaryrefslogtreecommitdiff
path: root/AllAboutData/getData.py
diff options
context:
space:
mode:
Diffstat (limited to 'AllAboutData/getData.py')
-rw-r--r--AllAboutData/getData.py82
1 files changed, 48 insertions, 34 deletions
diff --git a/AllAboutData/getData.py b/AllAboutData/getData.py
index 4cea9af..7b503fa 100644
--- a/AllAboutData/getData.py
+++ b/AllAboutData/getData.py
@@ -1,32 +1,34 @@
-import pandas as pd
+'''NBA data reciever
+
+This python script fetches NBA teams and its players (including retired)
+with some additional information about them.
+The data is stored in the current working directory and thus,
+any existing "Data" file is overwritten. Data will be in csv format.
+
+To use this script, "pandas" and "python-dotenv" must be installed
+You also have to make .env file in current dir and add there: API_KEY = your API_key
+You can get the API key from url below.
+
+Used API: https://rapidapi.com/theapiguy/api/free-nba/
+'''
+
+import os
import requests
-from utils import osSpecific # Some functions to delete and create directoryes needed for data storing
+import pandas as pd
+from utils import osSpecific # Some functions to delete and create directories for data
from dotenv import load_dotenv
-import os
-'''
-This python script fetches NBA teams and its players (currently playing and retired) and some
-additional information about them.
-The data is stored in the current working directory (any existing "Data" file is overwritten.
-Data is in csv format.
-
-Author: Rasmus Luha
-Created_at: 16.03.2022
-Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/
-'''
# Loading API key from environment variables.
load_dotenv()
API_KEY = os.getenv("API_KEY")
-# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key
-# You can get the API key from url listed above.
# API request details
url = "https://free-nba.p.rapidapi.com/"
headers = {
- 'x-rapidapi-host': "free-nba.p.rapidapi.com",
- 'x-rapidapi-key': API_KEY
+ "x-rapidapi-host": "free-nba.p.rapidapi.com",
+ "x-rapidapi-key": API_KEY
}
# File name variables to store data in
@@ -37,7 +39,7 @@ else:
teamsFile = "Data/NBAteams.csv"
playersDir = "Data/Players/"
-# Create new Data directory in order to avoid duplicates, when data is requested multiple times
+# Createubg new Data dir to avoid duplicates (due appending)
osSpecific.deleteDataDir()
osSpecific.addDataDir()
@@ -46,8 +48,12 @@ osSpecific.addDataDir()
###### Functions ######
def getTeamsData(url, headers):
+'''
+Requests Data about NBA teams and stores it.
+Takes API url as first and its headers as second argument.
+'''
- querystring = {"page":"0"}
+ querystring = {"page": "0"}
response = requests.request("GET", url+"teams", headers=headers, params=querystring)
teamsDf = pd.DataFrame(response.json()["data"])
@@ -58,36 +64,44 @@ def getTeamsData(url, headers):
def getPlayerData(url, headers):
+'''
+Requests Data about NBA players and stores it, based on teams
+Takes API url as first and its headers as second argument.
+'''
- print("Stared reading players data")
- # First request is made just to get the amount of pages that must be looped through
- querystring = {"per_page":"100","page":"0"}
+ print("Stared reading players data")
+ # First request is made to get the page count to loop
+ querystring = {"per_page": "100","page":"0"}
response = requests.request("GET", url+"players", headers=headers, params=querystring)
- pageCount = response.json()["meta"]["total_pages"]
+ pageCount = response.json()["meta"]["total_pages"] # Got the page count here
+
print("Pages to read: "+str(pageCount))
for el in range(1, pageCount+1):
- # Requesting pages in loop till pageCount
- querystring = {"per_page":"100","page":el}
+ # Requesting pages in loop till pageCount is reached
+ querystring = {"per_page": "100","page": el}
response = requests.request("GET", url+"players", headers=headers, params=querystring)
data = response.json()["data"]
+ # Making dataframe for each player to store it suitable file
for player in data:
teamName = player["team"]["full_name"]
- playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"])
+ playerDf = pd.DataFrame(columns=["first_name", "last_name",
+ "position", "height_feet",
+ "height_inches"])
- playerSeries = pd.Series({"first_name" : player["first_name"],
- "last_name" : player["last_name"],
- "position" : player["position"],
- "height_feet" : player["height_feet"],
- "height_inches" : player["height_inches"]})
+ playerSeries = pd.Series({"first_name": player["first_name"],
+ "last_name": player["last_name"],
+ "position": player["position"],
+ "height_feet": player["height_feet"],
+ "height_inches": player["height_inches"]})
- # Add player to dataframe
+
playerDf.loc[len(playerDf)] = playerSeries
- # Add dataframe to File
+ # Add dataframe to File, if first to be added, then also add column names
hdr = False if os.path.isfile(playersDir+teamName+".csv") else True
playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=hdr)
@@ -96,7 +110,7 @@ def getPlayerData(url, headers):
-
if __name__ == "__main__":
getTeamsData(url, headers)
getPlayerData(url, headers)
+