summaryrefslogtreecommitdiff
path: root/getData.py
diff options
context:
space:
mode:
Diffstat (limited to 'getData.py')
-rw-r--r--getData.py117
1 files changed, 70 insertions, 47 deletions
diff --git a/getData.py b/getData.py
index db11dc3..dd55cc2 100644
--- a/getData.py
+++ b/getData.py
@@ -1,76 +1,99 @@
import pandas as pd
import requests
-import osSpecific
+from utils import osSpecific # Some functions to delete and create directoryes needed for data storing
from dotenv import load_dotenv
import os
+'''
+This python script fetches NBA teams and its players (currently playing and retired) and some
+additional information about them.
+The data is stored in the current working directory (any existing "Data" file is overwritten.
+Data is in csv format.
+
+Author: Rasmus Luha
+Created_at: 16.03.2022
+Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/
+'''
+
+# Loading API key from environment variables.
load_dotenv()
API_KEY = os.getenv("API_KEY")
+# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key
+# You can get the API key from url listed above.
-# Delete Data Dir, if exists, and then create new. Have to do this to avoid duplicates, as Players are later appended to the files
-osSpecific.deleteDataDir()
-osSpecific.addDataDir()
+# API request details
+url = "https://free-nba.p.rapidapi.com/"
+headers = {
+ 'x-rapidapi-host': "free-nba.p.rapidapi.com",
+ 'x-rapidapi-key': API_KEY
+ }
-# File variables to but data into
+# File name variables to store data in
if osSpecific.whichOs() == "windows":
teamsFile = "Data/NBAteams.csv"
- playersDir = "Data\Players"
+ playersDir = "Data\Players\\"
else:
teamsFile = "Data/NBAteams.csv"
- playersDir = "Data/Players"
+ playersDir = "Data/Players/"
+
+# Create new Data directory in order to avoid duplicates, when data is requested multiple times
+osSpecific.deleteDataDir()
+osSpecific.addDataDir()
+
-# Requesting data about NBA teams
-url = "https://free-nba.p.rapidapi.com/"
-querystring = {"page":"0"}
-headers = {
- 'x-rapidapi-host': "free-nba.p.rapidapi.com",
- 'x-rapidapi-key': API_KEY
- }
-# Adding data to teams file
-response = requests.request("GET", url+"teams", headers=headers, params=querystring)
-teamsDf = pd.DataFrame(response.json()["data"])
-teamsDf.set_index("id")
-teamsDf = teamsDf.drop("id", axis=1)
+###### Functions ######
-teamsDf.to_csv(teamsFile)
+def getTeamsData(url, headers):
-#######################################################
-# Now requesting players for each team
-# First request is made just to get the amount of pages that must be looped through
+ querystring = {"page":"0"}
+ response = requests.request("GET", url+"teams", headers=headers, params=querystring)
-querystring = {"per_page":"100","page":"0"}
-response = requests.request("GET", url+"players", headers=headers, params=querystring)
-pageCount = response.json()["meta"]["total_pages"]
+ teamsDf = pd.DataFrame(response.json()["data"])
+ teamsDf.set_index("id")
+ teamsDf = teamsDf.drop("id", axis=1)
+ teamsDf.to_csv(teamsFile)
+ print("Teams data stored in Data directory as \"NBAteams.csv\"")
-for el in range(1, pageCount+1):
+def getPlayerData(url, headers):
- # Requesting pages in loop till pageCount
- querystring = {"per_page":"100","page":el}
+ # First request is made just to get the amount of pages that must be looped through
+ querystring = {"per_page":"100","page":"0"}
response = requests.request("GET", url+"players", headers=headers, params=querystring)
- data = response.json()["data"]
+ pageCount = response.json()["meta"]["total_pages"]
- for player in data:
- teamName = player["team"]["full_name"]
- playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"])
-
- playerSeries = pd.Series({"first_name" : player["first_name"],
- "last_name" : player["last_name"],
- "position" : player["position"],
- "height_feet" : player["height_feet"],
- "height_inches" : player["height_inches"]})
-
- #add player to dataframe
- playerDf.loc[len(playerDf)] = playerSeries
- #add dataframe to File
- playerDf.to_csv(playersDir+teamName, mode='a', index=False, header=False)
- print("Page "+el+" read.")
-print("All done, check Data Dir")
+ for el in range(1, pageCount+1):
+
+ # Requesting pages in loop till pageCount
+ querystring = {"per_page":"100","page":el}
+ response = requests.request("GET", url+"players", headers=headers, params=querystring)
+ data = response.json()["data"]
+
+ for player in data:
+ teamName = player["team"]["full_name"]
+ playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"])
+
+ playerSeries = pd.Series({"first_name" : player["first_name"],
+ "last_name" : player["last_name"],
+ "position" : player["position"],
+ "height_feet" : player["height_feet"],
+ "height_inches" : player["height_inches"]})
+
+ #add player to dataframe
+ playerDf.loc[len(playerDf)] = playerSeries
+ #add dataframe to File
+ playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=False)
+
+ print("Page "+str(el)+" read.")
+ print("All done, check \"Data\" Dir.")
-
+# Requesting and storing data
+if __name__ == "__main__":
+ getTeamsData(url, headers)
+ getPlayerData(url, headers)