summaryrefslogtreecommitdiff
path: root/AllAboutData/getData.py
diff options
context:
space:
mode:
authorRasmus Luha <rasmus.luha@gmail.com>2022-03-16 03:51:41 +0200
committerRasmus Luha <rasmus.luha@gmail.com>2022-03-16 03:51:41 +0200
commit9536dc1dd3c16119ac719453894a5385f9983050 (patch)
tree951ae1b25cab7bc3c31bb1249e878d2342277c65 /AllAboutData/getData.py
parent8fbcafc6fc2fe274537b297e6eb2473a68150255 (diff)
restructure the project
Diffstat (limited to 'AllAboutData/getData.py')
-rw-r--r--AllAboutData/getData.py101
1 files changed, 101 insertions, 0 deletions
diff --git a/AllAboutData/getData.py b/AllAboutData/getData.py
new file mode 100644
index 0000000..7c64b30
--- /dev/null
+++ b/AllAboutData/getData.py
@@ -0,0 +1,101 @@
+import pandas as pd
+import requests
+from utils import osSpecific # Some functions to delete and create directoryes needed for data storing
+from dotenv import load_dotenv
+import os
+
+'''
+This python script fetches NBA teams and its players (currently playing and retired) and some
+additional information about them.
+The data is stored in the current working directory (any existing "Data" file is overwritten.
+Data is in csv format.
+
+Author: Rasmus Luha
+Created_at: 16.03.2022
+Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/
+'''
+
+# Loading API key from environment variables.
+load_dotenv()
+API_KEY = os.getenv("API_KEY")
+
+# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key
+# You can get the API key from url listed above.
+
+# API request details
+url = "https://free-nba.p.rapidapi.com/"
+headers = {
+ 'x-rapidapi-host': "free-nba.p.rapidapi.com",
+ 'x-rapidapi-key': API_KEY
+ }
+
+# File name variables to store data in
+if osSpecific.whichOs() == "windows":
+ teamsFile = "Data/NBAteams.csv"
+ playersDir = "Data\Players\\"
+else:
+ teamsFile = "Data/NBAteams.csv"
+ playersDir = "Data/Players/"
+
+# Create new Data directory in order to avoid duplicates, when data is requested multiple times
+osSpecific.deleteDataDir()
+osSpecific.addDataDir()
+
+
+
+###### Functions ######
+
+def getTeamsData(url, headers):
+
+ querystring = {"page":"0"}
+ response = requests.request("GET", url+"teams", headers=headers, params=querystring)
+
+ teamsDf = pd.DataFrame(response.json()["data"])
+ teamsDf.set_index("id")
+ teamsDf = teamsDf.drop("id", axis=1)
+ teamsDf.to_csv(teamsFile)
+ print("Teams data stored in Data directory as \"NBAteams.csv\"")
+
+
+def getPlayerData(url, headers):
+
+ print("Stared reading players data")
+
+ # First request is made just to get the amount of pages that must be looped through
+ querystring = {"per_page":"100","page":"0"}
+ response = requests.request("GET", url+"players", headers=headers, params=querystring)
+ pageCount = response.json()["meta"]["total_pages"]
+
+ print("Pages to read: "+str(pageCount))
+ for el in range(1, pageCount+1):
+
+ # Requesting pages in loop till pageCount
+ querystring = {"per_page":"100","page":el}
+ response = requests.request("GET", url+"players", headers=headers, params=querystring)
+ data = response.json()["data"]
+
+ for player in data:
+ teamName = player["team"]["full_name"]
+ playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"])
+
+ playerSeries = pd.Series({"first_name" : player["first_name"],
+ "last_name" : player["last_name"],
+ "position" : player["position"],
+ "height_feet" : player["height_feet"],
+ "height_inches" : player["height_inches"]})
+
+ #add player to dataframe
+ playerDf.loc[len(playerDf)] = playerSeries
+ #add dataframe to File
+ playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=False)
+
+ print("Page "+str(el)+" read.")
+ print("All done, check \"Data\" Dir.")
+
+
+
+
+# Requesting and storing data
+if __name__ == "__main__":
+ getTeamsData(url, headers)
+ getPlayerData(url, headers)