if __main__ added

author: Rasmus Luha <rasmus.luha@gmail.com> 2022-03-16 03:29:12 +0200
committer: Rasmus Luha <rasmus.luha@gmail.com> 2022-03-16 03:29:12 +0200
commit: b73753d56bbcd28732d283cc3c2aa65fd88f99bf (patch)
tree: ea56a60c7c58661bc8109599af25c92b057ffe81 /getData.py
parent: b88db78b1e2885d51f3edd7084d5026b400daa35 (diff)
1 files changed, 70 insertions, 47 deletions
diff --git a/getData.py b/getData.py
index db11dc3..dd55cc2 100644
--- a/getData.py
+++ b/getData.py
@@ -1,76 +1,99 @@
 import pandas as pd
 import requests
-import osSpecific
+from utils import osSpecific  #  Some functions to delete and create directoryes needed for data storing
 from dotenv import load_dotenv
 import os
 
+'''
+This python script fetches NBA teams and its players (currently playing and retired) and some
+additional information about them. 
+The data is stored in the current working directory (any existing "Data" file is overwritten.
+Data is in csv format.
+
+Author: Rasmus Luha
+Created_at: 16.03.2022
+Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/
+'''
+
+# Loading API key from environment variables.
 load_dotenv()
 API_KEY = os.getenv("API_KEY")
 
+# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key
+# You can get the API key from url listed above.
 
-# Delete Data Dir, if exists, and then create new. Have to do this to avoid duplicates, as Players are later appended to the files
-osSpecific.deleteDataDir()
-osSpecific.addDataDir()
+# API request details
+url = "https://free-nba.p.rapidapi.com/"
+headers = {
+            'x-rapidapi-host': "free-nba.p.rapidapi.com",
+            'x-rapidapi-key': API_KEY
+          }
 
-# File variables to but data into
+# File name variables to store data in 
 if osSpecific.whichOs() == "windows":
     teamsFile = "Data/NBAteams.csv"
-    playersDir = "Data\Players"
+    playersDir = "Data\Players\\"
 else:
     teamsFile = "Data/NBAteams.csv"
-    playersDir = "Data/Players"
+    playersDir = "Data/Players/"
+
+# Create new Data directory in order to avoid duplicates, when data is requested multiple times 
+osSpecific.deleteDataDir()
+osSpecific.addDataDir()
+
 
-# Requesting data about NBA teams
-url = "https://free-nba.p.rapidapi.com/"
-querystring = {"page":"0"}
-headers = {
-            'x-rapidapi-host': "free-nba.p.rapidapi.com",
-            'x-rapidapi-key': API_KEY
-    }
 
-# Adding data to teams file
-response = requests.request("GET", url+"teams", headers=headers, params=querystring)
-teamsDf = pd.DataFrame(response.json()["data"])
-teamsDf.set_index("id")
-teamsDf = teamsDf.drop("id", axis=1)
+###### Functions ######
 
-teamsDf.to_csv(teamsFile)
+def getTeamsData(url, headers):
 
-#######################################################
-# Now requesting players for each team
-# First request is made just to get the amount of pages that must be looped through
+    querystring = {"page":"0"}
+    response = requests.request("GET", url+"teams", headers=headers, params=querystring)
 
-querystring = {"per_page":"100","page":"0"}
-response = requests.request("GET", url+"players", headers=headers, params=querystring)
-pageCount = response.json()["meta"]["total_pages"]
+    teamsDf = pd.DataFrame(response.json()["data"])
+    teamsDf.set_index("id")
+    teamsDf = teamsDf.drop("id", axis=1)
+    teamsDf.to_csv(teamsFile)
+    print("Teams data stored in Data directory as \"NBAteams.csv\"")
 
 
-for el in range(1, pageCount+1):
+def getPlayerData(url, headers):
 
-    # Requesting pages in loop till pageCount
-    querystring = {"per_page":"100","page":el}
+    # First request is made just to get the amount of pages that must be looped through
+    querystring = {"per_page":"100","page":"0"}
     response = requests.request("GET", url+"players", headers=headers, params=querystring)
-    data = response.json()["data"]
+    pageCount = response.json()["meta"]["total_pages"]
     
-    for player in data:
-        teamName = player["team"]["full_name"]
-        playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"])
-
-        playerSeries = pd.Series({"first_name" : player["first_name"],
-                                  "last_name" : player["last_name"],
-                                  "position" : player["position"],
-                                  "height_feet" : player["height_feet"],
-                                  "height_inches" : player["height_inches"]})
-
-        #add player to dataframe
-        playerDf.loc[len(playerDf)] = playerSeries 
-        #add dataframe to File
-        playerDf.to_csv(playersDir+teamName, mode='a', index=False, header=False)
-    print("Page "+el+" read.") 
-print("All done, check Data Dir")
     
+    for el in range(1, pageCount+1):
+    
+        # Requesting pages in loop till pageCount
+        querystring = {"per_page":"100","page":el}
+        response = requests.request("GET", url+"players", headers=headers, params=querystring)
+        data = response.json()["data"]
+        
+        for player in data:
+            teamName = player["team"]["full_name"]
+            playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"])
+    
+            playerSeries = pd.Series({"first_name" : player["first_name"],
+                                      "last_name" : player["last_name"],
+                                      "position" : player["position"],
+                                      "height_feet" : player["height_feet"],
+                                      "height_inches" : player["height_inches"]})
+    
+            #add player to dataframe
+            playerDf.loc[len(playerDf)] = playerSeries 
+            #add dataframe to File
+            playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=False)
+    
+        print("Page "+str(el)+" read.") 
+    print("All done, check \"Data\" Dir.")
 
 
 
 
-
+# Requesting and storing data
+if __name__ == "__main__":
+    getTeamsData(url, headers)
+    getPlayerData(url, headers)
author	Rasmus Luha <rasmus.luha@gmail.com>	2022-03-16 03:29:12 +0200
committer	Rasmus Luha <rasmus.luha@gmail.com>	2022-03-16 03:29:12 +0200
commit	b73753d56bbcd28732d283cc3c2aa65fd88f99bf (patch)
tree	ea56a60c7c58661bc8109599af25c92b057ffe81 /getData.py
parent	b88db78b1e2885d51f3edd7084d5026b400daa35 (diff)