1 files changed, 48 insertions, 34 deletions
diff --git a/AllAboutData/getData.py b/AllAboutData/getData.py
index 4cea9af..7b503fa 100644
--- a/AllAboutData/getData.py
+++ b/AllAboutData/getData.py
@@ -1,32 +1,34 @@
-import pandas as pd
+'''NBA data reciever
+
+This python script fetches NBA teams and its players (including retired)
+with some additional information about them. 
+The data is stored in the current working directory and thus, 
+any existing "Data" file is overwritten. Data will be in csv format.
+
+To use this script, "pandas" and "python-dotenv" must be installed
+You also have to make .env file in current dir and add there: API_KEY = your API_key
+You can get the API key from url below.
+
+Used API: https://rapidapi.com/theapiguy/api/free-nba/
+'''
+
+import os
 import requests
-from utils import osSpecific  #  Some functions to delete and create directoryes needed for data storing
+import pandas as pd
+from utils import osSpecific  #  Some functions to delete and create directories for data
 from dotenv import load_dotenv
-import os
 
-'''
-This python script fetches NBA teams and its players (currently playing and retired) and some
-additional information about them. 
-The data is stored in the current working directory (any existing "Data" file is overwritten.
-Data is in csv format.
-
-Author: Rasmus Luha
-Created_at: 16.03.2022
-Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/
-'''
 
 # Loading API key from environment variables.
 load_dotenv()
 API_KEY = os.getenv("API_KEY")
 
-# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key
-# You can get the API key from url listed above.
 
 # API request details
 url = "https://free-nba.p.rapidapi.com/"
 headers = {
-            'x-rapidapi-host': "free-nba.p.rapidapi.com",
-            'x-rapidapi-key': API_KEY
+            "x-rapidapi-host": "free-nba.p.rapidapi.com",
+            "x-rapidapi-key": API_KEY
           }
 
 # File name variables to store data in 
@@ -37,7 +39,7 @@ else:
     teamsFile = "Data/NBAteams.csv"
     playersDir = "Data/Players/"
 
-# Create new Data directory in order to avoid duplicates, when data is requested multiple times 
+# Createubg new Data dir to avoid duplicates (due appending)
 osSpecific.deleteDataDir()
 osSpecific.addDataDir()
 
@@ -46,8 +48,12 @@ osSpecific.addDataDir()
 ###### Functions ######
 
 def getTeamsData(url, headers):
+'''
+Requests Data about NBA teams and stores it.
+Takes API url as first and its headers as second argument.
+'''
 
-    querystring = {"page":"0"}
+    querystring = {"page": "0"}
     response = requests.request("GET", url+"teams", headers=headers, params=querystring)
 
     teamsDf = pd.DataFrame(response.json()["data"])
@@ -58,36 +64,44 @@ def getTeamsData(url, headers):
 
 
 def getPlayerData(url, headers):
+''' 
+Requests Data about NBA players and stores it, based on teams
+Takes API url as first and its headers as second argument.
+'''
 
-    print("Stared reading players data")
 
-    # First request is made just to get the amount of pages that must be looped through
-    querystring = {"per_page":"100","page":"0"}
+    print("Stared reading players data")
+    # First request is made to get the page count to loop
+    querystring = {"per_page": "100","page":"0"}
     response = requests.request("GET", url+"players", headers=headers, params=querystring)
-    pageCount = response.json()["meta"]["total_pages"]
+    pageCount = response.json()["meta"]["total_pages"]  #  Got the page count here
     
+
     print("Pages to read: "+str(pageCount)) 
     for el in range(1, pageCount+1):
     
-        # Requesting pages in loop till pageCount
-        querystring = {"per_page":"100","page":el}
+        # Requesting pages in loop till pageCount is reached
+        querystring = {"per_page": "100","page": el}
         response = requests.request("GET", url+"players", headers=headers, params=querystring)
         data = response.json()["data"]
         
+        # Making dataframe for each player to store it suitable file
         for player in data:
             teamName = player["team"]["full_name"]
-            playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"])
+            playerDf = pd.DataFrame(columns=["first_name", "last_name",
+                                             "position", "height_feet", 
+                                             "height_inches"])
     
-            playerSeries = pd.Series({"first_name" : player["first_name"],
-                                      "last_name" : player["last_name"],
-                                      "position" : player["position"],
-                                      "height_feet" : player["height_feet"],
-                                      "height_inches" : player["height_inches"]})
+            playerSeries = pd.Series({"first_name": player["first_name"],
+                                      "last_name": player["last_name"],
+                                      "position": player["position"],
+                                      "height_feet": player["height_feet"],
+                                      "height_inches": player["height_inches"]})
     
-            # Add player to dataframe
+            
             playerDf.loc[len(playerDf)] = playerSeries 
 
-            # Add dataframe to File
+            # Add dataframe to File, if first to be added, then also add column names
             hdr = False if os.path.isfile(playersDir+teamName+".csv") else True
             playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=hdr)
     
@@ -96,7 +110,7 @@ def getPlayerData(url, headers):
 
 
 
-
 if __name__ == "__main__":
     getTeamsData(url, headers)
     getPlayerData(url, headers)
+