From 9536dc1dd3c16119ac719453894a5385f9983050 Mon Sep 17 00:00:00 2001 From: Rasmus Luha Date: Wed, 16 Mar 2022 03:51:41 +0200 Subject: restructure the project --- .gitignore | 2 +- AllAboutData/getData.py | 101 +++++++++++++++++++++++++++++++++++++++ AllAboutData/utils/osSpecific.py | 25 ++++++++++ getData.py | 101 --------------------------------------- utils/osSpecific.py | 25 ---------- 5 files changed, 127 insertions(+), 127 deletions(-) create mode 100644 AllAboutData/getData.py create mode 100644 AllAboutData/utils/osSpecific.py delete mode 100644 getData.py delete mode 100644 utils/osSpecific.py diff --git a/.gitignore b/.gitignore index ef2469e..22e6646 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ venv/ .env __pycache__/ -Data/ +AllAboutData/Data/ diff --git a/AllAboutData/getData.py b/AllAboutData/getData.py new file mode 100644 index 0000000..7c64b30 --- /dev/null +++ b/AllAboutData/getData.py @@ -0,0 +1,101 @@ +import pandas as pd +import requests +from utils import osSpecific # Some functions to delete and create directoryes needed for data storing +from dotenv import load_dotenv +import os + +''' +This python script fetches NBA teams and its players (currently playing and retired) and some +additional information about them. +The data is stored in the current working directory (any existing "Data" file is overwritten. +Data is in csv format. + +Author: Rasmus Luha +Created_at: 16.03.2022 +Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/ +''' + +# Loading API key from environment variables. +load_dotenv() +API_KEY = os.getenv("API_KEY") + +# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key +# You can get the API key from url listed above. + +# API request details +url = "https://free-nba.p.rapidapi.com/" +headers = { + 'x-rapidapi-host': "free-nba.p.rapidapi.com", + 'x-rapidapi-key': API_KEY + } + +# File name variables to store data in +if osSpecific.whichOs() == "windows": + teamsFile = "Data/NBAteams.csv" + playersDir = "Data\Players\\" +else: + teamsFile = "Data/NBAteams.csv" + playersDir = "Data/Players/" + +# Create new Data directory in order to avoid duplicates, when data is requested multiple times +osSpecific.deleteDataDir() +osSpecific.addDataDir() + + + +###### Functions ###### + +def getTeamsData(url, headers): + + querystring = {"page":"0"} + response = requests.request("GET", url+"teams", headers=headers, params=querystring) + + teamsDf = pd.DataFrame(response.json()["data"]) + teamsDf.set_index("id") + teamsDf = teamsDf.drop("id", axis=1) + teamsDf.to_csv(teamsFile) + print("Teams data stored in Data directory as \"NBAteams.csv\"") + + +def getPlayerData(url, headers): + + print("Stared reading players data") + + # First request is made just to get the amount of pages that must be looped through + querystring = {"per_page":"100","page":"0"} + response = requests.request("GET", url+"players", headers=headers, params=querystring) + pageCount = response.json()["meta"]["total_pages"] + + print("Pages to read: "+str(pageCount)) + for el in range(1, pageCount+1): + + # Requesting pages in loop till pageCount + querystring = {"per_page":"100","page":el} + response = requests.request("GET", url+"players", headers=headers, params=querystring) + data = response.json()["data"] + + for player in data: + teamName = player["team"]["full_name"] + playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"]) + + playerSeries = pd.Series({"first_name" : player["first_name"], + "last_name" : player["last_name"], + "position" : player["position"], + "height_feet" : player["height_feet"], + "height_inches" : player["height_inches"]}) + + #add player to dataframe + playerDf.loc[len(playerDf)] = playerSeries + #add dataframe to File + playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=False) + + print("Page "+str(el)+" read.") + print("All done, check \"Data\" Dir.") + + + + +# Requesting and storing data +if __name__ == "__main__": + getTeamsData(url, headers) + getPlayerData(url, headers) diff --git a/AllAboutData/utils/osSpecific.py b/AllAboutData/utils/osSpecific.py new file mode 100644 index 0000000..7caf664 --- /dev/null +++ b/AllAboutData/utils/osSpecific.py @@ -0,0 +1,25 @@ +import os +import sys + +# terminal commands, which are unfortunately os-specific + +def whichOs(): + if sys.platform == "win32": + return "windows" + else: + return "good" # ...right? + +def deleteDataDir(): + if whichOs() == "windows": + os.system("rmdir \s Data") + else: + os.system("rm -r Data") + + +def addDataDir(): + if whichOs() == "windows": + os.system("mkdir Data\Players") + else: + os.system("mkdir -p Data/Players") + print("Created new empty Data directory") + diff --git a/getData.py b/getData.py deleted file mode 100644 index 7c64b30..0000000 --- a/getData.py +++ /dev/null @@ -1,101 +0,0 @@ -import pandas as pd -import requests -from utils import osSpecific # Some functions to delete and create directoryes needed for data storing -from dotenv import load_dotenv -import os - -''' -This python script fetches NBA teams and its players (currently playing and retired) and some -additional information about them. -The data is stored in the current working directory (any existing "Data" file is overwritten. -Data is in csv format. - -Author: Rasmus Luha -Created_at: 16.03.2022 -Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/ -''' - -# Loading API key from environment variables. -load_dotenv() -API_KEY = os.getenv("API_KEY") - -# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key -# You can get the API key from url listed above. - -# API request details -url = "https://free-nba.p.rapidapi.com/" -headers = { - 'x-rapidapi-host': "free-nba.p.rapidapi.com", - 'x-rapidapi-key': API_KEY - } - -# File name variables to store data in -if osSpecific.whichOs() == "windows": - teamsFile = "Data/NBAteams.csv" - playersDir = "Data\Players\\" -else: - teamsFile = "Data/NBAteams.csv" - playersDir = "Data/Players/" - -# Create new Data directory in order to avoid duplicates, when data is requested multiple times -osSpecific.deleteDataDir() -osSpecific.addDataDir() - - - -###### Functions ###### - -def getTeamsData(url, headers): - - querystring = {"page":"0"} - response = requests.request("GET", url+"teams", headers=headers, params=querystring) - - teamsDf = pd.DataFrame(response.json()["data"]) - teamsDf.set_index("id") - teamsDf = teamsDf.drop("id", axis=1) - teamsDf.to_csv(teamsFile) - print("Teams data stored in Data directory as \"NBAteams.csv\"") - - -def getPlayerData(url, headers): - - print("Stared reading players data") - - # First request is made just to get the amount of pages that must be looped through - querystring = {"per_page":"100","page":"0"} - response = requests.request("GET", url+"players", headers=headers, params=querystring) - pageCount = response.json()["meta"]["total_pages"] - - print("Pages to read: "+str(pageCount)) - for el in range(1, pageCount+1): - - # Requesting pages in loop till pageCount - querystring = {"per_page":"100","page":el} - response = requests.request("GET", url+"players", headers=headers, params=querystring) - data = response.json()["data"] - - for player in data: - teamName = player["team"]["full_name"] - playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"]) - - playerSeries = pd.Series({"first_name" : player["first_name"], - "last_name" : player["last_name"], - "position" : player["position"], - "height_feet" : player["height_feet"], - "height_inches" : player["height_inches"]}) - - #add player to dataframe - playerDf.loc[len(playerDf)] = playerSeries - #add dataframe to File - playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=False) - - print("Page "+str(el)+" read.") - print("All done, check \"Data\" Dir.") - - - - -# Requesting and storing data -if __name__ == "__main__": - getTeamsData(url, headers) - getPlayerData(url, headers) diff --git a/utils/osSpecific.py b/utils/osSpecific.py deleted file mode 100644 index 7caf664..0000000 --- a/utils/osSpecific.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -import sys - -# terminal commands, which are unfortunately os-specific - -def whichOs(): - if sys.platform == "win32": - return "windows" - else: - return "good" # ...right? - -def deleteDataDir(): - if whichOs() == "windows": - os.system("rmdir \s Data") - else: - os.system("rm -r Data") - - -def addDataDir(): - if whichOs() == "windows": - os.system("mkdir Data\Players") - else: - os.system("mkdir -p Data/Players") - print("Created new empty Data directory") - -- cgit v1.2.3