diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | API/main.py | 16 | ||||
-rw-r--r-- | AllAboutData/getData.py | 82 | ||||
-rw-r--r-- | AllAboutData/utils/osSpecific.py | 9 | ||||
-rw-r--r-- | README.md | 0 | ||||
-rw-r--r-- | requirements.txt | 113 |
6 files changed, 181 insertions, 41 deletions
@@ -1,4 +1,4 @@ venv/ .env -__pycache__/ +**/__pycache__/ AllAboutData/Data/ diff --git a/API/main.py b/API/main.py index 3816fab..c67c95b 100644 --- a/API/main.py +++ b/API/main.py @@ -1,9 +1,14 @@ from fastapi import FastAPI import pandas as pd import numpy as np +import os + +import sys +sys.path.append("--") app = FastAPI() + # Making relative paths for the data, on windows slashes would have to be turned around. # It is probably not the best way. relPathTeams = "../AllAboutData/Data/NBAteams.csv" @@ -20,6 +25,17 @@ def getTeamNames(): +@app.get("/") +def getIndex(): + return {"Message" : "Hello!"} + + +#@app.get("/sync") +#def syncPlayers(): # Currnetly only works for Unix type systems, which is not good +# os.system("rm " + relPathPlayers + "*") +# getData.getPlayerData(getData.url, getData.headers) + + @app.get("/teams") def getTeams(): diff --git a/AllAboutData/getData.py b/AllAboutData/getData.py index 4cea9af..7b503fa 100644 --- a/AllAboutData/getData.py +++ b/AllAboutData/getData.py @@ -1,32 +1,34 @@ -import pandas as pd +'''NBA data reciever + +This python script fetches NBA teams and its players (including retired) +with some additional information about them. +The data is stored in the current working directory and thus, +any existing "Data" file is overwritten. Data will be in csv format. + +To use this script, "pandas" and "python-dotenv" must be installed +You also have to make .env file in current dir and add there: API_KEY = your API_key +You can get the API key from url below. + +Used API: https://rapidapi.com/theapiguy/api/free-nba/ +''' + +import os import requests -from utils import osSpecific # Some functions to delete and create directoryes needed for data storing +import pandas as pd +from utils import osSpecific # Some functions to delete and create directories for data from dotenv import load_dotenv -import os -''' -This python script fetches NBA teams and its players (currently playing and retired) and some -additional information about them. -The data is stored in the current working directory (any existing "Data" file is overwritten. -Data is in csv format. - -Author: Rasmus Luha -Created_at: 16.03.2022 -Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/ -''' # Loading API key from environment variables. load_dotenv() API_KEY = os.getenv("API_KEY") -# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key -# You can get the API key from url listed above. # API request details url = "https://free-nba.p.rapidapi.com/" headers = { - 'x-rapidapi-host': "free-nba.p.rapidapi.com", - 'x-rapidapi-key': API_KEY + "x-rapidapi-host": "free-nba.p.rapidapi.com", + "x-rapidapi-key": API_KEY } # File name variables to store data in @@ -37,7 +39,7 @@ else: teamsFile = "Data/NBAteams.csv" playersDir = "Data/Players/" -# Create new Data directory in order to avoid duplicates, when data is requested multiple times +# Createubg new Data dir to avoid duplicates (due appending) osSpecific.deleteDataDir() osSpecific.addDataDir() @@ -46,8 +48,12 @@ osSpecific.addDataDir() ###### Functions ###### def getTeamsData(url, headers): +''' +Requests Data about NBA teams and stores it. +Takes API url as first and its headers as second argument. +''' - querystring = {"page":"0"} + querystring = {"page": "0"} response = requests.request("GET", url+"teams", headers=headers, params=querystring) teamsDf = pd.DataFrame(response.json()["data"]) @@ -58,36 +64,44 @@ def getTeamsData(url, headers): def getPlayerData(url, headers): +''' +Requests Data about NBA players and stores it, based on teams +Takes API url as first and its headers as second argument. +''' - print("Stared reading players data") - # First request is made just to get the amount of pages that must be looped through - querystring = {"per_page":"100","page":"0"} + print("Stared reading players data") + # First request is made to get the page count to loop + querystring = {"per_page": "100","page":"0"} response = requests.request("GET", url+"players", headers=headers, params=querystring) - pageCount = response.json()["meta"]["total_pages"] + pageCount = response.json()["meta"]["total_pages"] # Got the page count here + print("Pages to read: "+str(pageCount)) for el in range(1, pageCount+1): - # Requesting pages in loop till pageCount - querystring = {"per_page":"100","page":el} + # Requesting pages in loop till pageCount is reached + querystring = {"per_page": "100","page": el} response = requests.request("GET", url+"players", headers=headers, params=querystring) data = response.json()["data"] + # Making dataframe for each player to store it suitable file for player in data: teamName = player["team"]["full_name"] - playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"]) + playerDf = pd.DataFrame(columns=["first_name", "last_name", + "position", "height_feet", + "height_inches"]) - playerSeries = pd.Series({"first_name" : player["first_name"], - "last_name" : player["last_name"], - "position" : player["position"], - "height_feet" : player["height_feet"], - "height_inches" : player["height_inches"]}) + playerSeries = pd.Series({"first_name": player["first_name"], + "last_name": player["last_name"], + "position": player["position"], + "height_feet": player["height_feet"], + "height_inches": player["height_inches"]}) - # Add player to dataframe + playerDf.loc[len(playerDf)] = playerSeries - # Add dataframe to File + # Add dataframe to File, if first to be added, then also add column names hdr = False if os.path.isfile(playersDir+teamName+".csv") else True playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=hdr) @@ -96,7 +110,7 @@ def getPlayerData(url, headers): - if __name__ == "__main__": getTeamsData(url, headers) getPlayerData(url, headers) + diff --git a/AllAboutData/utils/osSpecific.py b/AllAboutData/utils/osSpecific.py index 7caf664..c89e65a 100644 --- a/AllAboutData/utils/osSpecific.py +++ b/AllAboutData/utils/osSpecific.py @@ -2,14 +2,17 @@ import os import sys # terminal commands, which are unfortunately os-specific - def whichOs(): +''' Returns "windows" if used os is windows. If not, returns "good" ''' + if sys.platform == "win32": return "windows" else: - return "good" # ...right? + return "good" def deleteDataDir(): +''' Removes Data directory from working directroy ''' + if whichOs() == "windows": os.system("rmdir \s Data") else: @@ -17,6 +20,8 @@ def deleteDataDir(): def addDataDir(): +''' Adds data directory from working directroy ''' + if whichOs() == "windows": os.system("mkdir Data\Players") else: diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/requirements.txt b/requirements.txt index 89389bc..a7ee8de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,116 @@ -certifi==2021.10.8 -charset-normalizer==2.0.12 +alabaster==0.7.12 +anyio==3.5.0 +apparmor==3.0.4 +appdirs==1.4.4 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +asttokens==2.0.5 +attrs==21.4.0 +Babel==2.9.1 +backcall==0.2.0 +beautifulsoup4==4.10.0 +bleach==4.1.0 +btrfsutil==5.16.2 +CacheControl==0.12.6 +cffi==1.15.0 +chardet==4.0.0 +colorama==0.4.4 +contextlib2==0.6.0.post1 +cryptography==36.0.1 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +distlib==0.3.4 +distro==1.6.0 +docutils==0.16 +entrypoints==0.4 +executing==0.8.3 +html5lib==1.1 idna==3.3 +imagesize==1.3.0 +ipykernel==6.9.1 +ipython==8.1.1 +ipython-genutils==0.2.0 +ipywidgets==7.6.5 +isc==2.0 +jedi==0.18.1 +Jinja2==3.0.3 +json5==0.9.6 +jsonschema==4.4.0 +jupyter==1.0.0 +jupyter-client==7.1.2 +jupyter-console==6.4.3 +jupyter-core==4.9.2 +jupyter-server==1.13.5 +jupyterlab==3.3.1 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +LibAppArmor==3.0.4 +libtorrent===2.0.5-build-libtorrent-rasterbar-src-libtorrent-rasterbar-2.0.5-bindings-python +MarkupSafe==2.0.1 +matplotlib-inline==0.1.3 +mistune==0.8.4 +more-itertools==8.10.0 +msgpack==1.0.3 +nbclassic==0.3.6 +nbclient==0.5.13 +nbconvert==6.4.4 +nbformat==5.2.0 +nest-asyncio==1.5.4 +notebook==6.4.8 +notebook-shim==0.1.0 numpy==1.22.3 +ordered-set==4.0.2 +packaging==20.9 pandas==1.4.1 +pandocfilters==1.5.0 +parso==0.8.3 +pep517==0.12.0 +pexpect==4.8.0 +pickleshare==0.7.5 +ply==3.11 +progress==1.6 +prometheus-client==0.13.1 +prompt-toolkit==3.0.28 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +pygame==2.1.2 +Pygments==2.11.2 +pyOpenSSL==21.0.0 +pyparsing==2.4.7 +pyrsistent==0.18.1 python-dateutil==2.8.2 -python-dotenv==0.19.2 pytz==2021.3 -requests==2.27.1 +pyzmq==22.3.0 +qtconsole==5.2.2 +QtPy==2.0.1 +requests==2.27.0 +resolvelib==0.5.5 +retrying==1.3.3 +Send2Trash==1.8.0 six==1.16.0 +sniffio==1.2.0 +snowballstemmer==2.2.0 +soupsieve==2.3.1 +Sphinx==4.4.0 +sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +stack-data==0.2.0 +team==1.0 +terminado==0.13.3 +testpath==0.6.0 +toml==0.10.2 +tomli==2.0.0 +tornado==6.1 +traitlets==5.1.1 urllib3==1.26.8 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.3.1 +widgetsnbextension==3.5.2 |