summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRasmus Luha <rasmus.luha@gmail.com>2022-03-16 23:13:48 +0200
committerRasmus Luha <rasmus.luha@gmail.com>2022-03-16 23:13:48 +0200
commitecb4b4c8c56d9f720ba004fcfefd40acb996cc7e (patch)
treec53e0de7e1e08cccdcc07f2a0a4408ecdf26cbf4
parent03aaa6978fbd73c13b63f0c525f01189c67d0f1f (diff)
Fixing comments and documentation
-rw-r--r--.gitignore2
-rw-r--r--API/main.py16
-rw-r--r--AllAboutData/getData.py82
-rw-r--r--AllAboutData/utils/osSpecific.py9
-rw-r--r--README.md0
-rw-r--r--requirements.txt113
6 files changed, 181 insertions, 41 deletions
diff --git a/.gitignore b/.gitignore
index 22e6646..e2f241f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
venv/
.env
-__pycache__/
+**/__pycache__/
AllAboutData/Data/
diff --git a/API/main.py b/API/main.py
index 3816fab..c67c95b 100644
--- a/API/main.py
+++ b/API/main.py
@@ -1,9 +1,14 @@
from fastapi import FastAPI
import pandas as pd
import numpy as np
+import os
+
+import sys
+sys.path.append("--")
app = FastAPI()
+
# Making relative paths for the data, on windows slashes would have to be turned around.
# It is probably not the best way.
relPathTeams = "../AllAboutData/Data/NBAteams.csv"
@@ -20,6 +25,17 @@ def getTeamNames():
+@app.get("/")
+def getIndex():
+ return {"Message" : "Hello!"}
+
+
+#@app.get("/sync")
+#def syncPlayers(): # Currnetly only works for Unix type systems, which is not good
+# os.system("rm " + relPathPlayers + "*")
+# getData.getPlayerData(getData.url, getData.headers)
+
+
@app.get("/teams")
def getTeams():
diff --git a/AllAboutData/getData.py b/AllAboutData/getData.py
index 4cea9af..7b503fa 100644
--- a/AllAboutData/getData.py
+++ b/AllAboutData/getData.py
@@ -1,32 +1,34 @@
-import pandas as pd
+'''NBA data reciever
+
+This python script fetches NBA teams and its players (including retired)
+with some additional information about them.
+The data is stored in the current working directory and thus,
+any existing "Data" file is overwritten. Data will be in csv format.
+
+To use this script, "pandas" and "python-dotenv" must be installed
+You also have to make .env file in current dir and add there: API_KEY = your API_key
+You can get the API key from url below.
+
+Used API: https://rapidapi.com/theapiguy/api/free-nba/
+'''
+
+import os
import requests
-from utils import osSpecific # Some functions to delete and create directoryes needed for data storing
+import pandas as pd
+from utils import osSpecific # Some functions to delete and create directories for data
from dotenv import load_dotenv
-import os
-'''
-This python script fetches NBA teams and its players (currently playing and retired) and some
-additional information about them.
-The data is stored in the current working directory (any existing "Data" file is overwritten.
-Data is in csv format.
-
-Author: Rasmus Luha
-Created_at: 16.03.2022
-Data fetched from: https://rapidapi.com/theapiguy/api/free-nba/
-'''
# Loading API key from environment variables.
load_dotenv()
API_KEY = os.getenv("API_KEY")
-# To use this script, you should make .env file in current dir and add there: API_KEY = your API_key
-# You can get the API key from url listed above.
# API request details
url = "https://free-nba.p.rapidapi.com/"
headers = {
- 'x-rapidapi-host': "free-nba.p.rapidapi.com",
- 'x-rapidapi-key': API_KEY
+ "x-rapidapi-host": "free-nba.p.rapidapi.com",
+ "x-rapidapi-key": API_KEY
}
# File name variables to store data in
@@ -37,7 +39,7 @@ else:
teamsFile = "Data/NBAteams.csv"
playersDir = "Data/Players/"
-# Create new Data directory in order to avoid duplicates, when data is requested multiple times
+# Createubg new Data dir to avoid duplicates (due appending)
osSpecific.deleteDataDir()
osSpecific.addDataDir()
@@ -46,8 +48,12 @@ osSpecific.addDataDir()
###### Functions ######
def getTeamsData(url, headers):
+'''
+Requests Data about NBA teams and stores it.
+Takes API url as first and its headers as second argument.
+'''
- querystring = {"page":"0"}
+ querystring = {"page": "0"}
response = requests.request("GET", url+"teams", headers=headers, params=querystring)
teamsDf = pd.DataFrame(response.json()["data"])
@@ -58,36 +64,44 @@ def getTeamsData(url, headers):
def getPlayerData(url, headers):
+'''
+Requests Data about NBA players and stores it, based on teams
+Takes API url as first and its headers as second argument.
+'''
- print("Stared reading players data")
- # First request is made just to get the amount of pages that must be looped through
- querystring = {"per_page":"100","page":"0"}
+ print("Stared reading players data")
+ # First request is made to get the page count to loop
+ querystring = {"per_page": "100","page":"0"}
response = requests.request("GET", url+"players", headers=headers, params=querystring)
- pageCount = response.json()["meta"]["total_pages"]
+ pageCount = response.json()["meta"]["total_pages"] # Got the page count here
+
print("Pages to read: "+str(pageCount))
for el in range(1, pageCount+1):
- # Requesting pages in loop till pageCount
- querystring = {"per_page":"100","page":el}
+ # Requesting pages in loop till pageCount is reached
+ querystring = {"per_page": "100","page": el}
response = requests.request("GET", url+"players", headers=headers, params=querystring)
data = response.json()["data"]
+ # Making dataframe for each player to store it suitable file
for player in data:
teamName = player["team"]["full_name"]
- playerDf = pd.DataFrame(columns=["first_name", "last_name", "position", "height_feet", "height_inches"])
+ playerDf = pd.DataFrame(columns=["first_name", "last_name",
+ "position", "height_feet",
+ "height_inches"])
- playerSeries = pd.Series({"first_name" : player["first_name"],
- "last_name" : player["last_name"],
- "position" : player["position"],
- "height_feet" : player["height_feet"],
- "height_inches" : player["height_inches"]})
+ playerSeries = pd.Series({"first_name": player["first_name"],
+ "last_name": player["last_name"],
+ "position": player["position"],
+ "height_feet": player["height_feet"],
+ "height_inches": player["height_inches"]})
- # Add player to dataframe
+
playerDf.loc[len(playerDf)] = playerSeries
- # Add dataframe to File
+ # Add dataframe to File, if first to be added, then also add column names
hdr = False if os.path.isfile(playersDir+teamName+".csv") else True
playerDf.to_csv(playersDir+teamName+".csv", mode='a', index=False, header=hdr)
@@ -96,7 +110,7 @@ def getPlayerData(url, headers):
-
if __name__ == "__main__":
getTeamsData(url, headers)
getPlayerData(url, headers)
+
diff --git a/AllAboutData/utils/osSpecific.py b/AllAboutData/utils/osSpecific.py
index 7caf664..c89e65a 100644
--- a/AllAboutData/utils/osSpecific.py
+++ b/AllAboutData/utils/osSpecific.py
@@ -2,14 +2,17 @@ import os
import sys
# terminal commands, which are unfortunately os-specific
-
def whichOs():
+''' Returns "windows" if used os is windows. If not, returns "good" '''
+
if sys.platform == "win32":
return "windows"
else:
- return "good" # ...right?
+ return "good"
def deleteDataDir():
+''' Removes Data directory from working directroy '''
+
if whichOs() == "windows":
os.system("rmdir \s Data")
else:
@@ -17,6 +20,8 @@ def deleteDataDir():
def addDataDir():
+''' Adds data directory from working directroy '''
+
if whichOs() == "windows":
os.system("mkdir Data\Players")
else:
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/README.md
diff --git a/requirements.txt b/requirements.txt
index 89389bc..a7ee8de 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,116 @@
-certifi==2021.10.8
-charset-normalizer==2.0.12
+alabaster==0.7.12
+anyio==3.5.0
+apparmor==3.0.4
+appdirs==1.4.4
+argon2-cffi==21.3.0
+argon2-cffi-bindings==21.2.0
+asttokens==2.0.5
+attrs==21.4.0
+Babel==2.9.1
+backcall==0.2.0
+beautifulsoup4==4.10.0
+bleach==4.1.0
+btrfsutil==5.16.2
+CacheControl==0.12.6
+cffi==1.15.0
+chardet==4.0.0
+colorama==0.4.4
+contextlib2==0.6.0.post1
+cryptography==36.0.1
+debugpy==1.5.1
+decorator==5.1.1
+defusedxml==0.7.1
+distlib==0.3.4
+distro==1.6.0
+docutils==0.16
+entrypoints==0.4
+executing==0.8.3
+html5lib==1.1
idna==3.3
+imagesize==1.3.0
+ipykernel==6.9.1
+ipython==8.1.1
+ipython-genutils==0.2.0
+ipywidgets==7.6.5
+isc==2.0
+jedi==0.18.1
+Jinja2==3.0.3
+json5==0.9.6
+jsonschema==4.4.0
+jupyter==1.0.0
+jupyter-client==7.1.2
+jupyter-console==6.4.3
+jupyter-core==4.9.2
+jupyter-server==1.13.5
+jupyterlab==3.3.1
+jupyterlab-pygments==0.1.2
+jupyterlab-server==2.10.3
+jupyterlab-widgets==1.0.2
+LibAppArmor==3.0.4
+libtorrent===2.0.5-build-libtorrent-rasterbar-src-libtorrent-rasterbar-2.0.5-bindings-python
+MarkupSafe==2.0.1
+matplotlib-inline==0.1.3
+mistune==0.8.4
+more-itertools==8.10.0
+msgpack==1.0.3
+nbclassic==0.3.6
+nbclient==0.5.13
+nbconvert==6.4.4
+nbformat==5.2.0
+nest-asyncio==1.5.4
+notebook==6.4.8
+notebook-shim==0.1.0
numpy==1.22.3
+ordered-set==4.0.2
+packaging==20.9
pandas==1.4.1
+pandocfilters==1.5.0
+parso==0.8.3
+pep517==0.12.0
+pexpect==4.8.0
+pickleshare==0.7.5
+ply==3.11
+progress==1.6
+prometheus-client==0.13.1
+prompt-toolkit==3.0.28
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pycparser==2.21
+pygame==2.1.2
+Pygments==2.11.2
+pyOpenSSL==21.0.0
+pyparsing==2.4.7
+pyrsistent==0.18.1
python-dateutil==2.8.2
-python-dotenv==0.19.2
pytz==2021.3
-requests==2.27.1
+pyzmq==22.3.0
+qtconsole==5.2.2
+QtPy==2.0.1
+requests==2.27.0
+resolvelib==0.5.5
+retrying==1.3.3
+Send2Trash==1.8.0
six==1.16.0
+sniffio==1.2.0
+snowballstemmer==2.2.0
+soupsieve==2.3.1
+Sphinx==4.4.0
+sphinxcontrib-applehelp==1.0.2
+sphinxcontrib-devhelp==1.0.2
+sphinxcontrib-htmlhelp==2.0.0
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.3
+sphinxcontrib-serializinghtml==1.1.5
+stack-data==0.2.0
+team==1.0
+terminado==0.13.3
+testpath==0.6.0
+toml==0.10.2
+tomli==2.0.0
+tornado==6.1
+traitlets==5.1.1
urllib3==1.26.8
+wcwidth==0.2.5
+webencodings==0.5.1
+websocket-client==1.3.1
+widgetsnbextension==3.5.2