@autor Israel Sanchez @Last update 06/24/2023 @tested on docker containers running over windows
- Docker Desktop
- WSL 2 installed
- Docker hub account
- Internet access
- Energy to start :) lol
docker run -p 8888:8888 --name jupyter -v "${PWD}":/home/jovyan/work jupyter/datascience-notebook docker run --name mssql2022 -v "${PWD}":/tmp -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=M1dNigt3ss" -p 1433:1433 -d mcr.microsoft.com/mssql/server:2022-latest SQL Server SA user pass: M1dNigt3ss -- Remember this
-
Download adventureworks BAK file from Microsoft repo
-
Check the container ID in
docker pskrump@KrumbaRumba:~/code/$ docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 9fba1dd731fd jupyter/datascience-notebook "tini -g -- start-no…" 18 hours ago Up 23 minutes (healthy) 0.0.0.0:8888->8888/tcp jupyter 17c83a1327c9 mcr.microsoft.com/mssql/server:2022-latest "/opt/mssql/bin/perm…" 18 hours ago Up 23 minutes 0.0.0.0:1433->1433/tcp mssql2022
-
Copy the AdventureWorks .bak file to the container
docker cp AdventureWorksDW2022.bak 17c83a1327c9:/tmp -
Use SQL Server client list MSSQL Studio or Azure Data Explorer to restore AdventureWorksDW2022 from .BAK File located on /tmp/AdventureWorksDW2022.bak
sudo docker exec -it --user root jupyter bash in jupyter container as root run the next code (looks to many lines I'm not expert but this install ODBC Client 18) & sync the dependencies locally
apt update -y apt upgrade -y apt install gpg -y #Download the desired package(s) curl -O https://download.microsoft.com/download/1/f/f/1fffb537-26ab-4947-a46a-7a45c27f6f77/msodbcsql18_18.2.2.1-1_amd64.apk curl -O https://download.microsoft.com/download/1/f/f/1fffb537-26ab-4947-a46a-7a45c27f6f77/mssql-tools18_18.2.1.1-1_amd64.apk #(Optional) Verify signature, if 'gpg' is missing install it using 'apk add gnupg': curl -O https://download.microsoft.com/download/1/f/f/1fffb537-26ab-4947-a46a-7a45c27f6f77/msodbcsql18_18.2.2.1-1_amd64.sig curl -O https://download.microsoft.com/download/1/f/f/1fffb537-26ab-4947-a46a-7a45c27f6f77/mssql-tools18_18.2.1.1-1_amd64.sig curl https://packages.microsoft.com/keys/microsoft.asc | gpg --import - apt update -y gpg --verify msodbcsql18_18.2.2.1-1_amd64.sig msodbcsql18_18.2.2.1-1_amd64.apk gpg --verify mssql-tools18_18.2.1.1-1_amd64.sig mssql-tools18_18.2.1.1-1_amd64.apk apt update -y apt upgrade -y apt-get install apt-file -y apt-file update apt upgrade -y apt-file find msodbcsql18_18.2.2.1-1_amd64.apk apt-file find mssql-tools18_18.2.1.1-1_amd64.apk #Install the package(s) apt install -y unixodbc-dev -y apt install -y unixodbc -y apt update -y apt upgrade -y apt install gpg -y sudo apt install --reinstall software-properties-common -y apt-get install odbcinst -y sudo curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - echo "deb [arch=amd64] https://packages.microsoft.com/ubuntu/21.10/prod impish main" | sudo tee /etc/apt/sources.list.d/mssql-release.list sudo add-apt-repository "$(wget -qO- https://packages.microsoft.com/config/ubuntu/22.04/prod.list)" apt update -y sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 apt install msodbcsql18 -y pip install azure-core pip install azure-mgmt-compute pip install azure-mgmt-containerservice pip install azure-mgmt-containerinstance pip install azure-storage-blob pip install azure-keyvault pip install azure-mgmt-storage pip install azure.storage.blob pip install azure-storage-file-share pip install azure-storage-common pip install azure-mgmt-datalake-store pip install azure-mgmt-databricks pip install azure-data-tables pip install azure-mgmt-synapse pip install azure-mgmt-monitor pip install imageio pip install prophet pip install xgboost pip install matplotlib pip install seaborn pip install sklearn pip install sqlalchemy pip install pyodbc pip install pandas pip install pyarrow pip install sqlalchemy-hana pip install hdbcli pip install pyspark pip install ipython-sql pip install sparksql-magic pip install delta-spark - Cargar_datos_excel_pandas.ipynb
-
000 datos_faltantes.ipynb
-
000 filtrar_datos_dataframes.ipynb
-
000 series_de_pandas.ipynb
-
0001 dataframes_de_pandas.ipynb
-
001 tutorial-limpieza-de-datos.ipynb
-
002 tutorial-analisis-exploratorio-de-datos.ipynb
-
0_FunctionIncludes.ipynb
-
500 Ejercicio Distribution info SAP TABLES.ipynb
-
500 Ejercicio Optimize DataFrameSize.ipynb
-
500 Sample load from CSV and optimization of memory.ipynb
Optimize the use of memory
-
501 Conectar con SQL Server.ipynb
-
502 Pandas DataFrame JOINS.ipynb
joins dataframes using merge functionallity from pandas
-
503 frequently algoritms used on pandas.ipynb
-
600 SQL Server Adventure Works SAMPLES.ipynb
-
602 SQL SERVER ADVENTURE ALL Process.ipynb
read tables from SQL Server and execute filters and merges
-
991_Extraer datos de SAPHana.ipynb

