Skip to content

Commit b2d9493

Browse files
committed
save
1 parent d31987f commit b2d9493

34 files changed

+7924
-0
lines changed

.gitignore

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
*.egg-info/
24+
.installed.cfg
25+
*.egg
26+
27+
# PyInstaller
28+
# Usually these files are written by a python script from a template
29+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
30+
*.manifest
31+
*.spec
32+
33+
# Installer logs
34+
pip-log.txt
35+
pip-delete-this-directory.txt
36+
37+
# Unit test / coverage reports
38+
htmlcov/
39+
.tox/
40+
.coverage
41+
.coverage.*
42+
.cache
43+
nosetests.xml
44+
coverage.xml
45+
*.cover
46+
.hypothesis/
47+
.pytest_cache/
48+
49+
# Translations
50+
*.mo
51+
*.pot
52+
53+
# Django stuff:
54+
*.log
55+
local_settings.py
56+
db.sqlite3
57+
58+
# Flask stuff:
59+
instance/
60+
.webassets-cache
61+
62+
# Scrapy stuff:
63+
.scrapy
64+
65+
# Sphinx documentation
66+
docs/_build/
67+
68+
# PyBuilder
69+
target/
70+
71+
# Jupyter Notebook
72+
.ipynb_checkpoints
73+
74+
# pyenv
75+
.python-version
76+
77+
# celery beat schedule file
78+
celerybeat-schedule
79+
80+
# SageMath parsed files
81+
*.sage.py
82+
83+
# Environments
84+
.env
85+
.venv
86+
env/
87+
venv/
88+
ENV/
89+
env.bak/
90+
venv.bak/
91+
92+
# Spyder project settings
93+
.spyderproject
94+
.spyproject
95+
96+
# Rope project settings
97+
.ropeproject
98+
99+
# mkdocs documentation
100+
/site
101+
102+
# mypy
103+
.mypy_cache/

AWS_SageMaker/Demo.ipynb

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Demo Notebook for SageMaker Endpoint\n",
8+
"\n",
9+
"- Demo SageMaker Endpoint on Forest Fire Cause Prediction"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": null,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import boto3\n",
19+
"\n",
20+
"endpoint_name = \"sagemaker/endpoint/model\"\n",
21+
"runtime = boto3.Session().client(service_name='sagemaker-runtime',\n",
22+
" region_name='us-east-2')"
23+
]
24+
},
25+
{
26+
"cell_type": "markdown",
27+
"metadata": {},
28+
"source": [
29+
"## Preprocess the raw data"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"metadata": {},
36+
"outputs": [],
37+
"source": [
38+
"import pandas as pd\n",
39+
"# test data, just take first row for sample\n",
40+
"bucket='your/bucket/name'\n",
41+
"data_key = 'the/etl/output/test/set'\n",
42+
"data_location = 's3://{}/{}'.format(bucket, data_key)\n",
43+
"print(data_location)\n",
44+
"\n",
45+
"test_df = pd.read_csv(data_location)\n",
46+
"test_df_orig = test_df.copy()"
47+
]
48+
},
49+
{
50+
"cell_type": "code",
51+
"execution_count": null,
52+
"metadata": {},
53+
"outputs": [],
54+
"source": [
55+
"sample_data = test_df_orig.iloc[:2]"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"metadata": {},
62+
"outputs": [],
63+
"source": [
64+
"sample_data.head()"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": null,
70+
"metadata": {},
71+
"outputs": [],
72+
"source": [
73+
"from sklearn import tree, preprocessing\n",
74+
"# Preprocessing \n",
75+
"test_df['DATE'] = pd.to_datetime(test_df['discovery_date'] - pd.Timestamp(0).to_julian_date(), unit='D')\n",
76+
"test_df['MONTH'] = pd.DatetimeIndex(test_df['DATE']).month\n",
77+
"test_df['DAY_OF_WEEK'] = test_df['DATE'].dt.weekday_name\n",
78+
"le = preprocessing.LabelEncoder()\n",
79+
"test_df['STATE'] = le.fit_transform(test_df['state'])\n",
80+
"test_df['DAY_OF_WEEK'] = le.fit_transform(test_df['DAY_OF_WEEK'])\n",
81+
"\n",
82+
"def set_label(cat):\n",
83+
" cause = 0\n",
84+
" natural = ['Lightning']\n",
85+
" accidental = ['Structure','Fireworks','Powerline','Railroad','Smoking',\n",
86+
" 'Children','Campfire','Equipment Use','Debris Burning']\n",
87+
" malicious = ['Arson']\n",
88+
" other = ['Missing/Undefined','Miscellaneous']\n",
89+
" if cat in natural:\n",
90+
" cause = 1\n",
91+
" elif cat in accidental:\n",
92+
" cause = 2\n",
93+
" elif cat in malicious:\n",
94+
" cause = 3\n",
95+
" else:\n",
96+
" cause = 4\n",
97+
" return cause\n",
98+
" \n",
99+
"\n",
100+
"test_df['LABEL'] = test_df['stat_cause_descr'].apply(lambda x: set_label(x)) # I created a copy of the original test_df earlier in the kernel\n",
101+
"test_df = test_df.drop('stat_cause_descr',axis=1)\n",
102+
"test_df.drop(['state', 'fire_size_class', 'discovery_date', 'DATE', 'cont_date'], axis=1, inplace=True)\n",
103+
"test_df = test_df.dropna()\n",
104+
"\n",
105+
"\n",
106+
"sample_test_X = test_df.drop(['LABEL'], axis=1).values[:2]\n",
107+
"sample_test_y = test_df['LABEL'].values[:2]"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": null,
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"sample_data.head()"
117+
]
118+
},
119+
{
120+
"cell_type": "code",
121+
"execution_count": null,
122+
"metadata": {},
123+
"outputs": [],
124+
"source": [
125+
"print(sample_test_X)"
126+
]
127+
},
128+
{
129+
"cell_type": "code",
130+
"execution_count": null,
131+
"metadata": {},
132+
"outputs": [],
133+
"source": [
134+
"# Makes sense, because Lightning is encoded to label/category 1 as natural disasters\n",
135+
"sample_test_y"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": null,
141+
"metadata": {},
142+
"outputs": [],
143+
"source": [
144+
"df_payload = pd.DataFrame(sample_test_X)"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"import io\n",
154+
"payload_file = io.StringIO()\n",
155+
"df_payload.to_csv(payload_file, header = None, index = None)"
156+
]
157+
},
158+
{
159+
"cell_type": "markdown",
160+
"metadata": {},
161+
"source": [
162+
"## Invoke Endpoint\n",
163+
"\n",
164+
"- SageMaker Scikit-learn model server provides a default implementation of input_fn. This function deserializes JSON, CSV, or NPY encoded data into a NumPy array."
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": null,
170+
"metadata": {},
171+
"outputs": [],
172+
"source": [
173+
"# text/csv didn't work because of reshaping (sklearn requires 2 dimensions)\n",
174+
"# application/jsonlines not available\n",
175+
"# application/json tries to convert JSON to float\n",
176+
"\n",
177+
"response = runtime.invoke_endpoint(EndpointName=endpoint_name, \n",
178+
" ContentType='text/csv', \n",
179+
" Body=payload_file.getvalue())"
180+
]
181+
},
182+
{
183+
"cell_type": "code",
184+
"execution_count": null,
185+
"metadata": {},
186+
"outputs": [],
187+
"source": [
188+
"# Print predictions\n",
189+
"print(response['Body'].read().decode())\n",
190+
"print(\"[natural, natural]\")"
191+
]
192+
},
193+
{
194+
"cell_type": "code",
195+
"execution_count": null,
196+
"metadata": {},
197+
"outputs": [],
198+
"source": [
199+
"# Print actual labels\n",
200+
"sample_test_y.tolist()\n",
201+
"print(\"[natural, misc.]\")"
202+
]
203+
},
204+
{
205+
"cell_type": "code",
206+
"execution_count": null,
207+
"metadata": {},
208+
"outputs": [],
209+
"source": []
210+
}
211+
],
212+
"metadata": {
213+
"kernelspec": {
214+
"display_name": "Python 3",
215+
"language": "python",
216+
"name": "python3"
217+
},
218+
"language_info": {
219+
"codemirror_mode": {
220+
"name": "ipython",
221+
"version": 3
222+
},
223+
"file_extension": ".py",
224+
"mimetype": "text/x-python",
225+
"name": "python",
226+
"nbconvert_exporter": "python",
227+
"pygments_lexer": "ipython3",
228+
"version": "3.6.5"
229+
}
230+
},
231+
"nbformat": 4,
232+
"nbformat_minor": 2
233+
}

0 commit comments

Comments
 (0)