Skip to content

Commit 8ffbaf2

Browse files
Chris ChiaChris Chia
authored andcommitted
adding files
1 parent 12e8909 commit 8ffbaf2

File tree

886 files changed

+418768
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

886 files changed

+418768
-0
lines changed

.DS_Store

10 KB
Binary file not shown.

.gitattributes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.csv filter=lfs diff=lfs merge=lfs -text
2+
*.csv filter=lfs diff=lfs merge=lfs -text
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import numpy as np\n",
10+
"import pandas as pd\n",
11+
"import matplotlib.pyplot as plt\n",
12+
"import seaborn as sns\n",
13+
"%matplotlib inline\n",
14+
"\n",
15+
"pd.options.display.max_rows=100\n",
16+
"pd.options.display.max_columns=100"
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": 2,
22+
"metadata": {},
23+
"outputs": [],
24+
"source": [
25+
"df = pd.read_csv(\"data-training.csv\")\n",
26+
"df.iloc[:,15:30] = df.iloc[:,15:30].fillna(0)"
27+
]
28+
},
29+
{
30+
"cell_type": "code",
31+
"execution_count": null,
32+
"metadata": {},
33+
"outputs": [],
34+
"source": [
35+
"sns.heatmap(df.corr().values)"
36+
]
37+
},
38+
{
39+
"cell_type": "code",
40+
"execution_count": null,
41+
"metadata": {},
42+
"outputs": [],
43+
"source": [
44+
"df['askRate0'].corr(df['askSize0'])"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": null,
50+
"metadata": {},
51+
"outputs": [],
52+
"source": [
53+
"df.describe()"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": null,
59+
"metadata": {},
60+
"outputs": [],
61+
"source": [
62+
"\n",
63+
"plt.bar(df['y'].value_counts().sort_index().index,df['y'].value_counts().sort_index().values)"
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": null,
69+
"metadata": {},
70+
"outputs": [],
71+
"source": [
72+
"df['y'].plot(figsize=(20,10))"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {},
79+
"outputs": [],
80+
"source": [
81+
"(df['askSize0'] / (df['askSize0']+df['bidSize0'])).hist(bins=100)"
82+
]
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": null,
87+
"metadata": {},
88+
"outputs": [],
89+
"source": [
90+
"((df['askRate0'] + df['bidRate0']) / 2).diff(1).corr(df['y'])"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {},
97+
"outputs": [],
98+
"source": [
99+
"for i in range(10):\n",
100+
" maxVolumes['askSize'+str(i)] = df.iloc[:,15+i].max()\n",
101+
" maxVolumes['bidSize'+str(i)] = df.iloc[:,45+i].max()\n",
102+
" df['askSize'+str(i)] /= maxVolumes['askSize'+str(i)]\n",
103+
" df['bidSize'+str(i)] /= maxVolumes['bidSize'+str(i)]"
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": null,
109+
"metadata": {
110+
"scrolled": false
111+
},
112+
"outputs": [],
113+
"source": [
114+
"(df['askRate5'] - df['askRate4']).median()"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"metadata": {},
121+
"outputs": [],
122+
"source": [
123+
"for i in range(1,15):\n",
124+
" df.loc[df['askRate'+str(i)].isna(), 'askRate'+str(i)] = df.loc[df['askRate'+str(i)].isna(), 'askRate'+str(i-1)]+0.5"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": 11,
130+
"metadata": {},
131+
"outputs": [],
132+
"source": [
133+
"from sklearn.decomposition import PCA\n",
134+
"\n",
135+
"\n",
136+
"pca = PCA()\n",
137+
"askVolumes = pca.fit_transform(df.iloc[:,15:23])"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {},
144+
"outputs": [],
145+
"source": [
146+
"plt.scatter(df['bidRate0'],df['y'])"
147+
]
148+
},
149+
{
150+
"cell_type": "code",
151+
"execution_count": 15,
152+
"metadata": {},
153+
"outputs": [
154+
{
155+
"data": {
156+
"text/plain": [
157+
"0.001696283864909054"
158+
]
159+
},
160+
"execution_count": 15,
161+
"metadata": {},
162+
"output_type": "execute_result"
163+
}
164+
],
165+
"source": [
166+
"from sklearn.linear_model import LinearRegression\n",
167+
"\n",
168+
"lr = LinearRegression()\n",
169+
"\n",
170+
"from sklearn.model_selection import cross_val_score\n",
171+
"scores = cross_val_score(lr,askVolumes,df['y'].values.reshape(-1,),cv=3)\n",
172+
"scores.mean()\n"
173+
]
174+
},
175+
{
176+
"cell_type": "code",
177+
"execution_count": 3,
178+
"metadata": {},
179+
"outputs": [
180+
{
181+
"data": {
182+
"text/plain": [
183+
"askSize0 -0.044874\n",
184+
"askSize1 -0.029520\n",
185+
"askSize2 -0.018320\n",
186+
"askSize3 -0.014050\n",
187+
"askSize4 -0.018614\n",
188+
"askSize5 -0.015285\n",
189+
"askSize6 -0.013968\n",
190+
"askSize7 -0.009049\n",
191+
"askSize8 -0.005798\n",
192+
"askSize9 -0.001430\n",
193+
"askSize10 0.001433\n",
194+
"askSize11 0.002797\n",
195+
"askSize12 0.004413\n",
196+
"askSize13 -0.001598\n",
197+
"askSize14 -0.002608\n",
198+
"dtype: float64"
199+
]
200+
},
201+
"execution_count": 3,
202+
"metadata": {},
203+
"output_type": "execute_result"
204+
}
205+
],
206+
"source": [
207+
"df.iloc[:,15:30].corrwith(df['y'])"
208+
]
209+
},
210+
{
211+
"cell_type": "code",
212+
"execution_count": null,
213+
"metadata": {},
214+
"outputs": [],
215+
"source": []
216+
}
217+
],
218+
"metadata": {
219+
"kernelspec": {
220+
"display_name": "Python 3",
221+
"language": "python",
222+
"name": "python3"
223+
},
224+
"language_info": {
225+
"codemirror_mode": {
226+
"name": "ipython",
227+
"version": 3
228+
},
229+
"file_extension": ".py",
230+
"mimetype": "text/x-python",
231+
"name": "python",
232+
"nbconvert_exporter": "python",
233+
"pygments_lexer": "ipython3",
234+
"version": "3.7.4"
235+
}
236+
},
237+
"nbformat": 4,
238+
"nbformat_minor": 2
239+
}

0 commit comments

Comments
 (0)