Skip to content

Commit 7f26c65

Browse files
Day 16 - Login, Scrape, Automate Comments & Likes with Selenium
1 parent 7a1e8c4 commit 7f26c65

File tree

5 files changed

+272
-0
lines changed

5 files changed

+272
-0
lines changed

tutorial-reference/Day 16/Pipfile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[[source]]
2+
name = "pypi"
3+
url = "https://pypi.org/simple"
4+
verify_ssl = true
5+
6+
[dev-packages]
7+
8+
[packages]
9+
selenium = "*"
10+
requests = "*"
11+
12+
[requires]
13+
python_version = "3.8"

tutorial-reference/Day 16/Pipfile.lock

Lines changed: 65 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tutorial-reference/Day 16/conf.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""
2+
INSTA_USERNAME=<set below>
3+
INSTA_PASSWORD=<set below>
4+
5+
6+
7+
8+
9+
10+
11+
12+
13+
14+
15+
16+
"""
17+
INSTA_USERNAME = ''
18+
INSTA_PASSWORD = ''
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import time
2+
from selenium import webdriver
3+
4+
browser = webdriver.Chrome() # Firefox()
5+
6+
url = 'https://google.com'
7+
browser.get(url)
8+
9+
"""
10+
<input type='text' class='' id='' name='??' />
11+
<textarea name='??'><textarea>
12+
<input name="q" type="text">
13+
"""
14+
time.sleep(2)
15+
name = 'q'
16+
search_el = browser.find_element_by_name("q")
17+
# print(search_el)
18+
# search_el = browser.find_elements_by_css_selector("h1")
19+
search_el.send_keys("selenium python")
20+
21+
"""
22+
<input type='submit' />
23+
<button type='submit' />
24+
<form></form>
25+
26+
<input type="submit">
27+
"""
28+
submit_btn_el = browser.find_element_by_css_selector("input[type='submit']")
29+
print(submit_btn_el.get_attribute('name'))
30+
time.sleep(2)
31+
submit_btn_el.click()

tutorial-reference/Day 16/insta.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# import getpass
2+
# my_password = getpass.getpass("What is your password?\n")
3+
# print(my_password)
4+
from urllib.parse import urlparse
5+
import os
6+
import time
7+
import requests
8+
from conf import INSTA_USERNAME, INSTA_PASSWORD
9+
from selenium import webdriver
10+
11+
browser = webdriver.Chrome()
12+
13+
url = "https://www.instagram.com"
14+
browser.get(url)
15+
16+
time.sleep(2)
17+
username_el = browser.find_element_by_name("username")
18+
username_el.send_keys(INSTA_USERNAME)
19+
20+
password_el = browser.find_element_by_name("password")
21+
password_el.send_keys(INSTA_PASSWORD)
22+
23+
time.sleep(1.5)
24+
submit_btn_el = browser.find_element_by_css_selector("button[type='submit']")
25+
submit_btn_el.click()
26+
27+
body_el = browser.find_element_by_css_selector("body")
28+
html_text = body_el.get_attribute("innerHTML")
29+
30+
# print(html_text)
31+
32+
"""
33+
<button class="_5f5mN jIbKX _6VtSN yZn4P">Follow</button>
34+
"""
35+
36+
# browser.find_elements_by_css_selector("button")
37+
38+
# xpath
39+
# my_button_xpath = "//button"
40+
#browser.find_elements_by_xpath(my_button_xpath)
41+
42+
def click_to_follow(browser):
43+
# my_follow_btn_xpath = "//a[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"
44+
# my_follow_btn_xpath = "//*[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"
45+
my_follow_btn_xpath = "//button[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"
46+
follow_btn_elments = browser.find_elements_by_xpath(my_follow_btn_xpath)
47+
for btn in follow_btn_elments:
48+
time.sleep(2) # self-throttle
49+
try:
50+
btn.click()
51+
except:
52+
pass
53+
54+
# new_user_url = "https://www.instagram.com/ted/"
55+
# browser.get(new_user_url)
56+
# click_to_follow(browser)
57+
58+
time.sleep(2)
59+
the_rock_url = "https://www.instagram.com/therock/"
60+
browser.get(the_rock_url)
61+
62+
63+
post_url_pattern = "https://www.instagram.com/p/<post-slug-id>"
64+
post_xpath_str = "//a[contains(@href, '/p/')]"
65+
post_links = browser.find_elements_by_xpath(post_xpath_str)
66+
post_link_el = None
67+
68+
if len(post_links) > 0:
69+
post_link_el = post_links[0]
70+
71+
if post_link_el != None:
72+
post_href = post_link_el.get_attribute("href")
73+
browser.get(post_href)
74+
75+
video_els = browser.find_elements_by_xpath("//video")
76+
images_els = browser.find_elements_by_xpath("//img")
77+
base_dir = os.path.dirname(os.path.abspath(__file__))
78+
data_dir = os.path.join(base_dir, "data")
79+
os.makedirs(data_dir, exist_ok=True)
80+
81+
# PIL to verify the size of any given image.
82+
83+
def scrape_and_save(elements):
84+
for el in elements:
85+
# print(img.get_attribute('src'))
86+
url = el.get_attribute('src')
87+
base_url = urlparse(url).path
88+
filename = os.path.basename(base_url)
89+
filepath = os.path.join(data_dir, filename)
90+
if os.path.exists(filepath):
91+
continue
92+
with requests.get(url, stream=True) as r:
93+
try:
94+
r.raise_for_status()
95+
except:
96+
continue
97+
with open(filepath, 'wb') as f:
98+
for chunk in r.iter_content(chunk_size=8192):
99+
if chunk:
100+
f.write(chunk)
101+
102+
"""
103+
LONG TERM Goal:
104+
Use machine learning to classify the post's
105+
image or video
106+
and then comment in a relevant fashion
107+
"""
108+
109+
"""
110+
<textarea aria-label="Add a comment…" placeholder="Add a comment…" class="Ypffh" autocomplete="off" autocorrect="off" style="height: 18px;"></textarea>
111+
"""
112+
def automate_comment(browser, content="That is cool!"):
113+
time.sleep(3)
114+
comment_xpath_str = "//textarea[contains(@placeholder, 'Add a comment')]"
115+
comment_el = browser.find_element_by_xpath(comment_xpath_str)
116+
comment_el.send_keys(content)
117+
submit_btns_xpath = "button[type='submit']"
118+
submit_btns_els = browser.find_elements_by_css_selector(submit_btns_xpath)
119+
time.sleep(2)
120+
for btn in submit_btns_els:
121+
try:
122+
btn.click()
123+
except:
124+
pass
125+
126+
127+
def automate_likes(browser):
128+
like_heart_svg_xpath = "//*[contains(@aria-label, 'Like')]"
129+
all_like_hearts_elements = browser.find_elements_by_xpath(like_heart_svg_xpath)
130+
max_heart_h = -1
131+
for heart_el in all_like_hearts_elements:
132+
h = heart_el.get_attribute("height")
133+
current_h = int(h)
134+
if current_h > max_heart_h:
135+
max_heart_h = current_h
136+
all_like_hearts_elements = browser.find_elements_by_xpath(like_heart_svg_xpath)
137+
for heart_el in all_like_hearts_elements:
138+
h = heart_el.get_attribute("height")
139+
if h == max_heart_h or h == f"{max_heart_h}":
140+
parent_button = heart_el.find_element_by_xpath('..')
141+
time.sleep(2)
142+
try:
143+
parent_button.click()
144+
except:
145+
pass

0 commit comments

Comments
 (0)