1+ import time
2+
13import requests
24from selenium import webdriver
35from selenium .webdriver .common .by import By
@@ -115,11 +117,13 @@ def get_experiences(self):
115117 self .scroll_to_bottom ()
116118 main_list = self .wait_for_element_to_load (name = "pvs-list__container" , base = main )
117119 for position in main_list .find_elements (By .CLASS_NAME , "pvs-list__paged-list-item" ):
118- position = position .find_element (By .XPATH , "// div[@ data-view-name='profile-component-entity']" )
120+ position = position .find_element (By .CSS_SELECTOR , "div[data-view-name='profile-component-entity']" )
119121 company_logo_elem , position_details = position .find_elements (By .XPATH , "*" )
120122
121123 # company elem
122124 company_linkedin_url = company_logo_elem .find_element (By .XPATH ,"*" ).get_attribute ("href" )
125+ if not company_linkedin_url :
126+ continue
123127
124128 # position details
125129 position_details_list = position_details .find_elements (By .XPATH ,"*" )
@@ -143,15 +147,26 @@ def get_experiences(self):
143147 company = outer_positions [0 ].find_element (By .TAG_NAME ,"span" ).text
144148 work_times = outer_positions [1 ].find_element (By .TAG_NAME ,"span" ).text
145149 location = outer_positions [2 ].find_element (By .TAG_NAME ,"span" ).text
150+ else :
151+ position_title = ""
152+ company = outer_positions [0 ].find_element (By .TAG_NAME ,"span" ).text
153+ work_times = ""
154+ location = ""
155+
146156
147157 times = work_times .split ("·" )[0 ].strip () if work_times else ""
148158 duration = work_times .split ("·" )[1 ].strip () if len (work_times .split ("·" )) > 1 else None
149159
150160 from_date = " " .join (times .split (" " )[:2 ]) if times else ""
151161 to_date = " " .join (times .split (" " )[3 :]) if times else ""
152-
153- if position_summary_text and len (position_summary_text .find_element (By .CLASS_NAME ,"pvs-list__container" ).find_element (By .CLASS_NAME ,"pvs-list__container" ).find_elements (By .XPATH ,"li" )) > 1 :
154- descriptions = position_summary_text .find_element (By .CLASS_NAME ,"pvs-list__container" ).find_element (By .CLASS_NAME ,"pvs-list__container" ).find_elements (By .XPATH ,"li" )
162+ if position_summary_text and any (element .get_attribute ("pvs-list__container" ) for element in position_summary_text .find_elements (By .TAG_NAME , "*" )):
163+ inner_positions = (position_summary_text .find_element (By .CLASS_NAME ,"pvs-list__container" )
164+ .find_element (By .XPATH ,"*" ).find_element (By .XPATH ,"*" ).find_element (By .XPATH ,"*" )
165+ .find_elements (By .CLASS_NAME ,"pvs-list__paged-list-item" ))
166+ else :
167+ inner_positions = []
168+ if len (inner_positions ) > 1 :
169+ descriptions = inner_positions
155170 for description in descriptions :
156171 res = description .find_element (By .TAG_NAME ,"a" ).find_elements (By .XPATH ,"*" )
157172 position_title_elem = res [0 ] if len (res ) > 0 else None
@@ -249,7 +264,6 @@ def get_name_and_location(self):
249264 self .name = top_panel .find_element (By .TAG_NAME , "h1" ).text
250265 self .location = top_panel .find_element (By .XPATH , "//*[@class='text-body-small inline t-black--light break-words']" ).text
251266
252-
253267 def get_about (self ):
254268 try :
255269 about = self .driver .find_element (By .ID ,"about" ).find_element (By .XPATH ,".." ).find_element (By .CLASS_NAME ,"display-flex" ).text
@@ -293,75 +307,6 @@ def scrape_logged_in(self, close_on_complete=True):
293307 self .get_educations ()
294308
295309 driver .get (self .linkedin_url )
296-
297- # get interest
298- try :
299-
300- _ = WebDriverWait (driver , self .__WAIT_FOR_ELEMENT_TIMEOUT ).until (
301- EC .presence_of_element_located (
302- (
303- By .XPATH ,
304- "//*[@class='pv-profile-section pv-interests-section artdeco-container-card artdeco-card ember-view']" ,
305- )
306- )
307- )
308- interestContainer = driver .find_element (By .XPATH ,
309- "//*[@class='pv-profile-section pv-interests-section artdeco-container-card artdeco-card ember-view']"
310- )
311- for interestElement in interestContainer .find_elements (By .XPATH ,
312- "//*[@class='pv-interest-entity pv-profile-section__card-item ember-view']"
313- ):
314- interest = Interest (
315- interestElement .find_element (By .TAG_NAME , "h3" ).text .strip ()
316- )
317- self .add_interest (interest )
318- except :
319- pass
320-
321- # get accomplishment
322- try :
323- _ = WebDriverWait (driver , self .__WAIT_FOR_ELEMENT_TIMEOUT ).until (
324- EC .presence_of_element_located (
325- (
326- By .XPATH ,
327- "//*[@class='pv-profile-section pv-accomplishments-section artdeco-container-card artdeco-card ember-view']" ,
328- )
329- )
330- )
331- acc = driver .find_element (By .XPATH ,
332- "//*[@class='pv-profile-section pv-accomplishments-section artdeco-container-card artdeco-card ember-view']"
333- )
334- for block in acc .find_elements (By .XPATH ,
335- "//div[@class='pv-accomplishments-block__content break-words']"
336- ):
337- category = block .find_element (By .TAG_NAME , "h3" )
338- for title in block .find_element (By .TAG_NAME ,
339- "ul"
340- ).find_elements (By .TAG_NAME , "li" ):
341- accomplishment = Accomplishment (category .text , title .text )
342- self .add_accomplishment (accomplishment )
343- except :
344- pass
345-
346- # get connections
347- try :
348- driver .get ("https://www.linkedin.com/mynetwork/invite-connect/connections/" )
349- _ = WebDriverWait (driver , self .__WAIT_FOR_ELEMENT_TIMEOUT ).until (
350- EC .presence_of_element_located ((By .CLASS_NAME , "mn-connections" ))
351- )
352- connections = driver .find_element (By .CLASS_NAME , "mn-connections" )
353- if connections is not None :
354- for conn in connections .find_elements (By .CLASS_NAME , "mn-connection-card" ):
355- anchor = conn .find_element (By .CLASS_NAME , "mn-connection-card__link" )
356- url = anchor .get_attribute ("href" )
357- name = conn .find_element (By .CLASS_NAME , "mn-connection-card__details" ).find_element (By .CLASS_NAME , "mn-connection-card__name" ).text .strip ()
358- occupation = conn .find_element (By .CLASS_NAME , "mn-connection-card__details" ).find_element (By .CLASS_NAME , "mn-connection-card__occupation" ).text .strip ()
359-
360- contact = Contact (name = name , occupation = occupation , url = url )
361- self .add_contact (contact )
362- except :
363- connections = None
364-
365310 if close_on_complete :
366311 driver .quit ()
367312
0 commit comments