Skip to content

Problem in click action/PDF interaction #321

@DsDastgheib

Description

@DsDastgheib

It seems the click action does not work when agent want to click on View Pdf for archive article.
Here is a sample code (parse_content_to_elements, and find_matching_anchor borrowed from here)

import re import browsergym.core # register the openended task as a gym environment from browsergym.utils.obs import flatten_axtree_to_str from dataclasses import dataclass, field def parse_content_to_elements(content: str): """Parse the observation content into a dictionary mapping anchors to their descriptions""" elements = {} current_anchor = None description_lines = [] for line in content.split('\n'): line = line.strip() if not line: continue # Check for anchor line anchor_match = re.match(r'\[(\d+)\](.*)', line) if anchor_match: # Save previous element if it exists if current_anchor and description_lines: elements[current_anchor] = ' '.join(description_lines) # Start new element current_anchor = anchor_match.group(1) description_lines = [anchor_match.group(2).strip()] else: # Add to current description if we have an anchor if current_anchor: description_lines.append(line) # Save last element if current_anchor and description_lines: elements[current_anchor] = ' '.join(description_lines) return elements def find_matching_anchor(content: str, selector: str): """Find the anchor ID that matches the given selector description""" elements = parse_content_to_elements(content) # Clean up selector and create a pattern selector = selector.lower().strip() for anchor, description in elements.items(): description = description.lower().strip() if selector in description: return anchor return None if __name__ == '__main__': env = gym.make( "browsergym/openended", task_kwargs={"start_url": "https://www.google.com/"}, # starting URL wait_for_user_message=False, # wait for a user message after each agent message sent to the chat ) # run the environment <> agent loop until termination obs, info = env.reset() action0 = 'goto("https://arxiv.org/abs/1706.03762")' obs, reward, terminated, truncated, info = env.step(action0) print(obs["url"]) action1 = "noop(2000)" obs, reward, terminated, truncated, info = env.step(action1) print(obs["url"]) extra_element_properties={} select = find_matching_anchor(flatten_axtree_to_str(obs["axtree_object"], extra_properties=extra_element_properties, with_clickable=True, skip_generic=True, filter_visible_only=True, ), "link 'View PDF',") action2 = f'click("{select}", "left")' print(action2) obs, reward, terminated, truncated, info = env.step(action2) print(obs["url"]) # release the environment env.close() 

The output is as

https://arxiv.org/abs/1706.03762 https://arxiv.org/abs/1706.03762 https://arxiv.org/abs/1706.03762 

We can see after the click action the URL doesn't change while we expect we redirect to https://arxiv.org/pdf/1706.03762.
I've tested for couple of archive articles and this did not work.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions