W3C Actions API in Python — ActionChains for Mobile

7 min read

The W3C Actions API replaced Appium's deprecated TouchAction class. In Python, it uses Selenium's ActionBuilder with PointerInput to model finger gestures as sequences of pointer events. The result is precise, cross-platform gesture control.

Imports

from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.actions.action_builder import ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interaction

Tap

def tap(driver, x: int, y: int):
    finger = PointerInput(interaction.POINTER_TOUCH, "finger")
    actions = ActionBuilder(driver, mouse=finger)
    actions.pointer_action\
        .move_to_location(x, y)\
        .pointer_down()\
        .pointer_up()
    actions.perform()
 
 
def tap_element(driver, element):
    loc = element.location
    size = element.size
    x = loc["x"] + size["width"] // 2
    y = loc["y"] + size["height"] // 2
    tap(driver, x, y)

Long press

import time
 
def long_press(driver, x: int, y: int, duration: float = 2.0):
    finger = PointerInput(interaction.POINTER_TOUCH, "finger")
    actions = ActionBuilder(driver, mouse=finger)
    actions.pointer_action\
        .move_to_location(x, y)\
        .pointer_down()\
        .pause(duration)\
        .pointer_up()
    actions.perform()

Swipe

def swipe(driver, start_x: int, start_y: int, end_x: int, end_y: int, duration: float = 0.5):
    finger = PointerInput(interaction.POINTER_TOUCH, "finger")
    actions = ActionBuilder(driver, mouse=finger)
    actions.pointer_action\
        .move_to_location(start_x, start_y)\
        .pointer_down()\
        .pause(duration)\
        .move_to_location(end_x, end_y)\
        .pointer_up()
    actions.perform()

The pause(duration) holds the pointer down before the move. Values between 0.4 and 0.7 seconds produce a natural swipe speed; very fast moves (< 0.2s) are treated as flings by many apps.

Screen-relative swipes

def swipe_up(driver, fraction: float = 0.4):
    """Swipe up by a fraction of screen height."""
    size = driver.get_window_size()
    x = size["width"] // 2
    start_y = int(size["height"] * 0.7)
    end_y = int(size["height"] * 0.3)
    swipe(driver, x, start_y, x, end_y)
 
 
def swipe_down(driver):
    size = driver.get_window_size()
    x = size["width"] // 2
    swipe(driver, x, int(size["height"] * 0.3), x, int(size["height"] * 0.7))
 
 
def swipe_left(driver):
    """Swipe left across the full screen (advance carousel)."""
    size = driver.get_window_size()
    y = size["height"] // 2
    swipe(driver, int(size["width"] * 0.8), y, int(size["width"] * 0.2), y)
 
 
def swipe_right(driver):
    size = driver.get_window_size()
    y = size["height"] // 2
    swipe(driver, int(size["width"] * 0.2), y, int(size["width"] * 0.8), y)

Pinch to zoom

Two-finger gestures require two PointerInput sequences:

def pinch_out(driver, element, factor: float = 0.3):
    """Zoom in (spread fingers outward)."""
    loc = element.location
    size = element.size
    cx = loc["x"] + size["width"] // 2
    cy = loc["y"] + size["height"] // 2
    offset = int(min(size["width"], size["height"]) * factor)
 
    finger1 = PointerInput(interaction.POINTER_TOUCH, "finger1")
    finger2 = PointerInput(interaction.POINTER_TOUCH, "finger2")
 
    f1_actions = ActionBuilder(driver, mouse=finger1)
    f1_actions.pointer_action\
        .move_to_location(cx - offset // 4, cy)\
        .pointer_down()\
        .pause(0.1)\
        .move_to_location(cx - offset, cy)\
        .pointer_up()
 
    f2_actions = ActionBuilder(driver, mouse=finger2)
    f2_actions.pointer_action\
        .move_to_location(cx + offset // 4, cy)\
        .pointer_down()\
        .pause(0.1)\
        .move_to_location(cx + offset, cy)\
        .pointer_up()
 
    # Perform both simultaneously
    from selenium.webdriver.common.actions.action_builder import ActionBuilder as AB
    combined = AB(driver)
    combined.add_action(finger1)
    combined.add_action(finger2)
    # Execute raw W3C actions
    driver.execute_script("mobile: pinchOpenGesture", {
        "left": loc["x"],
        "top": loc["y"],
        "width": size["width"],
        "height": size["height"],
        "scale": 2.0,
        "speed": 1.5
    })

For pinch gestures, Appium's mobile command shortcuts are often more reliable than raw multi-pointer sequences on both platforms:

# iOS pinch (Appium XCUITest extension)
driver.execute_script("mobile: pinchCloseGesture", {
    "elementId": element.id,
    "scale": 0.5,
    "velocity": 1.5
})
 
# Android pinch via UIAutomator
driver.execute_script("mobile: pinchOpenGesture", {
    "elementId": element.id,
    "percent": 0.75,
    "speed": 2500
})

Pull to refresh

def pull_to_refresh(driver):
    size = driver.get_window_size()
    x = size["width"] // 2
    # Start near the top, drag down slowly (1.5s makes it a pull, not a scroll)
    swipe(driver,
          start_x=x,
          start_y=int(size["height"] * 0.25),
          end_x=x,
          end_y=int(size["height"] * 0.75),
          duration=1.5)

GestureUtils class

Encapsulate all gestures in a reusable class:

# utils/gesture_utils.py
class GestureUtils:
    def __init__(self, driver):
        self.driver = driver
 
    def tap(self, element=None, x=None, y=None):
        if element:
            loc = element.location
            sz = element.size
            x = loc["x"] + sz["width"] // 2
            y = loc["y"] + sz["height"] // 2
        tap(self.driver, x, y)
 
    def swipe_up(self):
        swipe_up(self.driver)
 
    def swipe_down(self):
        swipe_down(self.driver)
 
    def swipe_left(self):
        swipe_left(self.driver)
 
    def swipe_right(self):
        swipe_right(self.driver)
 
    def pull_to_refresh(self):
        pull_to_refresh(self.driver)
 
    def long_press_element(self, element, duration: float = 2.0):
        loc = element.location
        sz = element.size
        long_press(self.driver,
                   loc["x"] + sz["width"] // 2,
                   loc["y"] + sz["height"] // 2,
                   duration)

// tip to track lessons you complete and pick up where you left off across devices.