The W3C Actions API replaced Appium's deprecated TouchAction class. In Python, it uses Selenium's ActionBuilder with PointerInput to model finger gestures as sequences of pointer events. The result is precise, cross-platform gesture control.
Imports
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.actions.action_builder import ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interactionTap
def tap(driver, x: int, y: int):
finger = PointerInput(interaction.POINTER_TOUCH, "finger")
actions = ActionBuilder(driver, mouse=finger)
actions.pointer_action\
.move_to_location(x, y)\
.pointer_down()\
.pointer_up()
actions.perform()
def tap_element(driver, element):
loc = element.location
size = element.size
x = loc["x"] + size["width"] // 2
y = loc["y"] + size["height"] // 2
tap(driver, x, y)Long press
import time
def long_press(driver, x: int, y: int, duration: float = 2.0):
finger = PointerInput(interaction.POINTER_TOUCH, "finger")
actions = ActionBuilder(driver, mouse=finger)
actions.pointer_action\
.move_to_location(x, y)\
.pointer_down()\
.pause(duration)\
.pointer_up()
actions.perform()Swipe
def swipe(driver, start_x: int, start_y: int, end_x: int, end_y: int, duration: float = 0.5):
finger = PointerInput(interaction.POINTER_TOUCH, "finger")
actions = ActionBuilder(driver, mouse=finger)
actions.pointer_action\
.move_to_location(start_x, start_y)\
.pointer_down()\
.pause(duration)\
.move_to_location(end_x, end_y)\
.pointer_up()
actions.perform()The pause(duration) holds the pointer down before the move. Values between 0.4 and 0.7 seconds produce a natural swipe speed; very fast moves (< 0.2s) are treated as flings by many apps.
Screen-relative swipes
def swipe_up(driver, fraction: float = 0.4):
"""Swipe up by a fraction of screen height."""
size = driver.get_window_size()
x = size["width"] // 2
start_y = int(size["height"] * 0.7)
end_y = int(size["height"] * 0.3)
swipe(driver, x, start_y, x, end_y)
def swipe_down(driver):
size = driver.get_window_size()
x = size["width"] // 2
swipe(driver, x, int(size["height"] * 0.3), x, int(size["height"] * 0.7))
def swipe_left(driver):
"""Swipe left across the full screen (advance carousel)."""
size = driver.get_window_size()
y = size["height"] // 2
swipe(driver, int(size["width"] * 0.8), y, int(size["width"] * 0.2), y)
def swipe_right(driver):
size = driver.get_window_size()
y = size["height"] // 2
swipe(driver, int(size["width"] * 0.2), y, int(size["width"] * 0.8), y)Pinch to zoom
Two-finger gestures require two PointerInput sequences:
def pinch_out(driver, element, factor: float = 0.3):
"""Zoom in (spread fingers outward)."""
loc = element.location
size = element.size
cx = loc["x"] + size["width"] // 2
cy = loc["y"] + size["height"] // 2
offset = int(min(size["width"], size["height"]) * factor)
finger1 = PointerInput(interaction.POINTER_TOUCH, "finger1")
finger2 = PointerInput(interaction.POINTER_TOUCH, "finger2")
f1_actions = ActionBuilder(driver, mouse=finger1)
f1_actions.pointer_action\
.move_to_location(cx - offset // 4, cy)\
.pointer_down()\
.pause(0.1)\
.move_to_location(cx - offset, cy)\
.pointer_up()
f2_actions = ActionBuilder(driver, mouse=finger2)
f2_actions.pointer_action\
.move_to_location(cx + offset // 4, cy)\
.pointer_down()\
.pause(0.1)\
.move_to_location(cx + offset, cy)\
.pointer_up()
# Perform both simultaneously
from selenium.webdriver.common.actions.action_builder import ActionBuilder as AB
combined = AB(driver)
combined.add_action(finger1)
combined.add_action(finger2)
# Execute raw W3C actions
driver.execute_script("mobile: pinchOpenGesture", {
"left": loc["x"],
"top": loc["y"],
"width": size["width"],
"height": size["height"],
"scale": 2.0,
"speed": 1.5
})For pinch gestures, Appium's mobile command shortcuts are often more reliable than raw multi-pointer sequences on both platforms:
# iOS pinch (Appium XCUITest extension)
driver.execute_script("mobile: pinchCloseGesture", {
"elementId": element.id,
"scale": 0.5,
"velocity": 1.5
})
# Android pinch via UIAutomator
driver.execute_script("mobile: pinchOpenGesture", {
"elementId": element.id,
"percent": 0.75,
"speed": 2500
})Pull to refresh
def pull_to_refresh(driver):
size = driver.get_window_size()
x = size["width"] // 2
# Start near the top, drag down slowly (1.5s makes it a pull, not a scroll)
swipe(driver,
start_x=x,
start_y=int(size["height"] * 0.25),
end_x=x,
end_y=int(size["height"] * 0.75),
duration=1.5)GestureUtils class
Encapsulate all gestures in a reusable class:
# utils/gesture_utils.py
class GestureUtils:
def __init__(self, driver):
self.driver = driver
def tap(self, element=None, x=None, y=None):
if element:
loc = element.location
sz = element.size
x = loc["x"] + sz["width"] // 2
y = loc["y"] + sz["height"] // 2
tap(self.driver, x, y)
def swipe_up(self):
swipe_up(self.driver)
def swipe_down(self):
swipe_down(self.driver)
def swipe_left(self):
swipe_left(self.driver)
def swipe_right(self):
swipe_right(self.driver)
def pull_to_refresh(self):
pull_to_refresh(self.driver)
def long_press_element(self, element, duration: float = 2.0):
loc = element.location
sz = element.size
long_press(self.driver,
loc["x"] + sz["width"] // 2,
loc["y"] + sz["height"] // 2,
duration)