Python Playwright — универсальный инструмент для веб-автоматизации и парсинга. Хотя он обычно используется в сочетании с контекстом браузера, существуют сценарии, в которых вам может потребоваться использовать Playwright без контекста. В этой статье мы рассмотрим различные способы достижения этой цели, приведя попутно примеры кода.
Метод 1: создание экземпляра браузера
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
# Use the browser instance without creating a context
# Perform actions and interact with web pages
browser.close()
run_without_context()
Метод 2. Открытие новой страницы
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
# Use the page instance without a context
# Perform actions and interact with web pages
page.close()
browser.close()
run_without_context()
Метод 3. Переход по URL-адресу
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
page.goto('https://example.com')
# Use the page instance to interact with the loaded web page
page.close()
browser.close()
run_without_context()
Метод 4. Моделирование действий пользователя
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
page.goto('https://example.com')
page.click('button')
# Perform additional actions such as filling forms, submitting data, etc.
page.close()
browser.close()
run_without_context()
Метод 5. Извлечение содержимого страницы
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
page.goto('https://example.com')
content = page.content()
# Process and extract the content from the page
page.close()
browser.close()
run_without_context()
Метод 6: создание снимков экрана
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
page.goto('https://example.com')
page.screenshot(path='screenshot.png')
# Additional logic to handle the captured screenshot
page.close()
browser.close()
run_without_context()
Метод 7. Обработка сетевых запросов
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
page.route('/*', lambda route: route.abort())
page.goto('https://example.com')
# Customize the behavior of network requests
page.close()
browser.close()
run_without_context()
Метод 8. Взаимодействие с JavaScript
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
page.goto('https://example.com')
result = page.evaluate('() => document.title')
# Perform JavaScript evaluations and retrieve results
page.close()
browser.close()
run_without_context()
Метод 9: внедрение пользовательских скриптов
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
page.add_script_tag(url='https://example.com/custom_script.js')
# Inject custom scripts into the page for enhanced functionality
page.close()
browser.close()
run_without_context()
Метод 10: обработка событий страницы
from playwright.sync_api import sync_playwright
def run_without_context():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
def handle_page_load():
print('Page loaded successfully.')
page.on('load', handle_page_load)
page.goto('https://example.com')
# Register event handlers to respond to page events
page.close()
browser.close()
run_without_context()
В этой статье мы рассмотрели различные методы использования PythonPlaywright без контекста. Эти методы включают в себя создание экземпляра браузера, открытие новой страницы, переход по URL-адресу, моделирование действий пользователя, извлечение содержимого страницы, создание снимков экрана, обработку сетевых запросов, взаимодействие с JavaScript, внедрение пользовательских сценариев и обработку событий страницы. Используя эти методы, вы можете использовать возможности Python Playwright в сценариях, где контекст не требуется, что позволяет автоматизировать веб-задачи и эффективно выполнять сбор веб-страниц.