如何在 Puppeteer 或 Selenium 中使用 Anti-Captcha 插件
Puppeteer 和 Selenium 是两个主要浏览器自动化引擎,而我们的插件则可紧密集成到这两个引擎中。本文将说明如何在 Puppeteer 和 Selenium 中将我们的插件分别用于 NodeJS 和 Python 编程语言。如果要在二者之间进行选择,则强烈建议选用 NodeJS+Puppeteer,因为其环境是原生态环境。
1. 安装所依赖的程序。对于 NodeJS,只须安装下面指定的 npm 程序包;对于 Python,请安装程序包并在此页面中下载可执行文件“chromedriver”。驱动程序版本必须与安装在系统中的 Chrome 版本相符。
npm install adm-zip puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
2. 下载适用于 Chrome 的 ZIP 版插件,将其解压缩到项目文件夹中。实际版本位于此处。也可以编程的方式下载:
//npm install adm-zip
const https = require('https')
const fs = require('fs');
const AdmZip = require("adm-zip");
const pluginURL = 'https://antcpt.com/anticaptcha-plugin.zip';
(async () => {
// 下载插件
await new Promise((resolve) => {
https.get(pluginURL, resp => resp.pipe(fs.createWriteStream('./plugin.zip').on('close', resolve)));
})
// 将其解压缩
const zip = new AdmZip("./plugin.zip");
await zip.extractAllTo("./plugin/", true);
})();
3. 然后在 ./plugin/js/config_ac_api_key.js 文件中配置 API 密钥。可在客户区找到 API 密钥。余额必须为正数才能使其起作用。
const apiKey = 'API_KEY_32_BYTES';
if (fs.existsSync('./plugin/js/config_ac_api_key.js')) {
let confData = fs.readFileSync('./plugin/js/config_ac_api_key.js', 'utf8');
confData = confData.replace(/antiCapthaPredefinedApiKey = ''/g, `antiCapthaPredefinedApiKey = '${apiKey}'`);
fs.writeFileSync('./plugin/js/config_ac_api_key.js', confData, 'utf8');
} else {
console.error('plugin configuration not found!')
}
4. 初始化装有插件的浏览器。对于 Puppeteer,建议将插件“puppeteer-extra-plugin-stealth”用于“puppeteer-extra”程序包,该插件可隐藏所有通过网络自动化 Chromium 浏览器的迹象。
//npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
(async () => {
const browser = await puppeteer.launch({
headless: false,
ignoreDefaultArgs: [
"--disable-extensions",
"--enable-automation"
],
args: [
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
'--allow-running-insecure-content',
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--mute-audio',
'--no-zygote',
'--no-xshm',
'--window-size=1920,1080',
'--no-first-run',
'--no-default-browser-check',
'--disable-dev-shm-usage',
'--disable-gpu',
'--enable-webgl',
'--ignore-certificate-errors',
'--lang=en-US,en;q=0.9',
'--password-store=basic',
'--disable-gpu-sandbox',
'--disable-software-rasterizer',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-infobars',
'--disable-breakpad',
'--disable-canvas-aa',
'--disable-2d-canvas-clip-aa',
'--disable-gl-drawing-for-tests',
'--enable-low-end-device-mode',
'--disable-extensions-except=./plugin',
'--load-extension=./plugin'
]
});
const page = await browser.newPage();
})();
5. 转到目标网页并按需填写窗体。插件会自动获取 Recaptcha 谜题并开始对其进行破解。
(async () => {
const url = 'https://anti-captcha.com/demo/?page=recaptcha_v2_textarea';
const login = 'Test login';
const password = 'Test password';
try {
await page.goto(url, {
waitUntil: "networkidle0"
});
} catch (e) {
console.error('err while loading the page: '+e);
}
// 禁止产生导航超时错误
await page.setDefaultNavigationTimeout(0);
await page.$eval('#login', (element, login) => {
element.value = login;
}, login);
await page.$eval('#password', (element, password) => {
element.value = password;
}, password);
})();
6. 下一步有些棘手。对于某些网页窗体,用户必须在破解 Recaptcha 谜题后按提交按钮;其他窗体则采用回调的方式,自动提交网页窗体内容。对于第一种情况,我们需要在破解 Recaptcha 谜题后立即按提交按钮。要在合适的时间按提交按钮,只须等到选择器 .antigate_solver.solved 弹出后再按提交按钮即可。
// 等待“已破解”选择器弹出
await page.waitForSelector('.antigate_solver.solved').catch(error => console.log('failed to wait for the selector'));
console.log('{{ $t('articles.how-to-integrate.code-comments.recaptcha-solved') }}');
// 按提交按钮
await Promise.all([
page.click('#submitButton'),
page.waitForNavigation({ waitUntil: "networkidle0" })
]);
console.log('任务已完成,已绕过有 Recaptcha 谜题的窗体');
大功告成,窗体填写完毕,Recaptcha 谜题破解完毕并已绕过。全部代码的示例:
// first run the following to install required npm packages:
//
// npm install adm-zip follow-redirects puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
//
//
const https = require('follow-redirects').https;
const fs = require('fs');
const AdmZip = require("adm-zip");
const apiKey = 'YOUR_API_KEY_HERE!';
const pluginURL = 'https://antcpt.com/anticaptcha-plugin.zip';
const url = 'https://anti-captcha.com/demo/?page=recaptcha_v2_textarea';
const login = 'Test login';
const password = 'Test password';
let page = null;
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
(async () => {
// 下载插件
await new Promise((resolve) => {
https.get(pluginURL, resp => resp.pipe(fs.createWriteStream('./plugin.zip').on('close', resolve)));
})
// 将其解压缩
const zip = new AdmZip("./plugin.zip");
await zip.extractAllTo("./plugin/", true);
// 在配置文件中设置 API 密钥
await new Promise((resolve, reject) => {
if (fs.existsSync('./plugin/js/config_ac_api_key.js')) {
let confData = fs.readFileSync('./plugin/js/config_ac_api_key.js', 'utf8');
confData = confData.replace(/antiCapthaPredefinedApiKey = ''/g, `antiCapthaPredefinedApiKey = '${apiKey}'`);
fs.writeFileSync('./plugin/js/config_ac_api_key.js', confData, 'utf8');
resolve();
} else {
console.error('plugin configuration not found!')
reject();
}
});
// 设置浏览器启动选项
const options = {
headless: false,
ignoreDefaultArgs: [
"--disable-extensions",
"--enable-automation"
],
args: [
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
'--allow-running-insecure-content',
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--mute-audio',
'--no-zygote',
'--no-xshm',
'--window-size=1920,1080',
'--no-first-run',
'--no-default-browser-check',
'--disable-dev-shm-usage',
'--disable-gpu',
'--enable-webgl',
'--ignore-certificate-errors',
'--lang=en-US,en;q=0.9',
'--password-store=basic',
'--disable-gpu-sandbox',
'--disable-software-rasterizer',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-infobars',
'--disable-breakpad',
'--disable-canvas-aa',
'--disable-2d-canvas-clip-aa',
'--disable-gl-drawing-for-tests',
'--enable-low-end-device-mode',
'--disable-extensions-except=./plugin',
'--load-extension=./plugin'
]
}
try {
// 启动装有插件的浏览器
const browser = await puppeteer.launch();
page = await browser.newPage();
} catch (e) {
console.log('could not launch browser: '+e.toString())
return;
}
// 转到目标网页
try {
await page.goto(url, {
waitUntil: "networkidle0"
});
} catch (e) {
console.error('err while loading the page: '+e);
}
// 禁止产生导航超时错误
await page.setDefaultNavigationTimeout(0);
// 填写窗体
await page.$eval('#login', (element, login) => {
element.value = login;
}, login);
await page.$eval('#password', (element, password) => {
element.value = password;
}, password);
// 等待“已破解”选择器弹出
await page.waitForSelector('.antigate_solver.solved').catch(error => console.log('failed to wait for the selector'));
console.log('{{ $t('articles.how-to-integrate.code-comments.recaptcha-solved') }}');
// 按提交按钮
await Promise.all([
page.click('#submitButton'),
page.waitForNavigation({ waitUntil: "networkidle0" })
]);
console.log('Recaptcha 谜题已破解');
})();
还有一个窍门:无头模式中的插件运行很棘手,因为 Chrome 不支持通过插件进行的浏览器自动化。请使用名为 Xvfb 的实用工具,此工具可为您的应用程序提供虚拟桌面。
# 安装程序包
apt-get install -y xvfb
# 设置显示变量
export DISPLAY=:0
# 在后台启动 Xvfb 守护程序(只此一次)
/usr/bin/Xvfb :0 -screen 0 1024x768x24 &
# 稍等片刻,以使其弹出(只此一次)
sleep 5
# 对“node”或“python”脚本添加前缀“xvfb-run”
xvfb-run node myscript.js
# 或
xvfb-run python myscript.py