您现在的位置是:亿华云 > IT科技类资讯
Squid代理服务器搭建亿级爬虫IP代理池
亿华云2025-10-09 01:21:51【IT科技类资讯】7人已围观
简介复制fromgeventimportmonkey#isort:skip monkey.patch_all()#isort:skip importloggin

复制from gevent import monkey # isort:skip monkey.patch_all() # isort:skip import logging import os import time import requests from gevent.pool import Pool logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s: - %(message)s",代搭建代理 datefmt="%Y-%m-%d %H:%M:%S" ) # 使用StreamHandler输出到屏幕 ch = logging.StreamHandler() ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(ch) # Squid的配置文件语法 # 将请求转发到父代理 PEER_CONF = "cache_peer %s parent %s 0 no-query weighted-round-robin weight=1 connect-fail-limit=2 allow-miss max-conn=5\n" # 可用代理 GOOD_PROXIES = [] pool = Pool(50) def check_proxy(proxy): """验证代理是服务器托管否可用 :param proxy list:[ip, port]""" global GOOD_PROXIES ip, port = proxy _proxies = { "http": "{ }:{ }".format(ip, port)} try: ip_url = "http://2019.ip138.com/ic.asp" res = requests.get(ip_url, proxies=_proxies, timeout=10) assert ip in res.content logger.info("[GOOD] - { }:{ }".format(ip, port)) GOOD_PROXIES.append(proxy) except Exception as e: logger.error("[BAD] - { }:{ }, { }".format(ip, port, e)) def update_conf(): withopen("/etc/squid/squid.conf.original", "r") as F: squid_conf = F.readlines() squid_conf.append("\n# Cache peer config\n") for proxy in GOOD_PROXIES: squid_conf.append(PEER_CONF % (proxy[0], proxy[1])) withopen("/etc/squid/squid.conf", "w") as F: F.writelines(squid_conf) def get_proxy(): global GOOD_PROXIES GOOD_PROXIES = [] # 1. 获取代理IP资源 api_url = "http://s.zdaye.com/?api=YOUR_API&count=100&fitter=1&px=2" res = requests.get(api_url).content if len(res) == 0: logger.error("no data") elif "bad"in res: logger.error("bad request") else: logger.info("get all proxies") proxies = [] for line in res.split(): proxies.append(line.strip().split(":")) pool.map(check_proxy, proxies) pool.join() # 2. 写入Squid配置文件 update_conf() # 3. 重新加载配置文件 os.system("squid -k reconfigure") logger.info(">>>> DONE! <<<<") def main(): start = time.time() while True: # 每30秒获取一批新IP if time.time() - start >= 30: get_proxy() start = time.time() time.sleep(5) if __name__ == "__main__": main() 1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23.24.25.26.27.28.29.30.31.32.33.34.35.36.37.38.39.40.41.42.43.44.45.46.47.48.49.50.51.52.53.54.55.56.57.58.59.60.61.62.63.64.65.66.67.68.69.70.71.72.73.74.75.76.77.78.79.80.81.82.83.84.85.86.87.88.89.90.91.92.93.云服务器提供商
很赞哦!(8)