Skip to content

Commit

Permalink
支持 可配置 代理 访问目标网站
Browse files Browse the repository at this point in the history
  • Loading branch information
xxx committed Jan 28, 2018
1 parent ab74d51 commit c5c218f
Show file tree
Hide file tree
Showing 12 changed files with 137 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

Expand Down Expand Up @@ -52,21 +53,25 @@ public static boolean isReachable(ShadowSocksDetailsEntity ss) {
return false;
}


/**
* 请求目标 URL 获取 Document
*/
protected Document getDocument() throws IOException {
Document document;
try {
document = Jsoup.connect(getTargetURL())
Connection connection = Jsoup.connect(getTargetURL())
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36")
.referrer("https://www.google.com/")
.ignoreContentType(true)
.followRedirects(true)
.ignoreHttpErrors(true)
.timeout(TIME_OUT)
// .proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080)))
.get();
.timeout(TIME_OUT);
if (isProxyEnable())
connection.proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress(getProxyHost(), getProxyPort())));

document = connection.get();

} catch (IOException e) {
throw new IOException("请求[" + getTargetURL() + "]异常:" + e.getMessage(), e);
}
Expand Down Expand Up @@ -152,4 +157,19 @@ protected ShadowSocksDetailsEntity parseURL(String imgURL) throws IOException, N
* 目标网站 URL
*/
protected abstract String getTargetURL();

/**
* 访问目标网站,是否启动代理
*/
protected abstract boolean isProxyEnable();

/**
* 代理地址
*/
protected abstract String getProxyHost();

/**
* 代理端口
*/
protected abstract int getProxyPort();
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import com.example.ShadowSocksShare.domain.ShadowSocksDetailsEntity;
import com.example.ShadowSocksShare.service.ShadowSocksCrawlerService;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.util.*;
Expand All @@ -18,6 +20,7 @@
@Slf4j
@Service
public class DoubCrawlerServiceImpl extends ShadowSocksCrawlerService {

// 目标网站 URL
private static final String TARGET_URL = "https://doub.io/sszhfx/";
// 协议
Expand All @@ -44,6 +47,19 @@ public class DoubCrawlerServiceImpl extends ShadowSocksCrawlerService {
}
};

// 访问目标网站,是否启动代理
@Value("${proxy.enable}")
@Getter
private boolean proxyEnable;
// 代理地址
@Getter
@Value("${proxy.host}")
private String proxyHost;
// 代理端口
@Getter
@Value("${proxy.port}")
private int proxyPort;

/**
* 网页内容解析 ss 信息
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

import com.example.ShadowSocksShare.domain.ShadowSocksDetailsEntity;
import com.example.ShadowSocksShare.service.ShadowSocksCrawlerService;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.util.Assert;

Expand All @@ -24,6 +26,18 @@
public class FreeSSRCrawlerServiceImpl extends ShadowSocksCrawlerService {
// 目标网站 URL
private static final String TARGET_URL = "https://freessr.win/";
// 访问目标网站,是否启动代理
@Value("${proxy.enable}")
@Getter
private boolean proxyEnable;
// 代理地址
@Getter
@Value("${proxy.host}")
private String proxyHost;
// 代理端口
@Getter
@Value("${proxy.port}")
private int proxyPort;

/**
* 网页内容解析 ss 信息
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

import com.example.ShadowSocksShare.domain.ShadowSocksDetailsEntity;
import com.example.ShadowSocksShare.service.ShadowSocksCrawlerService;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.util.Date;
Expand All @@ -21,6 +23,18 @@
public class FreeSS_EasyToUseCrawlerServiceImpl extends ShadowSocksCrawlerService {
// 目标网站 URL
private static final String TARGET_URL = "https://freess.cx/";
// 访问目标网站,是否启动代理
@Value("${proxy.enable}")
@Getter
private boolean proxyEnable;
// 代理地址
@Getter
@Value("${proxy.host}")
private String proxyHost;
// 代理端口
@Getter
@Value("${proxy.port}")
private int proxyPort;

/**
* 网页内容解析 ss 信息
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.text.MessageFormat;
Expand All @@ -27,17 +29,33 @@
public class Free_ssServiceImpl extends ShadowSocksCrawlerService {
// 目标网站 URL
private static final String TARGET_URL = "https://free-ss.site/ss.json?_={0}";
// 访问目标网站,是否启动代理
@Value("${proxy.enable}")
@Getter
private boolean proxyEnable;
// 代理地址
@Getter
@Value("${proxy.host}")
private String proxyHost;
// 代理端口
@Getter
@Value("${proxy.port}")
private int proxyPort;

public ShadowSocksEntity getShadowSocks() {
try (WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
// 设置代理
/*if (proxyEnable)
webClient.getOptions().setProxyConfig(new ProxyConfig(proxyHost, proxyPort));*/
// 1. 爬取账号
webClient.getOptions().setJavaScriptEnabled(true); // 启动JS
webClient.setJavaScriptTimeout(10 * 1000); // 设置JS执行的超时时间
webClient.getOptions().setUseInsecureSSL(true); // 忽略ssl认证
webClient.getOptions().setCssEnabled(false); // 禁用Css,可避免自动二次请求CSS进行渲染
webClient.getOptions().setThrowExceptionOnScriptError(false); //运行错误时,不抛出异常
webClient.getOptions().setTimeout(SOCKET_TIME_OUT); // 连接超时时间。如果为0,则无限期等待
webClient.getOptions().setTimeout(TIME_OUT); // 连接超时时间。如果为0,则无限期等待
webClient.setAjaxController(new NicelyResynchronizingAjaxController());// 设置Ajax异步
webClient.getCookieManager().setCookiesEnabled(true);//开启cookie管理

webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); // 忽略错误的 Http code

Expand All @@ -63,7 +81,8 @@ public ShadowSocksEntity getShadowSocks() {
if (StringUtils.isNotBlank(ssListJson)) {
Set<ShadowSocksDetailsEntity> set = null;
ObjectMapper mapper = new ObjectMapper();
Map<String, List<List<String>>> map = mapper.readValue(ssListJson, new TypeReference<Map<String, List<List<String>>>>() {});
Map<String, List<List<String>>> map = mapper.readValue(ssListJson, new TypeReference<Map<String, List<List<String>>>>() {
});

if (map.containsKey("data")) {
List<List<String>> strList = map.get("data");
Expand Down Expand Up @@ -96,7 +115,7 @@ public ShadowSocksEntity getShadowSocks() {
} catch (Exception e) {
log.error(e.getMessage(), e);
}
return new ShadowSocksEntity("free-ss.site", "free-ss.site", false, new Date());
return new ShadowSocksEntity("https://free-ss.site", "free-ss.site", false, new Date());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

import com.example.ShadowSocksShare.domain.ShadowSocksDetailsEntity;
import com.example.ShadowSocksShare.service.ShadowSocksCrawlerService;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.util.Assert;

Expand All @@ -24,6 +26,18 @@
public class IShadowCrawlerServiceImpl extends ShadowSocksCrawlerService {
// 目标网站 URL
private static final String TARGET_URL = "https://global.ishadowx.net/";
// 访问目标网站,是否启动代理
@Value("${proxy.enable}")
@Getter
private boolean proxyEnable;
// 代理地址
@Getter
@Value("${proxy.host}")
private String proxyHost;
// 代理端口
@Getter
@Value("${proxy.port}")
private int proxyPort;

/**
* 网页内容解析 ss 信息
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

import com.example.ShadowSocksShare.domain.ShadowSocksDetailsEntity;
import com.example.ShadowSocksShare.service.ShadowSocksCrawlerService;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.util.Date;
Expand All @@ -21,11 +23,21 @@
public class Ss8ServiceImpl extends ShadowSocksCrawlerService {
// 目标网站 URL
private static final String TARGET_URL = "https://en.ss8.fun/";
// 访问目标网站,是否启动代理
@Value("${proxy.enable}")
@Getter
private boolean proxyEnable;
// 代理地址
@Getter
@Value("${proxy.host}")
private String proxyHost;
// 代理端口
@Getter
@Value("${proxy.port}")
private int proxyPort;

/**
* 网页内容解析 ss 信息
*
* @param document
*/
@Override
protected Set<ShadowSocksDetailsEntity> parse(Document document) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

import com.example.ShadowSocksShare.domain.ShadowSocksDetailsEntity;
import com.example.ShadowSocksShare.service.ShadowSocksCrawlerService;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.util.Date;
Expand All @@ -19,6 +21,18 @@
public class SsrBlueCrawlerServiceImpl extends ShadowSocksCrawlerService {
// 目标网站 URL
private static final String TARGET_URL = "http://www.ssr.blue/";
// 访问目标网站,是否启动代理
@Value("${proxy.enable}")
@Getter
private boolean proxyEnable;
// 代理地址
@Getter
@Value("${proxy.host}")
private String proxyHost;
// 代理端口
@Getter
@Value("${proxy.port}")
private int proxyPort;

/**
* 网页内容解析 ss 信息
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
@Slf4j
@Component
// @Profile("prod")
public class ApplicationStartupListener {
@Autowired
private ShadowSocksSerivce shadowSocksSerivce;
Expand All @@ -30,6 +31,6 @@ public class ApplicationStartupListener {
@EventListener
public void handleOrderStateChange(ContextRefreshedEvent contextRefreshedEvent) {
log.debug(contextRefreshedEvent.toString());
crawlerSet.parallelStream().forEach((service) -> shadowSocksSerivce.crawlerAndSave(service));
crawlerSet.parallelStream()/*.filter((service) -> !(service instanceof InitService))*/.forEach((service) -> shadowSocksSerivce.crawlerAndSave(service));
}
}
2 changes: 1 addition & 1 deletion src/main/resources/application-dev.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ========== 自定义 ==========
proxy:
enable: true
server: 127.0.0.1
host: 127.0.0.1
port: 1080


Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/application-prod.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ========== 自定义 ==========
proxy:
enable: false
server: 127.0.0.1
host: 127.0.0.1
port: 1080


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.test.web.servlet.MockMvc;

import java.io.IOException;

@Slf4j
public class ShadowSocksCrawlerServiceTest extends BaseTest {
@Autowired
Expand Down Expand Up @@ -45,7 +43,7 @@ public void testDoubCrawlerService() {
}

@Test
public void testFree_ssService() throws IOException {
public void testFree_ssService() {
free_ssServiceImpl.getShadowSocks();
// log.debug("========>{}", entity);
}
Expand Down

0 comments on commit c5c218f

Please sign in to comment.