Skip to content

Commit

Permalink
code optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
isanvicente committed Aug 7, 2023
1 parent f545b49 commit b6e7482
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 86 deletions.
134 changes: 66 additions & 68 deletions src/main/java/elh/eus/MSM/FeedReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;

import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;

/**
* RSS/Atom feed reader.
Expand Down Expand Up @@ -578,20 +577,15 @@ private void getRssFeed (Feed f, String store){
if (kwrds.isEmpty())
{
System.err.println("MSM::FeadReader::getFeed ->no keywords provided full articles will be returned");
processFullArticle(doc,lang, pubDate, link, f.getSrcId(), store);
boolean success=MSMUtils.saveHtml2pdf(is, fileStorePath,link);
}
else
{
//processFullArticle(doc,lang, pubDate, link, f.getSrcId(), store);
boolean mentionsFound=parseArticleForKeywords(doc,lang, pubDate, link, f.getSrcId(), store);
//albisteak aipamenik bazuen gorde albistearen pdf-a
if (mentionsFound) {
OutputStream os = new FileOutputStream(fileStorePath+link);
PdfRendererBuilder builder = new PdfRendererBuilder();
builder.useFastMode();
builder.withHtmlContent(is.toString(),link); //)withUri(is);
builder.toStream(os);
builder.run();
boolean success=MSMUtils.saveHtml2pdf(is, fileStorePath,link);
}
}
}
Expand Down Expand Up @@ -1225,70 +1219,74 @@ private InputSource fetchHTML(URL linkSrc, CookieStore cst) throws IOException,
}


/**/
boolean startSelenium(FeedCredential cred)
{
System.setProperty("webdriver.chrome.driver",params.getProperty("chromedriverPath", "chromedriver"));
//System.setProperty("webdriver.chrome.bin", "/usr/bin/google-chrome-beta");
ChromeOptions seleniumOptions = new ChromeOptions();
String seleniumOpts=params.getProperty("seleniumOptions","");
if (! seleniumOpts.equalsIgnoreCase("")){
for (String o : seleniumOpts.split(";")){
seleniumOptions.addArguments(o);
}
}
seleniumOptions.setBinary("/usr/bin/google-chrome-beta");

seleniumDriver=new ChromeDriver(seleniumOptions);

try {
seleniumDriver.get(cred.getSsourl());
}catch (WebDriverException se){
try {
seleniumDriver.close();
/**
* start Selenium session with sso login crendential
* @param cred : sso login credentials
*
* @return
*/
boolean startSelenium(FeedCredential cred)
{
System.setProperty("webdriver.chrome.driver",params.getProperty("chromedriverPath", "chromedriver"));
//System.setProperty("webdriver.chrome.bin", "/usr/bin/google-chrome-beta");
ChromeOptions seleniumOptions = new ChromeOptions();
String seleniumOpts=params.getProperty("seleniumOptions","");
if (! seleniumOpts.equalsIgnoreCase("")){
for (String o : seleniumOpts.split(";")){
seleniumOptions.addArguments(o);
}
}
seleniumOptions.setBinary("/usr/bin/google-chrome-beta");

seleniumDriver=new ChromeDriver(seleniumOptions);
seleniumDriver.get(cred.getSsourl());
}catch (WebDriverException se2){
System.err.println("FeadReader::getRssFeed -> selenium could not open login page proceeding without it");
return false;
}

}
WebDriverWait wait = new WebDriverWait(seleniumDriver, Duration.ofSeconds(30));
// if there is a cookie accepting notice wait until is ready and click to accept
if (! cred.getCookieNotice().equalsIgnoreCase("none")) {
try {
wait.until(ExpectedConditions.elementToBeClickable(By.xpath(cred.getCookieNotice()))).click();
} catch (TimeoutException te){
System.err.println("FeadReader::getRssFeed -> selenium waited long enough for the cookie button, proceeding without it");
}
}

try{
//wait until the form is ready
wait.until(ExpectedConditions.elementToBeClickable(By.xpath("//*[@id=\""+cred.getUserField()+"\"]"))).click();
} catch (TimeoutException te){
try {
//wait until the form is ready
wait.until(ExpectedConditions.elementToBeClickable(By.id(cred.getUserField()))).click();
} catch (TimeoutException te2){
System.err.println("FeadReader::getRssFeed -> selenium waited long enough for the login form to be ready, proceeding without it");
}
}
try{
//user
seleniumDriver.findElement(By.id(cred.getUserField())).sendKeys(cred.getSsouser());
//pass
seleniumDriver.findElement(By.id(cred.getPassField())).sendKeys(cred.getSsopass() + Keys.ENTER);
}catch (ElementNotInteractableException nie){
System.err.println("FeadReader::getRssFeed -> selenium found an element not clickable, proceeding without login");
return false;
}
try {
seleniumDriver.get(cred.getSsourl());
}catch (WebDriverException se){
try {
seleniumDriver.close();
seleniumDriver=new ChromeDriver(seleniumOptions);
seleniumDriver.get(cred.getSsourl());
}catch (WebDriverException se2){
System.err.println("FeadReader::getRssFeed -> selenium could not open login page proceeding without it");
return false;
}

return true;
}

}
WebDriverWait wait = new WebDriverWait(seleniumDriver, Duration.ofSeconds(30));
// if there is a cookie accepting notice wait until is ready and click to accept
if (! cred.getCookieNotice().equalsIgnoreCase("none")) {
try {
wait.until(ExpectedConditions.elementToBeClickable(By.xpath(cred.getCookieNotice()))).click();
} catch (TimeoutException te){
System.err.println("FeadReader::getRssFeed -> selenium waited long enough for the cookie button, proceeding without it");
}
}

try{
//wait until the form is ready
wait.until(ExpectedConditions.elementToBeClickable(By.xpath("//*[@id=\""+cred.getUserField()+"\"]"))).click();
} catch (TimeoutException te){
try {
//wait until the form is ready
wait.until(ExpectedConditions.elementToBeClickable(By.id(cred.getUserField()))).click();
} catch (TimeoutException te2){
System.err.println("FeadReader::getRssFeed -> selenium waited long enough for the login form to be ready, proceeding without it");
}
}
try{
//user
seleniumDriver.findElement(By.id(cred.getUserField())).sendKeys(cred.getSsouser());
//pass
seleniumDriver.findElement(By.id(cred.getPassField())).sendKeys(cred.getSsopass() + Keys.ENTER);
}catch (ElementNotInteractableException nie){
System.err.println("FeadReader::getRssFeed -> selenium found an element not clickable, proceeding without login");
return false;
}

return true;
}



}
44 changes: 26 additions & 18 deletions src/main/java/elh/eus/MSM/MSMUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpCookie;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
Expand All @@ -48,43 +48,30 @@
import java.util.HashSet;
import java.util.List;

import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import javax.naming.NamingException;

import org.apache.http.auth.AuthenticationException;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CookieStore;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.TrustAllStrategy;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicCookieStore;

import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HttpContext;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.Header;
import org.apache.http.NameValuePair;
import org.xml.sax.InputSource;

import twitter4j.JSONArray;
import twitter4j.JSONException;
import twitter4j.JSONObject;

//import com.mysql.jdbc.jdbc2.optional.MysqlDataSource;
import com.mysql.cj.jdbc.MysqlDataSource;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;

public final class MSMUtils {

Expand Down Expand Up @@ -476,6 +463,27 @@ public static CloseableHttpClient httpClient () throws KeyManagementException, N

}


/**
* save html content to pdf by means of openhtmltopdf library.
*
* @param in
* @param storePath
* @param filename
* @return true if succesfull, false otherwise
*/
public static boolean saveHtml2pdf(InputSource in, String storePath, String filename)
{
try {
OutputStream os = new FileOutputStream(storePath+filename);
PdfRendererBuilder builder = new PdfRendererBuilder();
builder.useFastMode();
builder.withHtmlContent(in.toString(),filename); //)withUri(is);
builder.toStream(os);
builder.run();
return true;
}catch (IOException ioe) {
return false;
}
}

}

0 comments on commit b6e7482

Please sign in to comment.