From b6e74823d0bd36e9b065c13ce74250c0b031eea6 Mon Sep 17 00:00:00 2001 From: isanvicente Date: Mon, 7 Aug 2023 15:22:34 +0200 Subject: [PATCH] code optimization --- src/main/java/elh/eus/MSM/FeedReader.java | 134 +++++++++++----------- src/main/java/elh/eus/MSM/MSMUtils.java | 44 ++++--- 2 files changed, 92 insertions(+), 86 deletions(-) diff --git a/src/main/java/elh/eus/MSM/FeedReader.java b/src/main/java/elh/eus/MSM/FeedReader.java index 90fbec3..e05abe9 100644 --- a/src/main/java/elh/eus/MSM/FeedReader.java +++ b/src/main/java/elh/eus/MSM/FeedReader.java @@ -131,7 +131,6 @@ import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; -import com.openhtmltopdf.pdfboxout.PdfRendererBuilder; /** * RSS/Atom feed reader. @@ -578,7 +577,7 @@ private void getRssFeed (Feed f, String store){ if (kwrds.isEmpty()) { System.err.println("MSM::FeadReader::getFeed ->no keywords provided full articles will be returned"); - processFullArticle(doc,lang, pubDate, link, f.getSrcId(), store); + boolean success=MSMUtils.saveHtml2pdf(is, fileStorePath,link); } else { @@ -586,12 +585,7 @@ private void getRssFeed (Feed f, String store){ boolean mentionsFound=parseArticleForKeywords(doc,lang, pubDate, link, f.getSrcId(), store); //albisteak aipamenik bazuen gorde albistearen pdf-a if (mentionsFound) { - OutputStream os = new FileOutputStream(fileStorePath+link); - PdfRendererBuilder builder = new PdfRendererBuilder(); - builder.useFastMode(); - builder.withHtmlContent(is.toString(),link); //)withUri(is); - builder.toStream(os); - builder.run(); + boolean success=MSMUtils.saveHtml2pdf(is, fileStorePath,link); } } } @@ -1225,70 +1219,74 @@ private InputSource fetchHTML(URL linkSrc, CookieStore cst) throws IOException, } - /**/ - boolean startSelenium(FeedCredential cred) - { - System.setProperty("webdriver.chrome.driver",params.getProperty("chromedriverPath", "chromedriver")); - //System.setProperty("webdriver.chrome.bin", "/usr/bin/google-chrome-beta"); - ChromeOptions seleniumOptions = new ChromeOptions(); - String seleniumOpts=params.getProperty("seleniumOptions",""); - if (! seleniumOpts.equalsIgnoreCase("")){ - for (String o : seleniumOpts.split(";")){ - seleniumOptions.addArguments(o); - } - } - seleniumOptions.setBinary("/usr/bin/google-chrome-beta"); - - seleniumDriver=new ChromeDriver(seleniumOptions); - - try { - seleniumDriver.get(cred.getSsourl()); - }catch (WebDriverException se){ - try { - seleniumDriver.close(); + /** + * start Selenium session with sso login crendential + * @param cred : sso login credentials + * + * @return + */ + boolean startSelenium(FeedCredential cred) + { + System.setProperty("webdriver.chrome.driver",params.getProperty("chromedriverPath", "chromedriver")); + //System.setProperty("webdriver.chrome.bin", "/usr/bin/google-chrome-beta"); + ChromeOptions seleniumOptions = new ChromeOptions(); + String seleniumOpts=params.getProperty("seleniumOptions",""); + if (! seleniumOpts.equalsIgnoreCase("")){ + for (String o : seleniumOpts.split(";")){ + seleniumOptions.addArguments(o); + } + } + seleniumOptions.setBinary("/usr/bin/google-chrome-beta"); + seleniumDriver=new ChromeDriver(seleniumOptions); - seleniumDriver.get(cred.getSsourl()); - }catch (WebDriverException se2){ - System.err.println("FeadReader::getRssFeed -> selenium could not open login page proceeding without it"); - return false; - } - - } - WebDriverWait wait = new WebDriverWait(seleniumDriver, Duration.ofSeconds(30)); - // if there is a cookie accepting notice wait until is ready and click to accept - if (! cred.getCookieNotice().equalsIgnoreCase("none")) { - try { - wait.until(ExpectedConditions.elementToBeClickable(By.xpath(cred.getCookieNotice()))).click(); - } catch (TimeoutException te){ - System.err.println("FeadReader::getRssFeed -> selenium waited long enough for the cookie button, proceeding without it"); - } - } - try{ - //wait until the form is ready - wait.until(ExpectedConditions.elementToBeClickable(By.xpath("//*[@id=\""+cred.getUserField()+"\"]"))).click(); - } catch (TimeoutException te){ - try { - //wait until the form is ready - wait.until(ExpectedConditions.elementToBeClickable(By.id(cred.getUserField()))).click(); - } catch (TimeoutException te2){ - System.err.println("FeadReader::getRssFeed -> selenium waited long enough for the login form to be ready, proceeding without it"); - } - } - try{ - //user - seleniumDriver.findElement(By.id(cred.getUserField())).sendKeys(cred.getSsouser()); - //pass - seleniumDriver.findElement(By.id(cred.getPassField())).sendKeys(cred.getSsopass() + Keys.ENTER); - }catch (ElementNotInteractableException nie){ - System.err.println("FeadReader::getRssFeed -> selenium found an element not clickable, proceeding without login"); - return false; - } + try { + seleniumDriver.get(cred.getSsourl()); + }catch (WebDriverException se){ + try { + seleniumDriver.close(); + seleniumDriver=new ChromeDriver(seleniumOptions); + seleniumDriver.get(cred.getSsourl()); + }catch (WebDriverException se2){ + System.err.println("FeadReader::getRssFeed -> selenium could not open login page proceeding without it"); + return false; + } - return true; - } - + } + WebDriverWait wait = new WebDriverWait(seleniumDriver, Duration.ofSeconds(30)); + // if there is a cookie accepting notice wait until is ready and click to accept + if (! cred.getCookieNotice().equalsIgnoreCase("none")) { + try { + wait.until(ExpectedConditions.elementToBeClickable(By.xpath(cred.getCookieNotice()))).click(); + } catch (TimeoutException te){ + System.err.println("FeadReader::getRssFeed -> selenium waited long enough for the cookie button, proceeding without it"); + } + } + try{ + //wait until the form is ready + wait.until(ExpectedConditions.elementToBeClickable(By.xpath("//*[@id=\""+cred.getUserField()+"\"]"))).click(); + } catch (TimeoutException te){ + try { + //wait until the form is ready + wait.until(ExpectedConditions.elementToBeClickable(By.id(cred.getUserField()))).click(); + } catch (TimeoutException te2){ + System.err.println("FeadReader::getRssFeed -> selenium waited long enough for the login form to be ready, proceeding without it"); + } + } + try{ + //user + seleniumDriver.findElement(By.id(cred.getUserField())).sendKeys(cred.getSsouser()); + //pass + seleniumDriver.findElement(By.id(cred.getPassField())).sendKeys(cred.getSsopass() + Keys.ENTER); + }catch (ElementNotInteractableException nie){ + System.err.println("FeadReader::getRssFeed -> selenium found an element not clickable, proceeding without login"); + return false; + } + + return true; + } + } diff --git a/src/main/java/elh/eus/MSM/MSMUtils.java b/src/main/java/elh/eus/MSM/MSMUtils.java index 2e71bc9..7e79c4d 100644 --- a/src/main/java/elh/eus/MSM/MSMUtils.java +++ b/src/main/java/elh/eus/MSM/MSMUtils.java @@ -23,14 +23,14 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.net.HttpCookie; +import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; -import java.nio.charset.StandardCharsets; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; @@ -48,36 +48,22 @@ import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.regex.Pattern; -import java.util.stream.Collectors; import javax.naming.NamingException; -import org.apache.http.auth.AuthenticationException; -import org.apache.http.auth.UsernamePasswordCredentials; -import org.apache.http.client.ClientProtocolException; -import org.apache.http.client.CookieStore; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpUriRequest; -import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.conn.ssl.NoopHostnameVerifier; import org.apache.http.conn.ssl.TrustAllStrategy; -import org.apache.http.impl.auth.BasicScheme; -import org.apache.http.impl.client.BasicCookieStore; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; -import org.apache.http.message.BasicNameValuePair; -import org.apache.http.protocol.HttpContext; import org.apache.http.ssl.SSLContextBuilder; -import org.apache.http.Header; -import org.apache.http.NameValuePair; +import org.xml.sax.InputSource; import twitter4j.JSONArray; import twitter4j.JSONException; @@ -85,6 +71,7 @@ //import com.mysql.jdbc.jdbc2.optional.MysqlDataSource; import com.mysql.cj.jdbc.MysqlDataSource; +import com.openhtmltopdf.pdfboxout.PdfRendererBuilder; public final class MSMUtils { @@ -476,6 +463,27 @@ public static CloseableHttpClient httpClient () throws KeyManagementException, N } - + /** + * save html content to pdf by means of openhtmltopdf library. + * + * @param in + * @param storePath + * @param filename + * @return true if succesfull, false otherwise + */ + public static boolean saveHtml2pdf(InputSource in, String storePath, String filename) + { + try { + OutputStream os = new FileOutputStream(storePath+filename); + PdfRendererBuilder builder = new PdfRendererBuilder(); + builder.useFastMode(); + builder.withHtmlContent(in.toString(),filename); //)withUri(is); + builder.toStream(os); + builder.run(); + return true; + }catch (IOException ioe) { + return false; + } + } }