Skip to content

Commit

Permalink
fixed bug with searchTerms given through config file to feed reader.
Browse files Browse the repository at this point in the history
  • Loading branch information
isanvicente committed Jan 21, 2020
1 parent 9d00ade commit 4ef1fe8
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 14 deletions.
8 changes: 3 additions & 5 deletions src/main/java/elh/eus/MSM/CLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,9 @@ public final void feedReader()
String kwrds = params.getProperty("searchTerms", "none");
if (!kwrds.equalsIgnoreCase("none"))
{
String[] kwrdSplit = kwrds.split("\\s*,\\s*");
for (String kwrd : kwrdSplit)
{
kwrdList.add(new Keyword(kwrd,"all"));
}
List<String> terms = Arrays.asList(params.getProperty("searchTerms").split(","));
kwrdList = Keyword.createFromList(terms,Arrays.asList(params.getProperty("langs", "all").split(",")));
System.err.println("MSM::TwitterStreamClient - retrieved "+kwrdList+" keywords from config file");
}
// if no keyword is found in config file try to load them from the database as a last resort.
else{
Expand Down
31 changes: 24 additions & 7 deletions src/main/java/elh/eus/MSM/FeedReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ public FeedReader(String source) {
* @param kwrds2
*/
private void constructKeywordsPatterns() {

boolean anchors = false;

if (this.kwrds == null || this.kwrds.isEmpty())
{
System.err.println ("elh-MSM::FeedReader - No keywords loaded");
Expand Down Expand Up @@ -231,11 +234,17 @@ private void constructKeywordsPatterns() {
if (k.isAnchor())
{
sb_anchors.append(k.getText().replace('_',' ').toLowerCase()).append("|");
anchors=true;
}
}
String anchPatt = sb_anchors.toString();
anchPatt=anchPatt.substring(0, anchPatt.length()-1)+")";
anchorPattern = Pattern.compile(anchPatt);

// if anchor keywords found construct the anchor pattern
if (anchors)
{
String anchPatt = sb_anchors.toString();
anchPatt=anchPatt.substring(0, anchPatt.length()-1)+")";
anchorPattern = Pattern.compile(anchPatt);
}
}


Expand Down Expand Up @@ -669,12 +678,20 @@ private void parseArticleForKeywords(TextDocument doc, String lang, Date date, S

Set<Keyword> result = new HashSet<Keyword>();

String wholeText = StringUtils.stripAccents(doc.getContent()).toLowerCase();
boolean anchorFound = anchorPattern.matcher(wholeText).find();
String wholeText = StringUtils.stripAccents(doc.getContent()).toLowerCase();
boolean anchorFound = false;
if (anchorPattern == null)
{
anchorFound=false;
}
else
{
anchorFound = anchorPattern.matcher(wholeText).find();
}

//System.err.println("MSM::FeedReader::parseArticleForKeywords - anchorPattern: "+anchorPattern.toString()
// +"\n -- found? "+anchorFound+" lang: "+lang+" indep/dep:"+independentkwrds.size()+"/"+dependentkwrds.size());


// objects needed to call the tokenizer
Properties tokProp = new Properties();
tokProp.setProperty("language", lang);
Expand Down Expand Up @@ -721,7 +738,7 @@ private void parseArticleForKeywords(TextDocument doc, String lang, Date date, S
}

//System.err.println("MSM::FeedReader::parseArticleForKeywords - independent key:"
// +k.getText()+" l="+k.getLang()+" pattern:"+kwrdPatterns.get(k.getId()).toString());
// +k.getText()+" l="+k.getLang()+" pattern:"+kwrdPatterns.get(k.getId()).toString()+" document lang: "+lang);
if(k.getLang().equalsIgnoreCase(lang) && kwrdFound)
{
//System.err.println("MSM::FeedReader::parseArticleForKeywords - independent key found!!!: "+k.getText()+" id: "+k.getId());
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/elh/eus/MSM/Keyword.java
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,9 @@ public static Set<Keyword> createFromList(List<String> keyList, List<String> lan
{
for (String l: langs)
{
result.add(new Keyword(key,l,false,false,true,key));
Keyword k = new Keyword(key,l,false,false,true,key);
k.setId(result.size()+1);
result.add(k);
}
}
return result;
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/elh/eus/MSM/Mention.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;

import com.google.api.services.youtube.model.Video;
Expand Down Expand Up @@ -601,7 +602,12 @@ public int updateRetweetFavourites2db (Connection conn, long mId)
*/
public void print()
{
System.out.println("\ntext: "+text+"\nlang: "+lang+"\nkeywords: "+keywords.toString());
Set<String> kwords = new HashSet<String>();
for (Keyword k: keywords) {
kwords.add(k.getText());
}

System.out.println("\ntext: "+text+"\nlang: "+lang+"\nkeywords: "+kwords.toString());
}

public int mention2solr(){
Expand Down

0 comments on commit 4ef1fe8

Please sign in to comment.