forked from dchrastil/ScrapedIn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SI_login.py
89 lines (79 loc) · 2.92 KB
/
SI_login.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/python
__author__ = 'Danny Chrastil'
__email__ = 'danny.chrastil@gmail.com'
__description__ = 'Python Requests doesnt handle LinkedIn authentication well. This uses urllib instead'
__version__ = '0.2'
import cookielib
import os
import urllib
import urllib2
import re
import string
import sys
import config
from bs4 import BeautifulSoup
def linkedIn():
global opener
cookie_filename = "cookies.txt"
# Simulate browser with cookies enabled
cj = cookielib.MozillaCookieJar(cookie_filename)
if os.access(cookie_filename, os.F_OK):
cj.load()
# Load Proxy settings
if len(config.proxylist) > 0:
#print "[Status] Setting up proxy (%s)" % config.proxylist[0]
proxy_handler = urllib2.ProxyHandler({'https':config.proxylist[0]})
opener = urllib2.build_opener(
proxy_handler,
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0),
urllib2.HTTPCookieProcessor(cj)
)
else:
opener = urllib2.build_opener(
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0),
urllib2.HTTPCookieProcessor(cj)
)
# Get CSRF Token
#print "[Status] Obtaining a CSRF token"
html = loadPage("https://www.linkedin.com/")
soup = BeautifulSoup(html, "html.parser")
csrf = soup.find(id="loginCsrfParam-login")['value']
#print csrf
# Authenticate
login_data = urllib.urlencode({
'session_key': config.linkedin['username'],
'session_password': config.linkedin['password'],
'loginCsrfParam': csrf,
})
#print "[Status] Authenticating to Linkedin"
html = loadPage("https://www.linkedin.com/uas/login-submit", login_data)
soup = BeautifulSoup(html, "html.parser")
try:
print cj._cookies['.www.linkedin.com']['/']['li_at'].value
except:
print "error"
cj.save()
os.remove(cookie_filename)
def loadPage(url, data=None):
try:
response = opener.open(url)
except:
print "\n[Fatal] Your IP may have been temporarily blocked"
try:
if data is not None:
response = opener.open(url, data)
else:
response = opener.open(url)
#return response.headers.get('Set-Cookie')
return ''.join(response.readlines())
except:
# If URL doesn't load for ANY reason, try again...
# Quick and dirty solution for 404 returns because of network problems
# However, this could infinite loop if there's an actual problem
print "[Notice] Exception hit"
sys.exit(0)
linkedIn()