Skip to content

Commit

Permalink
Change the name of colums
Browse files Browse the repository at this point in the history
  • Loading branch information
jjakimoto committed Dec 2, 2018
1 parent ecafde1 commit 5239614
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 23 deletions.
9 changes: 5 additions & 4 deletions finance_ml/labeling/barriers.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def get_events(close, timestamps, sltp, trgt, min_ret=0,
trgt: pd.Series
Time series of threashold
min_ret: float, (default 0)
Minimum value of points to label
Minimum value of threashold to label
num_threads: int, (default 1)
The number of threads to use
t1: pd.Series, optional
Expand Down Expand Up @@ -115,7 +115,8 @@ def get_events(close, timestamps, sltp, trgt, min_ret=0,
# Skip when all of barrier are not touched
time_idx = time_idx.dropna(how='all')
events['type'] = time_idx.idxmin(axis=1)
events['t1'] = time_idx.min(axis=1)
events['time'] = time_idx.min(axis=1)
del events['t1']
if side is None:
events = events.drop('side', axis=1)
return events
Expand Down Expand Up @@ -151,7 +152,7 @@ def get_t1(close, timestamps, days=None, seconds=None):


def get_barrier_labels(close, timestamps, trgt, sltp=[1, 1],
num_days=1, min_ret=0, num_threads=16,
days=None, seconds=None, min_ret=0, num_threads=16,
side=None, sign_label=True):
"""Return Labels for triple barriesr
Expand Down Expand Up @@ -183,7 +184,7 @@ def get_barrier_labels(close, timestamps, trgt, sltp=[1, 1],
-------
pd.Series: label
"""
t1 = get_t1(close, timestamps, num_days)
t1 = get_t1(close, timestamps, days=days, seconds=seconds)
events = get_events(close, timestamps,
sltp=sltp,
trgt=trgt,
Expand Down
22 changes: 11 additions & 11 deletions finance_ml/labeling/sizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@


def get_sizes(close, events, sign_label=True):
"""Return bet sizes
"""Return label
Parameters
----------
close: pd.Series
events: pd.DataFrame
t1: time of barrier
time: time of barrier
type: type of barrier - tp, sl, or t1
trgt: horizontal barrier width
side: position side
Expand All @@ -22,23 +22,23 @@ def get_sizes(close, events, sign_label=True):
pd.Series: bet sizes
"""
# Prices algined with events
events = events.dropna(subset=['t1'])
events = events.dropna(subset=['time'])
# All used indices
time_idx = events.index.union(events['t1'].values).drop_duplicates()
time_idx = events.index.union(events['time'].values).drop_duplicates()
close = close.reindex(time_idx, method='bfill')
# Create out object
out = pd.DataFrame(index=events.index)
out['ret'] = close.loc[events['t1'].values].values / close.loc[
out['ret'] = close.loc[events['time'].values].values / close.loc[
events.index] - 1.
# Modify return according to the side
if 'side' in events:
out['ret'] *= events['side']
out['side'] = events['side']
# Assign labels
out['size'] = np.sign(out['ret'])
if sign_label:
out.loc[out['ret'] == 0, 'size'] = 1.
else:
# 0 when touching vertical line
out['size'].loc[events['type'] == 't1'] = 0
out.loc[out['ret'] == 0, 'size'] = 1
if 'side' in events:
out.loc[out['ret'] <= 0, 'size'] = 0
return out
if not sign_label:
out['size'].loc[events['type'] == 't1'] = 0
return out
4 changes: 2 additions & 2 deletions finance_ml/multiprocessing/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ def mp_pandas_obj(func, pd_obj, num_threads=24, mp_batches=1,
----------
func: function object
pd_obj: list
pd_obj[0]: the name of parameters to be parallelized
pd_obj[1]: parameters to be parallelized
pd_obj[0]: The name of parameters to be parallelized
pd_obj[1]: List of parameters to be parallelized
mp_batches: int
The number of batches processed for each thread
linear_mols: bool
Expand Down
9 changes: 3 additions & 6 deletions finance_ml/stats/vol.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,10 @@ def get_vol(close, span=100, days=None, seconds=None):
else:
delta = pd.Timedelta(days=days)
use_idx = close.index.searchsorted(close.index - delta)
use_idx = use_idx[use_idx > 0]
prev_idx = pd.Series(use_idx, index=close.index)
prev_idx = prev_idx[prev_idx > 0]
# Get rid of duplications in index
use_idx = np.unique(use_idx)
prev_idx = pd.Series(close.index[use_idx - 1], index=close.index[use_idx])
prev_idx = prev_idx.drop_duplicates()
ret = close[prev_idx.index] / close[prev_idx].values - 1
vol = ret.ewm(span=span).std()
return vol



0 comments on commit 5239614

Please sign in to comment.