Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
EXID-G committed May 27, 2024
1 parent ae3adba commit 2dcc15f
Show file tree
Hide file tree
Showing 35 changed files with 59,934 additions and 292,498 deletions.
264 changes: 0 additions & 264 deletions !mywork/data/NYCTAXI202004-202006.geo

This file was deleted.

69,170 changes: 0 additions & 69,170 deletions !mywork/data/NYCTAXI202004-202006.rel

This file was deleted.

Binary file added !mywork/data/NYCTAXI202004-202006.zip
Binary file not shown.
59,378 changes: 29,689 additions & 29,689 deletions !mywork/data/NYCTAXI_OD_agg.csv

Large diffs are not rendered by default.

23,341 changes: 0 additions & 23,341 deletions !mywork/data/NYCTAXI_OD_edge.csv

This file was deleted.

59,378 changes: 29,689 additions & 29,689 deletions !mywork/data/NYCTAXI_OD_f.csv

Large diffs are not rendered by default.

140,319 changes: 0 additions & 140,319 deletions !mywork/data/NYCTAXI_OD_f.gexf

This file was deleted.

Binary file removed !mywork/data/od_f.gephi
Binary file not shown.
234 changes: 208 additions & 26 deletions !mywork/preprocess.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## original data"
"# original data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -99,31 +99,32 @@
"4 4 Polygon [[['-74.17225508', '40.56170484'], ['-74.17348..."
]
},
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geo = pd.read_csv('../raw_data/NYCTAXI202004-202006_OD/NYCTAXI202004-202006.geo')\n",
"geo = pd.read_csv('raw_data/NYCTAXI202401-202403.geo')\n",
"print(geo.shape)\n",
"geo.head( )"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"type\n",
"Polygon 240\n",
"MultiPolygon 23\n",
"Name: type, dtype: int64"
"Name: count, dtype: int64"
]
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -134,7 +135,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -226,13 +227,13 @@
"4 4 geo 0 4 14927.0"
]
},
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rel = pd.read_csv('../raw_data/NYCTAXI202004-202006_OD/NYCTAXI202004-202006.rel')\n",
"rel = pd.read_csv('raw_data/NYCTAXI202401-202403.rel')\n",
"print(rel.shape)\n",
"rel.head()"
]
Expand All @@ -241,48 +242,229 @@
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(150995927, 6)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dyna_id</th>\n",
" <th>type</th>\n",
" <th>time</th>\n",
" <th>origin_id</th>\n",
" <th>destination_id</th>\n",
" <th>flow</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>state</td>\n",
" <td>2024-01-01T00:00:00Z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>state</td>\n",
" <td>2024-01-01T01:00:00Z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>state</td>\n",
" <td>2024-01-01T02:00:00Z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>state</td>\n",
" <td>2024-01-01T03:00:00Z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>state</td>\n",
" <td>2024-01-01T04:00:00Z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dyna_id type time origin_id destination_id flow\n",
"0 0 state 2024-01-01T00:00:00Z 0 0 0.0\n",
"1 1 state 2024-01-01T01:00:00Z 0 0 0.0\n",
"2 2 state 2024-01-01T02:00:00Z 0 0 0.0\n",
"3 3 state 2024-01-01T03:00:00Z 0 0 0.0\n",
"4 4 state 2024-01-01T04:00:00Z 0 0 0.0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"od = pd.read_csv('raw_data/NYCTAXI202401-202403.od')#.drop([\"type\"],axis=1)\n",
"print(od.shape)\n",
"od.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# od[\"flow\"].value_counts().to_frame().reset_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Get two kind of network"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Network group by time interval"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" 早高峰时段(7:00 - 9:00)\t上班、上学的高峰期。\n",
" 上午时段(9:00 - 12:00)\t早高峰过后到中午的时间段。\n",
" 午间时段(12:00 - 14:00)\t中午吃饭和午休时间。\n",
" 下午时段(14:00 - 17:00)\t午休结束到晚高峰前的时间段。\n",
" 晚高峰时段(17:00 - 19:00)\t下班、下学的高峰期。\n",
" 晚间时段(19:00 - 7:00)\t晚高峰过后到第二天早高峰前的时间段。\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# od = pd.read_csv('../raw_data/NYCTAXI202004-202006_OD/NYCTAXI202004-202006.od').drop([\"type\"],axis=1)\n",
"# print(od.shape)\n",
"# od.head()"
"od[\"time\"] = pd.to_datetime(od[\"time\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"od[\"hour\"] = od[\"time\"].dt.hour\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# Define the boundaries of the time periods\n",
"bins = [0, 7, 9, 12, 14, 17, 19, 24]\n",
"\n",
"# Define the labels for the time periods\n",
"labels = ['Night', 'Morning Rush', 'Morning', 'Noon', 'Afternoon', 'Evening Rush', 'Night']\n",
"\n",
"# Create a new column that groups hours into time periods\n",
"od['time_period'] = pd.cut(od['hour'], bins=bins, labels=labels, right=False)\n",
"\n",
"# Use the groupby function to group by the new column\n",
"od.groupby(\"time_period\").agg({\"flow\": \"mean\"}).reset_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## agg od by time -> data/NYCTAXI_OD_agg.csv"
"## Total flow network"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### agg od by time -> data/NYCTAXI_OD_agg.csv"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# od_agg = od.groupby([\"origin_id\",\"destination_id\"]).agg({\"flow\":\"sum\"}).reset_index()"
"od_agg = od.groupby([\"origin_id\",\"destination_id\"]).agg({\"flow\":\"sum\"}).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# od_agg.to_csv(\"data/NYCTAXI_OD_agg.csv\",index=False)"
"od_agg.to_csv(\"data/NYCTAXI_OD_agg.csv\",index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## join od_agg and rel"
"### join od_agg and rel"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -294,7 +476,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -303,7 +485,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 15,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -382,7 +564,7 @@
"4 0 4 0.0 14927.0"
]
},
"execution_count": 12,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -395,7 +577,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## add location to the od_f"
"### add location to the od_f"
]
},
{
Expand Down Expand Up @@ -776,7 +958,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/.vscode
/raw_data/*
/visualized_data/*
/libcity/tmp/*
/libcity/cache/*
/libcity/__pycache__
/libcity/*/__pycache__
/libcity/data/dataset/__pycache__
/.idea
/libcity/log/*
/libcity/model/*/__pycache__
/libcity/data/dataset/*/__pycache__

/!mywork/raw_data/*
File renamed without changes.
Loading

0 comments on commit 2dcc15f

Please sign in to comment.