Demo session#

A demo session to display the housing data scraped

[29]:
import os
import pandas
import datetime
import pytz

import warnings
warnings.filterwarnings("ignore")
[30]:
now = datetime.datetime.now(tz=pytz.timezone("Asia/Kuala_Lumpur"))
json_file = f"../../output/data/{now:%Y-%m-%d}.json"
[31]:
data = pandas.read_json(json_file)
# remove duplicates
cleaned_data = data[~data.duplicated(subset="id")]
cleaned_data.head(5)
[31]:
type id attributes links
0 ads 100550027 {'date': '2024-02-11 23:59:17', 'modified_ts':... {'image_baseurl': 'https://img.rnudah.com/imag...
1 ads 101268085 {'date': '2024-02-11 23:58:49', 'modified_ts':... {'image_baseurl': 'https://img.rnudah.com/imag...
2 ads 100795490 {'date': '2024-02-11 23:57:46', 'modified_ts':... {'image_baseurl': 'https://img.rnudah.com/imag...
3 ads 100549243 {'date': '2024-02-11 23:57:01', 'modified_ts':... {'image_baseurl': 'https://img.rnudah.com/imag...
4 ads 105600121 {'date': '2024-02-11 23:56:52', 'modified_ts':... {'image_baseurl': 'https://img.rnudah.com/imag...
[32]:
pandas.json_normalize(cleaned_data["attributes"])
[32]:
date modified_ts building_id type_name category_name list_id subject rooms_name subarea_name bathroom_name ... gallery_expiry gallery has_ps_gallery roommate_gender_name bundle urgent has_ps_urgent price_markdown escrow_enabled auction_date
0 2024-02-11 23:59:17 1707667157 4d2836b7-847d-49fc-a0d9-5b4a53ea0815 sell Apartment / Condominium 100550027 Eden Seaview At Batu Ferringhi Penang 3 Batu Ferringhi 2 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 2024-02-11 23:58:49 1707667129 NaN sell House 101268085 Double sty Terraced House At Taman Derga Perda... 4 Alor Setar 3 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 2024-02-11 23:57:46 1707667066 NaN sell House 100795490 Double sty Terraced House At Bandar Mutiara Su... 4 Sungai Petani 3 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 2024-02-11 23:57:01 1707667021 NaN sell House 100549243 Double sty Terraced House At Taman Sinar Intan... 4 Sungai Petani 3 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 2024-02-11 23:56:52 1707667012 NaN sell House 105600121 Taman Nora | Ulu Tiram | Tiram 4 Ulu Tiram 2 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3089 2024-02-12 20:43:23 1707741803 NaN sell House 104974541 Nak Beli Banglo/Semi D Tanah Luas d Ipoh? 3 Chemor 2 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3090 2024-02-12 20:42:56 1707741776 55a5a803-59d5-404a-89e4-5d9523f89bc4 sell Apartment / Condominium 101655915 Berminat Dgn Apartment Mampu Milik & Sudah Sia... 3 Cyberjaya 2 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3091 2024-02-12 20:42:51 1707741771 NaN let Commercial Property 105357085 Taman perindustrian asas jaya@bkt minyak @ 2 s... NaN Simpang Ampat NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3092 2024-02-12 20:42:24 1707741744 NaN let Commercial Property 105356999 Taman perindustrian asas jaya@bukit minyak@2 s... NaN Bukit Minyak NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3093 2024-02-12 20:41:29 1707741689 NaN sell House 105628077 CHEAPEST !! Double Storey Setia Ecohill (Pisti... 4 Semenyih 3 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3094 rows × 54 columns

[33]:
merged_data = pandas.concat([pandas.json_normalize(cleaned_data["attributes"]),
                            pandas.json_normalize(cleaned_data["links"])], axis=1,
                            )
merged_data
[33]:
date modified_ts building_id type_name category_name list_id subject rooms_name subarea_name bathroom_name ... has_ps_gallery roommate_gender_name bundle urgent has_ps_urgent price_markdown escrow_enabled auction_date image_baseurl image_grid_baseurl
0 2024-02-11 23:59:17 1707667157 4d2836b7-847d-49fc-a0d9-5b4a53ea0815 sell Apartment / Condominium 100550027 Eden Seaview At Batu Ferringhi Penang 3 Batu Ferringhi 2 ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
1 2024-02-11 23:58:49 1707667129 NaN sell House 101268085 Double sty Terraced House At Taman Derga Perda... 4 Alor Setar 3 ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
2 2024-02-11 23:57:46 1707667066 NaN sell House 100795490 Double sty Terraced House At Bandar Mutiara Su... 4 Sungai Petani 3 ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
3 2024-02-11 23:57:01 1707667021 NaN sell House 100549243 Double sty Terraced House At Taman Sinar Intan... 4 Sungai Petani 3 ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
4 2024-02-11 23:56:52 1707667012 NaN sell House 105600121 Taman Nora | Ulu Tiram | Tiram 4 Ulu Tiram 2 ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3089 2024-02-12 20:43:23 1707741803 NaN sell House 104974541 Nak Beli Banglo/Semi D Tanah Luas d Ipoh? 3 Chemor 2 ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
3090 2024-02-12 20:42:56 1707741776 55a5a803-59d5-404a-89e4-5d9523f89bc4 sell Apartment / Condominium 101655915 Berminat Dgn Apartment Mampu Milik & Sudah Sia... 3 Cyberjaya 2 ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
3091 2024-02-12 20:42:51 1707741771 NaN let Commercial Property 105357085 Taman perindustrian asas jaya@bkt minyak @ 2 s... NaN Simpang Ampat NaN ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
3092 2024-02-12 20:42:24 1707741744 NaN let Commercial Property 105356999 Taman perindustrian asas jaya@bukit minyak@2 s... NaN Bukit Minyak NaN ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids
3093 2024-02-12 20:41:29 1707741689 NaN sell House 105628077 CHEAPEST !! Double Storey Setia Ecohill (Pisti... 4 Semenyih 3 ... NaN NaN NaN NaN NaN NaN NaN NaN https://img.rnudah.com/images https://img.rnudah.com/grids

3094 rows × 56 columns

[34]:
merged_data[merged_data.duplicated(subset="list_id")]
[34]:
date modified_ts building_id type_name category_name list_id subject rooms_name subarea_name bathroom_name ... has_ps_gallery roommate_gender_name bundle urgent has_ps_urgent price_markdown escrow_enabled auction_date image_baseurl image_grid_baseurl

0 rows × 56 columns