Aller au contenu

C03 Import Data

Save all land occupancy data to the database.

Command

Bases: BaseCommand

Source code in back/iarbre_data/management/commands/c03_import_data.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
class Command(BaseCommand):
    help = "Create grid and save it to DB"

    def handle(self, *args, **options):
        """Save all land occupancy data to the database."""
        data_configs = DATA_FILES + URL_FILES
        for data_config in data_configs:
            if data_config["name"] == "Local Climate Zone":
                continue
            if (qs := Data.objects.filter(metadata=data_config["name"])).count() > 0:
                log_progress(
                    f"Data with metadata {data_config['name']}"
                    f" already exists ({qs.count()} rows). All deleted"
                )
                qs.delete()
            start = time.time()
            try:
                log_progress(
                    f"Loading data {data_config['name']}, factors {data_config['factors']}"
                )
                df = read_data(data_config)
            except pyogrio.errors.DataSourceError:
                print(f"Error reading data {data_config['name']}")
                continue
            log_progress("Processing data.")
            datas = process_data(df, data_config)
            log_progress("Saving geom data.")
            save_geometries(datas, data_config)
            log_progress(
                f"Data {data_config['name']} saved in {time.time() - start:.2f}s", True
            )

handle(*args, **options)

Save all land occupancy data to the database.

Source code in back/iarbre_data/management/commands/c03_import_data.py
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def handle(self, *args, **options):
    """Save all land occupancy data to the database."""
    data_configs = DATA_FILES + URL_FILES
    for data_config in data_configs:
        if data_config["name"] == "Local Climate Zone":
            continue
        if (qs := Data.objects.filter(metadata=data_config["name"])).count() > 0:
            log_progress(
                f"Data with metadata {data_config['name']}"
                f" already exists ({qs.count()} rows). All deleted"
            )
            qs.delete()
        start = time.time()
        try:
            log_progress(
                f"Loading data {data_config['name']}, factors {data_config['factors']}"
            )
            df = read_data(data_config)
        except pyogrio.errors.DataSourceError:
            print(f"Error reading data {data_config['name']}")
            continue
        log_progress("Processing data.")
        datas = process_data(df, data_config)
        log_progress("Saving geom data.")
        save_geometries(datas, data_config)
        log_progress(
            f"Data {data_config['name']} saved in {time.time() - start:.2f}s", True
        )

download_cerema(url)

Download Friches from CEREMA API.

Parameters:

Name Type Description Default
url str

URL to download data from

required

Returns:

Name Type Description
gdf GeoDataFrame

GeoDataFrame with data from URL

Source code in back/iarbre_data/management/commands/c03_import_data.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def download_cerema(url: str) -> gpd.GeoDataFrame:
    """
    Download Friches from CEREMA API.

    Args:
        url (str): URL to download data from

    Returns:
        gdf (GeoDataFrame): GeoDataFrame with data from URL
    """

    current_year = datetime.now().strftime("%Y")
    file_path = f"file_data/cartofriches_{current_year}.geojson"
    if os.path.isfile(file_path):
        gdf = gpd.read_file(file_path)
    else:  # Load data
        coddep = "69"
        cities = select_city(None)

        cities.crs = 2154
        cities_4326 = cities.to_crs(4326)
        combined_gdf = gpd.GeoDataFrame()

        max_retries = 3  # Number of retries, CEREMA API is not always reliable
        backoff_factor = 2  # Exponential wait if fail to avoid hitting API limits

        all_geometries = []
        for city in tqdm(cities_4326.itertuples()):
            bbox = ",".join(map(str, city.geometry.bounds))
            params = {
                "coddep": coddep,
                "code_insee": int(city.code),
                "in_bbox": bbox,
                "page_size": 1000,
            }
            for attempt in range(max_retries):
                try:
                    response = requests.get(url, params=params, timeout=10)
                    response.raise_for_status()
                    break  # Exit the loop if successful
                except requests.exceptions.Timeout:
                    print(f"Timeout occurred, retrying {attempt + 1}/{max_retries}...")
                except requests.exceptions.RequestException as e:
                    print(f"Request failed: {e}")
                    break
                time.sleep(backoff_factor**attempt)  # Wait before retrying

            if response.status_code != 200:
                raise Exception(
                    f"Error for Cartofriche: {response.status_code}, {response.text}"
                )
            tmp_gdf = gpd.read_file(BytesIO(response.content))
            all_geometries.append(tmp_gdf[["geometry"]])

            combined_gdf = pd.concat([combined_gdf, tmp_gdf], ignore_index=True)
            time.sleep(1)  # Avoid hitting API rate limits
        gdf = gpd.GeoDataFrame(pd.concat(all_geometries, ignore_index=True))
        gdf.crs = 4326
        gdf = gdf.to_crs(2154)
        gdf.to_file(file_path, driver="GeoJSON")
    return gdf

download_dbtopo(url)

Download Batiments from IGN BD TOPO V3.

Parameters:

Name Type Description Default
url str

URL to download data from

required

Returns:

Name Type Description
gdf GeoDataFrame

GeoDataFrame with data from URL

Source code in back/iarbre_data/management/commands/c03_import_data.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def download_dbtopo(url: str) -> gpd.GeoDataFrame:
    """
    Download Batiments from IGN BD TOPO V3.

    Args:
        url (str): URL to download data from

    Returns:
        gdf (GeoDataFrame): GeoDataFrame with data from URL
    """
    current_year = datetime.now().strftime("%Y")
    file_path = f"file_data/batiments_{current_year}.geojson"
    if os.path.isfile(file_path):
        gdf = gpd.read_file(file_path)
    else:  # Load data
        params = {
            "SERVICE": "WFS",
            "VERSION": "2.0.0",
            "REQUEST": "GetFeature",
            "TYPENAMES": "BDTOPO_V3:batiment",
            "SRSNAME": "EPSG:2154",
            "OUTPUTFORMAT": "text/xml; subtype=gml/3.2",
            "COUNT": 5000,
        }
        cities = select_city(None)

        all_geometries = []

        for city in tqdm(cities.itertuples()):
            bbox = ",".join(map(str, city.geometry.bounds))
            params["BBOX"] = bbox + ",EPSG:2154"
            start_index = 0
            while True:
                params["STARTINDEX"] = start_index

                response = requests.get(url, params=params, timeout=60)
                if response.status_code != 200:
                    raise Exception(
                        f"Error for BD TOPO: {response.status_code}, {response.text}"
                    )

                tmp_gdf = gpd.read_file(BytesIO(response.content))
                if tmp_gdf.empty:
                    break
                geom = tmp_gdf[
                    "geometry"
                ].force_2d()  # We don't need height of the buildings
                geom.name = (
                    "geometry"  # Restore name that has been erased by `force_2d`
                )
                all_geometries.append(geom)
                start_index += 5000

        gdf = gpd.GeoDataFrame(
            pd.concat(all_geometries, ignore_index=True), crs="EPSG:2154"
        )
        gdf.to_file(file_path, driver="GeoJSON")
    return gdf

download_from_url(url, layer_name)

Download data from a URL.

Parameters:

Name Type Description Default
url str

URL to download data from

required
layer_name str

Name of the layer to download

required

Returns:

Name Type Description
gdf GeoDataFrame

GeoDataFrame with data from URL

Source code in back/iarbre_data/management/commands/c03_import_data.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def download_from_url(url: str, layer_name: str) -> gpd.GeoDataFrame:
    """
    Download data from a URL.

    Args:
        url (str): URL to download data from
        layer_name (str): Name of the layer to download

    Returns:
        gdf (GeoDataFrame): GeoDataFrame with data from URL
    """
    params = dict(
        service="WFS",
        version="2.0.0",
        request="GetFeature",
        typeName=layer_name,
        outputFormat="GML3",
        crs=TARGET_PROJ,
    )
    content = requests.get(url, params=params, timeout=600).content
    # save content to bytes io and open with gp.GeoDataFrame.read_file
    io = BytesIO(content)
    gdf = gpd.read_file(io)

    return gdf

read_data(data_config)

Read data from a file or URL and return a GeoDataFrame.

Parameters:

Name Type Description Default
data_config dict

Contains either URL of the data or path to the file.

required

Returns:

Name Type Description
df GeoDataFrame

Use TARGET_PROJ and null and not valid geometry are removed.

Source code in back/iarbre_data/management/commands/c03_import_data.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def read_data(data_config: dict) -> gpd.GeoDataFrame:
    """Read data from a file or URL and return a GeoDataFrame.

    Args:
        data_config (dict): Contains either URL of the data or path to the file.

    Returns:
        df (GeoDataFrame): Use TARGET_PROJ and null and not valid geometry are removed.
    """
    if data_config.get("url"):
        if "data.geopf" in data_config.get("url").lower():  # BD TOPO
            df = download_dbtopo(data_config["url"])
        elif "cerema" in data_config.get("url").lower():
            df = download_cerema(data_config["url"])
        else:
            df = download_from_url(data_config["url"], data_config["layer_name"])
    elif data_config.get("layer_name"):
        df = gpd.read_file(
            DATA_DIR / data_config["file"], layer=data_config.get("layer_name")
        )
    else:
        df = gpd.read_file(DATA_DIR / data_config["file"])
    df["geometry"] = df.geometry.force_2d()
    df = df.to_crs(TARGET_PROJ)  # Re_proj if needed
    return df[df.geometry.notnull() & df.geometry.is_valid]  # Drop null or invalid geom