Aller au contenu

Database utils

Utils to interact with the DB using Django.

load_geodataframe_from_db(queryset, fields)

Load a GeoDataFrame from a Django model queryset.

Parameters:

Name Type Description Default
queryset QuerySet

Django queryset to load data from.

required
fields list[str]

List of fields to include in the GeoDataFrame.

required

Returns:

Name Type Description
df GeoDataFrame

GeoDataFrame with data from the queryset.

Source code in back/iarbre_data/utils/database.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def load_geodataframe_from_db(queryset, fields):
    """
    Load a GeoDataFrame from a Django model queryset.

    Args:
        queryset (QuerySet): Django queryset to load data from.
        fields (list[str]): List of fields to include in the GeoDataFrame.

    Returns:
        df (GeoDataFrame): GeoDataFrame with data from the queryset.
    """
    if not queryset.exists():
        return gpd.GeoDataFrame(columns=fields + ["geometry"])
    geom_field = "geometry"
    crs = TARGET_PROJ
    if "map_geometry" in fields:
        fields.remove("map_geometry")
        geom_field = "map_geometry"
        crs = TARGET_MAP_PROJ
    # Get geometry data in GEOJSON to avoid conversion to WKT with shapely
    qs = queryset.annotate(geom_json=AsGeoJSON(geom_field)).values(*fields, "geom_json")
    data = list(qs)  # Run query

    for row in data:
        row["geometry"] = shape(json.loads(row.pop("geom_json")))

    df = gpd.GeoDataFrame(data)
    df.set_geometry("geometry", inplace=True)
    df.crs = crs
    return df

log_progress(step, star=False)

Log the progress of a step with a timestamp.

Parameters:

Name Type Description Default
step str

The description of the step being logged.

required
star bool

Print or not a line of stars

False
Source code in back/iarbre_data/utils/database.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def log_progress(step: str, star=False) -> None:
    """
    Log the progress of a step with a timestamp.

    Args:
        step (str): The description of the step being logged.
        star (bool): Print or not a line of stars
    """
    print(f"{datetime.now().strftime('%H:%M:%S')} - {step}")
    if star:
        print("*" * 30 + "\n")
        print()

remove_duplicates(Model)

Deletes duplicates in the instance model based on geometry. Args: Model (class): iarbre_data.models in which duplicates are removed

Source code in back/iarbre_data/utils/database.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def remove_duplicates(Model) -> None:
    """Deletes duplicates in the instance model based on geometry.
    Args:
        Model (class): iarbre_data.models in which duplicates are removed
    """
    duplicates = (
        Model.objects.values("geometry").annotate(count=Count("id")).filter(count__gt=1)
    )

    for duplicate in duplicates:
        geometry = duplicate["geometry"]
        duplicate_instances = Model.objects.filter(geometry=geometry)
        # Keep the first and delete the rest
        ids_to_delete = duplicate_instances.values_list("id", flat=True)[1:]
        Model.objects.filter(id__in=ids_to_delete).delete()
    print(f"Removed duplicates for {duplicates.count()} entries.")

select_city(insee_code_city)

Select a list of cities based on INSEE_CODE.

Parameters:

Name Type Description Default
insee_code_city str

INSEE code of the city or cities to select.

required

Returns:

Name Type Description
selected_city GeoDataFrame

GeoDataFrame containing the selected city or cities.

Source code in back/iarbre_data/utils/database.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def select_city(insee_code_city: str) -> gpd.GeoDataFrame:
    """Select a list of cities based on INSEE_CODE.

    Args:
        insee_code_city (str): INSEE code of the city or cities to select.

    Returns:
        selected_city (GeoDataFrame): GeoDataFrame containing the selected city or cities.
    """
    if insee_code_city is not None:  # Perform selection only for a city
        insee_code_city = insee_code_city.split(",")
        selected_city_qs = City.objects.filter(code__in=insee_code_city)
        if not selected_city_qs.exists():
            raise ValueError(f"No city found with INSEE code {insee_code_city}")
        selected_city = load_geodataframe_from_db(
            selected_city_qs,
            ["id", "name", "code", "tiles_generated", "tiles_computed"],
        )
    else:
        selected_city = load_geodataframe_from_db(
            City.objects.all(),
            ["id", "name", "code", "tiles_generated", "tiles_computed"],
        )
    return selected_city