Aller au contenu

C01 Compute Plantability Indice

Compute and save indice data for the selected cities.

Command

Bases: BaseCommand

Source code in back/plantability/management/commands/c01_compute_plantability_indice.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
class Command(BaseCommand):
    help = "Compute and save factors data."

    def add_arguments(self, parser):
        parser.add_argument(
            "--insee_code_city",
            type=str,
            required=False,
            default=None,
            help="The INSEE code of the city or cities to process. If multiple cities, please separate with comma (e.g. --insee_code 69266,69382)",
        )

    def handle(self, *args, **options):
        """Compute and save indice data for the selected cities."""
        insee_code_city = options["insee_code_city"]

        selected_city = select_city(insee_code_city)
        nb_city = len(selected_city)
        for idx, city in enumerate(selected_city.itertuples()):
            log_progress(f"{city.name} ({idx+1} on {nb_city} city).")
            tiles_queryset = Tile.objects.filter(
                geometry__intersects=GEOSGeometry(city.geometry.wkt)
            )
            tiles_df = load_geodataframe_from_db(tiles_queryset, ["id"])
            compute_indice(tiles_df["id"])
        log_progress("Computing normalized indice.")
        compute_robust_normalized_indice()

handle(*args, **options)

Compute and save indice data for the selected cities.

Source code in back/plantability/management/commands/c01_compute_plantability_indice.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def handle(self, *args, **options):
    """Compute and save indice data for the selected cities."""
    insee_code_city = options["insee_code_city"]

    selected_city = select_city(insee_code_city)
    nb_city = len(selected_city)
    for idx, city in enumerate(selected_city.itertuples()):
        log_progress(f"{city.name} ({idx+1} on {nb_city} city).")
        tiles_queryset = Tile.objects.filter(
            geometry__intersects=GEOSGeometry(city.geometry.wkt)
        )
        tiles_df = load_geodataframe_from_db(tiles_queryset, ["id"])
        compute_indice(tiles_df["id"])
    log_progress("Computing normalized indice.")
    compute_robust_normalized_indice()

compute_indice(tiles_id)

Compute the indice for a list of tiles. The indice is computed as the weighted sum of the factors (land occupancy proportion) for each tile.

Parameters:

Name Type Description Default
tiles_id list[int]

List of tile ids to compute the indice for.

required

Returns:

Type Description
None

None

Source code in back/plantability/management/commands/c01_compute_plantability_indice.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def compute_indice(tiles_id) -> None:
    """
    Compute the indice for a list of tiles. The indice is computed as the weighted sum of the factors (land occupancy proportion) for each tile.

    Args:
        tiles_id (list[int]): List of tile ids to compute the indice for.

    Returns:
        None
    """
    df = pd.DataFrame(
        TileFactor.objects.filter(tile_id__in=tiles_id).values_list(
            "tile_id", "factor", "value"
        ),
        columns=["tile_id", "factor", "value"],
    )
    factors = pd.Series(FACTORS)
    factors.name = "factor_coeff"
    df = df.join(factors, on="factor")
    df["value"] = df["value"] * df["factor_coeff"]
    df = df.groupby("tile_id", as_index=False)["value"].sum()

    with transaction.atomic():
        Tile.objects.bulk_update(
            [
                Tile(
                    id=row.tile_id,
                    plantability_indice=row.value,
                )
                for row in df.itertuples()
            ],
            ["plantability_indice"],
            batch_size=10000,
        )

compute_robust_normalized_indice()

Robust normalization of the data and then thresholding to produce a normalized indice between 0 and 10.

Source code in back/plantability/management/commands/c01_compute_plantability_indice.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def compute_robust_normalized_indice() -> None:
    """Robust normalization of the data and then thresholding to produce a normalized indice between 0 and 10."""
    print("Computing Q1, Q3 and median")
    total_count = Tile.objects.all().count()
    if total_count > SAMPLE_LIMIT:
        sampled_ids = list(
            Tile.objects.order_by("?").values_list("id", flat=True)[:SAMPLE_LIMIT]
        )
        qs = (
            Tile.objects.filter(id__in=sampled_ids)
            .order_by("plantability_indice")
            .values_list("plantability_indice", flat=True)
        )
        del sampled_ids
    else:
        qs = Tile.objects.order_by("plantability_indice").values_list(
            "plantability_indice", flat=True
        )
    count = qs.count()
    q1_index = int(round(0.25 * count))
    q3_index = int(round(0.75 * count))
    median_index = int(round(0.5 * count))
    q1_value = qs[q1_index]
    median_value = qs[median_index]
    q3_value = qs[q3_index]
    iqr = q3_value - q1_value
    print(f"Q1: {q1_value}, Q3: {q3_value}, Median: {median_value}, IQR: {iqr}")
    del qs  # Free memory

    # Fetch in batches to avoid OOM issues
    batch_size = int(1e4)
    last_processed_id = 0
    qs = Tile.objects.all()
    total_batches = (len(qs) + batch_size - 1) // batch_size
    all_fields = [field.name for field in Tile._meta.get_fields() if field.concrete]
    with tqdm(total=total_batches, desc="Processing Batches") as pbar:
        while True:
            qs_batch = qs.filter(id__gt=last_processed_id).order_by("id")[:batch_size]

            if len(qs_batch) == 0:
                break
            df = gpd.GeoDataFrame(
                [
                    dict(
                        **{field: getattr(data, field) for field in all_fields},
                    )
                    for data in qs_batch
                ]
            )
            df.geometry = df["geometry"].apply(lambda el: shapely.wkt.loads(el.wkt))
            df = df.set_geometry("geometry")
            df["robust_scaling"] = (df.plantability_indice - median_value) / iqr
            df["plantability_normalized_indice"] = df["robust_scaling"].apply(
                score_thresholding
            )
            with transaction.atomic():
                Tile.objects.bulk_update(
                    [
                        Tile(
                            id=row.id,
                            plantability_normalized_indice=row.plantability_normalized_indice,
                        )
                        for row in df.itertuples()
                    ],
                    ["plantability_normalized_indice"],
                    batch_size=10000,
                )
            last_processed_id = df.iloc[-1].id
            pbar.update(1)