PM: Transition System Miner: Difference between revisions

From OnnoCenterWiki
Jump to navigationJump to search
Created page with "Siap! Berikut script **Transition System Miner** dengan **PM4Py** yang membaca **file XES terlampir** (`/mnt/data/BPIC_2012_A.xes`), menambang **Transition System (TS)**, lalu..."
 
No edit summary
 
Line 1: Line 1:
Siap! Berikut script **Transition System Miner** dengan **PM4Py** yang membaca **file XES terlampir** (`/mnt/data/BPIC_2012_A.xes`), menambang **Transition System (TS)**, lalu:
Siap! Berikut script '''Transition System Miner''' dengan '''PM4Py''' yang membaca '''file XES terlampir''' (`/mnt/data/BPIC_2012_A.xes`), menambang '''Transition System (TS)''', lalu:


* Cetak ringkasan jumlah **state** dan **transition**
* Cetak ringkasan jumlah '''state''' dan '''transition'''
* Simpan **gambar TS** (`transition_system.png`)
* Simpan '''gambar TS''' (`transition_system.png`)
* Ekspor daftar **state** (`ts_states.csv`) dan **transition** (`ts_transitions.csv`)
* Ekspor daftar '''state''' (`ts_states.csv`) dan '''transition''' (`ts_transitions.csv`)


> **Install dulu (sekali saja):**
'''Install dulu (sekali saja):'''
>
> ```bash
> pip install pm4py pandas graphviz
> # pastikan sistem punya Graphviz binary (dot). Linux (Debian/Ubuntu):
> # sudo apt-get install graphviz
> ```


---


### `transition_system_miner.py`
pip install pm4py pandas graphviz
# pastikan sistem punya Graphviz binary (dot). Linux (Debian/Ubuntu):
# sudo apt-get install graphviz


```python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-


import argparse
==transition_system_miner.py==
import sys
from pathlib import Path
import pandas as pd


from pm4py.objects.log.importer.xes import importer as xes_importer
#!/usr/bin/env python3
from pm4py.algo.discovery.transition_system import algorithm as ts_discovery
# -*- coding: utf-8 -*-
from pm4py.visualization.transition_system import visualizer as ts_visualizer
import argparse
import sys
from pathlib import Path
import pandas as pd
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.transition_system import algorithm as ts_discovery
from pm4py.visualization.transition_system import visualizer as ts_visualizer


def discover_transition_system(xes_path: Path, image_out: Path, states_out: Path, trans_out: Path):
    # 1) Load event log
    log = xes_importer.apply(str(xes_path))
    # 2) Discover Transition System
    # Parameter kunci (ubah jika perlu)
    parameters = {
        "case_glue": "case:concept:name",
        "activity_key": "concept:name",
        "timestamp_key": "time:timestamp",
    }
    ts = ts_discovery.apply(log, parameters=parameters)
    # 3) Visualisasi & simpan ke file
    gviz = ts_visualizer.apply(ts)
    ts_visualizer.save(gviz, str(image_out))
    # 4) Ekspor states & transitions ke CSV
    # Struktur objek TS di PM4Py: ts.states (set of State), ts.transitions (set of Transition)
    # State biasanya memiliki 'name' (id) dan 'label' (representasi state)
    states_rows = []
    for s in ts.states:
        sid = getattr(s, "name", None)
        slabel = getattr(s, "label", None)
        # fallback agar tetap terisi
        if sid is None:
            sid = str(s)
        if slabel is None:
            slabel = str(s)
        states_rows.append({"state_id": sid, "state_label": slabel})
    trans_rows = []
    for t in ts.transitions:
        # transition memiliki source (from), target (to), label (activity/event class)
        src = getattr(t, "from_state", getattr(t, "from", None))
        dst = getattr(t, "to_state", getattr(t, "to", None))
        lab = getattr(t, "label", None)
        # Ambil id/label state sumber & tujuan
        def state_id_label(state_obj):
            if state_obj is None:
                return None, None
            sid = getattr(state_obj, "name", None) or str(state_obj)
            slb = getattr(state_obj, "label", None) or str(state_obj)
            return sid, slb
        src_id, src_label = state_id_label(src)
        dst_id, dst_label = state_id_label(dst)
        trans_rows.append({
            "source_id": src_id,
            "source_label": src_label,
            "target_id": dst_id,
            "target_label": dst_label,
            "transition_label": lab if lab is not None else ""
        })
    pd.DataFrame(states_rows).to_csv(states_out, index=False)
    pd.DataFrame(trans_rows).to_csv(trans_out, index=False)
    # 5) Ringkasan
    print("=== Transition System Summary ===")
    print(f"States    : {len(states_rows)}")
    print(f"Transitions: {len(trans_rows)}")
    print(f"Gambar    : {image_out}")
    print(f"States CSV : {states_out}")
    print(f"Trans CSV  : {trans_out}")
def main():
    ap = argparse.ArgumentParser(description="Transition System Miner using PM4Py (from XES)")
    ap.add_argument("xes_path", type=str, help="Path ke file .xes")
    ap.add_argument("--img", type=str, default="transition_system.png", help="Output image (PNG)")
    ap.add_argument("--states_csv", type=str, default="ts_states.csv", help="Output CSV daftar state")
    ap.add_argument("--trans_csv", type=str, default="ts_transitions.csv", help="Output CSV daftar transition")
    args = ap.parse_args()
    xes_path = Path(args.xes_path)
    if not xes_path.exists():
        print(f"[ERROR] File tidak ditemukan: {xes_path}", file=sys.stderr)
        sys.exit(1)
    discover_transition_system(
        xes_path=xes_path,
        image_out=Path(args.img),
        states_out=Path(args.states_csv),
        trans_out=Path(args.trans_csv),
    )
if __name__ == "__main__":
    main()


def discover_transition_system(xes_path: Path, image_out: Path, states_out: Path, trans_out: Path):
    # 1) Load event log
    log = xes_importer.apply(str(xes_path))


    # 2) Discover Transition System
==Cara menjalankan (pakai file terlampir)==
    # Parameter kunci (ubah jika perlu)
    parameters = {
        "case_glue": "case:concept:name",
        "activity_key": "concept:name",
        "timestamp_key": "time:timestamp",
    }
    ts = ts_discovery.apply(log, parameters=parameters)


    # 3) Visualisasi & simpan ke file
python transition_system_miner.py /mnt/data/BPIC_2012_A.xes \
    gviz = ts_visualizer.apply(ts)
  --img ts_BPICA.png \
    ts_visualizer.save(gviz, str(image_out))
  --states_csv ts_BPICA_states.csv \
  --trans_csv ts_BPICA_transitions.csv


    # 4) Ekspor states & transitions ke CSV
    # Struktur objek TS di PM4Py: ts.states (set of State), ts.transitions (set of Transition)
    # State biasanya memiliki 'name' (id) dan 'label' (representasi state)
    states_rows = []
    for s in ts.states:
        sid = getattr(s, "name", None)
        slabel = getattr(s, "label", None)
        # fallback agar tetap terisi
        if sid is None:
            sid = str(s)
        if slabel is None:
            slabel = str(s)
        states_rows.append({"state_id": sid, "state_label": slabel})


    trans_rows = []
==Opsi & catatan==
    for t in ts.transitions:
        # transition memiliki source (from), target (to), label (activity/event class)
        src = getattr(t, "from_state", getattr(t, "from", None))
        dst = getattr(t, "to_state", getattr(t, "to", None))
        lab = getattr(t, "label", None)


        # Ambil id/label state sumber & tujuan
* '''Kolom kunci''' diset ke standar PM4Py:
        def state_id_label(state_obj):
            if state_obj is None:
                return None, None
            sid = getattr(state_obj, "name", None) or str(state_obj)
            slb = getattr(state_obj, "label", None) or str(state_obj)
            return sid, slb
 
        src_id, src_label = state_id_label(src)
        dst_id, dst_label = state_id_label(dst)
 
        trans_rows.append({
            "source_id": src_id,
            "source_label": src_label,
            "target_id": dst_id,
            "target_label": dst_label,
            "transition_label": lab if lab is not None else ""
        })
 
    pd.DataFrame(states_rows).to_csv(states_out, index=False)
    pd.DataFrame(trans_rows).to_csv(trans_out, index=False)
 
    # 5) Ringkasan
    print("=== Transition System Summary ===")
    print(f"States    : {len(states_rows)}")
    print(f"Transitions: {len(trans_rows)}")
    print(f"Gambar    : {image_out}")
    print(f"States CSV : {states_out}")
    print(f"Trans CSV  : {trans_out}")
 
 
def main():
    ap = argparse.ArgumentParser(description="Transition System Miner using PM4Py (from XES)")
    ap.add_argument("xes_path", type=str, help="Path ke file .xes")
    ap.add_argument("--img", type=str, default="transition_system.png", help="Output image (PNG)")
    ap.add_argument("--states_csv", type=str, default="ts_states.csv", help="Output CSV daftar state")
    ap.add_argument("--trans_csv", type=str, default="ts_transitions.csv", help="Output CSV daftar transition")
    args = ap.parse_args()
 
    xes_path = Path(args.xes_path)
    if not xes_path.exists():
        print(f"[ERROR] File tidak ditemukan: {xes_path}", file=sys.stderr)
        sys.exit(1)
 
    discover_transition_system(
        xes_path=xes_path,
        image_out=Path(args.img),
        states_out=Path(args.states_csv),
        trans_out=Path(args.trans_csv),
    )
 
 
if __name__ == "__main__":
    main()
```
 
---
 
### Cara menjalankan (pakai file terlampir)
 
```bash
python transition_system_miner.py /mnt/data/BPIC_2012_A.xes \
  --img ts_BPICA.png \
  --states_csv ts_BPICA_states.csv \
  --trans_csv ts_BPICA_transitions.csv
```
 
---
 
### Opsi & catatan
 
* **Kolom kunci** diset ke standar PM4Py:
   `case:concept:name`, `concept:name`, `time:timestamp`.
   `case:concept:name`, `concept:name`, `time:timestamp`.
   Jika log Anda memakai nama kolom berbeda, ubah di `parameters`.
   Jika log Anda memakai nama kolom berbeda, ubah di `parameters`.
* **Graphviz** diperlukan agar file PNG bisa disimpan. Jika belum ada, install `graphviz` (OS) selain paket Python-nya.
* '''Graphviz''' diperlukan agar file PNG bisa disimpan. Jika belum ada, install `graphviz` (OS) selain paket Python-nya.
* Untuk log besar, TS bisa sangat besar. Anda bisa mulai dari subset (filtering case/variant) sebelum menambang TS:
* Untuk log besar, TS bisa sangat besar. Anda bisa mulai dari subset (filtering case/variant) sebelum menambang TS:
 
** Filter variant Top-K, atau
  * Filter variant Top-K, atau
** Filter rentang tanggal tertentu.
  * Filter rentang tanggal tertentu.
* Jika ingin '''lihat''' langsung (open viewer), ganti `ts_visualizer.save(...)` menjadi `ts_visualizer.view(gviz)` (akan membuka jendela viewer apabila environment mendukung).
* Jika ingin **lihat** langsung (open viewer), ganti `ts_visualizer.save(...)` menjadi `ts_visualizer.view(gviz)` (akan membuka jendela viewer apabila environment mendukung).
 
Mau saya tambahkan versi dengan **filter Top-K variants** atau **prefix-length limit** biar graf tidak terlalu padat?

Latest revision as of 08:58, 13 September 2025

Siap! Berikut script Transition System Miner dengan PM4Py yang membaca file XES terlampir (`/mnt/data/BPIC_2012_A.xes`), menambang Transition System (TS), lalu:

  • Cetak ringkasan jumlah state dan transition
  • Simpan gambar TS (`transition_system.png`)
  • Ekspor daftar state (`ts_states.csv`) dan transition (`ts_transitions.csv`)

Install dulu (sekali saja):


pip install pm4py pandas graphviz
# pastikan sistem punya Graphviz binary (dot). Linux (Debian/Ubuntu):
# sudo apt-get install graphviz


transition_system_miner.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import sys
from pathlib import Path
import pandas as pd

from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.transition_system import algorithm as ts_discovery
from pm4py.visualization.transition_system import visualizer as ts_visualizer
def discover_transition_system(xes_path: Path, image_out: Path, states_out: Path, trans_out: Path):
    # 1) Load event log
    log = xes_importer.apply(str(xes_path))

    # 2) Discover Transition System
    # Parameter kunci (ubah jika perlu)
    parameters = {
        "case_glue": "case:concept:name",
        "activity_key": "concept:name",
        "timestamp_key": "time:timestamp",
    }
    ts = ts_discovery.apply(log, parameters=parameters)

    # 3) Visualisasi & simpan ke file
    gviz = ts_visualizer.apply(ts)
    ts_visualizer.save(gviz, str(image_out))

    # 4) Ekspor states & transitions ke CSV
    # Struktur objek TS di PM4Py: ts.states (set of State), ts.transitions (set of Transition)
    # State biasanya memiliki 'name' (id) dan 'label' (representasi state)
    states_rows = []
    for s in ts.states:
        sid = getattr(s, "name", None)
        slabel = getattr(s, "label", None)
        # fallback agar tetap terisi
        if sid is None:
            sid = str(s)
        if slabel is None:
            slabel = str(s)
        states_rows.append({"state_id": sid, "state_label": slabel})

    trans_rows = []
    for t in ts.transitions:
        # transition memiliki source (from), target (to), label (activity/event class)
        src = getattr(t, "from_state", getattr(t, "from", None))
        dst = getattr(t, "to_state", getattr(t, "to", None))
        lab = getattr(t, "label", None)

        # Ambil id/label state sumber & tujuan
        def state_id_label(state_obj):
            if state_obj is None:
                return None, None
            sid = getattr(state_obj, "name", None) or str(state_obj)
            slb = getattr(state_obj, "label", None) or str(state_obj)
            return sid, slb

        src_id, src_label = state_id_label(src)
        dst_id, dst_label = state_id_label(dst)

        trans_rows.append({
            "source_id": src_id,
            "source_label": src_label,
            "target_id": dst_id,
            "target_label": dst_label,
            "transition_label": lab if lab is not None else ""
        })

    pd.DataFrame(states_rows).to_csv(states_out, index=False)
    pd.DataFrame(trans_rows).to_csv(trans_out, index=False)

    # 5) Ringkasan
    print("=== Transition System Summary ===")
    print(f"States     : {len(states_rows)}")
    print(f"Transitions: {len(trans_rows)}")
    print(f"Gambar     : {image_out}")
    print(f"States CSV : {states_out}")
    print(f"Trans CSV  : {trans_out}")


def main():
    ap = argparse.ArgumentParser(description="Transition System Miner using PM4Py (from XES)")
    ap.add_argument("xes_path", type=str, help="Path ke file .xes")
    ap.add_argument("--img", type=str, default="transition_system.png", help="Output image (PNG)")
    ap.add_argument("--states_csv", type=str, default="ts_states.csv", help="Output CSV daftar state")
    ap.add_argument("--trans_csv", type=str, default="ts_transitions.csv", help="Output CSV daftar transition")
    args = ap.parse_args()

    xes_path = Path(args.xes_path)
    if not xes_path.exists():
        print(f"[ERROR] File tidak ditemukan: {xes_path}", file=sys.stderr)
        sys.exit(1)

    discover_transition_system(
        xes_path=xes_path,
        image_out=Path(args.img),
        states_out=Path(args.states_csv),
        trans_out=Path(args.trans_csv),
    )


if __name__ == "__main__":
    main()


Cara menjalankan (pakai file terlampir)

python transition_system_miner.py /mnt/data/BPIC_2012_A.xes \
  --img ts_BPICA.png \
  --states_csv ts_BPICA_states.csv \
  --trans_csv ts_BPICA_transitions.csv


Opsi & catatan

  • Kolom kunci diset ke standar PM4Py:
 `case:concept:name`, `concept:name`, `time:timestamp`.
 Jika log Anda memakai nama kolom berbeda, ubah di `parameters`.
  • Graphviz diperlukan agar file PNG bisa disimpan. Jika belum ada, install `graphviz` (OS) selain paket Python-nya.
  • Untuk log besar, TS bisa sangat besar. Anda bisa mulai dari subset (filtering case/variant) sebelum menambang TS:
    • Filter variant Top-K, atau
    • Filter rentang tanggal tertentu.
  • Jika ingin lihat langsung (open viewer), ganti `ts_visualizer.save(...)` menjadi `ts_visualizer.view(gviz)` (akan membuka jendela viewer apabila environment mendukung).