Spaces:

HorizonRobotics
/

DIPO

Running on Zero

App Files Files Community

xinjie.wang commited on about 1 month ago

Commit

c28dddb

1 Parent(s): 93b8c4c

init commit

Browse files

Files changed (48) hide show

app.py +133 -0
configs/config.yaml +93 -0
dataset/__init__.py +13 -0
dataset/base_dataset.py +404 -0
dataset/data_module.py +82 -0
dataset/mydataset.py +282 -0
dataset/utils.py +194 -0
inference.py +450 -0
lightning_logs/version_0/hparams.yaml +1 -0
lightning_logs/version_1/hparams.yaml +1 -0
lightning_logs/version_2/hparams.yaml +1 -0
lightning_logs/version_3/hparams.yaml +1 -0
lightning_logs/version_4/hparams.yaml +1 -0
lightning_logs/version_5/hparams.yaml +1 -0
lightning_logs/version_6/hparams.yaml +111 -0
lightning_logs/version_6/metrics.csv +4 -0
metrics/__init__.py +0 -0
metrics/aor.py +44 -0
metrics/cd.py +284 -0
metrics/giou.py +142 -0
metrics/iou.py +220 -0
metrics/iou_cdist.py +227 -0
models/__init__.py +19 -0
models/denoiser.py +415 -0
models/utils.py +199 -0
my_utils/__init__.py +0 -0
my_utils/callbacks.py +36 -0
my_utils/lr_schedulers.py +104 -0
my_utils/misc.py +35 -0
my_utils/plot.py +122 -0
my_utils/refs.py +122 -0
my_utils/render.py +482 -0
my_utils/savermixins.py +55 -0
objects/__init__.py +0 -0
objects/dict_utils.py +299 -0
objects/motions.py +99 -0
requirements.txt +21 -0
retrieval/__init__.py +0 -0
retrieval/obj_retrieval.py +509 -0
retrieval/retrieval_hash_acd.json +329 -0
retrieval/retrieval_hash_no_handles.json +722 -0
scripts/graph_pred/api.py +210 -0
scripts/graph_pred/eval.py +62 -0
scripts/graph_pred/prompt_workflow_new.py +363 -0
scripts/json2urdf.py +160 -0
scripts/mesh_retrieval/retrieve.py +97 -0
scripts/mesh_retrieval/retrieve_gpt.py +29 -0
scripts/mesh_retrieval/run_retrieve.py +68 -0

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import gradio as gr
+import os
+import shutil
+import zipfile
+from types import SimpleNamespace
+from inference import run_demo, load_config
+import random
+import string
+from gradio.themes import Soft
+from gradio.themes.utils.colors import gray, neutral, slate, stone, teal, zinc
+custom_theme = Soft(
+    primary_hue=stone,
+    secondary_hue=gray,
+    radius_size="md",
+    text_size="sm",
+    spacing_size="sm",
+)
+def inference_ui(img1, img2, omega, n_denoise_steps):
+    tmpdir = 'results'
+    random_str = ''.join(random.choices(string.ascii_letters, k=16))
+    tmpdir = tmpdir + "_" + random_str
+    # 删除所有包含 "results" 的目录
+    for dir in os.listdir('.'):
+        if dir.startswith('results') and os.path.isdir(dir):
+            shutil.rmtree(dir)
+    os.makedirs(os.path.join(tmpdir, "0"), exist_ok=True)
+    args = SimpleNamespace(
+        img_path_1=img1,
+        img_path_2=img2,
+        ckpt_path='ckpts/dipo.ckpt',
+        config_path='configs/config.yaml',
+        use_example_graph=False,
+        save_dir=tmpdir,
+        gt_data_root='./data/PartnetMobility',
+        n_samples=3,
+        omega=omega,
+        n_denoise_steps=n_denoise_steps,
+    )
+    args.config = load_config(args.config_path)
+    run_demo(args)
+    gif_path = os.path.join(tmpdir, "0", "animation.gif")
+    ply_path = os.path.join(tmpdir, "0", "object.ply")
+    glb_path = os.path.join(tmpdir, "0", "object.glb")
+    # 压缩结果为ZIP包
+    zip_path = os.path.join(tmpdir, "output.zip")
+    folder_to_zip = os.path.join(tmpdir, "0")
+    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+        for root, dirs, files in os.walk(folder_to_zip):
+            for file in files:
+                abs_path = os.path.join(root, file)
+                rel_path = os.path.relpath(abs_path, folder_to_zip)
+                zipf.write(abs_path, arcname=rel_path)
+    return (
+        gif_path if os.path.exists(gif_path) else None,
+        zip_path if os.path.exists(zip_path) else None
+    )
+def prepare_data():
+    if not os.path.exists("data") or not os.path.exists("saved_model"):
+        print("Downloading data.tar from Hugging Face Datasets...")
+        os.system("wget https://huggingface.co/datasets/wuruiqi0722/DIPO_data/resolve/main/data/data.tar -O data.tar")
+        os.system("tar -xf data.tar")
+with gr.Blocks(theme=custom_theme) as demo:
+    gr.Markdown("## DIPO: Dual-State Images Controlled Articulated Object Generation Powered by Diverse Data")
+    gr.Markdown(
+        """
+        <p style="display: flex; gap: 10px; flex-wrap: nowrap;">
+            <a href="https://rq-wu.github.io/projects/DIPO">
+                <img alt="📖 Project Page" src="https://img.shields.io/badge/📖-Project_Page-blue">
+            </a>
+            <a href="https://arxiv.org/abs/2505.20460">
+                <img alt="📄 arXiv" src="https://img.shields.io/badge/📄-arXiv-b31b1b">
+            </a>
+            <a href="https://github.com/RQ-Wu/DIPO">
+                <img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
+            </a>
+        </p>
+        """
+    )
+    gr.Markdown("Currently, only the articulated object in following categories are supported: Table, Dishwasher, StorageFurniture, Refrigerator, WashingMachine, Microwave, Oven.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            img1_input = gr.Image(label="Image: Closed State", type="filepath", height=250)
+            img2_input = gr.Image(label="Image: Opened State", type="filepath", height=250)
+            omega = gr.Slider(0.0, 1.0, step=0.1, value=0.5, label="Omega (CFG Guidance)")
+            n_denoise = gr.Slider(10, 200, step=10, value=100, label="Denoising Steps")
+            run_button = gr.Button("🚀 Run Generation (~2mins)")
+        with gr.Column(scale=1):
+            output_gif = gr.Image(label="GIF Animation", type="filepath", height=678, width=10000)
+            zip_download_btn = gr.DownloadButton(label="📦 Download URDF folder", interactive=False)
+    gr.Examples(
+        examples=[
+            ["examples/1.png", "examples/1_open_1.png"],
+            ["examples/1.png", "examples/1_open_2.png"],
+            ["examples/close1.png", "examples/open1.png"],
+            # ["examples/close2.png", "examples/open2.png"],
+            ["examples/close3.png", "examples/open3.png"],
+            # ["examples/close4.png", "examples/open4.png"],
+            ["examples/close5.png", "examples/open5.png"],
+            ["examples/close6.png", "examples/open6.png"],
+            ["examples/close7.png", "examples/open7.png"],
+            ["examples/close8.png", "examples/open8.png"],
+            ["examples/close9.jpg", "examples/open9.jpg"],
+            ["examples/close10.png", "examples/open10.png"],
+        ],
+        inputs=[img1_input, img2_input],
+        label="📂 Example Inputs"
+    )
+    run_button.click(
+        fn=inference_ui,
+        inputs=[img1_input, img2_input, omega, n_denoise],
+        outputs=[output_gif, zip_download_btn]
+    ).success(
+    lambda: gr.DownloadButton(interactive=True),
+    outputs=[zip_download_btn]
+)
+if __name__ == "__main__":
+    prepare_data()
+    demo.launch()

configs/config.yaml ADDED Viewed

	@@ -0,0 +1,93 @@

+name: dipo
+version: denoiser
+data:
+  name: dm_dipo
+  json_root: data_path
+  root: data_path   # root directory of the dataset
+  batch_size: 20  # batch size for training
+  num_workers: 8  # number of workers for data loading
+  K: 32    # maximum number of nodes (parts) in the graph (object)
+  split_file: split_file_path
+  n_views_per_model: 20
+  frame_mode: last_frame
+  test_which: pm
+  mode_num: 5
+system:
+  name: sys_origin
+  exp_dir: ./exps/${name}/${version}
+  data_root: ${data.root}
+  n_time_samples: 16
+  loss_fg_weight: 0.01
+  img_drop_prob: 0.1    # image dropout probability, for classifier free training
+  guidance_scaler: 0.5  # scaling factor for guidance on the image during inference
+  graph_drop_prob: 0.5  # graph dropout probability, for classifier free training
+  model:
+    name: denoiser
+    in_ch: 6
+    attn_dim: 128
+    n_head: 4
+    n_layers: 6
+    dropout: 0.1
+    K: ${data.K}
+    mode_num: 5
+    img_emb_dims: [768, 128]
+    cat_drop_prob: 0.5      # object category dropout probability, for classifier free training
+  scheduler:  # scheduler for the diffusion model
+    name: ddpm
+    config:
+      num_train_timesteps: 1000
+      beta_schedule: linear
+      prediction_type: epsilon
+  lr_scheduler_adapter: # lr scheduler for the new modules on top of the base model
+    name: LinearWarmupCosineAnnealingLR
+    warmup_epochs: 3
+    max_epochs: ${trainer.max_epochs}
+    warmup_start_lr: 1e-6
+    eta_min: 1e-5
+  optimizer_adapter: # optimizer for the new modules on top of the base model
+    name: AdamW
+    args:
+      lr: 5e-4
+      betas: [0.9, 0.99]
+      eps: 1.e-15
+  lr_scheduler_cage: # lr scheduler for modules in the base model
+    name: LinearWarmupCosineAnnealingLR
+    warmup_epochs: 3
+    max_epochs: ${trainer.max_epochs}
+    warmup_start_lr: 1e-6
+    eta_min: 1e-5
+  optimizer_cage: # optimizer for modules in the base model
+    name: AdamW
+    args:
+      lr: 5e-5
+      betas: [0.9, 0.99]
+      eps: 1.e-15
+checkpoint:
+  dirpath: ${system.exp_dir}/ckpts
+  save_top_k: -1
+  every_n_epochs: 50
+logger: # wandb logger
+  save_dir: ${system.exp_dir}/logs # directory to save logs
+  name: ${name}_${version}
+  project: SINGAPO
+trainer:
+  max_epochs: 200
+  log_every_n_steps: 100
+  limit_train_batches: 1.0
+  limit_val_batches: 1.0
+  check_val_every_n_epoch: 10
+  precision: 16-mixed
+  profiler: simple
+  num_sanity_val_steps: -1

dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+datamodules = {}
+def register(name):
+    def decorator(cls):
+        datamodules[name] = cls
+        return cls
+    return decorator
+def make(name, config):
+    dm = datamodules[name](config)
+    return dm
+from . import data_module

dataset/base_dataset.py ADDED Viewed

	@@ -0,0 +1,404 @@

+import os, sys
+import json
+import numpy as np
+# import collections.abc
+# sys.modules['collections'].Mapping = collections.abc.Mapping
+import networkx as nx
+from torch.utils.data import Dataset
+from my_utils.refs import cat_ref, sem_ref, joint_ref, data_mode_ref
+from collections import deque
+def build_graph(tree, K=32):
+    '''
+    Function to build graph from the node list.
+    Args:
+        nodes: list of nodes
+        K: the maximum number of nodes in the graph
+    Returns:
+        adj: adjacency matrix, records the 1-ring relationship (parent+children) between nodes
+        edge_list: list of edges, for visualization
+    '''
+    adj = np.zeros((K, K), dtype=np.float32)
+    parents = []
+    tree_list = []
+    for node in tree:
+        tree_list.append(
+            {
+                'id': node['id'],
+                'parent_id': node['parent'],
+            }
+        )
+        # 1-ring relationship
+        if node['parent'] != -1:
+            adj[node['id'], node['parent']] = 1
+            parents.append(node['parent'])
+        else:
+            adj[node['id'], node['id']] = 1
+            parents.append(-1)
+        for child_id in node['children']:
+            adj[node['id'], child_id] = 1
+    return {
+        'adj': adj,
+        'parents': np.array(parents, dtype=np.int8),
+        'tree_list': tree_list
+    }
+from collections import defaultdict
+from functools import cmp_to_key
+def bfs_tree_simple(tree_list):
+    order = [0] * len(tree_list)
+    queue = []
+    current_node_idx = 0
+    for node_idx, node in enumerate(tree_list):
+        if node['parent_id'] == -1:
+            queue.append(node['id'])
+            order[node_idx] = current_node_idx
+            current_node_idx += 1
+            break
+    while len(queue) > 0:
+        current_node = queue.pop(0)
+        for node_idx, node in enumerate(tree_list):
+            if node['parent_id'] == current_node:
+                queue.append(node['id'])
+                order[node_idx] = current_node_idx
+                current_node_idx += 1
+    return order
+def bfs_tree(tree_list, aabb_list, epsilon=1e-3):
+    # 初始化遍历顺序列表
+    order = [0] * len(tree_list)
+    current_order = 0
+    # 构建父节点到子节点的索引映射
+    parent_map = defaultdict(list)
+    for idx, node in enumerate(tree_list):
+        parent_map[node['parent_id']].append(idx)
+    # 查找根节点
+    root_indices = [idx for idx, node in enumerate(tree_list) if node['parent_id'] == -1]
+    if not root_indices:
+        return order
+    # 初始化队列（存储节点索引）
+    queue = [root_indices[0]]
+    order[root_indices[0]] = current_order
+    current_order += 1
+    # 比较函数：按中心坐标排序
+    def compare_centers(a, b):
+        # 获取两个节点的中心坐标
+        center_a = [(aabb_list[a][i] + aabb_list[a][i+3])/2 for i in range(3)]
+        center_b = [(aabb_list[b][i] + aabb_list[b][i+3])/2 for i in range(3)]
+        # 逐级比较坐标（考虑epsilon阈值）
+        for coord in range(3):
+            delta = abs(center_a[coord] - center_b[coord])
+            if delta > epsilon:
+                return -1 if center_a[coord] < center_b[coord] else 1
+        return 0  # 所有坐标差均小于阈值时保持原顺序
+    # BFS遍历
+    while queue:
+        current_idx = queue.pop(0)
+        current_id = tree_list[current_idx]['id']
+        # 获取子节点索引并排序
+        children = parent_map.get(current_id, [])
+        sorted_children = sorted(children, key=cmp_to_key(compare_centers))
+        # 处理子节点
+        for child_idx in sorted_children:
+            order[child_idx] = current_order
+            current_order += 1
+            queue.append(child_idx)
+    return order
+class BaseDataset(Dataset):
+    def __init__(self, hparams):
+        super().__init__()
+        self.hparams = hparams
+    def _filter_models(self, models_ids):
+        '''
+        Filter out models that has more than K nodes.
+        '''
+        json_data_root = self.hparams.json_root
+        filtered = []
+        for i, model_id in enumerate(models_ids):
+            if i % 100 == 0:
+                print(f'Checking model {i}/{len(models_ids)}')
+            path = os.path.join(json_data_root, model_id, self.json_name)
+            with open(path, 'r') as f:
+                json_file = json.load(f)
+                if len(json_file['diffuse_tree']) <= self.hparams.K:
+                    filtered.append(model_id)
+        return filtered
+    def get_acd_mapping(self):
+        self.category_mapping = {
+            'armoire': 'StorageFurniture',
+            'bookcase': 'StorageFurniture',
+            'chest_of_drawers': 'StorageFurniture',
+            'desk': 'Table',
+            'dishwasher': 'Dishwasher',
+            'hanging_cabinet': 'StorageFurniture',
+            'kitchen_cabinet': 'StorageFurniture',
+            'microwave': 'Microwave',
+            'nightstand': 'StorageFurniture',
+            'oven': 'Oven',
+            'refrigerator': 'Refrigerator',
+            'sink_cabinet': 'StorageFurniture',
+            'tv_stand': 'StorageFurniture',
+            'washer': 'WashingMachine',
+            'table': 'Table',
+            'cabinet': 'StorageFurniture',
+            'hanging_cabinet': 'StorageFurniture',
+        }
+    def _random_permute(self, graph, nodes):
+        '''
+        Function to randomly permute the nodes and update the graph and node attribute info.
+        Args:
+            graph: a dictionary containing the adjacency matrix, edge list, and root node
+            nodes: a list of nodes
+        Returns:
+            graph_permuted: a dictionary containing the updated adjacency matrix, edge list, and root node
+            nodes_permuted: a list of permuted nodes
+        '''
+        N = len(nodes)
+        order = np.random.permutation(N)
+        graph_permuted = self._reorder_nodes(graph, order)
+        exchange = [0] * len(order)
+        for i in range(len(order)):
+            exchange[order[i]] = i
+        nodes_permuted = nodes[exchange, :]
+        return graph_permuted, nodes_permuted
+    def _permute_by_order(self, graph, nodes, order):
+        '''
+        Function to permute the nodes and update the graph and node attribute info by order.
+        Args:
+            graph: a dictionary containing the adjacency matrix, edge list, and root node
+            nodes: a list of nodes
+            order: a list of indices for reordering
+        Returns:
+            graph_permuted: a dictionary containing the updated adjacency matrix, edge list, and root node
+            nodes_permuted: a list of permuted nodes
+        '''
+        graph_permuted = self._reorder_nodes(graph, order)
+        if nodes is None:
+            return graph_permuted, None
+        else:
+            exchange = [0] * len(order)
+            for i in range(len(order)):
+                exchange[order[i]] = i
+            nodes_permuted = nodes[exchange, :]
+            return graph_permuted, nodes_permuted
+    def _prepare_node_data(self, node):
+        # semantic label
+        label = np.array([sem_ref['fwd'][node['name']]], dtype=np.float32) / 5. - 0.8 # (1,), range from -0.8 to 0.8
+        # joint type
+        joint_type = np.array([joint_ref['fwd'][node['joint']['type']] / 5.], dtype=np.float32) - 0.5 # (1,), range from -0.8 to 0.8
+        # aabb
+        aabb_center = np.array(node['aabb']['center'], dtype=np.float32)  # (3,), range from -1 to 1
+        aabb_size = np.array(node['aabb']['size'], dtype=np.float32) # (3,), range from -1 to 1
+        aabb_max = aabb_center + aabb_size / 2
+        aabb_min = aabb_center - aabb_size / 2
+        # joint axis and range
+        if node['joint']['type'] == 'fixed':
+            axis_dir = np.zeros((3,), dtype=np.float32)
+            axis_ori = aabb_center
+            joint_range = np.zeros((2,), dtype=np.float32)
+        else:
+            if node['joint']['type'] == 'revolute' or node['joint']['type'] == 'continuous':
+                joint_range = np.array([node['joint']['range'][1]], dtype=np.float32) / 360.
+                joint_range = np.concatenate([joint_range, np.zeros((1,), dtype=np.float32)], axis=0) # (2,)
+            elif node['joint']['type'] == 'prismatic' or node['joint']['type'] == 'screw':
+                joint_range = np.array([node['joint']['range'][1]], dtype=np.float32)
+                joint_range = np.concatenate([np.zeros((1,), dtype=np.float32), joint_range], axis=0) # (2,)
+            axis_dir = np.array(node['joint']['axis']['direction'], dtype=np.float32) * 0.7 # (3,), range from -0.7 to 0.7
+            # make sure the axis is pointing to the positive direction
+            if np.sum(axis_dir > 0) < np.sum(-axis_dir > 0):
+                axis_dir = -axis_dir
+                joint_range = -joint_range
+            axis_ori = np.array(node['joint']['axis']['origin'], dtype=np.float32) # (3,), range from -1 to 1
+            if (node['joint']['type'] == 'prismatic' or node['joint']['type'] == 'screw') and node['name'] != 'door':
+                axis_ori = aabb_center
+        # prepare node data by given mod name
+        # aabb = np.concatenate([aabb_max, aabb_min], axis=0)
+        # axis = np.concatenate([axis_dir, axis_ori], axis=0)
+        # node_data_all = [aabb, joint_type.repeat(6), axis, joint_range.repeat(3), label.repeat(6)]
+        # node_data_list = [node_data_all[data_mode_ref[mod_name]] for mod_name in self.hparams.data_mode]
+        # node_data = np.concatenate(node_data_list, axis=0)
+        node_label = np.ones(6, dtype=np.float32)
+        node_data = np.concatenate([aabb_max, aabb_min, joint_type.repeat(6), axis_dir, axis_ori, joint_range.repeat(3), label.repeat(6), node_label], axis=0)
+        if self.hparams.mode_num == 5:
+            node_data = np.concatenate([aabb_max, aabb_min, joint_type.repeat(6), axis_dir, axis_ori, joint_range.repeat(3), label.repeat(6)], axis=0)
+        return node_data
+    def _reorder_nodes(self, graph, order):
+        '''
+        Function to reorder nodes in the graph and
+        update the adjacency matrix, edge list, and root node.
+        Args:
+            graph: a dictionary containing the adjacency matrix, edge list, and root node
+            order: a list of indices for reordering
+        Returns:
+            new_graph: a dictionary containing the updated adjacency matrix, edge list, and root node
+        '''
+        N = len(order)
+        mapping = {i: order[i] for i in range(N)}
+        mapping.update({i: i for i in range(N, self.hparams.K)})
+        G = nx.from_numpy_array(graph['adj'], create_using=nx.Graph)
+        G_ = nx.relabel_nodes(G, mapping)
+        new_adj = nx.adjacency_matrix(G_, G.nodes).todense()
+        exchange = [0] * len(order)
+        for i in range(len(order)):
+            exchange[order[i]] = i
+        return {
+            'adj': new_adj.astype(np.float32),
+            'parents': graph['parents'][exchange]
+        }
+    def _prepare_input_GT(self, file, model_id):
+        '''
+        Function to parse input item from a json file for the CAGE training.
+        '''
+        tree = file['diffuse_tree']
+        K = self.hparams.K # max number of nodes
+        cond = {} # conditional information and axillary data
+        cond['parents'] = np.zeros(K, dtype=np.int8)
+        # prepare node data
+        nodes = []
+        for node in tree:
+            node_data = self._prepare_node_data(node) # (36,)
+            nodes.append(node_data)
+        nodes = np.array(nodes, dtype=np.float32)
+        n_nodes = len(nodes)
+        # prepare graph
+        graph = build_graph(tree, self.hparams.K)
+        if self.mode == 'train': # perturb the node order for training
+            graph, nodes = self._random_permute(graph, nodes)
+        # pad the nodes to K with empty nodes
+        if n_nodes < K:
+            empty_node = np.zeros((nodes[0].shape[0],))
+            data = np.concatenate([nodes, [empty_node] * (K - n_nodes)], axis=0, dtype=np.float32) # (K, 36)
+        else:
+            data = nodes
+        mode_num = data.shape[1] // 6
+        data = data.reshape(K*mode_num, 6) # (K * n_attr, 6)
+        # attr mask (for Local Attention)
+        attr_mask = np.eye(K, K, dtype=bool)
+        attr_mask = attr_mask.repeat(mode_num, axis=0).repeat(mode_num, axis=1)
+        cond['attr_mask'] = attr_mask
+        # key padding mask (for Global Attention)
+        pad_mask = np.zeros((K*mode_num, K*mode_num), dtype=bool)
+        pad_mask[:, :n_nodes*mode_num] = 1
+        cond['key_pad_mask'] = pad_mask
+        # adj mask (for Graph Relation Attention)
+        adj_mask = graph['adj'][:].astype(bool)
+        adj_mask = adj_mask.repeat(mode_num, axis=0).repeat(mode_num, axis=1)
+        adj_mask[n_nodes*mode_num:, :] = 1
+        cond['adj_mask'] = adj_mask
+        # object category
+        if self.map_cat:  # for ACD dataset
+            category = file['meta']['obj_cat']
+            category = self.category_mapping[category]
+            cond['cat'] = cat_ref[category]
+        else:
+            cond['cat'] = cat_ref.get(file['meta']['obj_cat'], None)
+            if cond['cat'] is None:
+                cond['cat'] = self.category_mapping.get(file['meta']['obj_cat'], None)
+                if cond['cat'] is None:
+                    cond['cat'] = 2
+                else:
+                    cond['cat'] = cat_ref.get(cond['cat'], None)
+            # cond['cat'] = cat_ref[file['meta']['obj_cat']]
+        if cond['cat'] is None:
+            cond['cat'] = 2
+        # axillary info
+        cond['name'] = model_id
+        cond['adj'] = graph['adj']
+        cond['parents'][:n_nodes] = graph['parents']
+        cond['n_nodes'] = n_nodes
+        cond['obj_cat'] = file['meta']['obj_cat']
+        return data, cond
+    def _prepare_input(self, model_id, pred_file, gt_file=None):
+        '''
+        Function to parse input item from pred_file, and parse GT from gt_file if available.
+        '''
+        K = self.hparams.K # max number of nodes
+        cond = {} # conditional information and axillary data
+        # prepare node data
+        n_nodes = len(pred_file['diffuse_tree'])
+        # prepare graph
+        pred_graph = build_graph(pred_file['diffuse_tree'], K)
+        # dummy GT data
+        data = np.zeros((K*5, 6), dtype=np.float32)
+        # attr mask (for Local Attention)
+        attr_mask = np.eye(K, K, dtype=bool)
+        attr_mask = attr_mask.repeat(5, axis=0).repeat(5, axis=1)
+        cond['attr_mask'] = attr_mask
+        # key padding mask (for Global Attention)
+        pad_mask = np.zeros((K*5, K*5), dtype=bool)
+        pad_mask[:, :n_nodes*5] = 1
+        cond['key_pad_mask'] = pad_mask
+        # adj mask (for Graph Relation Attention)
+        adj_mask = pred_graph['adj'][:].astype(bool)
+        adj_mask = adj_mask.repeat(5, axis=0).repeat(5, axis=1)
+        adj_mask[n_nodes*5:, :] = 1
+        cond['adj_mask'] = adj_mask
+        # placeholder category, won't be used if category is given (below)
+        cond['cat'] = cat_ref['StorageFurniture']
+        cond['obj_cat'] = 'StorageFurniture'
+        # if object category is given as input
+        if not self.hparams.get('test_label_free', False):
+            assert 'meta' in pred_file, 'meta not found in the json file.'
+            assert 'obj_cat' in pred_file['meta'], 'obj_cat not found in the metadata of the json file.'
+            category = pred_file['meta']['obj_cat']
+            if self.map_cat:  # for ACD dataset
+                category = self.category_mapping[category]
+            cond['cat'] = cat_ref[category]
+            cond['obj_cat'] = category
+        # axillary info
+        cond['name'] = model_id
+        cond['adj'] = pred_graph['adj']
+        cond['parents'] = np.zeros(K, dtype=np.int8)
+        cond['parents'][:n_nodes] = pred_graph['parents']
+        cond['n_nodes'] = n_nodes
+        return data, cond
+    def __getitem__(self, index):
+        raise NotImplementedError
+    def __len__(self):
+        raise NotImplementedError

dataset/data_module.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+import json
+import dataset
+import lightning.pytorch as pl
+from torch.utils.data import DataLoader
+from dataset.mydataset import MyDataset
+@dataset.register("dm_dipo")
+class DIPODataModule(pl.LightningDataModule):
+    def __init__(self, hparams):
+        super().__init__()
+        self.hparams.update(hparams)
+    def _prepare_split(self):
+        with open(self.hparams.split_file , "r") as f:
+            splits = json.load(f)
+        train_ids = splits["train"]
+        val_ids = [i for i in train_ids if "data" not in i]
+        return train_ids, val_ids
+    def _prepare_test_ids(self):
+        if "acd" in self.hparams.get('test_which'):
+            with open("/home/users/ruiqi.wu/singapo/data/data_acd.json", "r") as f:
+                file = json.load(f)
+        elif 'pm' in self.hparams.get('test_which'):
+            with open(self.hparams.split_file, "r") as f:
+                file = json.load(f)
+        else:
+            raise NotImplementedError(f"Dataset {self.hparams.get('test_which')} not implemented for SingapoDataModule")
+        ids = file['test']
+        return ids
+    def setup(self, stage=None):
+        if stage == "fit" or stage is None:
+            train_ids, val_ids = self._prepare_split()
+            val_ids = val_ids
+            self.train_dataset = MyDataset(self.hparams, model_ids=train_ids[:10], mode="train")
+            self.val_dataset = MyDataset(self.hparams, model_ids=val_ids[:50], mode="val")
+        elif stage == "validate":
+            val_ids = self._prepare_test_ids()
+            val_ids = val_ids
+            self.val_dataset = MyDataset(self.hparams, model_ids=val_ids, mode="val")
+        elif stage == "test":
+            test_ids = self._prepare_test_ids()
+            self.test_dataset = MyDataset(self.hparams, model_ids=test_ids, mode="test")
+        else:
+            raise NotImplementedError(f"Stage {stage} not implemented for SingapoDataModule")
+    def train_dataloader(self):
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            pin_memory=True,
+            shuffle=True,
+            persistent_workers=True
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            self.val_dataset,
+            batch_size=128,
+            num_workers=self.hparams.num_workers,
+            pin_memory=True,
+            shuffle=False,
+            persistent_workers=True
+        )
+    def test_dataloader(self):
+        return DataLoader(
+            self.test_dataset,
+            batch_size=1,
+            num_workers=self.hparams.num_workers,
+            pin_memory=True,
+            shuffle=False,
+            persistent_workers=True
+        )

dataset/mydataset.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
+import json
+import numpy as np
+from PIL import Image
+import torchvision.transforms as T
+from dataset.base_dataset import BaseDataset
+import random
+from tqdm import tqdm
+import imageio
+import torch
+def make_white_background(src_img):
+    '''Make the white background for the input RGBA image.'''
+    src_img.load()
+    background = Image.new("RGB", src_img.size, (255, 255, 255))
+    background.paste(src_img, mask=src_img.split()[3]) # 3 is the alpha channel
+    return background
+class MyDataset(BaseDataset):
+    """
+    Dataset for training and testing on the PartNet-Mobility and ACD datasets (with our preprocessing).
+    The GT graph is given.
+    """
+    def __init__(self, hparams, model_ids, mode="train", json_name="object.json"):
+        self.hparams = hparams
+        self.json_name = json_name
+        self.model_ids = self._filter_models(model_ids)
+        self.mode = mode
+        self.map_cat = False
+        self.get_acd_mapping()
+        self.no_GT = (
+            True if self.hparams.get("test_no_GT", False) and self.hparams.get("test_pred_G", False)
+            else False
+        )
+        self.pred_G = (
+            False
+            if mode in ["train", "val"]
+            else self.hparams.get("test_pred_G", False)
+        )
+        if mode == 'test':
+            if "acd" in hparams.test_which:
+                self.map_cat = True
+        self.files = self._cache_data()
+        print(f"[INFO] {mode} dataset: {len(self)} data samples loaded.")
+    def _cache_data_train(self):
+        json_data_root = self.hparams.json_root
+        data_root = self.hparams.root
+        # number of views per model and in total
+        n_views_per_model = self.hparams.n_views_per_model
+        n_views = n_views_per_model * len(self.model_ids)
+        # json files for each model
+        json_files = []
+        # mapping to the index of the corresponding model in json_files
+        model_mappings = []
+        # space for dinov2 patch features
+        feats = np.empty((n_views, 512, 768), dtype=np.float16)
+        # space for object masks on image patches
+        obj_masks = np.empty((n_views, 256), dtype=bool)
+        # input images (not required in training)
+        imgs = None
+        # load data for non-aug views
+        i = 0  # index for views
+        for j, model_id in enumerate(self.model_ids):
+            print(model_id)
+            # if j % 10 == 0 and torch.distributed.get_rank() == 0:
+            #     print(f"\rLoading training data: {j}/{len(self.model_ids)}")
+            # 3D data
+            with open(os.path.join(json_data_root, model_id, self.json_name), "r") as f:
+                json_file = json.load(f)
+            json_files.append(json_file)
+            filenames = os.listdir(os.path.join(data_root, model_id, 'features'))
+            filenames = [f for f in filenames if 'high_res' not in f]
+            filenames = filenames[:self.hparams.n_views_per_model]
+            for filename in filenames:
+                view_feat = np.load(os.path.join(data_root, model_id, 'features', filename))
+                first_frame_feat = view_feat[0]
+                if self.hparams.frame_mode == 'last_frame':
+                    second_frame_feat = view_feat[-2]
+                elif self.hparams.frame_mode == 'random_state_frame':
+                    second_frame_feat = view_feat[-1]
+                else:
+                    raise NotImplementedError("Please provide correct frame mode: last_frame | random_state_frame")
+                feats[i : i + 1, :256, :] = first_frame_feat.astype(np.float16)
+                feats[i : i + 1, 256:, :] = second_frame_feat.astype(np.float16)
+                i = i + 1
+            model_mappings += [j] * n_views_per_model
+            # object masks for all views
+            # all_obj_masks = np.load(
+            #     os.path.join(json_data_root, model_id, "features/patch_obj_masks.npy")
+            # )  # (20, Np)
+            # obj_masks[i : i + n_views_per_model] = all_obj_masks[:n_views_per_model]
+        return {
+            "len": n_views,
+            "gt_files": json_files,
+            "features": feats,
+            "obj_masks": None,
+            "model_mappings": model_mappings,
+            "imgs": imgs,
+        }
+    def _cache_data_non_train(self):
+        # number of views per model and in total
+        n_views_per_model = 2
+        n_views = n_views_per_model * len(self.model_ids)
+        # json files for each model
+        gt_files = []
+        pred_files = []  # for predicted graphs
+        # mapping to the index of the corresponding model in json_files
+        model_mappings = []
+        # space for dinov2 patch features
+        feats = np.empty((n_views, 512, 768), dtype=np.float16)
+        # space for input images
+        first_imgs = np.empty((n_views, 128, 128, 3), dtype=np.uint8)
+        second_imgs = np.empty((n_views, 128, 128, 3), dtype=np.uint8)
+        # transformation for input images
+        transform = T.Compose(
+            [
+                T.Resize(256, interpolation=T.InterpolationMode.BICUBIC),
+                T.CenterCrop(224),
+                T.Resize(128, interpolation=T.InterpolationMode.BICUBIC),
+            ]
+        )
+        i = 0  # index for views
+        desc = f'Loading {self.mode} data'
+        for j, model_id in tqdm(enumerate(self.model_ids), total=len(self.model_ids), desc=desc):
+            with open(os.path.join(self.hparams.json_root, model_id, self.json_name), "r") as f:
+                json_file = json.load(f)
+            gt_files.append(json_file)
+            # filename_dir = os.path.join(self.hparams.root, model_id, 'features')
+            for filename in ['18.npy', '19.npy']:
+                view_feat = np.load(os.path.join(self.hparams.root, model_id, 'features', filename))
+                first_frame_feat = view_feat[0]
+                if self.hparams.frame_mode == 'last_frame':
+                    second_frame_feat = view_feat[-2]
+                elif self.hparams.frame_mode == 'random_state_frame':
+                    second_frame_feat = view_feat[-1]
+                else:
+                    raise NotImplementedError("Please provide correct frame mode: last_frame | random_state_frame")
+                feats[i : i + 1, :256, :] = first_frame_feat.astype(np.float16)
+                feats[i : i + 1, 256:, :] = second_frame_feat.astype(np.float16)
+                video_path = os.path.join(self.hparams.root, model_id, 'imgs', 'animation_' + filename.replace('.npy', '.mp4'))
+                reader = imageio.get_reader(video_path)
+                frames = []
+                for frame in reader:
+                    frames.append(frame)
+                reader.close()
+                first_img = Image.fromarray(frames[0])
+                if first_img.mode == 'RGBA':
+                    first_img = make_white_background(first_img)
+                first_img = np.asarray(transform(first_img), dtype=np.int8)
+                first_imgs[i] = first_img
+                if self.hparams.frame_mode == 'last_frame':
+                    second_img = Image.fromarray(frames[-1])
+                elif self.hparams.frame_mode == 'random_state_frame':
+                    second_img_path = video_path.replace('animation', 'random').replace('.mp4', '.png')
+                    second_img = Image.open(second_img_path)
+                if second_img.mode == 'RGBA':
+                    second_img = make_white_background(second_img)
+                second_img = np.asarray(transform(second_img), dtype=np.int8)
+                second_imgs[i] = second_img
+                i = i + 1
+            # mapping to json file
+            model_mappings += [j] * n_views_per_model
+        return {
+            "len": n_views,
+            "gt_files": gt_files,
+            "pred_files": pred_files,
+            "features": feats,
+            "model_mappings": model_mappings,
+            "imgs": [first_imgs, second_imgs],
+        }
+    def _cache_data(self):
+        """
+        Function to cache data from disk.
+        """
+        if self.mode == "train":
+            return self._cache_data_train()
+        else:
+            return self._cache_data_non_train()
+    def _get_item_train_val(self, index):
+        model_i = self.files["model_mappings"][index]
+        gt_file = self.files["gt_files"][model_i]
+        data, cond = self._prepare_input_GT(
+            file=gt_file, model_id=self.model_ids[model_i]
+        )
+        if self.mode == "val":
+            # input image for visualization
+            img_first = self.files["imgs"][0][index]
+            img_last = self.files["imgs"][1][index]
+            cond["img"] = np.concatenate([img_first, img_last], axis=1)
+        # else:
+        #     # object masks on patches
+        #     # obj_mask = self.files["obj_masks"][index][None, ...].repeat(self.hparams.K * 5, axis=0)
+        #     cond["img_obj_mask"] = [None]
+        return data, cond
+    def _get_item_test(self, index):
+        model_i = self.files["model_mappings"][index]
+        gt_file = None if self.no_GT else self.files["gt_files"][model_i]
+        if self.hparams.get('G_dir', None) is None:
+            data, cond = self._prepare_input_GT(file=gt_file, model_id=self.model_ids[model_i])
+        else:
+            if index % 2 == 0:
+                filename = '18.json'
+            else:
+                filename = '19.json'
+            pred_file_path = os.path.join(self.hparams.G_dir, self.model_ids[model_i], filename)
+            with open(pred_file_path, "r") as f:
+                pred_file = json.load(f)
+            data, cond = self._prepare_input(model_id=self.model_ids[model_i], pred_file=pred_file, gt_file=gt_file)
+        # input image for visualization
+        img_first = self.files["imgs"][0][index]
+        img_last = self.files["imgs"][1][index]
+        cond["img"] = np.concatenate([img_first, img_last], axis=1)
+        return data, cond
+    def __getitem__(self, index):
+        # input image features
+        feat = self.files["features"][index]
+        # prepare input, GT data and other axillary info
+        if self.mode == "test":
+            data, cond = self._get_item_test(index)
+        else:
+            data, cond = self._get_item_train_val(index)
+        return data, cond, feat
+    def __len__(self):
+        return self.files["len"]
+if __name__ == '__main__':
+    from types import SimpleNamespace
+    class EnhancedNamespace(SimpleNamespace):
+        def get(self, key, default=None):
+            return getattr(self, key, default)
+    hparams = {
+        "name": "dm_singapo",
+        "json_root": "/home/users/ruiqi.wu/singapo/",   # root directory of the dataset
+        "batch_size": 20,  # batch size for training
+        "num_workers": 8,  # number of workers for data loading
+        "K": 32,    # maximum number of nodes (parts) in the graph (object)
+        "split_file": "/home/users/ruiqi.wu/singapo/data/data_split.json",
+        "n_views_per_model": 5,
+        "root": "/home/users/ruiqi.wu/manipulate_3d_generate/data/blender_version",
+        "frame_mode": "last_frame"
+    }
+    hparams = EnhancedNamespace(**hparams)
+    with open(hparams.split_file , "r") as f:
+        splits = json.load(f)
+        train_ids = splits["train"]
+        val_ids = [i for i in train_ids if "augmented" not in i]
+    val_ids = [val_id for val_id in val_ids if os.path.exists(os.path.join(hparams.root, val_id, "features"))]
+    dataset = MyDataset(hparams, model_ids=val_ids[:20], mode="valid")
+    for i in range(20):
+        data, cond, feat = dataset.__getitem__(i)
+    import ipdb
+    ipdb.set_trace()

dataset/utils.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+import numpy as np
+from PIL import Image
+from my_utils.refs import joint_ref, sem_ref
+def rescale_axis(jtype, axis_d, axis_o, box_center):
+    '''
+    Function to rescale the axis for rendering
+    Args:
+    - jtype (int): joint type
+    - axis_d (np.array): axis direction
+    - axis_o (np.array): axis origin
+    - box_center (np.array): bounding box center
+    Returns:
+    - center (np.array): rescaled axis origin
+    - axis_d (np.array): rescaled axis direction
+    '''
+    if jtype == 0 or jtype == 1:
+        return [0., 0., 0.], [0., 0., 0.]
+    if jtype == 3 or jtype == 4:
+        center = box_center
+    else:
+        center = axis_o + np.dot(axis_d, box_center-axis_o) * axis_d
+    return center.tolist(), axis_d.tolist()
+def make_white_background(src_img):
+    '''Make the white background for the input RGBA image.'''
+    src_img.load()
+    background = Image.new("RGB", src_img.size, (255, 255, 255))
+    background.paste(src_img, mask=src_img.split()[3]) # 3 is the alpha channel
+    return background
+def build_graph(tree, K=32):
+    '''
+    Function to build graph from the node list.
+    Args:
+        nodes: list of nodes
+        K: the maximum number of nodes in the graph
+    Returns:
+        adj: adjacency matrix, records the 1-ring relationship (parent+children) between nodes
+        edge_list: list of edges, for visualization
+    '''
+    adj = np.zeros((K, K), dtype=np.float32)
+    parents = []
+    for node in tree:
+        # 1-ring relationship
+        if node['parent'] != -1:
+            adj[node['id'], node['parent']] = 1
+            parents.append(node['parent'])
+        else:
+            adj[node['id'], node['id']] = 1
+            parents.append(-1)
+        for child_id in node['children']:
+            adj[node['id'], child_id] = 1
+    return {
+        'adj': adj,
+        'parents': np.array(parents, dtype=np.int8)
+    }
+def load_input_from(pred_file, K=32):
+    '''
+    Function to parse input item from a file containing the predicted graph
+    '''
+    cond = {} # conditional information and axillary data
+    # prepare node data
+    n_nodes = len(pred_file['diffuse_tree'])
+    # prepare graph
+    pred_graph = build_graph(pred_file['diffuse_tree'], K)
+    # attr mask (for Local Attention)
+    attr_mask = np.eye(K, K, dtype=bool)
+    attr_mask = attr_mask.repeat(5, axis=0).repeat(5, axis=1)
+    cond['attr_mask'] = attr_mask
+    # key padding mask (for Global Attention)
+    pad_mask = np.zeros((K*5, K*5), dtype=bool)
+    pad_mask[:, :n_nodes*5] = 1
+    cond['key_pad_mask'] = pad_mask
+    # adj mask (for Graph Relation Attention)
+    adj_mask = pred_graph['adj'][:].astype(bool)
+    adj_mask = adj_mask.repeat(5, axis=0).repeat(5, axis=1)
+    adj_mask[n_nodes*5:, :] = 1
+    cond['adj_mask'] = adj_mask
+    # placeholder
+    data = np.zeros((K*5, 6), dtype=bool)
+    cond['cat'] = 2
+    # axillary info
+    cond['adj'] = pred_graph['adj']
+    cond['parents'] = np.zeros(K, dtype=np.int8)
+    cond['parents'][:n_nodes] = pred_graph['parents']
+    cond['n_nodes'] = n_nodes
+    return data, cond
+def convert_data_range(x):
+    '''postprocessing: convert the raw model output to the original range, following CAGE'''
+    x = x.reshape(-1, 30)  # (K, 36)
+    aabb_max = x[:, 0:3]
+    aabb_min = x[:, 3:6]
+    center = (aabb_max + aabb_min) / 2.0
+    size = (aabb_max - aabb_min).clip(min=5e-3)
+    j_type = np.mean(x[:, 6:12], axis=1)
+    j_type = ((j_type + 0.5) * 5).clip(min=1.0, max=5.0).round()
+    axis_d = x[:, 12:15]
+    axis_d = axis_d / (
+        np.linalg.norm(axis_d, axis=1, keepdims=True) + np.finfo(float).eps
+    )
+    axis_o = x[:, 15:18]
+    j_range = (x[:, 18:20] + x[:, 20:22] + x[:, 22:24]) / 3
+    j_range = j_range.clip(min=-1.0, max=1.0)
+    j_range[:, 0] = j_range[:, 0] * 360
+    j_range[:, 1] = j_range[:, 1]
+    label = np.mean(x[:, 24:30], axis=1)
+    label = ((label + 0.8) * 5).clip(min=0.0, max=7.0).round()
+    return {
+        "center": center,
+        "size": size,
+        "type": j_type,
+        "axis_d": axis_d,
+        "axis_o": axis_o,
+        "range": j_range,
+        "label": label,
+    }
+def parse_tree(data, n_nodes, par, adj):
+    tree = []
+    # convert to json format
+    for i in range(n_nodes):
+        node = {"id": i}
+        node["name"] = sem_ref["bwd"][int(data["label"][i].item())]
+        node["parent"] = int(par[i])
+        node["children"] = [
+            int(child) for child in np.where(adj[i] == 1)[0] if child != par[i]
+        ]
+        node["aabb"] = {}
+        node["aabb"]["center"] = data["center"][i].tolist()
+        node["aabb"]["size"] = data["size"][i].tolist()
+        node["joint"] = {}
+        if node['name'] == 'base':
+            node["joint"]["type"] = 'fixed'
+        else:
+            node["joint"]["type"] = joint_ref["bwd"][int(data["type"][i].item())]
+        if node["joint"]["type"] == "fixed":
+            node["joint"]["range"] = [0.0, 0.0]
+        elif node["joint"]["type"] == "revolute":
+            node["joint"]["range"] = [0.0, float(data["range"][i][0])]
+        elif node["joint"]["type"] == "continuous":
+            node["joint"]["range"] = [0.0, 360.0]
+        elif (
+            node["joint"]["type"] == "prismatic" or node["joint"]["type"] == "screw"
+        ):
+            node["joint"]["range"] = [0.0, float(data["range"][i][1])]
+        node["joint"]["axis"] = {}
+        # relocate the axis to visualize well
+        axis_o, axis_d = rescale_axis(
+            int(data["type"][i].item()),
+            data["axis_d"][i],
+            data["axis_o"][i],
+            data["center"][i],
+        )
+        node["joint"]["axis"]["direction"] = axis_d
+        node["joint"]["axis"]["origin"] = axis_o
+        # append node to the tree
+        tree.append(node)
+    return tree
+def convert_json(x, c, prefix=''):
+    out = {"meta": {}, "diffuse_tree": []}
+    n_nodes = c[f"{prefix}n_nodes"][0].item()
+    par = c[f"{prefix}parents"][0].cpu().numpy().tolist()
+    adj = c[f"{prefix}adj"][0].cpu().numpy()
+    np.fill_diagonal(adj, 0) # remove self-loop for the root node
+    if f"{prefix}obj_cat" in c:
+        out["meta"]["obj_cat"] = c[f"{prefix}obj_cat"][0]
+    # convert the data to original range
+    data = convert_data_range(x)
+    # parse the tree
+    tree = parse_tree(data, n_nodes, par, adj)
+    out["diffuse_tree"] = tree
+    return out

inference.py ADDED Viewed

	@@ -0,0 +1,450 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+import json
+import torch
+import argparse
+import numpy as np
+from PIL import Image, ImageOps
+import imageio
+# from my_utils.plot import viz_graph
+from my_utils.misc import load_config
+import torchvision.transforms as T
+from diffusers import DDPMScheduler
+from models.denoiser import Denoiser
+from scripts.json2urdf import create_urdf_from_json, pybullet_render
+from dataset.utils import make_white_background, load_input_from, convert_data_range, parse_tree
+import models
+import torch.nn.functional as F
+from io import BytesIO
+import base64
+from scripts.graph_pred.api import predict_graph_twomode, gpt_infer_image_category
+import subprocess
+import spaces
+import time
+cat_ref = {
+    "Table": 0,
+    "Dishwasher": 1,
+    "StorageFurniture": 2,
+    "Refrigerator": 3,
+    "WashingMachine": 4,
+    "Microwave": 5,
+    "Oven": 6,
+}
+def run_retrieve(src_dir, json_name, data_root):
+    fn_call = ['python', 'scripts/mesh_retrieval/retrieve.py', '--src_dir', src_dir, '--json_name', json_name, '--gt_data_root', data_root]
+    try:
+        subprocess.run(fn_call, check=True,  stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as e:
+        print(f'Error from run_retrieve: {src_dir}')
+        print(f'Error: {e}')
+def make_white_background(src_img):
+    '''Make the white background for the input RGBA image.'''
+    src_img.load()
+    background = Image.new("RGB", src_img.size, (255, 255, 255))
+    background.paste(src_img, mask=src_img.split()[3]) # 3 is the alpha channel
+    return background
+def pad_to_square(img, fill=0):
+    """Pad image to square with given fill value (default: 0 = black)."""
+    width, height = img.size
+    if width == height:
+        return img
+    max_side = max(width, height)
+    delta_w = max_side - width
+    delta_h = max_side - height
+    padding = (delta_w // 2, delta_h // 2, delta_w - delta_w // 2, delta_h - delta_h // 2)
+    return ImageOps.expand(img, padding, fill=fill)
+def load_img(img_path):
+    transform = T.Compose([
+        T.Resize((224, 224), interpolation=T.InterpolationMode.BICUBIC),
+        T.ToTensor(),
+        T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+    ])
+    with Image.open(img_path) as img:
+        if img.mode == 'RGBA':
+            img = make_white_background(img)
+        img = img.convert('RGB')  # Ensure it's 3-channel for normalization
+        img = pad_to_square(img, fill=0)
+        img = transform(img)
+    img_batch = img.unsqueeze(0).cuda()
+    return img_batch
+def load_frame_with_imageio(frame):
+    """
+    将单帧图像处理为符合 DINO 模型输入的格式。
+    """
+    transform = T.Compose([
+        T.Resize((224, 224), interpolation=T.InterpolationMode.BICUBIC),
+        T.ToTensor(),
+        T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+    ])
+    img = Image.fromarray(frame)  # 转为 PIL 图像
+    if img.mode == 'RGBA':
+        img = make_white_background(img)
+    img = transform(img)  # 应用预处理
+    return img.unsqueeze(0).cuda()  # 增加 batch 维度
+def read_video_as_batch_with_imageio(video_path):
+    """
+    使用 imageio 读取视频并将所有帧处理为 batch 格式 (B, C, H, W)。
+    """
+    reader = imageio.get_reader(video_path)
+    batch_frames = []
+    try:
+        for frame in reader:
+            # 加载帧并处理为 (1, C, H, W)
+            processed_frame = load_frame_with_imageio(frame)
+            batch_frames.append(processed_frame)
+        reader.close()
+        if batch_frames:
+            return torch.cat(batch_frames, dim=0).cuda()  # 在 batch 维度堆叠，并转移到 GPU
+        else:
+            print("视频没有有效帧")
+            return None
+    except Exception as e:
+        print(f"处理视频时出错: {e}")
+        return None
+def extract_dino_feature(img_path_1, img_path_2):
+    print('Extracting DINO feature...')
+    feat_1 = load_img(img_path_1)
+    feat_2 = load_img(img_path_2)
+    frames = torch.cat([feat_1, feat_2], dim=0)
+    dinov2_vitb14_reg = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14_reg', pretrained=True).cuda()
+    print('step4')
+    with torch.no_grad():
+        feat = dinov2_vitb14_reg.forward_features(frames)["x_norm_patchtokens"]
+    # release the GPU memory of the model
+    feat_input = torch.cat([feat[0], feat[-1]], dim=0).unsqueeze(0)
+    print('Extracting DINO feature over')
+    torch.cuda.empty_cache()
+    return feat_input
+def set_scheduler(n_steps=100):
+    scheduler = DDPMScheduler(num_train_timesteps=1000, beta_schedule='linear', prediction_type='epsilon')
+    scheduler.set_timesteps(n_steps)
+    return scheduler
+def prepare_model_input(data, cond, feat, n_samples):
+    # attention masks
+    attr_mask = torch.from_numpy(cond['attr_mask']).unsqueeze(0).repeat(n_samples, 1, 1)
+    key_pad_mask = torch.from_numpy(cond['key_pad_mask'])
+    graph_mask = torch.from_numpy(cond['adj_mask'])
+    # input image feature
+    f = feat.repeat(n_samples, 1, 1)
+    # input noise
+    B, C = data.shape
+    noise = torch.randn([n_samples, B, C], dtype=torch.float32)
+    # dummy image feature (used for guided diffusion)
+    dummy_feat = torch.from_numpy(np.load('systems/dino_dummy.npy').astype(np.float32))
+    dummy_feat = dummy_feat.unsqueeze(0).repeat(n_samples, 1, 1)
+    # dummy object category
+    cat = torch.zeros(1, dtype=torch.long).repeat(n_samples)
+    return {
+        "noise": noise.cuda(),
+        "attr_mask": attr_mask.cuda(),
+        "key_pad_mask": key_pad_mask.cuda(),
+        "graph_mask": graph_mask.cuda(),
+        "dummy_f": dummy_feat.cuda(),
+        'cat': cat.cuda(),
+        'f': f.cuda(),
+    }
+def prepare_model_input_nocond(feat, n_samples):
+    # attention masks
+    cond_example = np.zeros((32*5, 32*5), dtype=bool)
+    attr_mask = np.eye(32, 32, dtype=bool)
+    attr_mask = attr_mask.repeat(5, axis=0).repeat(5, axis=1)
+    attr_mask = torch.from_numpy(attr_mask).unsqueeze(0).repeat(n_samples, 1, 1)
+    key_pad_mask = torch.from_numpy(cond_example).unsqueeze(0).repeat(n_samples, 1, 1)
+    graph_mask = torch.from_numpy(cond_example).unsqueeze(0).repeat(n_samples, 1, 1)
+    # input image feature
+    f = feat.repeat(n_samples, 1, 1)
+    # input noise
+    data = np.zeros((32*5, 6), dtype=bool)
+    noise = torch.randn(data.shape, dtype=torch.float32).repeat(n_samples, 1, 1)
+    # dummy image feature (used for guided diffusion)
+    dummy_feat = torch.from_numpy(np.load('systems/dino_dummy.npy').astype(np.float32))
+    dummy_feat = dummy_feat.unsqueeze(0).repeat(n_samples, 1, 1)
+    # dummy object category
+    cat = torch.zeros(1, dtype=torch.long).repeat(n_samples)
+    return {
+        "noise": noise.cuda(),
+        "attr_mask": attr_mask.cuda(),
+        "key_pad_mask": key_pad_mask.cuda(),
+        "graph_mask": graph_mask.cuda(),
+        "dummy_f": dummy_feat.cuda(),
+        'cat': cat.cuda(),
+        'f': f.cuda(),
+    }
+def save_graph(pred_graph, save_dir):
+    print(f'Saving the predicted graph to {save_dir}/pred_graph.json')
+    # save the response
+    with open(os.path.join(save_dir, "pred_graph.json"), "w") as f:
+        json.dump(pred_graph, f, indent=4)
+    # Visualize the graph
+    # img_graph = Image.fromarray(viz_graph(pred_graph))
+    # img_graph.save(os.path.join(save_dir, "pred_graph.png"))
+def forward(model, scheduler, inputs, omega=0.5):
+    print('Running inference...')
+    noisy_x = inputs['noise']
+    for t in scheduler.timesteps:
+        timesteps = torch.tensor([t], device=inputs['noise'].device)
+        outputs_cond = model(
+            x=noisy_x,
+            cat=inputs['cat'],
+            timesteps=timesteps,
+            feat=inputs['f'],
+            key_pad_mask=inputs['key_pad_mask'],
+            graph_mask=inputs['graph_mask'],
+            attr_mask=inputs['attr_mask'],
+            label_free=True,
+        ) # take condtional image as input
+        if omega != 0:
+            outputs_free = model(
+                x=noisy_x,
+                cat=inputs['cat'],
+                timesteps=timesteps,
+                feat=inputs['dummy_f'],
+                key_pad_mask=inputs['key_pad_mask'],
+                graph_mask=inputs['graph_mask'],
+                attr_mask=inputs['attr_mask'],
+                label_free=True,
+            ) # take the dummy DINO features for the condition-free mode
+            noise_pred = (1 + omega) * outputs_cond['noise_pred'] - omega * outputs_free['noise_pred']
+        else:
+            noise_pred = outputs_cond['noise_pred']
+        noisy_x = scheduler.step(noise_pred, t, noisy_x).prev_sample
+    return noisy_x
+def _convert_json(x, c):
+    out = {"meta": {}, "diffuse_tree": []}
+    n_nodes = c["n_nodes"]
+    par = c["parents"].tolist()
+    adj = c["adj"]
+    np.fill_diagonal(adj, 0) # remove self-loop for the root node
+    if "obj_cat" in c:
+        out["meta"]["obj_cat"] = c["obj_cat"]
+    # convert the data to original range
+    data = convert_data_range(x)
+    # parse the tree
+    out["diffuse_tree"] = parse_tree(data, n_nodes, par, adj)
+    return out
+def post_process(output, cond, save_root, gt_data_root, visualize=False):
+    print('Post-processing...')
+    N = output.shape[0]
+    for i in range(N):
+        cond_n = {}
+        cond_n['n_nodes'] = cond['n_nodes'][i]
+        cond_n['parents'] = cond['parents'][i]
+        cond_n['adj'] = cond['adj'][i]
+        cond_n['obj_cat'] = cond['cat']
+        # convert the raw model output to the json format
+        out_json = _convert_json(output, cond_n)
+        save_dir = os.path.join(save_root, str(i))
+        os.makedirs(save_dir, exist_ok=True)
+        with open(os.path.join(save_dir, "object.json"), "w") as f:
+            json.dump(out_json, f, indent=4)
+        # retrieve part meshes (call python script)
+        # print(f"Retrieving part meshes for the object {i}...")
+        # os.system(f"python scripts/mesh_retrieval/retrieve.py --src_dir {save_dir} --json_name object.json --gt_data_root {gt_data_root}")
+def load_model(ckpt_path, config):
+    print('Loading model from checkpoint...')
+    model = models.make(config.name, config)
+    state_dict = torch.load(ckpt_path)
+    state_dict = {k.replace("model.", ""): v for k, v in state_dict.items()}
+    model.load_state_dict(state_dict)
+    model.eval()
+    return model.cuda()
+def convert_pred_graph(pred_graph):
+    cond = {}
+    B, K = pred_graph.shape[:2]
+    adj = np.zeros((B, K, K), dtype=np.float32)
+    padding = np.zeros((B, 5 * K, 5* K), dtype=bool)
+    parents = np.zeros((B, K), dtype=np.int32)
+    n_nodes = np.zeros((B,), dtype=np.int32)
+    for b in range(B):
+        node_len = 0
+        for k in range(K):
+            if pred_graph[b, k] == k and k > 0:
+                node_len = k
+                break
+            node = pred_graph[b, k]
+            adj[b, k, node] = 1
+            adj[b, node, k] = 1
+            parents[b, k] = node
+        adj[b, node_len:] = 1
+        padding[b, :, :5 * node_len] = 1
+        parents[b, 0] = -1
+        n_nodes[b] = node_len
+    adj_mask = adj.astype(bool).repeat(5, axis=1).repeat(5, axis=2)
+    attr_mask = np.eye(32, 32, dtype=bool)
+    attr_mask = attr_mask.repeat(5, axis=0).repeat(5, axis=1)
+    cond['adj_mask'] = adj_mask
+    cond['attr_mask'] = attr_mask
+    cond['key_pad_mask'] = padding
+    cond['adj'] = adj
+    cond['parents'] = parents
+    cond['n_nodes'] = n_nodes
+    cond['cat'] = 'StorageFurniture'
+    data = np.zeros((32*5, 6), dtype=bool)
+    return data, cond
+def bfs_tree_simple(tree_list):
+    order = [0] * len(tree_list)
+    queue = []
+    current_node_idx = 0
+    for node_idx, node in enumerate(tree_list):
+        if node['parent'] == -1:
+            queue.append(node['id'])
+            order[current_node_idx] = node_idx
+            current_node_idx += 1
+            break
+    while len(queue) > 0:
+        current_node = queue.pop(0)
+        for node_idx, node in enumerate(tree_list):
+            if node['parent'] == current_node:
+                queue.append(node['id'])
+                order[current_node_idx] = node_idx
+                current_node_idx += 1
+    return order
+def get_graph_from_gpt(img_path_1, img_path_2):
+    first_img = Image.open(img_path_1)
+    first_img_data = first_img.resize((1024, 1024))
+    buffer = BytesIO()
+    first_img_data.save(buffer, format="PNG")
+    buffer.seek(0)
+    # encode the image as base64
+    first_encoded_image = base64.b64encode(buffer.read()).decode("utf-8")
+    second_img = Image.open(img_path_2)
+    second_img_data = second_img.resize((1024, 1024))
+    buffer = BytesIO()
+    second_img_data.save(buffer, format="PNG")
+    buffer.seek(0)
+    # encode the image as base64
+    second_encoded_image = base64.b64encode(buffer.read()).decode("utf-8")
+    pred_gpt = predict_graph_twomode('', first_img_data=first_encoded_image, second_img_data=second_encoded_image)
+    print(pred_gpt)
+    pred_graph = pred_gpt['diffuse_tree']
+    # order = bfs_tree_simple(pred_graph)
+    # pred_graph = [pred_graph[i] for i in order]
+    # generate array [0, 1, 2, ..., 31] for init
+    graph_array = np.array([i for i in range(32)])
+    for node_idx, node in enumerate(pred_graph):
+        if node['parent'] == -1:
+            graph_array[node_idx] = node_idx
+        else:
+            graph_array[node_idx] = node['parent']
+    # new axis for batch
+    graph_array = np.expand_dims(graph_array, axis=0)
+    cat_str = gpt_infer_image_category(first_encoded_image, second_encoded_image)
+    return torch.from_numpy(graph_array).cuda().repeat(3, 1), cat_str
+@spaces.GPU
+def run_demo(args):
+    # extract DINOV2 feature from the input image
+    t1 = time.time()
+    feat = extract_dino_feature(args.img_path_1, args.img_path_2)
+    t2 = time.time()
+    print(f'Extracted DINO feature in {t2 - t1:.2f} seconds')
+    scheduler = set_scheduler(args.n_denoise_steps)
+    # load the checkpoint of the model
+    model = load_model(args.ckpt_path, args.config.system.model)
+    # inference
+    with torch.no_grad():
+        t3 = time.time()
+        pred_graph, cat_str = get_graph_from_gpt(args.img_path_1, args.img_path_2)
+        t4 = time.time()
+        print(f'Got the predicted graph in {t4 - t3:.2f} seconds')
+        print(pred_graph)
+        data, cond = convert_pred_graph(pred_graph)
+        inputs = prepare_model_input(data, cond, feat, n_samples=args.n_samples)
+        # Update the object category
+        cond['cat'] = cat_str
+        inputs['cat'][:] = cat_ref[cat_str]
+        print(f'Object category predicted by GPT: {cat_str}, {cat_ref[cat_str]}')
+        output = forward(model, scheduler, inputs, omega=args.omega).cpu().numpy()
+        t5 = time.time()
+        print(f'Forwarded the model in {t5 - t4:.2f} seconds')
+        # post-process
+        post_process(output, cond, args.save_dir, args.gt_data_root, visualize=True)
+    # retrieve
+    for sample in os.listdir(args.save_dir):
+        sample_dir = os.path.join(args.save_dir, sample)
+        t6 = time.time()
+        run_retrieve(sample_dir, 'object.json', args.gt_data_root)
+        t7 = time.time()
+        print(f'Retrieved part meshes for in {t7 - t6:.2f} seconds')
+    save_json_path = os.path.join(args.save_dir, "0", "object.json")
+    with open(save_json_path, 'r') as file:
+        json_data = json.load(file)
+    create_urdf_from_json(json_data, save_json_path.replace('.json', '.urdf'))
+    pybullet_render(save_json_path.replace('.json', '.urdf'), os.path.join(args.save_dir, "0"), 8)
+if __name__ == '__main__':
+    '''
+    Script for running the inference on an example image input.
+    '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--img_path_1", type=str, default='examples/1.png', help="path to the input image")
+    parser.add_argument("--img_path_2", type=str, default='examples/1_open_1.png', help="path to the input image")
+    parser.add_argument("--ckpt_path", type=str, default='exps/singapo/final/ckpts/last.ckpt', help="path to the checkpoint of the model")
+    parser.add_argument("--config_path", type=str, default='exps/singapo/final/config/parsed.yaml', help="path to the config file")
+    parser.add_argument("--use_example_graph", action="store_true", default=False, help="if you don't have the openai key yet, turn on to use the example graph for inference")
+    parser.add_argument("--save_dir", type=str, default='results', help="path to save the output")
+    parser.add_argument("--gt_data_root", type=str, default='./', help="the root directory of the original data, used for part mesh retrieval")
+    parser.add_argument("--n_samples", type=int, default=3, help="number of samples to generate given the input")
+    parser.add_argument("--omega", type=float, default=0.5, help="the weight of the condition-free mode in the inference")
+    parser.add_argument("--n_denoise_steps", type=int, default=100, help="number of denoising steps")
+    args = parser.parse_args()
+    assert os.path.exists(args.img_path_1), "The input image does not exist"
+    # assert os.path.exists(args.ckpt_path), "The checkpoint does not exist"
+    assert os.path.exists(args.config_path), "The config file does not exist"
+    os.makedirs(args.save_dir, exist_ok=True)
+    config = load_config(args.config_path)
+    args.config = config
+    run_demo(args)

lightning_logs/version_0/hparams.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

lightning_logs/version_1/hparams.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

lightning_logs/version_2/hparams.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

lightning_logs/version_3/hparams.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

lightning_logs/version_4/hparams.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

lightning_logs/version_5/hparams.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

lightning_logs/version_6/hparams.yaml ADDED Viewed

	@@ -0,0 +1,111 @@

+name: sys_origin
+exp_dir: ./exps/dipo/denoiser
+data_root: /horizon-bucket/robot_lab/users/ruiqi.wu/robot/dataset/blender
+n_time_samples: 16
+loss_fg_weight: 0.01
+img_drop_prob: 0.1
+guidance_scaler: 0.5
+graph_drop_prob: 0.5
+model:
+  name: denoiser
+  in_ch: 6
+  attn_dim: 128
+  n_head: 4
+  n_layers: 6
+  dropout: 0.1
+  K: 32
+  mode_num: 5
+  img_emb_dims:
+  - 768
+  - 128
+  cat_drop_prob: 0.5
+scheduler:
+  name: ddpm
+  config:
+    num_train_timesteps: 1000
+    beta_schedule: linear
+    prediction_type: epsilon
+lr_scheduler_adapter:
+  name: LinearWarmupCosineAnnealingLR
+  warmup_epochs: 3
+  max_epochs: 200
+  warmup_start_lr: 1.0e-06
+  eta_min: 1.0e-05
+optimizer_adapter:
+  name: AdamW
+  args:
+    lr: 0.0005
+    betas:
+    - 0.9
+    - 0.99
+    eps: 1.0e-15
+lr_scheduler_cage:
+  name: LinearWarmupCosineAnnealingLR
+  warmup_epochs: 3
+  max_epochs: 200
+  warmup_start_lr: 1.0e-06
+  eta_min: 1.0e-05
+optimizer_cage:
+  name: AdamW
+  args:
+    lr: 5.0e-05
+    betas:
+    - 0.9
+    - 0.99
+    eps: 1.0e-15
+hparams:
+  name: sys_origin
+  exp_dir: ./exps/dipo/denoiser
+  data_root: /horizon-bucket/robot_lab/users/ruiqi.wu/robot/dataset/blender
+  n_time_samples: 16
+  loss_fg_weight: 0.01
+  img_drop_prob: 0.1
+  guidance_scaler: 0.5
+  graph_drop_prob: 0.5
+  model:
+    name: denoiser
+    in_ch: 6
+    attn_dim: 128
+    n_head: 4
+    n_layers: 6
+    dropout: 0.1
+    K: 32
+    mode_num: 5
+    img_emb_dims:
+    - 768
+    - 128
+    cat_drop_prob: 0.5
+  scheduler:
+    name: ddpm
+    config:
+      num_train_timesteps: 1000
+      beta_schedule: linear
+      prediction_type: epsilon
+  lr_scheduler_adapter:
+    name: LinearWarmupCosineAnnealingLR
+    warmup_epochs: 3
+    max_epochs: 200
+    warmup_start_lr: 1.0e-06
+    eta_min: 1.0e-05
+  optimizer_adapter:
+    name: AdamW
+    args:
+      lr: 0.0005
+      betas:
+      - 0.9
+      - 0.99
+      eps: 1.0e-15
+  lr_scheduler_cage:
+    name: LinearWarmupCosineAnnealingLR
+    warmup_epochs: 3
+    max_epochs: 200
+    warmup_start_lr: 1.0e-06
+    eta_min: 1.0e-05
+  optimizer_cage:
+    name: AdamW
+    args:
+      lr: 5.0e-05
+      betas:
+      - 0.9
+      - 0.99
+      eps: 1.0e-15

lightning_logs/version_6/metrics.csv ADDED Viewed

	@@ -0,0 +1,4 @@

+lr-AdamW,lr-AdamW-1,step
+1e-06,1e-06,0
+0.0002505,2.5500000000000003e-05,10
+0.0005,5.000000000000001e-05,20

metrics/__init__.py ADDED Viewed

File without changes

metrics/aor.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+import numpy as np
+from copy import deepcopy
+from metrics.iou import sampling_iou
+from objects.motions import transform_all_parts
+from objects.dict_utils import get_bbox_vertices
+'''
+This file computes the Average Overlap Ratio (AOR) metric\n
+'''
+def AOR(tgt, num_states=10, transform_use_plucker=False):
+    tree = tgt["diffuse_tree"]
+    states = np.linspace(0, 1, num_states)
+    original_bbox_vertices = np.array([get_bbox_vertices(tgt, i) for i in range(len(tgt["diffuse_tree"]))], dtype=np.float32)
+    ious = []
+    for state_idx, state in enumerate(states):
+        ious_per_state = []
+        bbox_vertices = deepcopy(original_bbox_vertices)
+        part_trans = transform_all_parts(bbox_vertices, tgt, state, transform_use_plucker)
+        for node in tree:
+            children = node['children']
+            num_children = len(children)
+            if num_children < 2:
+                continue
+            for i in range(num_children-1):
+                for j in range(i+1, num_children):
+                    child_id = children[i]
+                    sibling_id = children[j]
+                    bbox_v_0 = deepcopy(bbox_vertices[child_id])
+                    bbox_v_1 = deepcopy(bbox_vertices[sibling_id])
+                    iou = sampling_iou(bbox_v_0, bbox_v_1, part_trans[child_id], part_trans[sibling_id], num_samples=10000)
+                    if np.isnan(iou):
+                        continue
+                    ious_per_state.append(iou)
+        if len(ious_per_state) > 0:
+            ious.append(np.mean(ious_per_state))
+    if len(ious) == 0:
+        return -1
+    return float(np.mean(ious))

metrics/cd.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""
+This script computes the Chamfer Distance (CD) between two objects\n
+"""
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import torch
+import numpy as np
+import trimesh
+from copy import deepcopy
+from pytorch3d.structures import Meshes
+from pytorch3d.ops import sample_points_from_meshes
+from pytorch3d.loss import chamfer_distance
+from objects.motions import transform_all_parts
+from objects.dict_utils import (
+    zero_center_object,
+    rescale_object,
+    compute_overall_bbox_size,
+    get_base_part_idx,
+    find_part_mapping
+)
+def _load_and_combine_plys(dir, ply_files, scale=None, z_rotate=None, translate=None):
+    """
+    Load and combine the ply files into one PyTorch3D mesh
+    - dir: the directory of the object in which the ply files are from\n
+    - ply_files: the list of ply files\n
+    - scale: the scale factor to apply to the vertices\n
+    - z_rotate: whether to rotate the object around the z-axis by 90 degrees\n
+    - translate: the translation to apply to the vertices\n
+    Return:\n
+    - mesh: one PyTorch3D mesh of the combined ply files
+    """
+    # Combine the ply files into one
+    meshes = []
+    for ply_file in ply_files:
+        meshes.append(trimesh.load(os.path.join(dir, ply_file), force="mesh"))
+    full_part_mesh = trimesh.util.concatenate(meshes)
+    # Apply the transformations
+    full_part_mesh.vertices -= full_part_mesh.bounding_box.centroid
+    transformation = trimesh.transformations.compose_matrix(
+        scale=scale,
+        angles=[0, 0, np.radians(90) if z_rotate else 0],
+        translate=translate,
+    )
+    full_part_mesh.apply_transform(transformation)
+    # Create the PyTorch3D mesh
+    mesh = Meshes(
+        verts=torch.as_tensor(full_part_mesh.vertices, dtype=torch.float32, device='cuda').unsqueeze(
+            0
+        ),
+        faces=torch.as_tensor(full_part_mesh.faces, dtype=torch.int32, device='cuda').unsqueeze(0),
+    )
+    return mesh
+def _compute_chamfer_distance(
+    obj1_part_points, obj2_part_points, part_mapping=None, exclude_id=-1
+):
+    """
+    Compute the chamfer distance between the two set of points representing the two objects
+    - obj1_part_points: the set of points representing the first object\n
+    - obj2_part_points: the set of points representing the second object\n
+    - part_mapping (optional): the part mapping from the first object to the second object, if provided, the chamfer distance will be computed between the corresponding parts\n
+    - exclude_id (optional): the part id to exclude from the chamfer distance computation, the default if provided is the base part id\n
+    Return:\n
+    - distance: the chamfer distance between the two objects
+    """
+    if part_mapping is not None:
+        n_parts = part_mapping.shape[0]
+        distance = 0
+        for i in range(n_parts):
+            if i == exclude_id:
+                continue
+            obj1_part_points_i = obj1_part_points[i]
+            obj2_part_points_i = obj2_part_points[int(part_mapping[i, 0])]
+            with torch.no_grad():
+                obj1_part_points_i = obj1_part_points_i.cuda()
+                obj2_part_points_i = obj2_part_points_i.cuda()
+                # symmetric chamfer distance
+                forward_distance, _ = chamfer_distance(
+                    obj1_part_points_i[None, :],
+                    obj2_part_points_i[None, :],
+                    batch_reduction=None,
+                )
+                backward_distance, _ = chamfer_distance(
+                    obj2_part_points_i[None, :],
+                    obj1_part_points_i[None, :],
+                    batch_reduction=None,
+                )
+                distance += (forward_distance.item() + backward_distance.item()) * 0.5
+        distance /= n_parts
+    else:
+        # Merge the points of all parts into one tensor
+        obj1_part_points = obj1_part_points.reshape(-1, 3)
+        obj2_part_points = obj2_part_points.reshape(-1, 3)
+        # Compute the chamfer distance between the two objects
+        with torch.no_grad():
+            obj1_part_points = obj1_part_points.cuda()
+            obj2_part_points = obj2_part_points.cuda()
+            forward_distance, _ = chamfer_distance(
+                obj1_part_points[None, :],
+                obj2_part_points[None, :],
+                batch_reduction=None,
+            )
+            backward_distance, _ = chamfer_distance(
+                obj2_part_points[None, :],
+                obj1_part_points[None, :],
+                batch_reduction=None,
+            )
+            distance = (forward_distance.item() + backward_distance.item()) * 0.5
+    return distance
+def _get_scores(
+    src_dict,
+    tgt_dict,
+    original_src_part_points,
+    original_tgt_part_points,
+    part_mapping,
+    num_states,
+    include_base,
+    src_base_idx,
+):
+    chamfer_distances = np.zeros(num_states, dtype=np.float32)
+    joint_states = np.linspace(0, 1, num_states)
+    for state_idx, state in enumerate(joint_states):
+        # Reset the part point clouds
+        src_part_points = deepcopy(original_src_part_points)
+        tgt_part_points = deepcopy(original_tgt_part_points)
+        # Transform the part point clouds to the current state using the joints
+        transform_all_parts(src_part_points.numpy(), src_dict, state, dry_run=False)
+        transform_all_parts(tgt_part_points.numpy(), tgt_dict, state, dry_run=False)
+        # Compute the chamfer distance between the two objects
+        chamfer_distances[state_idx] = _compute_chamfer_distance(
+            src_part_points,
+            tgt_part_points,
+            part_mapping=part_mapping,
+            exclude_id=-1 if include_base else src_base_idx,
+        )
+    # Compute the ID
+    aid_cd = np.mean(chamfer_distances)
+    rid_cd = chamfer_distances[0]
+    return {
+        "AS-CD": float(aid_cd),
+        "RS-CD": float(rid_cd),
+    }
+def CD(
+    gen_obj_dict,
+    gen_obj_path,
+    gt_obj_dict,
+    gt_obj_path,
+    num_states=5,
+    num_samples=2048,
+    include_base=False,
+):
+    """
+    Compute the Chamfer Distance\n
+    This metric is the average of per-part chamfer distance between the two objects over a number of articulation states\n
+    - gen_obj_dict: the generated object dictionary\n
+    - gen_obj_path: the directory to the predicted object\n
+    - gt_obj_dict: the ground truth object dictionary\n
+    - gt_obj_path: the directory to the ground truth object\n
+    - num_states (optional): the number of articulation states to compute the metric\n
+    - num_samples (optional): the number of samples to use\n
+    - include_base (optional): whether to include the base part in the chamfer distance computation\n
+    Return:\n
+    - aid_score: the score over the sampled articulated states\n
+    - rid_score: the score at the resting state\n
+        - The score is in the range of [0, inf), lower is better
+    """
+    # Make copies of the dictionaries to avoid modifying the original dictionaries
+    gen_dict = deepcopy(gen_obj_dict)
+    gt_dict = deepcopy(gt_obj_dict)
+    # Zero center the objects
+    zero_center_object(gen_dict)
+    zero_center_object(gt_dict)
+    # Compute the scale factor by comparing the overall bbox size and scale the candidate object as a whole
+    gen_bbox_size = compute_overall_bbox_size(gen_dict)
+    gt_bbox_size = compute_overall_bbox_size(gt_dict)
+    scale_factor = gen_bbox_size / gt_bbox_size
+    rescale_object(gen_obj_dict, scale_factor)
+    # Record the indices of the base parts of the two objects
+    gen_base_idx = get_base_part_idx(gen_dict)
+    gt_base_idx = get_base_part_idx(gt_dict)
+    # Find mapping between the parts of the two objects based on closest bbox centers
+    mapping_gen2gt = find_part_mapping(gen_dict, gt_dict, use_hungarian=True)
+    mapping_gt2gen = find_part_mapping(gt_dict, gen_dict, use_hungarian=True)
+    # Get the number of parts of the two objects
+    gen_tree = gen_dict["diffuse_tree"]
+    gt_tree = gt_dict["diffuse_tree"]
+    gen_num_parts = len(gen_tree)
+    gt_num_parts = len(gt_tree)
+    # Get the paths of the ply files of the two objects
+    gen_part_ply_paths = [
+        {"dir": gen_obj_path, "files": gen_tree[i]["plys"]}
+        for i in range(gen_num_parts)
+    ]
+    gt_part_ply_paths = [
+        {"dir": gt_obj_path, "files": gt_tree[i]["plys"]}
+        for i in range(gt_num_parts)
+    ]
+    # Load the ply files of the two objects and sample points from them
+    gen_part_points = torch.zeros(
+        (gen_num_parts, num_samples, 3), dtype=torch.float32
+    )
+    for i in range(gen_num_parts):
+        part_mesh = _load_and_combine_plys(
+            gen_part_ply_paths[i]["dir"],
+            gen_part_ply_paths[i]["files"],
+            scale=scale_factor,
+            translate=gen_tree[i]["aabb"]["center"],
+        )
+        gen_part_points[i] = sample_points_from_meshes(
+            part_mesh, num_samples=num_samples
+        ).squeeze(0).cpu()
+    gt_part_points = torch.zeros(
+        (gt_num_parts, num_samples, 3), dtype=torch.float32
+    )
+    for i in range(gt_num_parts):
+        part_mesh = _load_and_combine_plys(
+            gt_part_ply_paths[i]["dir"],
+            gt_part_ply_paths[i]["files"],
+            translate=gt_tree[i]["aabb"]["center"],
+        )
+        gt_part_points[i] = sample_points_from_meshes(
+            part_mesh, num_samples=num_samples
+        ).squeeze(0).cpu()
+    cd_gen2gt = _get_scores(
+        gen_dict,
+        gt_dict,
+        gen_part_points,
+        gt_part_points,
+        mapping_gen2gt,
+        num_states,
+        include_base,
+        gen_base_idx,
+    )
+    cd_gt2gen = _get_scores(
+        gt_dict,
+        gen_dict,
+        gt_part_points,
+        gen_part_points,
+        mapping_gt2gen,
+        num_states,
+        include_base,
+        gt_base_idx,
+    )
+    return {
+        "AS-CD": (cd_gen2gt["AS-CD"] + cd_gt2gen["AS-CD"]) / 2,
+        "RS-CD": (cd_gen2gt["RS-CD"] + cd_gt2gen["RS-CD"]) / 2,
+    }

metrics/giou.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+import numpy as np
+from metrics.iou import (
+    _sample_points_in_box3d,
+    _apply_backward_transformations,
+    _apply_forward_transformations,
+    _count_points_in_box3d,
+)
+def giou_aabb(bbox1_vertices, bbox2_verices):
+    """
+    Compute the generalized IoU between two axis-aligned bounding boxes\n
+    - bbox1_vertices: the vertices of the first bounding box in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+    - bbox2_vertices: the vertices of the second bounding box in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+    Return:\n
+    - giou: the gIoU between the two bounding boxes
+    """
+    volume1 = np.prod(np.max(bbox1_vertices, axis=0) - np.min(bbox1_vertices, axis=0))
+    volume2 = np.prod(np.max(bbox2_verices, axis=0) - np.min(bbox2_verices, axis=0))
+    # Compute the intersection and union of the two bounding boxes
+    min_bbox = np.maximum(np.min(bbox1_vertices, axis=0), np.min(bbox2_verices, axis=0))
+    max_bbox = np.minimum(np.max(bbox1_vertices, axis=0), np.max(bbox2_verices, axis=0))
+    intersection = np.prod(np.clip(max_bbox - min_bbox, a_min=0, a_max=None))
+    union = volume1 + volume2 - intersection
+    # Compute IoU
+    iou = intersection / union if union > 0 else 0
+    # Compute the smallest enclosing box
+    min_enclosing_bbox = np.minimum(np.min(bbox1_vertices, axis=0), np.min(bbox2_verices, axis=0))
+    max_enclosing_bbox = np.maximum(np.max(bbox1_vertices, axis=0), np.max(bbox2_verices, axis=0))
+    volume3 = np.prod(max_enclosing_bbox - min_enclosing_bbox)
+    # Compute gIoU
+    giou = iou - (volume3 - union) / volume3 if volume3 > 0 else iou
+    return giou
+def sampling_giou(
+    bbox1_vertices,
+    bbox2_vertices,
+    bbox1_transformations,
+    bbox2_transformations,
+    num_samples=10000,
+):
+    """
+    Compute the IoU between two bounding boxes\n
+    - bbox1_vertices: the vertices of the first bounding box\n
+    - bbox2_vertices: the vertices of the second bounding box\n
+    - bbox1_transformations: list of transformations applied to the first bounding box\n
+    - bbox2_transformations: list of transformations applied to the second bounding box\n
+    - num_samples (optional): the number of samples to use per bounding box\n
+    Return:\n
+    - iou: the IoU between the two bounding boxes after applying the transformations
+    """
+    # if no transformations are applied, use the axis-aligned bounding box IoU
+    if len(bbox1_transformations) == 0 and len(bbox2_transformations) == 0:
+        return giou_aabb(bbox1_vertices, bbox2_vertices)
+    # Volume of the two bounding boxes
+    bbox1_volume = np.prod(
+        np.max(bbox1_vertices, axis=0) - np.min(bbox1_vertices, axis=0)
+    )
+    bbox2_volume = np.prod(
+        np.max(bbox2_vertices, axis=0) - np.min(bbox2_vertices, axis=0)
+    )
+    # Volume of the smallest enclosing box
+    min_enclosing_bbox = np.minimum(np.min(bbox1_vertices, axis=0), np.min(bbox2_vertices, axis=0))
+    max_enclosing_bbox = np.maximum(np.max(bbox1_vertices, axis=0), np.max(bbox2_vertices, axis=0))
+    cbbox_volume = np.prod(max_enclosing_bbox - min_enclosing_bbox)
+    # Sample points in the two bounding boxes
+    bbox1_points = _sample_points_in_box3d(bbox1_vertices, num_samples)
+    bbox2_points = _sample_points_in_box3d(bbox2_vertices, num_samples)
+    # Transform the points
+    forward_bbox1_points = _apply_forward_transformations(
+        bbox1_points, bbox1_transformations
+    )
+    forward_bbox2_points = _apply_forward_transformations(
+        bbox2_points, bbox2_transformations
+    )
+    # Transform the forward points to the other box's rest pose frame
+    forward_bbox1_points_in_rest_bbox2_frame = _apply_backward_transformations(
+        forward_bbox1_points, bbox2_transformations
+    )
+    forward_bbox2_points_in_rest_bbox1_frame = _apply_backward_transformations(
+        forward_bbox2_points, bbox1_transformations
+    )
+    # Count the number of points in the other bounding box
+    num_bbox1_points_in_bbox2 = _count_points_in_box3d(
+        forward_bbox1_points_in_rest_bbox2_frame, bbox2_vertices
+    )
+    num_bbox2_points_in_bbox1 = _count_points_in_box3d(
+        forward_bbox2_points_in_rest_bbox1_frame, bbox1_vertices
+    )
+    # Compute the IoU
+    intersect = (
+        bbox1_volume * num_bbox1_points_in_bbox2
+        + bbox2_volume * num_bbox2_points_in_bbox1
+    ) / 2
+    union = bbox1_volume * num_samples + bbox2_volume * num_samples - intersect
+    iou = intersect / union
+    giou = iou - (cbbox_volume * num_samples - union) / (cbbox_volume * num_samples) if cbbox_volume > 0 else iou
+    return giou
+def sampling_cDist(
+    part1,
+    part2,
+    bbox1_transformations,
+    bbox2_transformations,
+):
+    '''
+    Compute the centroid distance between two bounding boxes\n
+    - bbox1_vertices: the vertices of the first bounding box\n
+    - bbox2_vertices: the vertices of the second bounding box\n
+    - bbox1_transformations: list of transformations applied to the first bounding box\n
+    - bbox2_transformations: list of transformations applied to the second bounding box\n
+    '''
+    bbox1_centroid = np.array(part1['aabb']['center'], dtype=np.float32).reshape(1, 3)
+    bbox2_centroid = np.array(part2['aabb']['center'], dtype=np.float32).reshape(1, 3)
+    # Transform the centroids
+    bbox1_transformed_centroids = _apply_forward_transformations(bbox1_centroid, bbox1_transformations)
+    bbox2_transformed_centroids = _apply_forward_transformations(bbox2_centroid, bbox2_transformations)
+    # Compute the centroid distance
+    cDist = np.linalg.norm(bbox1_transformed_centroids - bbox2_transformed_centroids)
+    return cDist

metrics/iou.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import numpy as np
+def _sample_points_in_box3d(bbox_vertices, num_samples):
+    """
+    Sample points in a axis-aligned 3D bounding box\n
+    - bbox_vertices: the vertices of the bounding box in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+    - num_samples: the number of samples to use\n
+    Return:\n
+    - points: the sampled points in the form: [[x0, y0, z0], [x1, y1, z1], ...]
+    """
+    # Compute the bounding box size
+    bbox_size = np.max(bbox_vertices, axis=0) - np.min(bbox_vertices, axis=0)
+    # Sample points in the bounding box
+    points = np.random.rand(num_samples, 3) * bbox_size + np.min(bbox_vertices, axis=0)
+    return points
+def _apply_forward_transformations(points, transformations):
+    """
+    Apply forward transformations to the points\n
+    - points: the points in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+    - transformations: list of transformations to apply\n
+    Return:\n
+    - points_transformed: the transformed points in the form: [[x0, y0, z0], [x1, y1, z1], ...]
+    """
+    if len(transformations) == 0:
+        return points
+    # To homogeneous coordinates
+    points_transformed = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1)
+    # Apply the transformations one by one in order
+    for transformation in transformations:
+        if transformation["type"] == "translation":
+            points_transformed = np.matmul(
+                transformation["matrix"], points_transformed.T
+            ).T
+        elif transformation["type"] == "rotation":
+            axis_origin = np.append(transformation["rotation_axis_origin"], 0)
+            points_recentered = points_transformed - axis_origin
+            points_rotated = np.matmul(transformation["matrix"], points_recentered.T).T
+            points_transformed = points_rotated + axis_origin
+        elif transformation["type"] == "plucker":
+            points_transformed = np.matmul(
+                transformation["matrix"], points_transformed.T
+            ).T
+        else:
+            raise ValueError(f"Unknown transformation type: {transformation['type']}")
+    return points_transformed[..., :3]
+def _apply_backward_transformations(points, transformations):
+    """
+    Apply backward transformations to the points\n
+    - points: the points in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+    - transformations: list of transformations to apply\n
+        - The inverse of the transformations are applied in reverse order\n
+    Return:\n
+    - points_transformed: the transformed points in the form: [[x0, y0, z0], [x1, y1, z1], ...]
+    Reference: https://mathematica.stackexchange.com/questions/106257/how-do-i-get-the-inverse-of-a-homogeneous-transformation-matrix
+    """
+    if len(transformations) == 0:
+        return points
+    # To homogeneous coordinates
+    points_transformed = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1)
+    # Apply the transformations one by one in reverse order
+    for transformation in transformations[::-1]:
+        inv_transformation = np.eye(4)
+        inv_transformation[:3, :3] = transformation["matrix"][:3, :3].T
+        inv_transformation[:3, 3] = -np.matmul(
+            transformation["matrix"][:3, :3].T, transformation["matrix"][:3, 3]
+        )
+        if transformation["type"] == "translation":
+            points_transformed = np.matmul(inv_transformation, points_transformed.T).T
+        elif transformation["type"] == "rotation":
+            axis_origin = np.append(transformation["rotation_axis_origin"], 0)
+            points_recentered = points_transformed - axis_origin
+            points_rotated = np.matmul(inv_transformation, points_recentered.T).T
+            points_transformed = points_rotated + axis_origin
+        elif transformation["type"] == "plucker":
+            points_transformed = np.matmul(inv_transformation, points_transformed.T).T
+        else:
+            raise ValueError(f"Unknown transformation type: {transformation['type']}")
+    return points_transformed[..., :3]
+def _count_points_in_box3d(points, bbox_vertices):
+    """
+    Count the number of points in a 3D bounding box\n
+    - points: the points in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+    - bbox_vertices: the vertices of the bounding box in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+        - The bbox is assumed to be axis-aligned\n
+    Return:\n
+    - num_points_in_bbox: the number of points in the bounding box
+    """
+    # Count the number of points in the bounding box
+    num_points_in_bbox = np.sum(
+        np.all(points >= np.min(bbox_vertices, axis=0), axis=1)
+        & np.all(points <= np.max(bbox_vertices, axis=0), axis=1)
+    )
+    return num_points_in_bbox
+def iou_aabb(bbox1_vertices, bbox2_verices):
+    """
+    Compute the IoU between two axis-aligned bounding boxes\n
+    - bbox1_vertices: the vertices of the first bounding box in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+    - bbox2_vertices: the vertices of the second bounding box in the form: [[x0, y0, z0], [x1, y1, z1], ...]\n
+    Return:\n
+    - iou: the IoU between the two bounding boxes
+    """
+    # Compute the intersection and union of the two bounding boxes
+    min_bbox = np.maximum(np.min(bbox1_vertices, axis=0), np.min(bbox2_verices, axis=0))
+    max_bbox = np.minimum(np.max(bbox1_vertices, axis=0), np.max(bbox2_verices, axis=0))
+    intersection = np.prod(np.clip(max_bbox - min_bbox, a_min=0, a_max=None))
+    union = (
+        np.prod(np.max(bbox1_vertices, axis=0) - np.min(bbox1_vertices, axis=0))
+        + np.prod(np.max(bbox2_verices, axis=0) - np.min(bbox2_verices, axis=0))
+        - intersection
+    )
+    # Compute the IoU
+    iou = intersection / union if union > 0 else 0
+    return iou
+def sampling_iou(
+    bbox1_vertices,
+    bbox2_vertices,
+    bbox1_transformations,
+    bbox2_transformations,
+    num_samples=10000,
+):
+    """
+    Compute the IoU between two bounding boxes\n
+    - bbox1_vertices: the vertices of the first bounding box\n
+    - bbox2_vertices: the vertices of the second bounding box\n
+    - bbox1_transformations: list of transformations applied to the first bounding box\n
+    - bbox2_transformations: list of transformations applied to the second bounding box\n
+    - num_samples (optional): the number of samples to use per bounding box\n
+    Return:\n
+    - iou: the IoU between the two bounding boxes after applying the transformations
+    """
+    # if no transformations are applied, use the axis-aligned bounding box IoU
+    if len(bbox1_transformations) == 0 and len(bbox2_transformations) == 0:
+        return iou_aabb(bbox1_vertices, bbox2_vertices)
+    # Volume of the two bounding boxes
+    bbox1_volume = np.prod(
+        np.max(bbox1_vertices, axis=0) - np.min(bbox1_vertices, axis=0)
+    )
+    bbox2_volume = np.prod(
+        np.max(bbox2_vertices, axis=0) - np.min(bbox2_vertices, axis=0)
+    )
+    # Sample points in the two bounding boxes
+    bbox1_points = _sample_points_in_box3d(bbox1_vertices, num_samples)
+    bbox2_points = _sample_points_in_box3d(bbox2_vertices, num_samples)
+    # Transform the points
+    forward_bbox1_points = _apply_forward_transformations(
+        bbox1_points, bbox1_transformations
+    )
+    forward_bbox2_points = _apply_forward_transformations(
+        bbox2_points, bbox2_transformations
+    )
+    # Transform the forward points to the other box's rest pose frame
+    forward_bbox1_points_in_rest_bbox2_frame = _apply_backward_transformations(
+        forward_bbox1_points, bbox2_transformations
+    )
+    forward_bbox2_points_in_rest_bbox1_frame = _apply_backward_transformations(
+        forward_bbox2_points, bbox1_transformations
+    )
+    # Count the number of points in the other bounding box
+    num_bbox1_points_in_bbox2 = _count_points_in_box3d(
+        forward_bbox1_points_in_rest_bbox2_frame, bbox2_vertices
+    )
+    num_bbox2_points_in_bbox1 = _count_points_in_box3d(
+        forward_bbox2_points_in_rest_bbox1_frame, bbox1_vertices
+    )
+    # Compute the IoU
+    intersect = (
+        bbox1_volume * num_bbox1_points_in_bbox2
+        + bbox2_volume * num_bbox2_points_in_bbox1
+    ) / 2
+    union = bbox1_volume * num_samples + bbox2_volume * num_samples - intersect
+    iou = intersect / union
+    return iou

metrics/iou_cdist.py ADDED Viewed

	@@ -0,0 +1,227 @@

+"""
+This file computes the IoU-based and centroid-distance-based metrics in a symmetric manner\n
+"""
+import sys, os
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+import numpy as np
+from copy import deepcopy
+from objects.dict_utils import (
+    get_base_part_idx,
+    get_bbox_vertices,
+    remove_handles,
+    compute_overall_bbox_size,
+    rescale_object,
+    find_part_mapping,
+    zero_center_object,
+)
+from objects.motions import transform_all_parts
+from metrics.giou import sampling_giou, sampling_cDist
+def _get_scores(
+    src_dict,
+    tgt_dict,
+    original_src_bbox_vertices,
+    original_tgt_bbox_vertices,
+    mapping,
+    num_states,
+    rotation_fix_range,
+    num_samples,
+    iou_include_base,
+):
+    # Record the indices of the base parts of the src objects
+    src_base_idx = get_base_part_idx(src_dict)
+    # Compute the sum of IoU between the generated object and the candidate object over a number of articulation states
+    num_parts_in_src = len(src_dict["diffuse_tree"])
+    iou_per_part_and_state = np.zeros((num_parts_in_src, num_states), dtype=np.float32)
+    cDist_per_part_and_state = np.zeros(
+        (num_parts_in_src, num_states), dtype=np.float32
+    )
+    states = np.linspace(0, 1, num_states)
+    for state_idx, state in enumerate(states):
+        # Get a fresh copy of the bounding box vertices in rest pose
+        src_bbox_vertices = deepcopy(original_src_bbox_vertices)
+        tgt_bbox_vertices = deepcopy(original_tgt_bbox_vertices)
+        # Transform the objects to the current state using the joints
+        src_part_transfomrations = transform_all_parts(
+            src_bbox_vertices,
+            src_dict,
+            state,
+            rotation_fix_range=rotation_fix_range,
+        )
+        tgt_part_transfomrations = transform_all_parts(
+            tgt_bbox_vertices,
+            tgt_dict,
+            state,
+            rotation_fix_range=rotation_fix_range,
+        )
+        # Compute the IoU between the two objects using the transformed bounding boxes and the part mapping
+        for src_part_idx in range(num_parts_in_src):
+            # Get the index of the corresponding part in the candidate object
+            tgt_part_idx = int(mapping[src_part_idx, 0])
+            # Always use a fresh copy of the bounding box vertices in rest pose in case dry_run=False is incorrectly set
+            src_part_bbox_vertices = deepcopy(original_src_bbox_vertices)[src_part_idx]
+            tgt_part_bbox_vertices = deepcopy(original_tgt_bbox_vertices)[tgt_part_idx]
+            # Compute the sampling-based IoU between the two parts
+            iou_per_part_and_state[src_part_idx, state_idx] = sampling_giou(
+                src_part_bbox_vertices,
+                tgt_part_bbox_vertices,
+                src_part_transfomrations[src_part_idx],
+                tgt_part_transfomrations[tgt_part_idx],
+                num_samples=num_samples,
+            )
+            # Compute the centriod distance between the two matched parts
+            cDist_per_part_and_state[src_part_idx, state_idx] = sampling_cDist(
+                src_dict["diffuse_tree"][src_part_idx],
+                tgt_dict["diffuse_tree"][tgt_part_idx],
+                src_part_transfomrations[src_part_idx],
+                tgt_part_transfomrations[tgt_part_idx],
+            )
+    # IoU and cDist at the resting state
+    per_part_iou_avg_at_rest = iou_per_part_and_state[:, 0]
+    per_part_cDist_avg_at_rest = cDist_per_part_and_state[:, 0]
+    # Average the IoU over the states
+    per_part_iou_avg_over_states = np.sum(iou_per_part_and_state, axis=1) / num_states
+    # Average the cDist over the states
+    per_part_cDist_avg_over_states = (
+        np.sum(cDist_per_part_and_state, axis=1) / num_states
+    )
+    # Remove the base part if specified
+    if not iou_include_base:
+        per_part_iou_avg_over_states = np.delete(
+            per_part_iou_avg_over_states, src_base_idx
+        )
+        per_part_iou_avg_at_rest = np.delete(per_part_iou_avg_at_rest, src_base_idx)
+        per_part_cDist_avg_over_states = np.delete(
+            per_part_cDist_avg_over_states, src_base_idx
+        )
+        per_part_cDist_avg_at_rest = np.delete(per_part_cDist_avg_at_rest, src_base_idx)
+    aid_iou = float(np.mean(per_part_iou_avg_over_states)) if len(per_part_iou_avg_over_states) > 0 else 0
+    aid_cdist = float(np.mean(per_part_cDist_avg_over_states)) if len(per_part_cDist_avg_over_states) > 0 else 1
+    rid_iou = float(np.mean(per_part_iou_avg_at_rest)) if len(per_part_iou_avg_at_rest) > 0 else 0
+    rid_cdist = float(np.mean(per_part_cDist_avg_at_rest)) if len(per_part_cDist_avg_at_rest) > 0 else 1
+    return {
+        "AS-IoU": 1. - aid_iou,
+        "AS-cDist": aid_cdist,
+        "RS-IoU": 1. - rid_iou,
+        "RS-cDist": rid_cdist
+    }
+def IoU_cDist(
+    gen_obj_dict,
+    gt_obj_dict,
+    num_states=2,
+    compare_handles=False,
+    iou_include_base=False,
+    rotation_fix_range=True,
+    num_samples=10000,
+):
+    """
+    Compute the IoU-based and centroid-distance-based metrics\n
+    This metric is the average sum of IoU between parts in the two objects over the sampled articulation states and at the resting state\n
+    - gen_obj_dict: the dictionary of the generated object\n
+    - gt_obj_dict: the dictionary of the gt object\n
+    - num_states: the number of articulation states to compute the metric\n
+    - compare_handles (optional): whether to compare the handles\n
+    - iou_include_base (optional): whether to include the base part in the IoU computation\n
+    - rotation_fix_range (optional): whether to fix the rotation range to 90 degrees for revolute joints\n
+    - num_samples (optional): the number of samples to use\n
+    Return:\n
+    - scores: a dictionary of the computed scores\n
+        - "AS-IoU": the average IoU over the articulation states\n
+        - "AS-cDist": the average centroid distance over the articulation states\n
+        - "RS-IoU": the average IoU at the resting state\n
+        - "RS-cDist": the average centroid distance at the resting state\n
+    """
+    # Make copies of the dictionaries to avoid modifying the original dictionaries
+    gen_dict = deepcopy(gen_obj_dict)
+    gt_dict = deepcopy(gt_obj_dict)
+    # Strip the handles from the object if not comparing them
+    if not compare_handles:
+        gen_dict = remove_handles(gen_dict)
+        gt_dict = remove_handles(gt_dict)
+    # Zero center the objects
+    zero_center_object(gen_dict)
+    zero_center_object(gt_dict)
+    # scale the generated object as a whole to match the size of the gt object
+    gen_bbox_size = compute_overall_bbox_size(gen_dict)
+    gt_bbox_size = compute_overall_bbox_size(gt_dict)
+    scale_factor = gt_bbox_size / gen_bbox_size
+    rescale_object(gen_dict, scale_factor)
+    mapping_gen2gt = find_part_mapping(gen_dict, gt_dict, use_hungarian=True)
+    # for i in range(mapping_gen2gt.shape[0]):
+    #     if mapping_gen2gt[i][0] < 100:
+    #         gen_dict['diffuse_tree'][i]["parent"] = gt_dict['diffuse_tree'][int(mapping_gen2gt[i][0])]["parent"]
+    #         gen_dict['diffuse_tree'][i]["children"] = gt_dict['diffuse_tree'][int(mapping_gen2gt[i][0])]["children"]
+    #         gen_dict['diffuse_tree'][i]["id"] = gt_dict['diffuse_tree'][int(mapping_gen2gt[i][0])]["id"]
+    # mapping_gen2gt = find_part_mapping(gen_dict, gt_dict, use_hungarian=True)
+    mapping_gt2gen = find_part_mapping(gt_dict, gen_dict, use_hungarian=True)
+    # Save the original bounding box vertices in rest pose
+    original_gen_bbox_vertices = np.array(
+        [get_bbox_vertices(gen_dict, i) for i in range(len(gen_dict["diffuse_tree"]))],
+        dtype=np.float32,
+    )
+    original_gt_bbox_vertices = np.array(
+        [get_bbox_vertices(gt_dict, i) for i in range(len(gt_dict["diffuse_tree"]))],
+        dtype=np.float32,
+    )
+    # import ipdb
+    # ipdb.set_trace()
+    scores_gen2gt = _get_scores(
+        gen_dict,
+        gt_dict,
+        original_gen_bbox_vertices,
+        original_gt_bbox_vertices,
+        mapping_gen2gt,
+        num_states,
+        rotation_fix_range,
+        num_samples,
+        iou_include_base,
+    )
+    scores_gt2gen = _get_scores(
+        gt_dict,
+        gen_dict,
+        original_gt_bbox_vertices,
+        original_gen_bbox_vertices,
+        mapping_gt2gen,
+        num_states,
+        rotation_fix_range,
+        num_samples,
+        iou_include_base,
+    )
+    scores = {
+        "AS-IoU": (scores_gen2gt["AS-IoU"] + scores_gt2gen["AS-IoU"]) / 2,
+        "AS-cDist": (scores_gen2gt["AS-cDist"] + scores_gt2gen["AS-cDist"]) / 2,
+        "RS-IoU": (scores_gen2gt["RS-IoU"] + scores_gt2gen["RS-IoU"]) / 2,
+        "RS-cDist": (scores_gen2gt["RS-cDist"] + scores_gt2gen["RS-cDist"]) / 2,
+    }
+    return scores

models/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+models = {}
+def register(name):
+    def decorator(cls):
+        models[name] = cls
+        return cls
+    return decorator
+def make(name, config):
+    if name == 'model_B9':
+        name = 'denoiser_singapo'
+    model = models[name](config)
+    return model
+from . import denoiser

models/denoiser.py ADDED Viewed

	@@ -0,0 +1,415 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+import torch
+import models
+from torch import nn
+from diffusers.models.attention import Attention, FeedForward
+from models.utils import (
+    PEmbeder,
+    FinalLayer,
+    VisAttnProcessor,
+    MyAdaLayerNormZero
+)
+class RAPCrossAttnBlock(nn.Module):
+    def __init__(self, dim, num_layers, num_heads, head_dim, dropout=0.0, img_emb_dims=None):
+        super().__init__()
+        self.layers = nn.ModuleList([
+            Attention(
+                query_dim=dim,
+                cross_attention_dim=dim,
+                heads=num_heads,
+                dim_head=head_dim,
+                dropout=dropout,
+                bias=True,
+                cross_attention_norm="layer_norm",
+                processor=VisAttnProcessor(),
+            )
+            for _ in range(num_layers)
+        ])
+        self.norms = nn.ModuleList([
+            nn.LayerNorm(dim) for _ in range(num_layers)
+        ])
+        img_emb_layers = []
+        for i in range(len(img_emb_dims) - 1):
+            img_emb_layers.append(nn.Linear(img_emb_dims[i], img_emb_dims[i + 1]))
+            img_emb_layers.append(nn.LeakyReLU(inplace=True))
+        img_emb_layers.pop(-1)
+        self.img_emb = nn.Sequential(*img_emb_layers)
+        self.init_img_emb_weights()
+    def init_img_emb_weights(self):
+        for m in self.img_emb.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight, mode="fan_in")
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def forward(self, img_first, img_second):
+        """
+        Inputs:
+            img_first:  (B, Np, D)
+            img_second: (B, Np, D)
+        Output:
+            fused_feat: (B, Np, D)
+        """
+        img_first = self.img_emb(img_first)
+        img_second = self.img_emb(img_second)
+        fused = img_second
+        for norm, attn in zip(self.norms, self.layers):
+            normed = norm(fused)
+            delta, _ = attn(normed, encoder_hidden_states=img_first, attention_mask=None)
+            fused = fused + delta  # residual connection
+        return fused
+class Attn_Block(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        num_attention_heads: int,
+        attention_head_dim: int,
+        dropout=0.0,
+        activation_fn: str = "geglu",
+        num_embeds_ada_norm: int = None,
+        attention_bias: bool = False,
+        norm_elementwise_affine: bool = True,
+        final_dropout: bool = False,
+        class_dropout_prob: float = 0.0,  # for classifier-free
+        img_emb_dims=None,
+    ):
+        super().__init__()
+        self.norm1 = MyAdaLayerNormZero(dim, num_embeds_ada_norm, class_dropout_prob)
+        self.norm2 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine)
+        self.norm3 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine)
+        self.norm4 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine)
+        self.norm5 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine)
+        self.norm6 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine)
+        self.local_attn = Attention(
+            query_dim=dim,
+            heads=num_attention_heads,
+            dim_head=attention_head_dim,
+            dropout=dropout,
+            bias=attention_bias,
+        )
+        self.global_attn = Attention(
+            query_dim=dim,
+            heads=num_attention_heads,
+            dim_head=attention_head_dim,
+            dropout=dropout,
+            bias=attention_bias,
+        )
+        self.graph_attn = Attention(
+            query_dim=dim,
+            heads=num_attention_heads,
+            dim_head=attention_head_dim,
+            dropout=dropout,
+            bias=attention_bias,
+        )
+        self.img_attn = Attention(
+            query_dim=dim,
+            cross_attention_dim=dim,
+            heads=num_attention_heads,
+            dim_head=attention_head_dim,
+            dropout=dropout,
+            bias=attention_bias,
+            cross_attention_norm="layer_norm",
+            processor=VisAttnProcessor(), # to be removed for release model
+        )
+        self.img_attn_second = Attention(
+            query_dim=dim,
+            cross_attention_dim=dim,
+            heads=num_attention_heads,
+            dim_head=attention_head_dim,
+            dropout=dropout,
+            bias=attention_bias,
+            cross_attention_norm="layer_norm",
+            processor=VisAttnProcessor(), # to be removed for release model
+        )
+        self.ff = FeedForward(
+            dim,
+            dropout=dropout,
+            activation_fn=activation_fn,
+            final_dropout=final_dropout,
+        )
+        # image embedding layers
+        layers = []
+        for i in range(len(img_emb_dims) - 1):
+            layers.append(nn.Linear(img_emb_dims[i], img_emb_dims[i + 1]))
+            layers.append(nn.LeakyReLU(inplace=True))
+        layers.pop(-1)
+        self.img_emb = nn.Sequential(*layers)
+        self.init_img_emb_weights()
+    def init_img_emb_weights(self):
+        for m in self.img_emb.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight, mode="fan_in")
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def forward(
+        self,
+        hidden_states,
+        img_patches,
+        fuse_feat,
+        pad_mask,
+        attr_mask,
+        graph_mask,
+        timestep,
+        class_labels,
+        label_free=False,
+    ):
+        # image patches embedding
+        img_emb = self.img_emb(img_patches)
+        # adaptive normalization, taken timestep and class_labels as input condition
+        norm_hidden_states, gate_1, shift_mlp, scale_mlp, gate_mlp, gate_2, gate_3 = (
+            self.norm1(
+                hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype,
+                label_free=label_free
+            )
+        )
+        # local attribute self-attention
+        attr_out = self.local_attn(norm_hidden_states, attention_mask=attr_mask)
+        attr_out = gate_1.unsqueeze(1) * attr_out
+        hidden_states = hidden_states + attr_out
+        # global attribute self-attention
+        norm_hidden_states = self.norm2(hidden_states)
+        global_out = self.global_attn(norm_hidden_states, attention_mask=pad_mask)
+        global_out = gate_2.unsqueeze(1) * global_out
+        hidden_states = hidden_states + global_out
+        # graph relation self-attention
+        norm_hidden_states = self.norm3(hidden_states)
+        graph_out = self.graph_attn(norm_hidden_states, attention_mask=graph_mask)
+        graph_out = gate_3.unsqueeze(1) * graph_out
+        hidden_states = hidden_states + graph_out
+        img_first, img_second = img_emb.chunk(2, dim=1)
+        # cross attention with image patches
+        norm_hidden_states = self.norm4(hidden_states)
+        B, Na, D = norm_hidden_states.shape
+        Np = img_first.shape[1] # number of image patches
+        mode_num = Na // 32
+        reshaped = norm_hidden_states.reshape(B, Na // mode_num, mode_num, D)
+        bboxes = reshaped[:, :, 0, :] # (B, K, D)
+        # cross attention between bbox attributes and image patches
+        bbox_img_out, bbox_cross_attn_map = self.img_attn(
+            bboxes,
+            encoder_hidden_states=img_first,
+            attention_mask=None,
+        )  # cross_attn_map: (B, n_head, K, Np)
+        # to reshape the cross_attn_map back to (B, n_head, Na*5, Np), reduntant for other attributes, fix later
+        # cross_attn_map_reshape = torch.zeros(size=(B, bbox_cross_attn_map.shape[1], Na // mode_num, mode_num, Np), device=bbox_cross_attn_map.device)
+        # cross_attn_map_reshape[:, :, :, 0, :] = bbox_cross_attn_map
+        # cross_attn_map = cross_attn_map_reshape.reshape(B, bbox_cross_attn_map.shape[1], Na, Np)
+        # assemble the output of cross attention with bbox attributes and other attributes
+        img_out = torch.empty(size=(B, Na // mode_num, mode_num, D), device=hidden_states.device, dtype=hidden_states.dtype)
+        img_out[:, :, 0, :] = bbox_img_out
+        img_out[:, :, 1:, :] = reshaped[:, :, 1:, :]
+        img_out = img_out.reshape(B, Na, D)
+        hidden_states = hidden_states + img_out
+        norm_hidden_states = self.norm6(hidden_states)
+        B, Na, D = norm_hidden_states.shape
+        Np = img_second.shape[1] # number of image patches
+        mode_num = Na // 32
+        reshaped = norm_hidden_states.reshape(B, Na // mode_num, mode_num, D)
+        joints = reshaped # (B, K, 4, D)
+        joints = joints.reshape(B, Na // mode_num * 5, D)
+        # cross attention between bbox attributes and image patches
+        joint_img_out, bbox_cross_attn_map = self.img_attn_second(
+            joints,
+            encoder_hidden_states=fuse_feat,
+            attention_mask=None,
+        )  # cross_attn_map: (B, n_head, K*4, Np)
+        # to reshape the cross_attn_map back to (B, n_head, Na*5, Np), reduntant for other attributes, fix later
+        # cross_attn_map_reshape = torch.zeros(size=(B, bbox_cross_attn_map.shape[1], Na // mode_num, mode_num, Np), device=bbox_cross_attn_map.device)
+        # cross_attn_map_reshape[:, :, :, 1:5, :] = bbox_cross_attn_map.reshape(
+        #     B, bbox_cross_attn_map.shape[1], Na // mode_num, 4, Np
+        # )
+        # cross_attn_map = cross_attn_map_reshape.reshape(B, bbox_cross_attn_map.shape[1], Na, Np)
+        # assemble the output of cross attention with bbox attributes and other attributes
+        img_out = torch.empty(size=(B, Na // mode_num, mode_num, D), device=hidden_states.device, dtype=hidden_states.dtype)
+        img_out = joint_img_out.reshape(B, Na // mode_num, 5, D)
+        img_out = img_out.reshape(B, Na, D)
+        hidden_states = hidden_states + img_out
+        # feed-forward
+        norm_hidden_states = self.norm5(hidden_states)
+        norm_hidden_states = (
+            norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+        )
+        ff_output = self.ff(norm_hidden_states)
+        ff_output = gate_mlp.unsqueeze(1) * ff_output
+        hidden_states = ff_output + hidden_states
+        return hidden_states
+@models.register("denoiser")
+class Denoiser(nn.Module):
+    """
+    Denoiser based on CAGE's attribute attention block + our ICA module, with 4 sequential attentions: LA -> GA -> GRA -> ICA
+    Different image adapters for each layer.
+    The image cross attention is with key-padding masks (object mask, part mask)
+    *** The ICA only applies to the bbox attributes, not other attributes such as motion params.***
+    """
+    def __init__(self, hparams):
+        super().__init__()
+        self.hparams = hparams
+        self.K = self.hparams.get("K", 32)
+        in_ch = hparams.in_ch
+        attn_dim = hparams.attn_dim
+        mid_dim = attn_dim // 2
+        n_head = hparams.n_head
+        head_dim = attn_dim // n_head
+        num_embeds_ada_norm = 6 * attn_dim
+        # embedding layers for different node attributes
+        self.aabb_emb = nn.Sequential(
+            nn.Linear(in_ch, mid_dim),
+            nn.ReLU(inplace=True),
+            nn.Linear(mid_dim, attn_dim),
+        )
+        self.jaxis_emb = nn.Sequential(
+            nn.Linear(in_ch, mid_dim),
+            nn.ReLU(inplace=True),
+            nn.Linear(mid_dim, attn_dim),
+        )
+        self.range_emb = nn.Sequential(
+            nn.Linear(in_ch, mid_dim),
+            nn.ReLU(inplace=True),
+            nn.Linear(mid_dim, attn_dim),
+        )
+        self.label_emb = nn.Sequential(
+            nn.Linear(in_ch, mid_dim),
+            nn.ReLU(inplace=True),
+            nn.Linear(mid_dim, attn_dim),
+        )
+        self.jtype_emb = nn.Sequential(
+            nn.Linear(in_ch, mid_dim),
+            nn.ReLU(inplace=True),
+            nn.Linear(mid_dim, attn_dim),
+        )
+        # self.node_type_emb = nn.Sequential(
+        #     nn.Linear(in_ch, mid_dim),
+        #     nn.ReLU(inplace=True),
+        #     nn.Linear(mid_dim, attn_dim),
+        # )
+        # positional encoding for nodes and attributes
+        self.pe_node = PEmbeder(self.K, attn_dim)
+        self.pe_attr = PEmbeder(self.hparams.mode_num, attn_dim)
+        # attention layers
+        self.attn_layers = nn.ModuleList(
+            [
+                Attn_Block(
+                    dim=attn_dim,
+                    num_attention_heads=n_head,
+                    attention_head_dim=head_dim,
+                    class_dropout_prob=hparams.get("cat_drop_prob", 0.0),
+                    dropout=hparams.dropout,
+                    activation_fn="geglu",
+                    num_embeds_ada_norm=num_embeds_ada_norm,
+                    attention_bias=False,
+                    norm_elementwise_affine=True,
+                    final_dropout=False,
+                    img_emb_dims=hparams.get("img_emb_dims", None),
+                )
+                for d in range(hparams.n_layers)
+            ]
+        )
+        self.image_interaction = RAPCrossAttnBlock(
+            dim=attn_dim,
+            num_layers=6,
+            num_heads=n_head,
+            head_dim=head_dim,
+            dropout=hparams.dropout,
+            img_emb_dims=hparams.get("img_emb_dims", None),
+        )
+        self.final_layer = FinalLayer(attn_dim, in_ch)
+    def forward(
+        self,
+        x,
+        cat,
+        timesteps,
+        feat,
+        key_pad_mask=None,
+        graph_mask=None,
+        attr_mask=None,
+        label_free=False,
+    ):
+        B = x.shape[0]
+        x = x.view(B, self.K, 5 * 6)
+        # embedding layers for different attributes
+        x_aabb = self.aabb_emb(x[..., :6])
+        x_jtype = self.jtype_emb(x[..., 6:12])
+        x_jaxis = self.jaxis_emb(x[..., 12:18])
+        x_range = self.range_emb(x[..., 18:24])
+        x_label = self.label_emb(x[..., 24:30])
+        # x_node_type = self.node_type_emb(x[..., 30:36])
+        # concatenate all attribute embeddings
+        x_ = torch.cat(
+            [x_aabb, x_jtype, x_jaxis, x_range, x_label], dim=2
+        )  # (B, K, 6*attn_dim)
+        x_ = x_.view(B, self.K * self.hparams.mode_num, self.hparams.attn_dim)
+        # positional encoding for nodes and attributes
+        idx_attr = torch.tensor(
+            [0, 1, 2, 3, 4], device=x.device, dtype=torch.long
+        ).repeat(self.K)
+        idx_node = torch.arange(
+            self.K, device=x.device, dtype=torch.long
+        ).repeat_interleave(self.hparams.mode_num)
+        x_ = self.pe_attr(self.pe_node(x_, idx=idx_node), idx=idx_attr)
+        # init tensor to store attention maps
+        Np = feat.shape[1]
+        img_first, img_second = feat.chunk(2, dim=1)
+        fused_img_feat = self.image_interaction(img_first, img_second)  # (B, Np, D)
+        # attention layers
+        for i, attn_layer in enumerate(self.attn_layers):
+            x_ = attn_layer(
+                hidden_states=x_,
+                img_patches=feat,
+                fuse_feat=fused_img_feat,
+                timestep=timesteps,
+                class_labels=cat,
+                pad_mask=key_pad_mask,
+                graph_mask=graph_mask,
+                attr_mask=attr_mask,
+                label_free=label_free,
+            )
+        y = self.final_layer(x_, timesteps, cat)
+        return {
+            'noise_pred': y,
+            'attn_maps': None,
+        }

models/utils.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import torch
+from torch import nn
+from typing import Optional
+from diffusers.models.embeddings import Timesteps, TimestepEmbedding, LabelEmbedding
+class FinalLayer(nn.Module):
+    """
+    Final layer of the diffusion model that outputs the final logits.
+    """
+    def __init__(self, in_ch, out_ch=None, dropout=0.0):
+        super().__init__()
+        out_ch = in_ch if out_ch is None else out_ch
+        self.linear = nn.Linear(in_ch, out_ch)
+        self.norm = AdaLayerNormTC(in_ch, 2 * in_ch, dropout)
+    def forward(self, x, t, cond=None):
+        assert cond is not None
+        x = self.norm(x, t, cond)
+        x = self.linear(x)
+        return x
+class AdaLayerNormTC(nn.Module):
+    """
+    Norm layer modified to incorporate timestep and condition embeddings.
+    """
+    def __init__(self, embedding_dim, num_embeddings, dropout):
+        super().__init__()
+        self.emb = CombinedTimestepLabelEmbeddings(
+            num_embeddings, embedding_dim, dropout
+        )
+        self.silu = nn.SiLU()
+        self.linear = nn.Linear(embedding_dim, embedding_dim * 2)
+        self.norm = nn.LayerNorm(
+            embedding_dim, elementwise_affine=False, eps=torch.finfo(torch.float16).eps
+        )
+    def forward(self, x, timestep, cond):
+        emb = self.linear(self.silu(self.emb(timestep, cond, hidden_dtype=None)))
+        scale, shift = torch.chunk(emb, 2, dim=1)
+        x = self.norm(x) * (1 + scale[:, None]) + shift[:, None]
+        return x
+class PEmbeder(nn.Module):
+    """
+    Positional embedding layer.
+    """
+    def __init__(self, vocab_size, d_model):
+        super().__init__()
+        self.embed = nn.Embedding(vocab_size, d_model)
+        self._init_embeddings()
+    def _init_embeddings(self):
+        nn.init.kaiming_normal_(self.embed.weight, mode="fan_in")
+    def forward(self, x, idx=None):
+        if idx is None:
+            idx = torch.arange(x.shape[1], device=x.device, dtype=torch.long)
+        return x + self.embed(idx)
+class CombinedTimestepLabelEmbeddings(nn.Module):
+    '''Modified from diffusers.models.embeddings.CombinedTimestepLabelEmbeddings'''
+    def __init__(self, num_classes, embedding_dim, class_dropout_prob=0.1):
+        super().__init__()
+        self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=1)
+        self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim)
+        self.class_embedder = LabelEmbedding(num_classes, embedding_dim, class_dropout_prob)
+    def forward(self, timestep, class_labels, hidden_dtype=None, label_free=False):
+        timesteps_proj = self.time_proj(timestep)
+        timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype))  # (N, D)
+        force_drop_ids = None # training mode
+        if label_free: # inference mode, force_drop_ids is set to all ones to be dropped in class_embedder
+            force_drop_ids = torch.ones_like(class_labels, dtype=torch.bool, device=class_labels.device)
+        class_labels = self.class_embedder(class_labels, force_drop_ids)  # (N, D)
+        conditioning = timesteps_emb + class_labels  # (N, D)
+        return conditioning
+class MyAdaLayerNormZero(nn.Module):
+    """
+    Adaptive layer norm zero (adaLN-Zero), borrowed from diffusers.models.attention.AdaLayerNormZero.
+    Extended to incorporate scale parameters (gate_2, gate_3) for intermidate attention layers.
+    """
+    def __init__(self, embedding_dim, num_embeddings, class_dropout_prob):
+        super().__init__()
+        self.emb = CombinedTimestepLabelEmbeddings(
+            num_embeddings, embedding_dim, class_dropout_prob
+        )
+        self.silu = nn.SiLU()
+        self.linear = nn.Linear(embedding_dim, 8 * embedding_dim, bias=True)
+        self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6)
+    def forward(self, x, timestep, class_labels, hidden_dtype=None, label_free=False):
+        emb_t_cls = self.emb(timestep, class_labels, hidden_dtype=hidden_dtype, label_free=label_free)
+        emb = self.linear(self.silu(emb_t_cls))
+        (
+            shift_msa,
+            scale_msa,
+            gate_msa,
+            shift_mlp,
+            scale_mlp,
+            gate_mlp,
+            gate_2,
+            gate_3,
+        ) = emb.chunk(8, dim=1)
+        x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None]
+        return x, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_2, gate_3
+class VisAttnProcessor:
+    r"""
+    This code is adapted from diffusers.models.attention_processor.AttnProcessor.
+    Used for visualizing the attention maps when testing, NOT for training.
+    """
+    def __call__(
+        self,
+        attn,
+        hidden_states,
+        encoder_hidden_states: Optional[torch.FloatTensor] = None,
+        attention_mask: Optional[torch.FloatTensor] = None,
+        temb: Optional[torch.FloatTensor] = None,
+        *args,
+        **kwargs,
+    ) -> torch.Tensor:
+        # Removed
+        # if len(args) > 0 or kwargs.get("scale", None) is not None:
+        #     deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`."
+        #     deprecate("scale", "1.0.0", deprecation_message)
+        residual = hidden_states
+        if attn.spatial_norm is not None:
+            hidden_states = attn.spatial_norm(hidden_states, temb)
+        input_ndim = hidden_states.ndim
+        if input_ndim == 4:
+            batch_size, channel, height, width = hidden_states.shape
+            hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
+        batch_size, sequence_length, _ = (
+            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
+        )
+        attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
+        if attn.group_norm is not None:
+            hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
+        query = attn.to_q(hidden_states)
+        if encoder_hidden_states is None:
+            encoder_hidden_states = hidden_states
+        elif attn.norm_cross:
+            encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
+        key = attn.to_k(encoder_hidden_states)
+        value = attn.to_v(encoder_hidden_states)
+        query = attn.head_to_batch_dim(query) # (40, 160, 16)
+        key = attn.head_to_batch_dim(key) # (40, 256, 16)
+        value = attn.head_to_batch_dim(value)  # (40, 256, 16)
+        if attention_mask is not None:
+            if attention_mask.dtype == torch.bool:
+                attn_mask = torch.zeros_like(attention_mask, dtype=query.dtype, device=query.device)
+                attn_mask = attn_mask.masked_fill_(attention_mask.logical_not(), float("-inf"))
+            else:
+                attn_mask = attention_mask
+                assert attn_mask.dtype == query.dtype, f"query and attention_mask must have the same dtype, but got {query.dtype} and {attention_mask.dtype}."
+        else:
+            attn_mask = None
+        attention_probs = attn.get_attention_scores(query, key, attn_mask) # (40, 160, 256)
+        hidden_states = torch.bmm(attention_probs, value) # (40, 160, 16)
+        hidden_states = attn.batch_to_head_dim(hidden_states)
+        # linear proj
+        hidden_states = attn.to_out[0](hidden_states)
+        # dropout
+        hidden_states = attn.to_out[1](hidden_states)
+        if input_ndim == 4:
+            hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
+        if attn.residual_connection:
+            hidden_states = hidden_states + residual
+        hidden_states = hidden_states / attn.rescale_output_factor
+        attention_probs = attention_probs.reshape(batch_size, attn.heads, query.shape[1], sequence_length)
+        return hidden_states, attention_probs

my_utils/__init__.py ADDED Viewed

File without changes

my_utils/callbacks.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+import torch
+from my_utils.misc import dump_config
+from lightning.pytorch.callbacks.callback import Callback
+from lightning.pytorch.utilities.rank_zero import rank_zero_only
+class ConfigSnapshotCallback(Callback):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+    def setup(self, trainer, pl_module, stage) -> None:
+        self.savedir = os.path.join(pl_module.hparams.exp_dir, 'config')
+    @rank_zero_only
+    def save_config_snapshot(self):
+        os.makedirs(self.savedir, exist_ok=True)
+        dump_config(os.path.join(self.savedir, 'parsed.yaml'), self.config)
+    def on_fit_start(self, trainer, pl_module):
+        self.save_config_snapshot()
+class GPUCacheCleanCallback(Callback):
+    def on_train_batch_start(self, *args, **kwargs):
+        torch.cuda.empty_cache()
+    def on_validation_batch_start(self, *args, **kwargs):
+        torch.cuda.empty_cache()
+    def on_test_batch_start(self, *args, **kwargs):
+        torch.cuda.empty_cache()
+    def on_predict_batch_start(self, *args, **kwargs):
+        torch.cuda.empty_cache()

my_utils/lr_schedulers.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""
+    Code copied from lightning-bolts
+"""
+import math
+import warnings
+from typing import List
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+class LinearWarmupCosineAnnealingLR(_LRScheduler):
+    """Sets the learning rate of each parameter group to follow a linear warmup schedule between warmup_start_lr and
+    base_lr followed by a cosine annealing schedule between base_lr and eta_min.
+    .. warning::
+        It is recommended to call :func:`.step()` for :class:`LinearWarmupCosineAnnealingLR`
+        after each iteration as calling it after each epoch will keep the starting lr at
+        warmup_start_lr for the first epoch which is 0 in most cases.
+    .. warning::
+        passing epoch to :func:`.step()` is being deprecated and comes with an EPOCH_DEPRECATION_WARNING.
+        It calls the :func:`_get_closed_form_lr()` method for this scheduler instead of
+        :func:`get_lr()`. Though this does not change the behavior of the scheduler, when passing
+        epoch param to :func:`.step()`, the user should call the :func:`.step()` function before calling
+        train and validation methods.
+    """
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        warmup_epochs: int,
+        max_epochs: int,
+        warmup_start_lr: float = 0.0,
+        eta_min: float = 0.0,
+        last_epoch: int = -1,
+    ) -> None:
+        """
+        Args:
+            optimizer (Optimizer): Wrapped optimizer.
+            warmup_epochs (int): Maximum number of iterations for linear warmup
+            max_epochs (int): Maximum number of iterations
+            warmup_start_lr (float): Learning rate to start the linear warmup. Default: 0.
+            eta_min (float): Minimum learning rate. Default: 0.
+            last_epoch (int): The index of last epoch. Default: -1.
+        """
+        self.warmup_epochs = warmup_epochs
+        self.max_epochs = max_epochs
+        self.warmup_start_lr = warmup_start_lr
+        self.eta_min = eta_min
+        super().__init__(optimizer, last_epoch)
+    def get_lr(self) -> List[float]:
+        """Compute learning rate using chainable form of the scheduler."""
+        if not self._get_lr_called_within_step:
+            warnings.warn(
+                "To get the last learning rate computed by the scheduler; please use `get_last_lr()`.",
+                UserWarning,
+            )
+        if self.last_epoch == 0:
+            return [self.warmup_start_lr] * len(self.base_lrs)
+        if self.last_epoch < self.warmup_epochs:
+            return [
+                group["lr"] + (base_lr - self.warmup_start_lr) / (self.warmup_epochs - 1)
+                for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups)
+            ]
+        if self.last_epoch == self.warmup_epochs:
+            return self.base_lrs
+        if (self.last_epoch - 1 - self.max_epochs) % (2 * (self.max_epochs - self.warmup_epochs)) == 0:
+            return [
+                group["lr"]
+                + (base_lr - self.eta_min) * (1 - math.cos(math.pi / (self.max_epochs - self.warmup_epochs))) / 2
+                for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups)
+            ]
+        return [
+            (1 + math.cos(math.pi * (self.last_epoch - self.warmup_epochs) / (self.max_epochs - self.warmup_epochs)))
+            / (
+                1
+                + math.cos(
+                    math.pi * (self.last_epoch - self.warmup_epochs - 1) / (self.max_epochs - self.warmup_epochs)
+                )
+            )
+            * (group["lr"] - self.eta_min)
+            + self.eta_min
+            for group in self.optimizer.param_groups
+        ]
+    def _get_closed_form_lr(self) -> List[float]:
+        """Called when epoch is passed as a param to the `step` function of the scheduler."""
+        if self.last_epoch < self.warmup_epochs:
+            return [
+                self.warmup_start_lr + self.last_epoch * (base_lr - self.warmup_start_lr) / (self.warmup_epochs - 1)
+                for base_lr in self.base_lrs
+            ]
+        return [
+            self.eta_min
+            + 0.5
+            * (base_lr - self.eta_min)
+            * (1 + math.cos(math.pi * (self.last_epoch - self.warmup_epochs) / (self.max_epochs - self.warmup_epochs)))
+            for base_lr in self.base_lrs
+        ]

my_utils/misc.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from omegaconf import OmegaConf
+# ============ Register OmegaConf Recolvers ============= #
+OmegaConf.register_new_resolver('add', lambda a, b: a + b)
+OmegaConf.register_new_resolver('sub', lambda a, b: a - b)
+OmegaConf.register_new_resolver('mul', lambda a, b: a * b)
+OmegaConf.register_new_resolver('div', lambda a, b: a / b)
+# ======================================================= #
+def prompt(question):
+    inp = input(f"{question} (y/n)").lower().strip()
+    if inp and inp == 'y':
+        return True
+    if inp and inp == 'n':
+        return False
+    return prompt(question)
+def load_config(*yaml_files, cli_args=[]):
+    yaml_confs = [OmegaConf.load(f) for f in yaml_files]
+    cli_conf = OmegaConf.from_cli(cli_args)
+    conf = OmegaConf.merge(*yaml_confs, cli_conf)
+    OmegaConf.resolve(conf)
+    return conf
+def config_to_primitive(config, resolve=True):
+    return OmegaConf.to_container(config, resolve=resolve)
+def dump_config(path, config):
+    with open(path, 'w') as fp:
+        OmegaConf.save(config=config, f=fp)

my_utils/plot.py ADDED Viewed

	@@ -0,0 +1,122 @@

+# import os, sys
+# sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+# import matplotlib
+# matplotlib.use('Agg')
+# import numpy as np
+# import networkx as nx
+# from io import BytesIO
+# from PIL import Image, ImageDraw
+# from matplotlib import pyplot as plt
+# from sklearn.decomposition import PCA
+# from my_utils.refs import graph_color_ref
+# def add_text(text, imgarr):
+#     '''
+#     Function to add text to image
+#     Args:
+#     - text (str): text to add
+#     - imgarr (np.array): image array
+#     Returns:
+#     - img (np.array): image array with text
+#     '''
+#     img = Image.fromarray(imgarr)
+#     I = ImageDraw.Draw(img)
+#     I.text((10, 10), text, fill='black')
+#     return np.asarray(img)
+# def get_color(ref, n_nodes):
+#     '''
+#     Function to color the nodes
+#     Args:
+#     - ref (list): list of color reference
+#     - n_nodes (int): number of nodes
+#     Returns:
+#     - colors (list): list of colors
+#     '''
+#     N = len(ref)
+#     colors = []
+#     for i in range(n_nodes):
+#         colors.append(np.array([[int(i) for i in ref[i%N][4:-1].split(',')]]) / 255.)
+#     return colors
+# def make_grid(images, cols=5):
+#     """
+#     Arrange list of images into a N x cols grid.
+#     Args:
+#     - images (list): List of Numpy arrays representing the images.
+#     - cols (int): Number of columns for the grid.
+#     Returns:
+#     - grid (numpy array): Numpy array representing the image grid.
+#     """
+#     # Determine the dimensions of each image
+#     img_h, img_w, _ = images[0].shape
+#     rows = len(images) // cols
+#     # Initialize a blank canvas
+#     grid = np.zeros((rows * img_h, cols * img_w, 3), dtype=images[0].dtype)
+#     # Place each image onto the grid
+#     for idx, img in enumerate(images):
+#         y = (idx // cols) * img_h
+#         x = (idx % cols) * img_w
+#         grid[y: y + img_h, x: x + img_w] = img
+#     return grid
+# def viz_graph(info_dict, res=256):
+#     '''
+#     Function to plot the directed graph
+#     Args:
+#     - info_dict (dict): output json containing the graph information
+#     - res (int): resolution of the image
+#     Returns:
+#     - img_arr (np.array): image array
+#     '''
+#     # build tree
+#     tree = info_dict['diffuse_tree']
+#     edges = []
+#     for node in tree:
+#         edges += [(node['id'], child) for child in node['children']]
+#     G = nx.DiGraph()
+#     G.add_edges_from(edges)
+#     # plot tree
+#     plt.figure(figsize=(res/100, res/100))
+#     colors = get_color(graph_color_ref, len(tree))
+#     pos = nx.nx_agraph.graphviz_layout(G, prog="twopi", args="")
+#     node_order = sorted(G.nodes())
+#     nx.draw(G, pos, node_color=colors, nodelist=node_order, edge_color='k', with_labels=False)
+#     buf = BytesIO()
+#     plt.savefig(buf, format="png", dpi=100)
+#     buf.seek(0)
+#     img = Image.open(buf)
+#     img_arr = np.asarray(img)
+#     buf.close()
+#     plt.clf()
+#     plt.close()
+#     return img_arr[:, :, :3]
+# def viz_patch_feat_pca(feat):
+#     pca = PCA(n_components=3)
+#     pca.fit(feat)
+#     feat_pca = pca.transform(feat)
+#     t = np.array(feat_pca)
+#     t_min = t.min(axis=0, keepdims=True)
+#     t_max = t.max(axis=0, keepdims=True)
+#     normalized_t = (t - t_min) / (t_max - t_min)
+#     array = (normalized_t * 255).astype(np.uint8)
+#     img_array = array.reshape(16, 16, 3)
+#     return img_array

my_utils/refs.py ADDED Viewed

	@@ -0,0 +1,122 @@

+# reference of object categories
+cat_ref = {
+    "Table": 0,
+    "Dishwasher": 1,
+    "StorageFurniture": 2,
+    "Refrigerator": 3,
+    "WashingMachine": 4,
+    "Microwave": 5,
+    "Oven": 6,
+}
+data_mode_ref = {
+    "aabb_max": 0,
+    "aabb_min": 1,
+    "joint_type": 2,
+    "axis_dir": 3,
+    "axis_ori": 4,
+    "joint_range": 5,
+    "label": 6
+}
+# reference of semantic labels for each part
+sem_ref = {
+    "fwd": {
+        "door": 0,
+        "drawer": 1,
+        "base": 2,
+        "handle": 3,
+        "wheel": 4,
+        "knob": 5,
+        "shelf": 6,
+        "tray": 7,
+    },
+    "bwd": {
+        0: "door",
+        1: "drawer",
+        2: "base",
+        3: "handle",
+        4: "wheel",
+        5: "knob",
+        6: "shelf",
+        7: "tray",
+    },
+}
+# reference of joint types for each part
+joint_ref = {
+    "fwd": {"fixed": 1, "revolute": 2, "prismatic": 3, "screw": 4, "continuous": 5},
+    "bwd": {1: "fixed", 2: "revolute", 3: "prismatic", 4: "screw", 5: "continuous"},
+}
+import plotly.express as px
+# pallette for joint type color
+joint_color_ref = px.colors.qualitative.Set1
+# pallette for graph node color
+# graph_color_ref = px.colors.qualitative.Bold + px.colors.qualitative.Prism
+# graph_color_ref = [
+#     "rgb(200, 200, 200)",  # 奶橙黄
+#     "rgb(255, 196, 200)",  # 莓奶粉
+#     "rgb(154, 228, 186)",  # 牛油果绿
+#     "rgb(252, 208, 140)",  # 奶橙黄
+#     "rgb(217, 189, 250)",  # 薄紫
+#     "rgb(203, 237, 164)",  # 抹茶绿
+#     "rgb(188, 229, 235)",  # 青蓝灰
+#     "rgb(179, 199, 243)",  # 雾蓝
+#     "rgb(255, 224, 130)",  # 淡柠黄
+#     "rgb(222, 179, 212)",  # 粉紫
+#     "rgb(148, 212, 224)",  # 冰蓝
+# ]
+graph_color_ref = [
+    "rgb(160, 160, 160)",  # 奶橙灰 → 深灰白，对比提升
+    "rgb(255, 130, 145)",  # 莓奶粉 → 更亮更红
+    "rgb(80, 200, 150)",   # 牛油果绿 → 更深更绿
+    "rgb(255, 180, 60)",   # 奶橙黄 → 更橙更亮
+    "rgb(180, 140, 255)",  # 薄紫 → 更强饱和度紫
+    "rgb(130, 210, 50)",   # 抹茶绿 → 偏亮偏黄的绿
+    "rgb(90, 190, 220)",   # 青蓝灰 → 加蓝提升对比
+    "rgb(100, 150, 255)",  # 雾蓝 → 饱和冷蓝
+    "rgb(255, 200, 0)",    # 淡柠黄 → 纯柠黄
+    "rgb(200, 100, 190)",  # 粉紫 → 更紫
+    "rgb(80, 180, 255)",   # 冰蓝 → 更冷更亮的蓝
+    "rgb(255, 130, 145)",  # 莓奶粉 → 更亮更红
+    "rgb(80, 200, 150)",   # 牛油果绿 → 更深更绿
+    "rgb(255, 180, 60)",   # 奶橙黄 → 更橙更亮
+    "rgb(180, 140, 255)",  # 薄紫 → 更强饱和度紫
+    "rgb(130, 210, 50)",   # 抹茶绿 → 偏亮偏黄的绿
+    "rgb(90, 190, 220)",   # 青蓝灰 → 加蓝提升对比
+    "rgb(100, 150, 255)",  # 雾蓝 → 饱和冷蓝
+    "rgb(255, 200, 0)",    # 淡柠黄 → 纯柠黄
+    "rgb(200, 100, 190)",  # 粉紫 → 更紫
+    "rgb(80, 180, 255)",   # 冰蓝 → 更冷更亮的蓝
+    "rgb(255, 130, 145)",  # 莓奶粉 → 更亮更红
+    "rgb(80, 200, 150)",   # 牛油果绿 → 更深更绿
+    "rgb(255, 180, 60)",   # 奶橙黄 → 更橙更亮
+    "rgb(180, 140, 255)",  # 薄紫 → 更强饱和度紫
+    "rgb(130, 210, 50)",   # 抹茶绿 → 偏亮偏黄的绿
+    "rgb(90, 190, 220)",   # 青蓝灰 → 加蓝提升对比
+    "rgb(100, 150, 255)",  # 雾蓝 → 饱和冷蓝
+    "rgb(255, 200, 0)",    # 淡柠黄 → 纯柠黄
+    "rgb(200, 100, 190)",  # 粉紫 → 更紫
+    "rgb(80, 180, 255)",   # 冰蓝 → 更冷更亮的蓝
+    "rgb(255, 130, 145)",  # 莓奶粉 → 更亮更红
+    "rgb(80, 200, 150)",   # 牛油果绿 → 更深更绿
+    "rgb(255, 180, 60)",   # 奶橙黄 → 更橙更亮
+    "rgb(180, 140, 255)",  # 薄紫 → 更强饱和度紫
+    "rgb(130, 210, 50)",   # 抹茶绿 → 偏亮偏黄的绿
+    "rgb(90, 190, 220)",   # 青蓝灰 → 加蓝提升对比
+    "rgb(100, 150, 255)",  # 雾蓝 → 饱和冷蓝
+    "rgb(255, 200, 0)",    # 淡柠黄 → 纯柠黄
+    "rgb(200, 100, 190)",  # 粉紫 → 更紫
+    "rgb(80, 180, 255)",   # 冰蓝 → 更冷更亮的蓝
+]
+# pallette for semantic label color
+semantic_color_ref = px.colors.qualitative.Vivid_r
+# attention map visulaization color
+attn_color_ref = px.colors.sequential.Viridis
+from matplotlib.colors import LinearSegmentedColormap
+cmap_attn = LinearSegmentedColormap.from_list("mycmap", attn_color_ref, N=256)

my_utils/render.py ADDED Viewed

	@@ -0,0 +1,482 @@

+# import os, sys
+# sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+# import trimesh
+# import pyrender
+# import numpy as np
+# # import open3d as o3d
+# from copy import deepcopy
+# os.environ['PYOPENGL_PLATFORM'] = 'egl'
+# from my_utils.refs import semantic_color_ref, graph_color_ref, joint_color_ref
+# def get_rotation_axis_angle(k, theta):
+#     '''
+#     Rotation matrix converter from axis-angle using Rodrigues' rotation formula
+#     Args:
+#         k (np.ndarray): 3D unit vector representing the axis to rotate about.
+#         theta (float): Angle to rotate with in radians.
+#     Returns:
+#         R (np.ndarray): 3x3 rotation matrix.
+#     '''
+#     if np.linalg.norm(k) == 0.:
+#         return np.eye(3)
+#     k = k / np.linalg.norm(k)
+#     kx, ky, kz = k[0], k[1], k[2]
+#     cos, sin = np.cos(theta), np.sin(theta)
+#     R = np.zeros((3, 3), dtype=np.float32)
+#     R[0, 0] = cos + (kx**2) * (1 - cos)
+#     R[0, 1] = kx * ky * (1 - cos) - kz * sin
+#     R[0, 2] = kx * kz * (1 - cos) + ky * sin
+#     R[1, 0] = kx * ky * (1 - cos) + kz * sin
+#     R[1, 1] = cos + (ky**2) * (1 - cos)
+#     R[1, 2] = ky * kz * (1 - cos) - kx * sin
+#     R[2, 0] = kx * kz * (1 - cos) - ky * sin
+#     R[2, 1] = ky * kz * (1 - cos) + kx * sin
+#     R[2, 2] = cos + (kz**2) * (1 - cos)
+#     return R
+# def rescale_axis(jtype, axis_d, axis_o, box_center):
+#     '''
+#     Function to rescale the axis for rendering
+#     Args:
+#     - jtype (int): joint type
+#     - axis_d (np.array): axis direction
+#     - axis_o (np.array): axis origin
+#     - box_center (np.array): bounding box center
+#     Returns:
+#     - center (np.array): rescaled axis origin
+#     - axis_d (np.array): rescaled axis direction
+#     '''
+#     if jtype == 0 or jtype == 1:
+#         return [0., 0., 0.], [0., 0., 0.]
+#     if jtype == 3 or jtype == 4:
+#         center = box_center
+#     else:
+#         center = axis_o + np.dot(axis_d, box_center-axis_o) * axis_d
+#     return center.tolist(), axis_d.tolist()
+# # def get_axis_mesh(k, axis_o, bbox_center, joint_type):
+# #     '''
+# #     Function to get the axis mesh
+# #     Args:
+# #     - k (np.array): axis direction
+# #     - center (np.array): axis origin
+# #     - bbox_center (np.array): bounding box center
+# #     - joint_type (int): joint type
+# #     '''
+# #     if joint_type == 0 or joint_type == 1 or np.linalg.norm(k) == 0. :
+# #         return None
+# #     k = k / np.linalg.norm(k)
+# #     if joint_type == 3 or joint_type == 4: # prismatic or screw
+# #         axis_o = bbox_center
+# #     else: # revolute or continuous
+# #         axis_o = axis_o + np.dot(k, bbox_center-axis_o) * k
+# #     axis = o3d.geometry.TriangleMesh.create_arrow(cylinder_radius=0.015, cone_radius=0.03, cylinder_height=1.0, cone_height=0.08)
+# #     arrow = np.array([0., 0., 1.], dtype=np.float32)
+# #     n = np.cross(arrow, k)
+# #     rad = np.arccos(np.dot(arrow, k))
+# #     R_arrow = get_rotation_axis_angle(n, rad)
+# #     axis.rotate(R_arrow, center=(0, 0, 0))
+# #     axis.translate(axis_o[:3])
+# #     axis.compute_vertex_normals()
+# #     vertices = np.asarray(axis.vertices)
+# #     faces = np.asarray(axis.triangles)
+# #     trimesh_axis = trimesh.Trimesh(vertices=vertices, faces=faces)
+# #     # trimesh_axis.visual.vertex_colors = np.array([0, 0, 0, 1.0], dtype=np.float32)
+# #     trimesh_axis.visual.vertex_colors = np.repeat(np.array([0, 0, 0, 1.0]), vertices.shape[0], axis=0)
+# #     return trimesh_axis
+# def get_camera_pose(eye, look_at, up):
+#     """
+#     Compute the 4x4 transformation matrix for a camera pose.
+#     Parameters:
+#         eye (np.ndarray): 3D position of the camera.
+#         look_at (np.ndarray): 3D point the camera is looking at.
+#         up (np.ndarray): Up vector.
+#     Returns:
+#         pose (np.ndarray): 4x4 transformation matrix representing the camera pose.
+#     """
+#     # Compute the forward, right, and new up vectors
+#     forward = (look_at - eye)
+#     forward = forward / np.linalg.norm(forward)
+#     right = np.cross(forward, up)
+#     right = right / np.linalg.norm(right)
+#     new_up = np.cross(right, forward)
+#     new_up = new_up / np.linalg.norm(new_up)
+#     # Create rotation matrix
+#     pose = np.eye(4)
+#     pose[0:3, 0] = right
+#     pose[0:3, 1] = new_up
+#     pose[0:3, 2] = -forward  # Negative because the camera looks along the negative Z axis in its local coordinate
+#     pose[0:3, 3] = eye
+#     return pose
+# def get_rotation_axis_angle_box(axis, angle):
+#     axis = axis / np.linalg.norm(axis)
+#     return trimesh.transformations.rotation_matrix(angle, axis)
+# def get_colored_box(center, size, jtype=None, jrange=None, axis_d=None, axis_o=None):
+#     '''
+#     Create a solid color box and its animated state if joint info is provided
+#     Args:
+#         center (np.array): box center (3,)
+#         size (np.array): box size (3,)
+#         color (list or array): RGBA color, e.g. [255, 0, 0, 255]
+#         jtype (int): joint type (2=rot, 3=slide, 4=screw, 5=continuous)
+#         jrange (list): joint motion range
+#         axis_d (np.array): axis direction (3,)
+#         axis_o (np.array): axis origin (3,)
+#     Returns:
+#         box: trimesh.Trimesh at rest
+#         box_anim: trimesh.Trimesh after transformation
+#     '''
+#     size = np.clip(size, a_min=0.005, a_max=3.0)
+#     center = np.clip(center, a_min=-3.0, a_max=3.0)
+#     # Rest state box
+#     box = trimesh.creation.box(extents=size)
+#     box.apply_translation(center)
+#     # Animated state (deepcopy + transform)
+#     box_anim = deepcopy(box)
+#     if jtype is not None:
+#         if jtype == 2:  # revolute
+#             theta = np.deg2rad(jrange[1])
+#             T = trimesh.transformations.translation_matrix(axis_o)
+#             R_3 = get_rotation_axis_angle(axis_d, theta)
+#             R = np.eye(4, dtype=np.float32)
+#             R[:3, :3] = R_3
+#             T_inv = trimesh.transformations.translation_matrix(-axis_o)
+#             box_anim.apply_transform(T @ R @ T_inv)
+#         elif jtype == 3:  # prismatic
+#             dist = float(jrange[1])
+#             T = trimesh.transformations.translation_matrix(axis_d * dist)
+#             box_anim.apply_transform(T)
+#         elif jtype == 4:  # screw
+#             theta = np.pi / 4
+#             dist = float(jrange[1])
+#             T1 = trimesh.transformations.translation_matrix(-axis_o)
+#             R = get_rotation_axis_angle(axis_d, theta)
+#             T2 = trimesh.transformations.translation_matrix(axis_o + axis_d * dist)
+#             box_anim.apply_transform(T1 @ R @ T2)
+#         elif jtype == 5:  # continuous
+#             theta = np.pi / 4
+#             T = trimesh.transformations.translation_matrix(-axis_o)
+#             R_3 = get_rotation_axis_angle(axis_d, theta)
+#             R = np.eye(4, dtype=np.float32)
+#             R[:3, :3] = R_3
+#             T_inv = trimesh.transformations.translation_matrix(axis_o)
+#             box_anim.apply_transform(T @ R @ T_inv)
+#     return box, box_anim
+# # def get_bbox_mesh_pair(center, size, radius=0.01, jtype=None, jrange=None, axis_d=None, axis_o=None):
+# #     '''
+# #     Function to get the bounding box mesh pair
+# #     Args:
+# #     - center (np.array): bounding box center
+# #     - size (np.array): bounding box size
+# #     - radius (float): radius of the cylinder
+# #     - jtype (int): joint type
+# #     - jrange (list): joint range
+# #     - axis_d (np.array): axis direction
+# #     - axis_o (np.array): axis origin
+# #     Returns:
+# #     - trimesh_box (trimesh object): trimesh object for the bbox at resting state
+# #     - trimesh_box_anim (trimesh object): trimesh object for the bbox at opening state
+# #     '''
+# #     size = np.clip(size, a_max=3, a_min=0.005)
+# #     center = np.clip(center, a_max=3, a_min=-3)
+# #     line_box = o3d.geometry.TriangleMesh()
+# #     z_cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius=radius, height=size[2])
+# #     y_cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius=radius, height=size[1])
+# #     R_y = get_rotation_axis_angle(np.array([1., 0., 0.], dtype=np.float32), np.pi / 2)
+# #     y_cylinder.rotate(R_y, center=(0, 0, 0))
+# #     x_cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius=radius, height=size[0])
+# #     R_x = get_rotation_axis_angle(np.array([0., 1., 0.], dtype=np.float32), np.pi / 2)
+# #     x_cylinder.rotate(R_x, center=(0, 0, 0))
+# #     z1 = deepcopy(z_cylinder)
+# #     z1.translate(np.array([-size[0] / 2, size[1] / 2, 0.], dtype=np.float32))
+# #     line_box += z1.translate(center[:3])
+# #     z2 = deepcopy(z_cylinder)
+# #     z2.translate(np.array([size[0] / 2, size[1] / 2, 0.], dtype=np.float32))
+# #     line_box += z2.translate(center[:3])
+# #     z3 = deepcopy(z_cylinder)
+# #     z3.translate(np.array([-size[0] / 2, -size[1] / 2, 0.], dtype=np.float32))
+# #     line_box += z3.translate(center[:3])
+# #     z4 = deepcopy(z_cylinder)
+# #     z4.translate(np.array([size[0] / 2, -size[1] / 2, 0.], dtype=np.float32))
+# #     line_box += z4.translate(center[:3])
+# #     y1 = deepcopy(y_cylinder)
+# #     y1.translate(np.array([-size[0] / 2, 0., size[2] / 2], dtype=np.float32))
+# #     line_box += y1.translate(center[:3])
+# #     y2 = deepcopy(y_cylinder)
+# #     y2.translate(np.array([size[0] / 2, 0., size[2] / 2], dtype=np.float32))
+# #     line_box += y2.translate(center[:3])
+# #     y3 = deepcopy(y_cylinder)
+# #     y3.translate(np.array([-size[0] / 2, 0., -size[2] / 2], dtype=np.float32))
+# #     line_box += y3.translate(center[:3])
+# #     y4 = deepcopy(y_cylinder)
+# #     y4.translate(np.array([size[0] / 2, 0., -size[2] / 2], dtype=np.float32))
+# #     line_box += y4.translate(center[:3])
+# #     x1 = deepcopy(x_cylinder)
+# #     x1.translate(np.array([0., -size[1] / 2, size[2] / 2], dtype=np.float32))
+# #     line_box += x1.translate(center[:3])
+# #     x2 = deepcopy(x_cylinder)
+# #     x2.translate(np.array([0., size[1] / 2, size[2] / 2], dtype=np.float32))
+# #     line_box += x2.translate(center[:3])
+# #     x3 = deepcopy(x_cylinder)
+# #     x3.translate(np.array([0., -size[1] / 2, -size[2] / 2], dtype=np.float32))
+# #     line_box += x3.translate(center[:3])
+# #     x4 = deepcopy(x_cylinder)
+# #     x4.translate(np.array([0., size[1] / 2, -size[2] / 2]))
+# #     line_box += x4.translate(center[:3])
+# #     # transform
+# #     line_box_anim = deepcopy(line_box)
+# #     if jtype == 2: # revolute
+# #         theta = np.deg2rad(jrange[1])
+# #         line_box_anim.translate(-axis_o)
+# #         R = get_rotation_axis_angle(axis_d, theta)
+# #         line_box_anim.rotate(R, center=(0, 0, 0))
+# #         line_box_anim.translate(axis_o)
+# #     elif jtype == 3: # prismatic
+# #         dist = np.array(jrange[1], dtype=np.float32)
+# #         line_box_anim.translate(axis_d * dist)
+# #     elif jtype == 4: # screw
+# #         dist = np.array(jrange[1], dtype=np.float32)
+# #         theta = 0.25 * np.pi
+# #         R = get_rotation_axis_angle(axis_d, theta)
+# #         line_box_anim.translate(-axis_o)
+# #         line_box_anim.rotate(R, center=(0, 0, 0))
+# #         line_box_anim.translate(axis_o)
+# #         line_box_anim.translate(axis_d * dist)
+# #     elif jtype == 5: # continuous
+# #         theta = 0.25 * np.pi
+# #         R = get_rotation_axis_angle(axis_d, theta)
+# #         line_box_anim.translate(-axis_o)
+# #         line_box_anim.rotate(R, center=(0, 0, 0))
+# #         line_box_anim.translate(axis_o)
+# #     vertices = np.asarray(line_box.vertices)
+# #     faces = np.asarray(line_box.triangles)
+# #     trimesh_box = trimesh.Trimesh(vertices=vertices, faces=faces)
+# #     trimesh_box.visual.vertex_colors = np.array([0.0, 1.0, 1.0, 1.0], dtype=np.float32)
+# #     vertices_anim = np.asarray(line_box_anim.vertices)
+# #     faces_anim = np.asarray(line_box_anim.triangles)
+# #     trimesh_box_anim = trimesh.Trimesh(vertices=vertices_anim, faces=faces_anim)
+# #     trimesh_box_anim.visual.vertex_colors = np.array([0.0, 1.0, 1.0, 1.0], dtype=np.float32)
+# #     return trimesh_box, trimesh_box_anim
+# def get_color_from_palette(palette, idx):
+#     '''
+#     Function to get the color from the palette
+#     Args:
+#     - palette (list): list of color reference
+#     - idx (int): index of the color
+#     Returns:
+#     - color (np.array): color in the index of idx
+#     '''
+#     ref = palette[idx % len(palette)]
+#     ref_list = [int(i) for i in ref[4:-1].split(',')]
+#     if idx % len(palette) == 0:
+#         ref_list.append(120)
+#     else:
+#         ref_list.append(255)
+#     color = np.array([ref_list], dtype=np.float32) / 255.
+#     return color
+# def render_anim_parts(aabbs, axiss, resolution=256):
+#     '''
+#     Function to render the 3D bounding boxes and axes in the scene
+#     Args:
+#         aabbs: list of trimesh objects for the bounding box of each part
+#         axiss: list of trimesh objects for the axis of each part
+#         resolution: resolution of the rendered image
+#     Returns:
+#         color_img: rendered image
+#     '''
+#     n_parts = len(aabbs)
+#     # build mesh for each 3D bounding box
+#     scene = pyrender.Scene()
+#     for i in range(n_parts):
+#         scene.add(aabbs[i])
+#         if axiss[i] is not None:
+#             scene.add(axiss[i])
+#     # Add light to the scene
+#     scene.ambient_light = np.full(shape=3, fill_value=1.5, dtype=np.float32)
+#     light = pyrender.DirectionalLight(color=np.ones(2), intensity=5.0)
+#     # Add camera to the scene
+#     pose = get_camera_pose(eye=np.array([1.5, 1.2, 4.5]), look_at=np.array([0, 0, 0]), up=np.array([0, 1, 0]))
+#     camera = pyrender.PerspectiveCamera(yfov=np.pi / 5.0, aspectRatio=1.0)
+#     scene.add(light, pose=pose)
+#     scene.add(camera, pose=pose)
+#     # Offscreen Rendering
+#     offscreen_renderer = pyrender.OffscreenRenderer(resolution, resolution)
+#     # Render the scene
+#     color_img, _ = offscreen_renderer.render(scene)
+#     # Cleanup
+#     offscreen_renderer.delete()
+#     scene.clear()
+#     return color_img
+# def draw_boxes_axiss_anim(aabbs_0, aabbs_1, axiss, mode='graph', resolution=256, types=None):
+#     '''
+#     Function to draw the 3D bounding boxes and axes of the two frames
+#     Args:
+#         aabbs_0: list of trimesh objects for the bounding box of each part in the resting state
+#         aabbs_1: list of trimesh objects for the bounding box of each part in the open state
+#         axiss: list of trimesh objects for the axis of each part
+#         mode:
+#             'graph'     using palette corresponding to graph node,
+#             'jtype'     using palette corresponding to joint type,
+#             'semantic'  using palette corresponding to semantic label
+#         resolution: resolution of the rendered image
+#         types: ids corresponding to each joint type or semantic label, if mode is 'jtype' or 'semantic'
+#     '''
+#     n_parts = len(aabbs_0)
+#     ren_aabbs_0 = []
+#     ren_aabbs_1 = []
+#     ren_axiss = []
+#     if mode == 'graph':
+#         palette = graph_color_ref
+#         # Add meshes to the scene
+#         for i in range(n_parts):
+#             color = get_color_from_palette(palette, i)
+#             aabb_0 = pyrender.Mesh.from_trimesh(aabbs_0[i], smooth=False)
+#             aabb_0.primitives[0].color_0 = color.repeat(aabb_0.primitives[0].positions.shape[0], axis=0)
+#             ren_aabbs_0.append(aabb_0)
+#             aabb_1 = pyrender.Mesh.from_trimesh(aabbs_1[i], smooth=False)
+#             aabb_1.primitives[0].color_0 = color.repeat(aabb_1.primitives[0].positions.shape[0], axis=0)
+#             ren_aabbs_1.append(aabb_1)
+#             if axiss[i] is not None:
+#                 axis = pyrender.Mesh.from_trimesh(axiss[i], smooth=False)
+#                 axis.primitives[0].color_0 = color.repeat(axis.primitives[0].positions.shape[0], axis=0)
+#                 ren_axiss.append(axis)
+#             else:
+#                 ren_axiss.append(None)
+#     elif mode == 'jtype' or mode == 'semantic':
+#         assert types is not None
+#         palette = joint_color_ref if mode == 'jtype' else semantic_color_ref
+#         # Add meshes to the scene
+#         for i in range(n_parts):
+#             color = get_color_from_palette(palette, types[i])
+#             aabb_0 = pyrender.Mesh.from_trimesh(aabbs_0[i], smooth=False)
+#             aabb_0.primitives[0].color_0 = color.repeat(aabb_0.primitives[0].positions.shape[0], axis=0)
+#             ren_aabbs_0.append(aabb_0)
+#             aabb_1 = pyrender.Mesh.from_trimesh(aabbs_1[i], smooth=False)
+#             aabb_1.primitives[0].color_0 = color.repeat(aabb_1.primitives[0].positions.shape[0], axis=0)
+#             ren_aabbs_1.append(aabb_1)
+#             if axiss[i] is not None:
+#                 axis = pyrender.Mesh.from_trimesh(axiss[i], smooth=False)
+#                 ren_axiss.append(axis)
+#             else:
+#                 ren_axiss.append(None)
+#     else:
+#         raise ValueError('mode must be either graph or type')
+#     img0 = render_anim_parts(ren_aabbs_0, ren_axiss, resolution=resolution)
+#     img1 = render_anim_parts(ren_aabbs_1, ren_axiss, resolution=resolution)
+#     return np.concatenate([img0, img1], axis=1)
+# def prepare_meshes(info_dict):
+#     """
+#     Function to prepare the bbox and axis meshes for visualization
+#     Args:
+#     - info_dict (dict): output json containing the graph information
+#     """
+#     from my_utils.refs import joint_ref, sem_ref
+#     tree = info_dict["diffuse_tree"]
+#     bbox_0, bbox_1, axiss, labels, jtypes = [], [], [], [], []
+#     root_id = 0
+#     # get root id
+#     for node in tree:
+#         if node["parent"] == -1:
+#             root_id = node["id"]
+#     for node in tree:
+#         # retrieve info
+#         box_cen = np.array(node["aabb"]["center"], dtype=np.float32)
+#         box_size = np.array(node["aabb"]["size"], dtype=np.float32)
+#         axis_d = np.array(node["joint"]["axis"]["direction"], dtype=np.float32)
+#         axis_o = np.array(node["joint"]["axis"]["origin"], dtype=np.float32)
+#         jtype = joint_ref["fwd"][node["joint"]["type"]]
+#         # construct meshes for bbox in two states (closed and fully open)
+#         if node["id"] == root_id or node["parent"] == root_id:  # use the joint info directly
+#             bb_0, bb_1 = get_colored_box(
+#                 box_cen,
+#                 box_size,
+#                 jtype=jtype,
+#                 jrange= node["joint"]["range"],
+#                 axis_d=axis_d,
+#                 axis_o=axis_o,
+#             )
+#         else: # use the parent joint info
+#             parent_id = node["parent"]
+#             bb_0, bb_1 = get_colored_box(
+#                 box_cen,
+#                 box_size,
+#                 jtype=joint_ref["fwd"][tree[parent_id]["joint"]["type"]],
+#                 jrange=tree[parent_id]["joint"]["range"],
+#                 axis_d=np.array(tree[parent_id]["joint"]["axis"]["direction"], dtype=np.float32),
+#                 axis_o=np.array(tree[parent_id]["joint"]["axis"]["origin"], dtype=np.float32),
+#             )
+#         # construct mesh for joint axis
+#         axis_mesh = get_axis_mesh(axis_d, axis_o, box_cen, node["joint"]["type"])
+#         # append
+#         bbox_0.append(bb_0)
+#         bbox_1.append(bb_1)
+#         axiss.append(axis_mesh)
+#         labels.append(sem_ref["fwd"][node["name"]])
+#         jtypes.append(jtype)
+#     return {
+#         "bbox_0": bbox_0,
+#         "bbox_1": bbox_1,
+#         "axiss": axiss,
+#         "labels": labels,
+#         "jtypes": jtypes,
+#     }

my_utils/savermixins.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import json
+import torch
+import imageio
+import numpy as np
+class SaverMixin():
+    @property
+    def save_dir(self):
+        return self.hparams.save_dir
+    def convert_format(self, data):
+        if isinstance(data, np.ndarray):
+            return data
+        elif isinstance(data, torch.Tensor):
+            return data.cpu().numpy()
+        elif isinstance(data, list):
+            return [self.convert_format(d) for d in data]
+        elif isinstance(data, dict):
+            return {k: self.convert_format(v) for k, v in data.items()}
+        else:
+            raise TypeError('Data must be in type numpy.ndarray, torch.Tensor, list or dict, getting', type(data))
+    def get_save_path(self, filename):
+        save_path = os.path.join(self.save_dir, filename)
+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        return save_path
+    def save_rgb_image(self, filename, img):
+        imageio.imwrite(self.get_save_path(filename), img)
+    def save_rgb_video(self, filename, stage='fit', filter=None):
+        img_dir = os.path.join(self.logger.log_dir, 'images', stage)
+        writer_graph = imageio.get_writer(os.path.join(img_dir, filename), fps=1)
+        for file in sorted(os.listdir(img_dir)):
+            if file.endswith('.png') and 'gt' not in file:
+                if filter is not None:
+                    if filter in file:
+                        writer_graph.append_data(imageio.imread(os.path.join(img_dir, file)))
+                else:
+                    writer_graph.append_data(imageio.imread(os.path.join(img_dir, file)))
+        writer_graph.close()
+    def save_json(self, filename, data):
+        save_path = self.get_save_path(filename)
+        with open(save_path, 'w') as f:
+            json.dump(data, f)

objects/__init__.py ADDED Viewed

File without changes

objects/dict_utils.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import numpy as np
+from scipy.optimize import linear_sum_assignment
+def get_base_part_idx(obj_dict):
+    """
+    Get the index of the base part in the object dictionary\n
+    - obj_dict: the object dictionary\n
+    Return:\n
+    - base_part_idx: the index of the base part
+    """
+    # Adjust for NAP's corner case
+    base_part_ids = np.where(
+        [part["parent"] == -1 for part in obj_dict["diffuse_tree"]]
+    )[0]
+    if len(base_part_ids) > 0:
+        return base_part_ids[0].item()
+    else:
+        raise ValueError("No base part found")
+def get_bbox_vertices(obj_dict, part_idx):
+    """
+    Get the 8 vertices of the bounding box\n
+    The order of the vertices is the same as the order that pytorch3d.ops.box3d_overlap expects\n
+    (This order is not necessary since we are not using pytorch3d.ops.box3d_overlap anymore)\n
+    - bbox_center: the center of the bounding box in the form: [cx, cy, cz]\n
+    - bbox_size: the size of the bounding box in the form: [lx, ly, lz]\n
+    Return:\n
+    - bbox_vertices: the 8 vertices of the bounding box in the form: [[x0, y0, z0], [x1, y1, z1], ...]
+    """
+    part = obj_dict["diffuse_tree"][part_idx]
+    bbox_center = np.array(part["aabb"]["center"], dtype=np.float32)
+    bbox_size_half = np.array(part["aabb"]["size"], dtype=np.float32) / 2
+    bbox_vertices = np.zeros((8, 3), dtype=np.float32)
+    # Get the 8 vertices of the bounding box in the order that pytorch3d.ops.box3d_overlap expects:
+    # 0: (x0, y0, z0)    # 1: (x1, y0, z0)    # 2: (x1, y1, z0)    # 3: (x0, y1, z0)
+    # 4: (x0, y0, z1)    # 5: (x1, y0, z1)    # 6: (x1, y1, z1)    # 7: (x0, y1, z1)
+    bbox_vertices[0, :] = bbox_center - bbox_size_half
+    bbox_vertices[1, :] = bbox_center + np.array(
+        [bbox_size_half[0], -bbox_size_half[1], -bbox_size_half[2]], dtype=np.float32
+    )
+    bbox_vertices[2, :] = bbox_center + np.array(
+        [bbox_size_half[0], bbox_size_half[1], -bbox_size_half[2]], dtype=np.float32
+    )
+    bbox_vertices[3, :] = bbox_center + np.array(
+        [-bbox_size_half[0], bbox_size_half[1], -bbox_size_half[2]], dtype=np.float32
+    )
+    bbox_vertices[4, :] = bbox_center + np.array(
+        [-bbox_size_half[0], -bbox_size_half[1], bbox_size_half[2]], dtype=np.float32
+    )
+    bbox_vertices[5, :] = bbox_center + np.array(
+        [bbox_size_half[0], -bbox_size_half[1], bbox_size_half[2]], dtype=np.float32
+    )
+    bbox_vertices[6, :] = bbox_center + bbox_size_half
+    bbox_vertices[7, :] = bbox_center + np.array(
+        [-bbox_size_half[0], bbox_size_half[1], bbox_size_half[2]], dtype=np.float32
+    )
+    return bbox_vertices
+def compute_overall_bbox_size(obj_dict):
+    """
+    Compute the overall bounding box size of the object\n
+    - obj_dict: the object dictionary\n
+    Return:\n
+    - bbox_size: the overall bounding box size in the form: [lx, ly, lz]
+    """
+    bbox_min = np.zeros((len(obj_dict["diffuse_tree"]), 3), dtype=np.float32)
+    bbox_max = np.zeros((len(obj_dict["diffuse_tree"]), 3), dtype=np.float32)
+    # For each part, compute the bounding box and store the min and max vertices
+    for part_idx, part in enumerate(obj_dict["diffuse_tree"]):
+        bbox_center = np.array(part["aabb"]["center"], dtype=np.float32)
+        bbox_size_half = np.array(part["aabb"]["size"], dtype=np.float32) / 2
+        bbox_min[part_idx] = bbox_center - bbox_size_half
+        bbox_max[part_idx] = bbox_center + bbox_size_half
+    # Compute the overall bounding box size
+    bbox_min = np.min(bbox_min, axis=0)
+    bbox_max = np.max(bbox_max, axis=0)
+    bbox_size = bbox_max - bbox_min
+    return bbox_size
+def remove_handles(obj_dict):
+    """
+    Remove the handles from the object dictionary and adjust the id, parent, and children of the parts\n
+    - obj_dict: the object dictionary\n
+    Return:\n
+    - obj_dict: the object dictionary without the handles
+    """
+    # Find the indices of the handles
+    handle_idxs = np.array(
+        [
+            i
+            for i in range(len(obj_dict["diffuse_tree"]))
+            if obj_dict["diffuse_tree"][i]["name"] == "handle"
+            and obj_dict["diffuse_tree"][i]["parent"] != -1
+        ]
+    )  # Added to avoid corner case of NAP where the handle is the base part
+    # Remove the handles from the object dictionary and adjust the id, parent, and children of the parts
+    for handle_idx in handle_idxs:
+        handle = obj_dict["diffuse_tree"][handle_idx]
+        parent_idx = handle["parent"]
+        if handle_idx in obj_dict["diffuse_tree"][parent_idx]["children"]:
+            obj_dict["diffuse_tree"][parent_idx]["children"].remove(handle_idx)
+        obj_dict["diffuse_tree"].pop(handle_idx)
+        # Adjust the id, parent, and children of the parts
+        for part in obj_dict["diffuse_tree"]:
+            if part["id"] > handle_idx:
+                part["id"] -= 1
+            if part["parent"] > handle_idx:
+                part["parent"] -= 1
+            for i in range(len(part["children"])):
+                if part["children"][i] > handle_idx:
+                    part["children"][i] -= 1
+        handle_idxs -= 1
+    return obj_dict
+# def normalize_object(obj_dict):
+#     """
+#     Normalize the object as a whole\n
+#     Make the base part to be centered at the origin and have a size of 2\n
+#     obj_dict: the object dictionary
+#     """
+#     # Find the base part and compute the translation and scaling factors
+#     tree = obj_dict["diffuse_tree"]
+#     for part in tree:
+#         if part["parent"] == -1:
+#             translate = -np.array(part["aabb"]["center"], dtype=np.float32)
+#             scale = 2.0 / np.array(part["aabb"]["size"], dtype=np.float32)
+#             break
+#     for part in tree:
+#         part["aabb"]["center"] = (
+#             np.array(part["aabb"]["center"], dtype=np.float32) + translate
+#         ) * scale
+#         part["aabb"]["size"] = np.array(part["aabb"]["size"], dtype=np.float32) * scale
+#         if part["joint"]["type"] != "fixed":
+#             part["joint"]["axis"]["origin"] = (
+#                 np.array(part["joint"]["axis"]["origin"], dtype=np.float32) + translate
+#             ) * scale
+def zero_center_object(obj_dict):
+    """
+    Zero center the object as a whole\n
+    - obj_dict: the object dictionary
+    """
+    bbox_min = np.zeros((len(obj_dict["diffuse_tree"]), 3))
+    bbox_max = np.zeros((len(obj_dict["diffuse_tree"]), 3))
+    # For each part, compute the bounding box and store the min and max vertices
+    for part_idx, part in enumerate(obj_dict["diffuse_tree"]):
+        bbox_center = np.array(part["aabb"]["center"])
+        bbox_size_half = np.array(part["aabb"]["size"]) / 2
+        bbox_min[part_idx] = bbox_center - bbox_size_half
+        bbox_max[part_idx] = bbox_center + bbox_size_half
+    # Compute the overall bounding box size
+    bbox_min = np.min(bbox_min, axis=0)
+    bbox_max = np.max(bbox_max, axis=0)
+    bbox_center = (bbox_min + bbox_max) / 2
+    translate = -bbox_center
+    for part in obj_dict["diffuse_tree"]:
+        part["aabb"]["center"] = np.array(part["aabb"]["center"]) + translate
+        if part["joint"]["type"] != "fixed":
+            part["joint"]["axis"]["origin"] = np.array(part["joint"]["axis"]["origin"]) + translate
+def rescale_object(obj_dict, scale_factor):
+    """
+    Rescale the object as a whole\n
+    - obj_dict: the object dictionary\n
+    - scale_factor: the scale factor to rescale the object
+    """
+    for part in obj_dict["diffuse_tree"]:
+        part["aabb"]["center"] = (
+            np.array(part["aabb"]["center"], dtype=np.float32) * scale_factor
+        )
+        part["aabb"]["size"] = (
+            np.array(part["aabb"]["size"], dtype=np.float32) * scale_factor
+        )
+        if part["joint"]["type"] != "fixed":
+            part["joint"]["axis"]["origin"] = (
+                np.array(part["joint"]["axis"]["origin"], dtype=np.float32)
+                * scale_factor
+            )
+def find_part_mapping(obj1_dict, obj2_dict, use_hungarian=False):
+    """
+    Find the correspondences from the first object to the second object based on closest bbox centers\n
+    - obj1_dict: the first object dictionary\n
+    - obj2_dict: the second object dictionary\n
+    Return:\n
+    - mapping: the mapping from the first object to the second object in the form: [[obj_part_idx, distance], ...]
+    """
+    if use_hungarian:
+        return hungarian_matching(obj1_dict, obj2_dict)
+    # Initialize the distances to be +inf
+    mapping = np.ones((len(obj1_dict["diffuse_tree"]), 2)) * np.inf
+    # For each part in the first object, find the closest part in the second object based on the bounding box center
+    for req_part_idx, req_part in enumerate(obj1_dict["diffuse_tree"]):
+        for obj_part_idx, obj_part in enumerate(obj2_dict["diffuse_tree"]):
+            distance = np.linalg.norm(
+                np.array(req_part["aabb"]["center"])
+                - np.array(obj_part["aabb"]["center"])
+            )
+            if distance < mapping[req_part_idx, 1]:
+                mapping[req_part_idx, :] = [obj_part_idx, distance]
+    return mapping
+def hungarian_matching(obj1_dict, obj2_dict):
+    """
+    Find the correspondences from the first object to the second object based on closest bbox centers using Hungarian algorithm\n
+    - obj1_dict: the first object dictionary\n
+    - obj2_dict: the second object dictionary\n
+    Return:\n
+    - mapping: the mapping from the first object to the second object in the form: [[obj_part_idx], ...]
+    """
+    INF = 9999999
+    tree1 = obj1_dict["diffuse_tree"]
+    tree2 = obj2_dict["diffuse_tree"]
+    n_parts1 = len(tree1)
+    n_parts2 = len(tree2)
+    n_parts_max = max(n_parts1, n_parts2)
+    # Initialize the cost matrix
+    cost_matrix = np.ones((n_parts_max, n_parts_max), dtype=np.float32) * INF
+    for i in range(n_parts1):
+        for j in range(n_parts2):
+            cost_matrix[i, j] = np.linalg.norm(
+                np.array(tree1[i]["aabb"]["center"], dtype=np.float32)
+                - np.array(tree2[j]["aabb"]["center"], dtype=np.float32)
+            )
+    # Find the correspondences using the Hungarian algorithm
+    row_ind, col_ind = linear_sum_assignment(cost_matrix)
+    # Valid correspondences are those with all cost less than INF
+    valid_correspondences = np.where(cost_matrix[row_ind, col_ind] < INF)[0]
+    invalid_correspondences = np.where(np.logical_not(cost_matrix[row_ind, col_ind] < INF))[0]
+    row_i = row_ind[valid_correspondences]
+    col_i = col_ind[valid_correspondences]
+    # Construct the mapping
+    mapping = np.zeros(
+        (n_parts1, 2), dtype=np.float32
+    )
+    mapping[row_i, 0] = col_i
+    mapping[row_i, 1] = cost_matrix[row_i, col_i]
+    # assign the index of the most closely matched part
+    if n_parts1 > n_parts2:
+        row_j = row_ind[invalid_correspondences]
+        col_j = cost_matrix[row_j, :].argmin(axis=1)
+        mapping[row_j, 0] = col_j
+        mapping[row_j, 1] = cost_matrix[row_j, col_j]
+    return mapping

objects/motions.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import sys
+import numpy as np
+import quaternion
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from objects.dict_utils import get_base_part_idx
+def transform_all_parts(part_vertices, obj_dict, joint_state,
+                        rotation_fix_range=True, dry_run=True):
+    """
+    Transform all parts of the object according to the joint state\n
+    - part_vertices: vertices of the object in rest pose in the form:\n
+        - [K_parts, N_vertices, 3]\n
+    - obj_dict: the object dictionary\n
+    - joint_state: the joint state in the range of [0, 1]\n
+    - rotation_fix_range (optional): whether to fix the rotation range to 90 degrees for revolute joints\n
+    - dry_run (optional): if True, only return the transformation matrices without changing the vertices\n
+    Return:\n
+    - part_transformations: records of the transformations applied to the parts\n
+    """
+    part_transformations = [[] for _ in range(len(obj_dict["diffuse_tree"]))]
+    if joint_state == 0.0:
+        return part_transformations
+    # Get a visit order of the parts such that children parts are visited before parents
+    part_visit_order = []
+    base_idx = get_base_part_idx(obj_dict)
+    indices_to_visit = [base_idx]
+    while len(indices_to_visit) > 0: # Breadth-first traversal
+        current_idx = indices_to_visit.pop(0)
+        part_visit_order.append(current_idx)
+        # if current_idx == 9:
+        #     import ipdb
+        #     ipdb.set_trace()
+        indices_to_visit += obj_dict["diffuse_tree"][current_idx]["children"]
+    part_visit_order.reverse()
+    # Transform the parts in the visit order - children first, then parents
+    for i in part_visit_order:
+        part = obj_dict["diffuse_tree"][i]
+        joint = part["joint"]
+        children_idxs = part["children"]
+        # Store the transformation used to transform the part and its children
+        applied_tramsformation_matrix = np.eye(4, dtype=np.float32)
+        applied_rotation_axis_origin = np.array([np.nan, np.nan, np.nan], dtype=np.float32)
+        applied_transformation_type = "none"
+        if joint["type"] == "prismatic":
+                # Translate the part and its children
+                translation = np.array(joint["axis"]["direction"], dtype=np.float32) * joint["range"][1] * joint_state
+                if not dry_run:
+                    part_vertices[[i] + children_idxs] += translation
+                # Store the transformation used
+                applied_tramsformation_matrix[:3, 3] = translation
+                applied_transformation_type = "translation"
+        elif joint["type"] == "revolute" or joint["type"] == "continuous":
+            if joint["type"] == "revolute":
+                if not rotation_fix_range:
+                    # Use the full range as specified in the object file
+                    rotation_radian = np.radians(joint["range"][1] * joint_state)
+                else:
+                    # Fix the rotation range to 90 degrees
+                    rotation_range_sign = np.sign(joint["range"][1])
+                    rotation_radian = np.radians(rotation_range_sign * 90 * joint_state)
+            else:
+                rotation_radian = np.radians(360 * joint_state)
+            # Prepare the rotation matrix via axis-angle representation and quaternion
+            rotation_axis_origin = np.array(joint["axis"]["origin"], dtype=np.float32)
+            rotation_axis_direction = np.array(joint["axis"]["direction"], dtype=np.float32) / np.linalg.norm(joint["axis"]["direction"])
+            rotation_matrix = quaternion.as_rotation_matrix(quaternion.from_rotation_vector(rotation_radian * rotation_axis_direction))
+            if not dry_run:
+                # Rotate the part and its children
+                vertices_to_rotate = (part_vertices[[i] + children_idxs] - rotation_axis_origin)
+                part_vertices[[i] + children_idxs] = np.matmul(rotation_matrix, vertices_to_rotate.transpose([0, 2, 1])).transpose([0, 2, 1]) + rotation_axis_origin
+            # Store the transformation used
+            applied_tramsformation_matrix[:3, :3] = rotation_matrix
+            applied_rotation_axis_origin = rotation_axis_origin
+            applied_transformation_type = "rotation"
+        # Record the transformation used
+        if not applied_transformation_type == "none":
+            record = {
+                "type": applied_transformation_type,
+                "matrix": applied_tramsformation_matrix,
+                "rotation_axis_origin": applied_rotation_axis_origin
+            }
+            for idx in [i] + children_idxs:
+                part_transformations[idx].append(record)
+    return part_transformations

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+--extra-index-url https://download.pytorch.org/whl/cu121
+torch==2.2.2
+torchvision==0.17.2
+pytorch-lightning==2.4.0
+lightning==2.3.3
+matplotlib
+numpy==1.26.4
+gradio==5.34.2
+wandb
+omegaconf
+imageio
+diffusers
+plotly
+pybullet
+pyrender
+trimesh
+numpy-quaternion
+openai
+spaces
+json_repair

retrieval/__init__.py ADDED Viewed

File without changes

retrieval/obj_retrieval.py ADDED Viewed

	@@ -0,0 +1,509 @@

+import os
+import sys
+import random
+import json
+import numpy as np
+from copy import deepcopy
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from metrics.iou_cdist import IoU_cDist
+import networkx as nx
+all_categories = [
+    "Table",
+    "StorageFurniture",
+    "WashingMachine",
+    "Microwave",
+    "Dishwasher",
+    "Refrigerator",
+    "Oven",
+]
+all_categories_acd = [
+    'armoire',
+    'bookcase',
+    'chestofdrawers',
+    'hangingcabinet',
+    'kitchencabinet'
+]
+def get_hash(file, key="diffuse_tree", ignore_handles=True, dag=False):
+    tree = file[key]
+    if dag:
+        G = nx.DiGraph()
+    else:
+        G = nx.Graph()
+    for node in tree:
+        if ignore_handles and "handle" in node["name"].lower():
+            continue
+        G.add_node(node["id"])
+        if node["parent"] != -1:
+            G.add_edge(node["id"], node["parent"])
+    hashcode = nx.weisfeiler_lehman_graph_hash(G)
+    return hashcode
+def _verify_mesh_exists(dir, ply_files, verbose=False):
+    """
+    Verify that the mesh files exist\n
+    - dir: the directory of the object\n
+    - ply_files: the list of mesh files\n
+    - verbose (optional): whether to print the progress\n
+    return:\n
+    - True if the mesh files exist, False otherwise
+    """
+    for ply_file in ply_files:
+        if not os.path.exists(os.path.join(dir, ply_file)):
+            if verbose:
+                print(f" - {os.path.join(dir, ply_file)} does not exist!!!")
+            return False
+    return True
+def _generate_output_part_dicts(
+    candidate_dict,
+    part_idx,
+    candidate_dir,
+    requirement_part_bbox_sizes,
+    bbox_size_eps=1e-3,
+    verbose=False,
+):
+    """
+    Generate the output part dictionary for all parts that are fulfilled by the candidate part and computing the scale factor of the parts\n
+    - candidate_dict: the candidate object dictionary\n
+    - part_idx: the index of the part in the candidate object\n
+    - candidate_dir: the directory of the candidate object\n
+    - requirement_part_bbox_sizes: the bounding box sizes of the requirement part in the form: [[lx1, ly1, lz1], [lx2, ly2, lz2], ...]\n
+    - bbox_size_eps (optional): the epsilon to avoid zero volume parts\n
+    - verbose (optional): whether to print the progress\n
+    Return:\n
+    - part_dicts: the output part dictionaries in the form:
+        - [{name, dir, files, scale_factor=[sx, sy, sz]}, z_rotate_90]
+            - z_rotate_90 is True if the part needs to be rotated by 90 degrees around the z-axis
+    - [{}, ...] if any of the mesh files do not exist
+    """
+    part_dicts = [{} for _ in range(len(requirement_part_bbox_sizes))]
+    fixed_portion = {
+        "name": candidate_dict["diffuse_tree"][part_idx]["name"],
+        "dir": candidate_dir,
+        "files": candidate_dict["diffuse_tree"][part_idx]["plys"],
+        "z_rotate_90": False,
+    }
+    # Verify that the mesh files exist
+    if not _verify_mesh_exists(fixed_portion["dir"], fixed_portion["files"], verbose):
+        if verbose:
+            print(
+                f" - ! Found invalid mesh files in {fixed_portion['dir']}, skipping..."
+            )
+        return part_dicts  # List of empty dicts
+    candidate_bbox_size = np.array(
+        candidate_dict["diffuse_tree"][part_idx]["aabb"]["size"]
+    )
+    candidate_bbox_size = np.maximum(
+        candidate_bbox_size, bbox_size_eps
+    )  # Avoid zero volume parts
+    for i, requirement_part_bbox_size in enumerate(requirement_part_bbox_sizes):
+        part_dicts[i] = deepcopy(fixed_portion)
+        # For non-handle parts, compute the scale factor normally
+        if fixed_portion["name"] != "handle":
+            part_dicts[i]["scale_factor"] = list(
+                np.array(requirement_part_bbox_size) / candidate_bbox_size
+            )
+        # For handles, need to consider the orientation of the selected handle and the orientation of the requirement handle
+        else:
+            requirement_handle_is_horizontal = (
+                requirement_part_bbox_size[0] > requirement_part_bbox_size[1]
+            )
+            candidate_handle_is_horizontal = (
+                candidate_bbox_size[0] > candidate_bbox_size[1]
+            )
+            # If the orientations are different, rotate the requirement handle by 90 degrees around the z-axis before computing the scale factor
+            if requirement_handle_is_horizontal != candidate_handle_is_horizontal:
+                rotated_requirement_part_bbox_size = [
+                    requirement_part_bbox_size[1],
+                    requirement_part_bbox_size[0],
+                    requirement_part_bbox_size[2],
+                ]
+                part_dicts[i]["scale_factor"] = list(
+                    np.array(rotated_requirement_part_bbox_size) / candidate_bbox_size
+                )
+                part_dicts[i]["z_rotate_90"] = True
+            # If the orientations are the same, compute the scale factor normally
+            else:
+                part_dicts[i]["scale_factor"] = list(
+                    np.array(requirement_part_bbox_size) / candidate_bbox_size
+                )
+    return part_dicts
+def find_obj_candidates(
+    requirement_dict,
+    dataset_dir,
+    hashbook_path,
+    num_states=5,
+    metric_compare_handles=False,
+    metric_iou_include_base=True,
+    metric_num_samples=10000,
+    keep_top=5,
+    gt_file_name="object.json",
+    verbose=False,
+):
+    """
+    Find the best object candidates for selecting the base part using AID\n
+    - requirement_dict: the object dictionary of the requirement\n
+    - dataset_dir: the directory of the dataset to search in\n
+    - hashbook_path: the path to the hashbook for filtering candidates\n
+    - num_states: the number of states to average the metric over\n
+    - metric_transform_plucker (optional): whether to use Plucker coordinates to move parts when computing the metric\n
+    - metric_compare_handles (optional): whether to compare handles when computing the metric\n
+    - metric_iou_include_base (optional): whether to include the base when computing the IoU\n
+    - metric_scale_factor (optional): the scale factor to scale the object before computing the metric\n
+        - Scaling up the object makes the sampling more well distributed\n
+    - metric_num_samples (optional): the number of samples to use when computing the metric\n
+    - keep_top (optional): the number of top candidates to keep\n
+    - gt_file_name (optional): the name of the ground truth json file, which describes a candidate object\n
+    - verbose (optional): whether to print the progress\n
+    return:\n
+    - a list of best object candidates of the form:
+        - {"category", "dir", "score"}
+    """
+    dataset_dir = os.path.abspath(dataset_dir)
+    # Load the hashbook
+    with open(hashbook_path, "r") as f:
+        hashbook = json.load(f)
+    if 'acd' in hashbook_path:
+        all_categories = all_categories_acd
+    else:
+        all_categories = [
+            "Table",
+            "StorageFurniture",
+            "WashingMachine",
+            "Microwave",
+            "Dishwasher",
+            "Refrigerator",
+            "Oven",
+        ]
+    # Resolve paths to directories
+    category_specified = False
+    requirement_category = ""
+    # if the category is specified, only search in that category, otherwise search in all categories
+    if "obj_cat" in requirement_dict["meta"]:
+        requirement_category = requirement_dict["meta"]["obj_cat"]
+        category_specified = True
+    if requirement_category == "StroageFurniture":
+        requirement_category = "StorageFurniture"
+    category_dirs = (
+        [os.path.join(dataset_dir, requirement_category)]
+        if category_specified
+        else [os.path.join(dataset_dir, category) for category in all_categories]
+    )
+    # Extract requirement data
+    requirement_part_names = []
+    requirement_part_bboxes = []
+    for part in requirement_dict["diffuse_tree"]:
+        requirement_part_names.append(part["name"])
+        requirement_part_bboxes.append(
+            np.concatenate([part["aabb"]["center"], part["aabb"]["size"]])
+        )
+    # Compute hash of the requirement graph
+    requirement_graph_hash = get_hash(requirement_dict)
+    # Prefetch list of ids of candidate objects with the same hash
+    # import ipdb
+    # ipdb.set_trace()
+    if category_specified and requirement_graph_hash in hashbook[requirement_category]:
+        same_hash_obj_ids = hashbook[requirement_category][requirement_graph_hash]
+    else:
+        # Use all categories if category is not specified
+        same_hash_obj_ids = []
+        for category in all_categories:
+            if requirement_graph_hash in hashbook[category]:
+                same_hash_obj_ids += hashbook[category][requirement_graph_hash]
+    # Iterate through all candidate objects and keep the top k candidates
+    best_obj_candidates = []
+    for category_dir in category_dirs:
+        obj_ids = os.listdir(category_dir)
+        for i, obj_id in enumerate(obj_ids):
+            if verbose:
+                print(
+                    f"\r - Finding candidates from {category_dir.split('/')[-1]}: {i+1}/{len(obj_ids)}",
+                    end="",
+                )
+            # Load the candidate object
+            obj_dir = os.path.join(category_dir, obj_id)
+            if os.path.exists(os.path.join(obj_dir, gt_file_name)):
+                with open(os.path.join(obj_dir, gt_file_name), "r") as f:
+                    obj_dict = json.load(f)
+                    if "diffuse_tree" not in obj_dict:  # Rename for compatibility
+                        obj_dict["diffuse_tree"] = obj_dict.pop("arti_tree")
+            # Compute metric for selecting the base if the hash matches or if there are no objects with the same hash
+            if obj_id in same_hash_obj_ids or len(same_hash_obj_ids) == 0:
+                scores = IoU_cDist(
+                    requirement_dict,
+                    obj_dict,
+                    num_states=num_states,
+                    compare_handles=metric_compare_handles,
+                    iou_include_base=metric_iou_include_base,
+                    num_samples=metric_num_samples,
+                )
+                base_score = scores["AS-cDist"]
+                # Add the candidate to the list of best candidates and keep the top k candidates
+                best_obj_candidates.append(
+                    {
+                        "category": category_dir.split("/")[-1],
+                        "dir": obj_dir,
+                        "score": base_score,
+                    }
+                )
+                best_obj_candidates = sorted(
+                    best_obj_candidates, key=lambda x: x["score"]
+                )[:keep_top]
+        if verbose:
+            print()
+    return best_obj_candidates
+def pick_and_rescale_parts(
+    requirement_dict,
+    obj_candidates,
+    dataset_dir,
+    gt_file_name="object.json",
+    verbose=False,
+):
+    """
+    Pick and rescale parts from the object candidates
+    - requirement_dict: the object dictionary of the requirement\n
+    - obj_candidates: the list of best object candidates for selecting the base part\n
+    - dataset_dir: the directory of the dataset to search in\n
+    - gt_file_name (optional): the name of the ground truth file, which describes a candidate object\n
+    - verbose (optional): whether to print the progress\n
+    return:\n
+    - parts_to_render: a list of selected parts for the requirement parts in the form:
+        - [{name, dir, files, scale_factor=[sx, sy, sz]}, z_rotate_90]
+            - z_rotate_90 is True if the part needs to be rotated by 90 degrees around the z-axis
+    """
+    # Extract requirement data
+    if 'acd' in dataset_dir:
+        all_categories = all_categories_acd
+    else:
+        all_categories = [
+            "Table",
+            "StorageFurniture",
+            "WashingMachine",
+            "Microwave",
+            "Dishwasher",
+            "Refrigerator",
+            "Oven",
+        ]
+    requirement_part_names = []
+    requirement_part_bbox_sizes = []
+    for part in requirement_dict["diffuse_tree"]:
+        if part['name'] == 'wheel':
+            part['name'] = 'handle'
+        requirement_part_names.append(part["name"])
+        requirement_part_bbox_sizes.append(part["aabb"]["size"])
+    # Collect the unique part names and store the indices of the parts with the same name
+    unique_requirement_part_names = {}
+    for i, part_name in enumerate(requirement_part_names):
+        if part_name not in unique_requirement_part_names:
+            unique_requirement_part_names[part_name] = [i]
+        else:
+            unique_requirement_part_names[part_name].append(i)
+    parts_to_render = [{} for _ in range(len(requirement_part_names))]
+    # Iterate through the object candidates selected for the base part first
+    for candidate in obj_candidates:
+        if all(
+            [len(part) > 0 for part in parts_to_render]
+        ):  # Break if all parts are fulfilled
+            break
+        if not os.path.exists(os.path.join(candidate["dir"], gt_file_name)):
+            continue
+        # Load the candidate object
+        with open(os.path.join(candidate["dir"], gt_file_name), "r") as f:
+            candidate_dict = json.load(f)
+        # Pick parts from the candidate if the part name matches and the part requirement is not yet fulfilled
+        for candidate_part_idx, part in enumerate(candidate_dict["diffuse_tree"]):
+            part_needed = part["name"] in unique_requirement_part_names
+            if not part_needed:
+                continue
+            part_not_fulfilled = any(
+                [
+                    len(parts_to_render[i]) == 0
+                    for i in unique_requirement_part_names[part["name"]]
+                ]
+            )
+            if not part_not_fulfilled:
+                continue
+            # Get the indices of the requirement parts that are fulfilled by this candidate part and their bounding box sizes
+            fullfill_part_idxs = unique_requirement_part_names[part["name"]]
+            fullfill_part_bbox_sizes = [
+                requirement_part_bbox_sizes[i] for i in fullfill_part_idxs
+            ]
+            # Generate all output part dictionaries at once
+            part_dicts = _generate_output_part_dicts(
+                candidate_dict,
+                candidate_part_idx,
+                candidate["dir"],
+                fullfill_part_bbox_sizes,
+                verbose=verbose,
+            )
+            # Update the output part dictionaries
+            [
+                parts_to_render[part_idx].update(part_dicts[part_dict_idx])
+                for part_dict_idx, part_idx in enumerate(fullfill_part_idxs)
+            ]
+    # If there are still parts that are not fulfilled
+    if any([len(part) == 0 for part in parts_to_render]):
+        # Collect the remaining part names
+        remaining_part_names = list(
+            set(
+                [
+                    requirement_part_names[i]
+                    for i in range(len(requirement_part_names))
+                    if len(parts_to_render[i]) == 0
+                ]
+            )
+        )
+        if verbose:
+            print(
+                f" - Parts {remaining_part_names} are not fulfilled by the selected candidates, searching in the dataset..."
+            )
+        # If the category is specified, only search in that category, otherwise search in all categories
+        # requirement_dict["meta"]["obj_cat"] = ""
+        requirement_category = requirement_dict["meta"]["obj_cat"]
+        if requirement_category == "StroageFurniture":
+            requirement_category = "StorageFurniture"
+        category_specified = requirement_category != ""
+        if category_specified:
+            category_dirs = [os.path.join(dataset_dir, requirement_category)]
+        else:
+            category_dirs = [
+                os.path.join(dataset_dir, category) for category in all_categories
+            ]
+        # Iterate through all objects
+        retry = True  # Retry if the category is specified, but some parts are still not fulfilled (See the end of the while loop)
+        retry_time = 0
+        while retry:
+            print(retry_time)
+            retry_time += 1
+            for category_dir in category_dirs:
+                obj_ids = os.listdir(category_dir)
+                random.shuffle(obj_ids)  # Randomize the order of the objects
+                for i, obj_id in enumerate(obj_ids):
+                    if True:
+                        print(
+                            f"- Finding missing parts from {category_dir.split('/')[-1]}: {i+1}/{len(obj_ids)} \n"
+                        )
+                    # Load the candidate object
+                    obj_dir = os.path.join(category_dir, obj_id)
+                    if not os.path.exists(os.path.join(obj_dir, gt_file_name)):
+                        continue
+                    with open(os.path.join(obj_dir, gt_file_name), "r") as f:
+                        candidate_dict = json.load(f)
+                    # Pick the part from the candidate if the part name matches and the parts that are not fulfilled
+                    for candidate_part_idx, part in enumerate(
+                        candidate_dict["diffuse_tree"]
+                    ):
+                        part_needed = part["name"] in remaining_part_names
+                        if part_needed:
+                            # Get the indices of the requirement parts that are fulfilled by this candidate part and their bounding box sizes
+                            fullfill_part_idxs = unique_requirement_part_names[
+                                part["name"]
+                            ]
+                            fullfill_part_bbox_sizes = [
+                                requirement_part_bbox_sizes[i]
+                                for i in fullfill_part_idxs
+                            ]
+                            # Generate all output part dictionaries at once
+                            part_dicts = _generate_output_part_dicts(
+                                candidate_dict,
+                                candidate_part_idx,
+                                obj_dir,
+                                fullfill_part_bbox_sizes,
+                                verbose=verbose,
+                            )
+                            # Update the output part dictionaries
+                            [
+                                parts_to_render[part_idx].update(
+                                    part_dicts[part_dict_idx]
+                                )
+                                for part_dict_idx, part_idx in enumerate(
+                                    fullfill_part_idxs
+                                )
+                            ]
+                    if all([len(part) > 0 for part in parts_to_render]):
+                        if verbose:
+                            print(" -> Found all missing parts")
+                        break
+                if all([len(part) > 0 for part in parts_to_render]):
+                    retry = False
+                    break
+            # If the category is specified, but some parts are still not fulfilled, search in all categories
+            if category_specified and any([len(part) == 0 for part in parts_to_render]):
+                if verbose:
+                    print(
+                        " - Required category is {requirement_category}, but some parts are still not fulfilled, searching in all categories..."
+                    )
+                category_specified = False
+                retry = True
+                category_dirs = [
+                    os.path.join(dataset_dir, category)
+                    for category in all_categories
+                    if category != requirement_category
+                ]
+    # Raise error if there are still parts that are not fulfilled
+    if any([len(part) == 0 for part in parts_to_render]):
+        raise RuntimeError(
+            "Failed to fulfill all requirements, some parts may not exist in the dataset"
+        )
+    return parts_to_render

retrieval/retrieval_hash_acd.json ADDED Viewed

	@@ -0,0 +1,329 @@

+{
+    "armoire": {
+        "ff24f9310d003cd7b7894b2d6ec79a03": [
+            "B07H8V49M2"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "B07GFDZVYY"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "B07GFSJ69T",
+            "8415f258d5e129e9bb63cab54c7c207e04c0dfed",
+            "06764d11dcec69878cc762c36482be5ef2865443",
+            "0760a3dd43bd9dd9c0ec1ea4e033c6e121d92df5",
+            "12001de4686bf2e4b9c721c93b35a424dd48249f"
+        ],
+        "8fae315ff2d1ea185b4ed6d0d092ae0e": [
+            "B07GFW9GFX",
+            "16b93d86d1a466f5982e60c8d322ddd8f312056e"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "B07H8PS4FZ",
+            "cd17a7b8d78ee79bc52015841577a2652f9e0625",
+            "3033be75f2ac15885e8f4813dc55e168af09e91f"
+        ],
+        "02746714ea1f7e20dd4ac8f18d8cdac3": [
+            "3bc24e0ea79ade13d4703ca23ece1e4019f9b70b"
+        ],
+        "dd3473c941b94dd6654e9f89bb51cac9": [
+            "128b5f2d072869004b7a218ab674f93f73a66670"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "69340d8678701e72f39cc01890da1b8af3fd603d",
+            "24f8284e4bdef4397e5b12dc4f2b74a137d63dbb"
+        ],
+        "5a8eac0760a558d4174437be478ec0aa": [
+            "a88c710a0b90706398a0fd7a9d73123338d04354"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "28098e6540f2f2fc07f7fc6a00edd6ce371a2618"
+        ],
+        "00232256ef3ac441f59b36bfc7bd190c": [
+            "11f8b552a802c6233a1332713568f05f901b725c"
+        ]
+    },
+    "cabinet": {
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "B07D42T6CX"
+        ],
+        "d25563e624d9195ce94b1f768fdc503d": [
+            "B07MGL8651"
+        ],
+        "2fce5c033589d2dfe24fa67dc6885386": [
+            "B07QD6V13M"
+        ]
+    },
+    "nightstand": {
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "B072ZK8897",
+            "xxxx54c93c4fxabe2x463exafd8xcae01106dc7d",
+            "037f34132f162235d80ce46f67c4fa2238d94da0",
+            "155c182834f40ebb1d5666d3a72ee828e097097a"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "B072ZMHBKQ",
+            "fed2a84682713eabe2b8d0e1e950d891c7442d5f"
+        ],
+        "5a8eac0760a558d4174437be478ec0aa": [
+            "1fa3dec03ab0afb0749373b2c8da8bf77c92e271",
+            "265e819d1f027cde8dac05468a70455f62cbf069",
+            "4bd1e4ec215403d3239f09517e44568f78da3b40"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "b8bdd9cbc1ca695a206583afdc26f1a4c3987303",
+            "0dfabed4818c34cbaa5ef41a3bcaf89177744b2c"
+        ]
+    },
+    "table": {
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "B07K6RNQDH",
+            "B07JXXR83F"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "B082YNHQLX"
+        ],
+        "ddc2ef1be48dc58fe68226818824b648": [
+            "B075Z93NHP"
+        ],
+        "ec04032eee6bc67c5fdd5ec6705c3137": [
+            "B075Z93NKX"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "B075Z96KQL"
+        ]
+    },
+    "bookcase": {
+        "5a8eac0760a558d4174437be478ec0aa": [
+            "2c681d7e64e0410d76156f500cd2df798975a25d",
+            "36d7feaf2471b67aa638609fe2c2278fba4a15a0"
+        ],
+        "ff24f9310d003cd7b7894b2d6ec79a03": [
+            "a194a188dfcd8e3796529c0263448ba047ce632f"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "d2f7d40d8e1a56ede42103027842e000d9cacd3e"
+        ],
+        "ddc2ef1be48dc58fe68226818824b648": [
+            "36d90c43b90a526247d61e709f349b3ed54081e2"
+        ],
+        "1e8c6b47706f002757c3370366001f06": [
+            "d5ba163ba97f94c7aa4a4a625eb0547b8894e1a5"
+        ],
+        "61f645001e86ad8a32357cc828ae33cb": [
+            "3b558ce715c88cca63b307f8e0e9b665ca57ef43"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "44641367282a6a9616c91439944d4160a0e6f66a"
+        ],
+        "8fae315ff2d1ea185b4ed6d0d092ae0e": [
+            "17a8f7c040254439fe8771f66f1c526c71d24904"
+        ],
+        "0e3c4946251b437ca90e5fe70efdea5b": [
+            "9418cb5d4e1b7ff0ebd469e28dfdbaa99bc61f4d"
+        ]
+    },
+    "desk": {
+        "ddc2ef1be48dc58fe68226818824b648": [
+            "319c007ee7d07ca84c797a512ad4a98c9abc42da",
+            "1619e2e6a18d5d374963a3a4280f7a6d76356079"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "7489bcf3f226d315b4627bc442422f44f9eff092",
+            "429de8a53ddf94a47ec553147dc8603b803be056"
+        ],
+        "5a8eac0760a558d4174437be478ec0aa": [
+            "3a12fb2ab85d734b4b11bc6f541be9961e7ecd23",
+            "03f226dd0e012925a8674564c4de19cb786c9a88"
+        ],
+        "8fae315ff2d1ea185b4ed6d0d092ae0e": [
+            "b1c0fc607c063010c4b9300c955a0e3e5f7001fc"
+        ],
+        "02746714ea1f7e20dd4ac8f18d8cdac3": [
+            "01be253cbfd14b947e9dbe09d0b1959e97d72122"
+        ],
+        "69144809aea48cb46eae9c3950f24a15": [
+            "d7cec0e53dadbc4291064708c84e3614b79ac3c9"
+        ],
+        "f38a9419ca785a395579ce42491c830e": [
+            "561105c73bf76152a2b32e4f55f80db6a25ac0d4"
+        ],
+        "d25563e624d9195ce94b1f768fdc503d": [
+            "128ed2ced9a101aa0d131fd224012fd52198003f"
+        ],
+        "17a09dc7b6207f11cd18889788802b88": [
+            "6d264e3023b940b0b1d31b77e04d0c845853c1f0"
+        ]
+    },
+    "dishwasher": {
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "cb3dd8f2c8de396606e0794f6effc921aff7235d",
+            "aad01c69d7a27a1740e422f5f64b781816bd86fa"
+        ]
+    },
+    "microwave": {
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "a630fbac79cdc164c9344a588f72d207b6d25e33",
+            "bf840db863fc9c2646b2f8f372e4847b2fd42e34",
+            "c3b2adbd3b89bdcd01f1e813bc4d2e06975ec727",
+            "68407fbf5c7296351b2c26c2e59510effc87a637",
+            "81da9279eae235c3faced51d516e970acdac5e84"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "9849af0395972ff84e40f5c1a51db8a25e3ef6f7"
+        ]
+    },
+    "oven": {
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "702d3ab650d34ee1dd236b0df5882573ec70c504"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "00b0d5e167ae6b42666de010025efad4506563f1",
+            "c1bdae17057dfa88d5e3894433642030fd66c7d6",
+            "197a447eb68ba32ab44c50948b3af1e63048e174",
+            "239c5c38a53badc24ca6950ee78d8f6c115c3074",
+            "4a326efb8ab35d8ce823575d4c3b7033e8c3e5e8"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "ef32e2cd6dd99d883f627e238f55ad0766240d44",
+            "f8bff67d469223c8e8bf44553834bd5482a96ecc",
+            "41efc8ed9c8d433e9ff877f3b9ea1c0eda45479c"
+        ]
+    },
+    "refrigerator": {
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "1c7874f93ca418d7edebd150a7422095fd76897a"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "9449ef7831c43bc0db23aac79ea442aa71d0db11",
+            "0bb1cdb98fbdfda5b41abaa39aa7f82321e58b72",
+            "1c33bd447d70d4d22116c434d912f1fae78e02b7"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "cae4c60830bba615ff533dc23ffee6e6e5c7d14e"
+        ]
+    },
+    "sink_cabinet": {
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "56dc6fc7669736b5fd6a85d1b14a01d029beff59"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "f637f110fbed653b7983d9fc6a6d53795b384461"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "5f074f91cc2ce2a4d5a62e6cce77c435e5dbf457",
+            "6219ef05f4a7b56419749e45a45143df8af44495",
+            "027f20642dd34e7914fc4fc4efa70fbb54bcecbb",
+            "112dc87e26450400941c6eaff60866bc19badc64"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "76102a94ca13d9cdaf7d5a77262cddd4df9a806c"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "829bf87b541238bb7579a05d18d4f7f1d1f98af1"
+        ],
+        "ada4be0df4d7d600d6729eb4a621f99a": [
+            "f1de6498f43789e1c27150b3ae1f9b5bfc051775"
+        ]
+    },
+    "tv_stand": {
+        "0e3c4946251b437ca90e5fe70efdea5b": [
+            "a77e6006efcacc637e1c2a49e72232ee0f435e35",
+            "212778c0c265e0db358ad7c8c1fa9a4bcfe41bd7"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "ba231dd136e3bc77fb04ade17235b923aa7b2f07"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "056be15536045e9a6a94b9b93cff62f72d43c326",
+            "1aebe7d2500bfbcb0c6ec787a98a2b3701099ed7"
+        ],
+        "94d192237d5fe1b065910cb51d8ee711": [
+            "63b7d75b724e079aab99030084f0eae1b43b7498"
+        ]
+    },
+    "washer": {
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "6cd2dc2611c27f758c972b4874efad8c8cbd5d29",
+            "912447108f21083d877aab6653742fceccc6ce7d",
+            "00b285ed70673826ccb6941929a64abfaf5f9239",
+            "eccf8ba37c804b9067e675f09eec2c13e951f61c",
+            "031024e00569909d466d87df9fead90355ba29e5",
+            "faefe63ba896c06920aa6d23b05ab83f3d6d37ea",
+            "2c2b2914fc526f6e8bbe65511ecf58bba0027ec0",
+            "4bc6f883a374400355bcf95e611a4e8f8b950ed5"
+        ]
+    },
+    "chestofdrawers": {
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "762665bc7a958151874c15edfc2711b161376678",
+            "d96b2b9537c7c721d5a79b375aefbfacecd04f65"
+        ],
+        "47259e6c2fba7c74a9b725012e01ebba": [
+            "d621c3a39d9291f0943531204d68e705633986c9"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "293c42d04758bffcc63070066170cbb1f09918cc",
+            "1c0bbc026e76c09885dc5c6f156a6c3dec605d10"
+        ],
+        "25bfc0f15836b69b830cf66b5217bed6": [
+            "807955fd4dcb59b67789b24f2e7bc167027c870a"
+        ],
+        "322c8717a498a3e832420518775f8ffc": [
+            "2cdb28938dff1f9ab13aee7630cea51f44f60952"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "831e2cff446337af8791038161c7d32d96726b20",
+            "93767fc355a1afbfff79d54a25204069e0543d2b",
+            "11808e4bfc4534caf787fa15ba07bc2cbee95fdd"
+        ],
+        "3ab3e03b34bc406737d81bd5db0ee212": [
+            "4c067979055ae739365d340a1699036be5c136c7"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "848c2cc9428329445b3de7dc982469c583b16c1d"
+        ],
+        "7936dbd6c88dda2542d5509f6078a0a9": [
+            "09a0a4a031e33e738214b812f71dd838232df54c"
+        ],
+        "8fae315ff2d1ea185b4ed6d0d092ae0e": [
+            "609530b858465d9795ac43ba435fdd5f12d95956",
+            "652403bb9a0b199ebdebe538a44bc897eab624f6"
+        ],
+        "ddc2ef1be48dc58fe68226818824b648": [
+            "bc59a49ebf99164d5ed88bd6eaff12ec4ed86d0a"
+        ]
+    },
+    "hangingcabinet": {
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "B07B8NBQQP",
+            "dcef0d475b7fdea2530898215feafddac1fe9bcc",
+            "bb966a4f853df29b8b37b89e157aed4eb3936aec"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "be1b0ef31886b4b2b42cf0bf1b7df548917c9943"
+        ],
+        "5a8eac0760a558d4174437be478ec0aa": [
+            "3432a71596b6cd7e944b6f19cf6d713fe17fc8bd",
+            "4468c13bc98184bcc403027164ede52b178e5d20",
+            "644c2d1505103189b6ae49f5a58b97ed41202149"
+        ]
+    },
+    "kitchencabinet": {
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "3a1c604565d5fa1f411f3fb437a816094ae69122",
+            "88c87a19b5e883787b5707d90545e25360594822"
+        ],
+        "8fae315ff2d1ea185b4ed6d0d092ae0e": [
+            "bc3c5c45d5a9126bff85470c7c48d4e2b7ebfd0d"
+        ],
+        "86f7cf811774c9dc1f8ac7ebefafd51c": [
+            "0c42ee5b635c8acdb6bf235ff9420d742a16fd30"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "10444a1165c1745f960eb6183d24dd05b60e781f"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "1a570da5ae0c4d1d51c94be81d7248f155a0dcef"
+        ]
+    }
+}

retrieval/retrieval_hash_no_handles.json ADDED Viewed

	@@ -0,0 +1,722 @@

+{
+    "Table": {
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "25160",
+            "32761",
+            "28594",
+            "25959",
+            "34178",
+            "23724",
+            "21473",
+            "26545",
+            "26692",
+            "27478",
+            "19384",
+            "26800",
+            "25756",
+            "23807",
+            "26886",
+            "31249",
+            "21718",
+            "23372",
+            "32601"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "32566",
+            "26525",
+            "29557",
+            "24644",
+            "20453",
+            "26806",
+            "26670",
+            "20279",
+            "20411",
+            "20555",
+            "27189",
+            "30869",
+            "19855",
+            "24931",
+            "22339",
+            "28668",
+            "26073",
+            "26652",
+            "21467",
+            "20985",
+            "22241",
+            "22508",
+            "29921",
+            "27044",
+            "23511"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "27619",
+            "26875",
+            "27267",
+            "19825",
+            "33914",
+            "23472",
+            "22692",
+            "32174",
+            "19179",
+            "29133",
+            "31601",
+            "32086",
+            "30238",
+            "25308",
+            "32746",
+            "28164",
+            "32932",
+            "34617",
+            "23782"
+        ],
+        "dd3473c941b94dd6654e9f89bb51cac9": [
+            "30666"
+        ],
+        "8fae315ff2d1ea185b4ed6d0d092ae0e": [
+            "33930",
+            "32052",
+            "32213"
+        ],
+        "8999452509fdfa98335c5ba44ed05498": [
+            "26503",
+            "20043",
+            "29525",
+            "26657",
+            "30341"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "19836",
+            "22301",
+            "30663",
+            "25144",
+            "25493",
+            "32354",
+            "22433",
+            "32324",
+            "30857",
+            "26387",
+            "32259",
+            "33457",
+            "20745",
+            "33116",
+            "25913"
+        ],
+        "0e3c4946251b437ca90e5fe70efdea5b": [
+            "19898",
+            "26608",
+            "22367",
+            "24152"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "34610",
+            "33810"
+        ]
+    },
+    "Dishwasher": {
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "12480",
+            "12579",
+            "12542",
+            "12085",
+            "12559",
+            "12092",
+            "12580",
+            "12565",
+            "12612",
+            "11622",
+            "12530",
+            "12071",
+            "12654",
+            "12259",
+            "12558",
+            "11700",
+            "12553",
+            "12414",
+            "12543",
+            "12561",
+            "12590",
+            "12540",
+            "12621",
+            "12531",
+            "12614",
+            "12560",
+            "12428",
+            "12606",
+            "12552",
+            "12592",
+            "12617",
+            "12583",
+            "12605",
+            "12596",
+            "11661"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "12349",
+            "11826"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "12065",
+            "12484"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "12597"
+        ]
+    },
+    "WashingMachine": {
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "100283",
+            "103369",
+            "103361",
+            "100282",
+            "103425",
+            "103480",
+            "103778"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "103490",
+            "103775"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "103452",
+            "103518",
+            "103521"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "103508",
+            "103776"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "103781",
+            "103528"
+        ]
+    },
+    "Microwave": {
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "7304",
+            "7236",
+            "7221",
+            "7263",
+            "7292",
+            "7306",
+            "7320",
+            "7310"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "7273",
+            "7167",
+            "7296",
+            "7349"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "7366",
+            "7119"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "7128"
+        ]
+    },
+    "Oven": {
+        "ddc2ef1be48dc58fe68226818824b648": [
+            "101946",
+            "101930"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "7130",
+            "101971",
+            "7290",
+            "7138"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "101773",
+            "102055"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "7220",
+            "7187"
+        ],
+        "0e3c4946251b437ca90e5fe70efdea5b": [
+            "101921",
+            "101943",
+            "101917",
+            "101947"
+        ],
+        "8fae315ff2d1ea185b4ed6d0d092ae0e": [
+            "102018",
+            "102044",
+            "102060",
+            "101931"
+        ],
+        "538827adb8c90adf1322121db4e66fef": [
+            "101924"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "101940",
+            "7347",
+            "102019",
+            "7332",
+            "7120",
+            "7201",
+            "101909",
+            "7179"
+        ],
+        "dd3473c941b94dd6654e9f89bb51cac9": [
+            "101808",
+            "102001"
+        ],
+        "ebb9c0168d323bca8e92227bdaa7a788": [
+            "101908"
+        ]
+    },
+    "Refrigerator": {
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "12066",
+            "10036",
+            "11712",
+            "11178",
+            "10867",
+            "10900",
+            "12059",
+            "11299",
+            "10620",
+            "10685",
+            "11846",
+            "12043",
+            "12248",
+            "10347",
+            "10489",
+            "10751",
+            "12036",
+            "10143",
+            "10655",
+            "10612",
+            "10068",
+            "10638",
+            "12050",
+            "11231",
+            "10586",
+            "11550",
+            "11304",
+            "10627"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "10849",
+            "12054",
+            "12038",
+            "10373",
+            "10944",
+            "12252",
+            "10905",
+            "11260",
+            "12055",
+            "10144",
+            "10797",
+            "11211",
+            "12250",
+            "12042",
+            "12249"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "11709"
+        ]
+    },
+    "Safe": {
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "102423",
+            "101583",
+            "102318",
+            "101612",
+            "102301",
+            "101591",
+            "101594",
+            "102380",
+            "101613",
+            "101599",
+            "101619",
+            "101605",
+            "102389",
+            "101363",
+            "102387"
+        ],
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "102311",
+            "101579",
+            "101611",
+            "102384",
+            "102309",
+            "101623",
+            "101593",
+            "101603",
+            "102316",
+            "101584",
+            "102381"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "101604",
+            "101564",
+            "102418"
+        ]
+    },
+    "StorageFurniture": {
+        "cde6b48ed870286595c1455af7aff8bd": [
+            "47711",
+            "45841",
+            "45427",
+            "46653",
+            "48740",
+            "46544",
+            "44962",
+            "48517",
+            "47296",
+            "46230",
+            "45213",
+            "44853",
+            "40453",
+            "45632",
+            "45949",
+            "48855",
+            "44781",
+            "48797",
+            "45132",
+            "46981",
+            "46440",
+            "48491",
+            "45661",
+            "45503",
+            "47853",
+            "47252",
+            "48010",
+            "46879",
+            "48876",
+            "46537",
+            "45622",
+            "46641",
+            "46334",
+            "48253",
+            "48063",
+            "47233",
+            "45940",
+            "45290",
+            "46874",
+            "47438",
+            "45135",
+            "46130",
+            "46123",
+            "47088",
+            "45756",
+            "47944",
+            "47178"
+        ],
+        "3ba4ffe16dfe637510ed1c3676ec6cb0": [
+            "47168",
+            "45176",
+            "45908",
+            "45249",
+            "46127",
+            "45633",
+            "45936",
+            "47388",
+            "45091",
+            "45385",
+            "48721",
+            "45667",
+            "41004",
+            "41529",
+            "45671",
+            "45130",
+            "45504",
+            "45638",
+            "45087",
+            "45164",
+            "48036",
+            "47651",
+            "45950",
+            "45623",
+            "46132",
+            "45693",
+            "45783",
+            "45007",
+            "47180",
+            "45690",
+            "48271",
+            "45267",
+            "46092",
+            "45134",
+            "47021",
+            "46744",
+            "47187",
+            "48686",
+            "46556",
+            "46408",
+            "46616",
+            "46439",
+            "45448",
+            "47963",
+            "45212",
+            "45413",
+            "45645",
+            "38516",
+            "45822",
+            "47315",
+            "45916",
+            "45173",
+            "47686",
+            "45699",
+            "45244",
+            "46889",
+            "46906",
+            "46430",
+            "45717",
+            "45937",
+            "45203",
+            "45372",
+            "47133",
+            "46966",
+            "45419",
+            "45779",
+            "45297",
+            "46922",
+            "46944",
+            "46417",
+            "45850",
+            "45620",
+            "46044",
+            "45248",
+            "48452",
+            "47817",
+            "45964",
+            "46029",
+            "46179",
+            "46787",
+            "41452",
+            "45166",
+            "48167",
+            "48243",
+            "45177",
+            "45961",
+            "47632",
+            "46033",
+            "47391",
+            "47316",
+            "41086",
+            "45384",
+            "45526",
+            "45915",
+            "47281",
+            "45415",
+            "45606",
+            "47742",
+            "46401",
+            "45323",
+            "45178",
+            "47514",
+            "46117",
+            "46197",
+            "48467",
+            "46452",
+            "48519",
+            "45855",
+            "47729",
+            "46057",
+            "45600",
+            "35059",
+            "45691",
+            "45524",
+            "48490",
+            "47182",
+            "45910",
+            "48413",
+            "45247",
+            "48023",
+            "48746",
+            "47419",
+            "45621",
+            "45403",
+            "46427",
+            "45443",
+            "45922",
+            "47601",
+            "45853",
+            "45516",
+            "46107"
+        ],
+        "8fae315ff2d1ea185b4ed6d0d092ae0e": [
+            "45271",
+            "46199",
+            "47669",
+            "49188",
+            "46145",
+            "47926",
+            "45948",
+            "47290",
+            "40417",
+            "48623",
+            "47235",
+            "49062",
+            "47648",
+            "45612"
+        ],
+        "1e8c6b47706f002757c3370366001f06": [
+            "47466"
+        ],
+        "c502b67eb6d91d909ba398fa39bec60c": [
+            "45092",
+            "46443",
+            "45696",
+            "45189",
+            "46108",
+            "48356",
+            "45238",
+            "41003",
+            "45642",
+            "49025",
+            "46955",
+            "45749",
+            "45801",
+            "45374",
+            "46084",
+            "47227",
+            "47578",
+            "45243",
+            "45636",
+            "45387",
+            "41083",
+            "48258",
+            "41085",
+            "46060",
+            "44817",
+            "46598",
+            "46002",
+            "48169",
+            "46762",
+            "45710",
+            "48018",
+            "45262",
+            "47207",
+            "46856",
+            "46466",
+            "47595",
+            "47701",
+            "45332",
+            "49140",
+            "45219",
+            "48263",
+            "45677",
+            "47976",
+            "46549",
+            "45194",
+            "45759",
+            "46893",
+            "46120",
+            "47089"
+        ],
+        "5144181ac27497fdfa9bdb5b8b799630": [
+            "45575",
+            "45594",
+            "45235",
+            "46655",
+            "49182",
+            "45687",
+            "46403",
+            "46768",
+            "46896",
+            "45001",
+            "47613",
+            "47254",
+            "46180",
+            "47024",
+            "46825",
+            "48379",
+            "49132",
+            "48878",
+            "47577",
+            "47565",
+            "45573",
+            "44826",
+            "46847",
+            "46732",
+            "45168",
+            "46277",
+            "47238",
+            "45746",
+            "47808",
+            "45662",
+            "48381",
+            "45963",
+            "45354",
+            "45676",
+            "47278",
+            "47529",
+            "46437",
+            "45378",
+            "46563",
+            "47570",
+            "45444",
+            "48700",
+            "45780",
+            "47099",
+            "46490",
+            "45523",
+            "47747",
+            "46045",
+            "45305",
+            "40147",
+            "49133",
+            "46700",
+            "46236",
+            "45505",
+            "48859",
+            "46166",
+            "46456",
+            "45162",
+            "45776",
+            "45420",
+            "46481",
+            "45767",
+            "45423",
+            "45790",
+            "49038",
+            "45670",
+            "47954",
+            "48479",
+            "46019",
+            "46801",
+            "45984",
+            "45159",
+            "49042",
+            "45784",
+            "48177",
+            "46859",
+            "46741",
+            "46134",
+            "45694",
+            "46480",
+            "45689",
+            "46037",
+            "47185",
+            "45397",
+            "48013",
+            "46699",
+            "48513",
+            "45146",
+            "45463",
+            "41510",
+            "45747"
+        ],
+        "8999452509fdfa98335c5ba44ed05498": [
+            "46172",
+            "46014",
+            "45261",
+            "46109",
+            "46380"
+        ],
+        "0e3c4946251b437ca90e5fe70efdea5b": [
+            "48497"
+        ],
+        "c24dd733315066f7c7da3d578f954d8c": [
+            "48051"
+        ],
+        "87036528afd9c9dd03a6ab72efec0136": [
+            "45725"
+        ]
+    }
+}

scripts/graph_pred/api.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
+import re
+import json
+import base64
+import argparse
+from PIL import Image
+from io import BytesIO
+from openai import AzureOpenAI
+from scripts.graph_pred.prompt_workflow_new import messages
+import json_repair
+# Initialize the OpenAI client
+endpoint = os.environ.get("ENDPOINT")
+api_key = os.environ.get("API_KEY")
+api_version = os.environ.get("API_VERSION")
+model_name = os.environ.get("MODEL_NAME")
+client = AzureOpenAI(
+    azure_endpoint=endpoint,
+    api_key=api_key,
+    api_version=api_version,
+)
+def encode_image(image_path: str, center_crop=False):
+    """Resize and encode the image as base64"""
+    # load the image
+    image = Image.open(image_path)
+    # resize the image to 224x224
+    if center_crop: # (resize to 256x256 and then center crop to 224x224)
+        image = image.resize((256, 256))
+        width, height = image.size
+        left = (width - 224) / 2
+        top = (height - 224) / 2
+        right = (width + 224) / 2
+        bottom = (height + 224) / 2
+        image = image.crop((left, top, right, bottom))
+    else:
+        image = image.resize((224, 224))
+    # conver the image to bytes
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    buffer.seek(0)
+    # encode the image as base64
+    encoded_image = base64.b64encode(buffer.read()).decode("utf-8")
+    return encoded_image
+def display_image(image_data):
+    """Display the image from the base64 encoded image data"""
+    img = Image.open(BytesIO(base64.b64decode(image_data)))
+    img.show()
+    img.close()
+def convert_format(src):
+    '''Convert the JSON format from the response to a tree format'''
+    def _sort_nodes(tree):
+        num_nodes = len(tree)
+        sorted_tree = [dict() for _ in range(num_nodes)]
+        for node in tree:
+            sorted_tree[node["id"]] = node
+        return sorted_tree
+    def _traverse(node, parent_id, current_id):
+        for key, value in node.items():
+            node_id = current_id[0]
+            current_id[0] += 1
+            # Create the node
+            tree_node = {
+                "id": node_id,
+                "parent": parent_id,
+                "name": key,
+                "children": [],
+            }
+            # Traverse children if they exist
+            if isinstance(value, list):
+                for child in value:
+                    child_id = _traverse(child, node_id, current_id)
+                    tree_node["children"].append(child_id)
+            # Add this node to the tree
+            tree.append(tree_node)
+            return node_id
+    tree = []
+    current_id = [0]
+    _traverse(src, -1, current_id)
+    diffuse_tree = _sort_nodes(tree)
+    return diffuse_tree
+def predict_graph_twomode(image_path, first_img_data=None, second_img_data=None, debug=False, center_crop=False):
+    '''Predict the part connectivity graph from the image'''
+    # Encode the image
+    if first_img_data is None or second_img_data is None:
+        first_img_data = encode_image(image_path, center_crop)
+        second_img_data = encode_image(image_path.replace('close', 'open'), center_crop)
+    # if debug:
+    #     display_image(image_data) # for double checking the image
+    #     breakpoint()
+    new_message = messages.copy()
+    new_message.append(
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/png;base64,{first_img_data}"},
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/png;base64,{second_img_data}"},
+                    }
+                ],
+            },
+    )
+    # Get the completion from the model
+    completion = client.chat.completions.create(
+        model=model_name,
+        messages=new_message,
+        response_format={"type": "text"},
+        temperature=1,
+        max_tokens=4096,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0,
+    )
+    print('processing the response...')
+    # Extract the response
+    content = completion.choices[0].message.content
+    src = json.loads(re.search(r"```json\n(.*?)\n```", content, re.DOTALL).group(1))
+    print(src)
+    # Convert the JSON format to tree format
+    diffuse_tree = convert_format(src)
+    return {"diffuse_tree": diffuse_tree, "original_response": content}
+def save_response(save_path, response):
+    '''Save the response to a json file'''
+    with open(save_path, "w") as file:
+        json.dump(response, file, indent=4)
+def gpt_infer_image_category(image1, image2):
+    system_role = "You are a highly knowledgeable assistant specializing in physics, engineering, and object properties."
+    text_prompt = (
+        "Given two images of an object, determine its category. "
+        "The category must be one of the following: Table, Dishwasher, StorageFurniture, "
+        "Refrigerator, WashingMachine, Microwave, Oven. "
+        "Output only the category name and nothing else. Do not include any other text."
+    )
+    content_user = [
+        {
+            "type": "text",
+            "text": text_prompt,
+        },
+        {
+            "type": "image_url",
+            "image_url": {"url": f"data:image/png;base64,{image1}"},
+        },
+        {
+            "type": "image_url",
+            "image_url": {"url": f"data:image/png;base64,{image2}"},
+        },
+    ]
+    payload = {
+        "messages": [
+            {"role": "system", "content": system_role},
+            {"role": "user", "content": content_user},
+        ],
+        "temperature": 0.1,
+        "max_tokens": 500,
+        "top_p": 0.1,
+        "frequency_penalty": 0,
+        "presence_penalty": 0,
+        "stop": None,
+        "model": model_name,
+    }
+    completion = client.chat.completions.create(**payload)
+    response = completion.choices[0].message.content
+    json_repair.loads(response)
+    return response
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Predict the part connectivity graph from an image")
+    parser.add_argument("--img_path", type=str, required=True, help="path to the image")
+    parser.add_argument("--save_path", type=str, required=True, help="path to the save the response")
+    parser.add_argument("--center_crop", action="store_true", help="whether to center crop the image to 224x224, otherwise resize to 224x224")
+    args = parser.parse_args()
+    try:
+        response = predict_graph(args.img_path, args.center_crop)
+        save_response(args.save_path, response)
+        response = predict_graph_twomode(args.img_path, args.center_crop)
+        save_response(args.save_path[:-5] + 'twomode.json', response)
+    except Exception as e:
+        with open('openai_err.log', 'a') as f:
+            f.write('---------------------------\n')
+            f.write(f'{args.img_path}\n')
+            f.write(f'{e}\n')

scripts/graph_pred/eval.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+import json
+import argparse
+import networkx as nx
+from tqdm import tqdm
+def get_hash(file, key='diffuse_tree'):
+    tree = file[key]
+    G = nx.DiGraph()
+    for node in tree:
+        G.add_node(node['id'])
+        if node['parent'] != -1:
+            G.add_edge(node['id'], node['parent'])
+    hashcode = nx.weisfeiler_lehman_graph_hash(G)
+    return hashcode
+if __name__ == "__main__":
+    '''Script to evaluate the accuracy of the generated graphs'''
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--exp_dir', type=str, required=True, help='path to the experiment directory')
+    parser.add_argument('--gt_data_root', type=str, required=True, help='root directory of the ground-truth data')
+    parser.add_argument('--gt_json_name', type=str, default='object.json', help='Path to the ground truth data')
+    args = parser.parse_args()
+    assert os.path.exists(args.exp_dir), "The experiment directory does not exist"
+    assert os.path.exists(args.gt_data_root), "The ground-truth data root does not exist"
+    exp_dir = args.exp_dir
+    gt_data_dir = args.gt_data_root
+    acc = 0
+    files = os.listdir(exp_dir)
+    sorted(files)
+    total = len(files)
+    wrong_files = []
+    for file in tqdm(files):
+        tokens = file.split('@')
+        gt_dir = f'{gt_data_dir}'
+        for token in tokens[:-1]:
+            gt_dir = os.path.join(gt_dir, token)
+        with open(os.path.join(gt_dir, args.gt_json_name)) as f:
+            gt = json.load(f)
+        # load json files
+        with open(os.path.join(exp_dir, file)) as f:
+            pred = json.load(f)
+        # get hash for the graph
+        pred_hash = get_hash(pred)
+        gt_hash = get_hash(gt)
+        # compare hash
+        if pred_hash == gt_hash:
+            acc += 1
+        else:
+            wrong_files.append(file)
+    with open(os.path.join(os.path.dirname(exp_dir), f'acc_{os.path.basename(exp_dir)}.json'), 'w') as f:
+        json.dump({'acc': acc/total, 'wrong_files': wrong_files}, f, indent=4)

scripts/graph_pred/prompt_workflow_new.py ADDED Viewed

	@@ -0,0 +1,363 @@

+import os
+from PIL import Image
+from io import BytesIO
+import base64
+def encode_image(image_path: str, center_crop=False):
+    """Resize and encode the image as base64"""
+    # load the image
+    image = Image.open(image_path)
+    # resize the image to 224x224
+    if center_crop: # (resize to 256x256 and then center crop to 224x224)
+        image = image.resize((256, 256))
+        width, height = image.size
+        left = (width - 224) / 2
+        top = (height - 224) / 2
+        right = (width + 224) / 2
+        bottom = (height + 224) / 2
+        image = image.crop((left, top, right, bottom))
+    else:
+        image = image.resize((512, 512))
+    # conver the image to bytes
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    buffer.seek(0)
+    # encode the image as base64
+    encoded_image = base64.b64encode(buffer.read()).decode("utf-8")
+    return encoded_image
+system_prompt = """
+You are an expert in the recognition, structural parsing, and physical‑feasibility validation of articulated objects from image inputs.
+You will be provided with two rendered images of the same object:
+1. A closed‑state image (all movable parts in their fully closed positions)
+2. An open‑state image (all movable parts in their fully opened positions)
+Your task is to analyze the object's articulated structure and generate a connectivity graph describing the part relationships.
+You must follow this workflow:
+1. Part Detection:
+   - Detect candidate parts with their coarse position in the **closed-state image**, with optional assistance from the **open-state image** to resolve ambiguous or occluded parts.
+   - Allowed part types: ['base', 'door', 'drawer', 'handle', 'knob', 'tray']
+   - Ignore small decorative things directly attached to the base.
+   - There must be exactly one "base"; "tray" is only allowed if the object is a microwave (but "tray" is not the component must be shown in microwave).
+2. Step-by-Step Reasoning:
+   1. Part Listing: List all detected parts and their counts (do not infer attachment yet)
+   2. Validation: Enforce structural rules:
+      - Exactly one base
+      - Each door or drawer may have at most two handles or knobs
+      - Every handle/knob must attach to a door or drawer
+      - Trays only appear in microwaves
+   3. Attachment Inference: For each non-base part, infer its parent (e.g., "drawer_1 (attached to base)").
+      - Use the open-state image to assist in verifying attachments if the closed-state image is ambiguous.
+   4. Connectivity Graph Construction: Output a JSON tree of part relationships. The base is the root.
+Example output:
+```json
+{
+  "base": [
+    { "door": [ { "handle": [] } ] },
+    { "drawer": [ { "handle": [] } ] }
+  ]
+}
+```
+Final Output: You **MUST** output a single JSON tree representing the part connectivity of the object. Use the open-state image to improve the accuracy and completeness of your structural interpretation, but base your parsing primarily on the closed-state image.
+Note: You **MUST** give the final output in JSON format.
+"""
+answer_sys = """
+Great — please upload the two images of the same object:
+One in the closed state (all movable parts fully closed)
+One in the open state (all movable parts fully open)
+Once I receive both, I’ll proceed with:
+Part detection on the closed image
+Structured reasoning and validation
+Attachment inference
+Final part connectivity graph in JSON format
+Let me know when you're ready to continue!
+"""
+answer_1 = """
+Great! Let’s go through the articulated object parsing workflow based on the provided **closed-state** and **open-state** images.
+---
+### **1. Part Detection**
+From the **closed-state image** (first image), with verification from the **open-state image** (second image):
+- **Base**: 1 base structure (entire wooden cabinet)
+- **Doors**: 4 doors
+  - 2 on the upper section (left & right)
+  - 2 on the lower section (left & right)
+- **Drawers**: 4 drawers
+  - 3 in a row at the middle
+  - 1 larger drawer centered below the middle row
+- **Handles**: 8 handles
+  - 1 per drawer
+  - 1 per door
+---
+### **2. Step-by-Step Reasoning**
+#### 2.1 **Part Listing**
+- `base`: 1
+- `door`: 4
+- `drawer`: 4
+- `handle`: 8
+#### 2.2 **Validation**
+- ✅ Exactly one base
+- ✅ Each door has 1 handle → 4 handles for 4 doors
+- ✅ Each drawer has 1 handle → 4 handles for 4 drawers
+- ✅ No trays → ✓ (not a microwave)
+- ✅ Every handle is attached to a door or drawer (verified using open-state image)
+All rules are satisfied.
+#### 2.3 **Attachment Inference**
+From closed/open-state comparison:
+- Each **door** is directly attached to the **base**
+- Each **drawer** is mounted to slide out of the **base**
+- Each **handle** is on its respective **door** or **drawer**
+---
+### **3. Connectivity Graph Construction**
+Final hierarchical structure:
+```json
+{
+  "base": [
+    { "door_1": [ { "handle_1": [] } ] },
+    { "door_2": [ { "handle_2": [] } ] },
+    { "door_3": [ { "handle_3": [] } ] },
+    { "door_4": [ { "handle_4": [] } ] },
+    { "drawer_1": [ { "handle_5": [] } ] },
+    { "drawer_2": [ { "handle_6": [] } ] },
+    { "drawer_3": [ { "handle_7": [] } ] },
+    { "drawer_4": [ { "handle_8": [] } ] }
+  ]
+}
+```
+"""
+answer_2 = """
+Thanks for the new image pair! Let's analyze the articulated structure of the **washing machine** based on the provided closed-state and open-state images.
+---
+### **1. Part Detection**
+From the **closed-state image**, validated using the **open-state image**:
+- **Base**: 1 main body (the washing machine structure)
+- **Door**: 1 circular front-loading door
+- **Handle**: Notice that there is no handle attached to the dooor
+- **Knob**: 1 control knob on the top of the machine
+---
+### **2. Step-by-Step Reasoning**
+#### 2.1 **Part Listing**
+- `base`: 1
+- `door`: 1
+- `knob`: 1
+#### 2.2 **Validation**
+- ✅ Exactly one base
+- ✅ No drawers, trays → Valid
+- ✅ Trays not present (correct: not a microwave)
+→ **All structural constraints satisfied**
+#### 2.3 **Attachment Inference**
+- The **door** is front-mounted and clearly opens via a **hinge** on the left side (from viewer's perspective)
+- The **knob** is directly mounted on the **base** (top section)
+---
+### **3. Connectivity Graph Construction**
+```json
+{
+  "base": [
+    { "door": []},
+    { "knob": []}
+  ]
+}
+```
+"""
+answer_3 = """
+Thanks! Let’s analyze the articulated structure of this oven using your provided closed and open state images.
+---
+### ✅ Articulated Object Parsing (Oven)
+#### **1. Part Detection**
+From the **closed-state** image (with **open-state** verification):
+- **base**: 1 (the main oven body)
+- **door**: 1 (the front oven door)
+- **handle**: 1 (long horizontal handle on the door)
+> 🔍 *The 6 small control knobs on top are **ignored** as instructed.*
+---
+#### **2. Step-by-Step Reasoning**
+**2.1 Part Listing:**
+- `base`: 1
+- `door`: 1
+- `handle`: 1
+**2.2 Validation:**
+- ✅ Exactly one base
+- ✅ Door has 1 handle
+- ✅ Handle is attached to the door
+- ✅ No trays (✓ valid for an oven)
+- ✅ No knobs or extra handles beyond what's allowed
+✔ All constraints are satisfied.
+**2.3 Attachment Inference:**
+- The **door** is bottom-hinged and connected to the **base**
+- The **handle** is directly mounted on the door
+---
+#### **3. Final Connectivity Graph**
+```json
+{
+  "base": [
+    { "door": [ { "handle": [] } ] }
+  ]
+}
+```
+"""
+answer_4 = """
+Thanks! Let's now analyze the **wardrobe** based on the closed and open-state images you've provided.
+---
+### ✅ Articulated Object Parsing – Wardrobe
+---
+### **1. Part Detection**
+- **Base**: 1 (entire wardrobe structure)
+- **Doors**: 4 vertical panels (two left, two right)
+- **Drawers**: 4 horizontal drawers at the bottom
+- **Handles**: 8 in total
+  - 4 for doors (1 each)
+  - 4 for drawers (1 each)
+---
+### **2. Step-by-Step Reasoning**
+#### **2.1 Part Listing**
+- `base`: 1
+- `door`: 4
+- `drawer`: 4
+- `handle`: 8
+#### **2.2 Validation**
+- ✅ Exactly one base
+- ✅ Each door has 1 handle → valid
+- ✅ Each drawer has 1 handle → valid
+- ✅ No trays → ✓ valid (not a microwave)
+- ✅ Every handle is attached to a door or drawer (confirmed via open-state image)
+✔ All structural rules are satisfied.
+---
+#### **2.3 Attachment Inference**
+- Each **door** is hinged to the **base**
+- Each **drawer** slides out of the **base**
+- Each **handle** is mounted on one **door** or **drawer**
+---
+### **3. Final Connectivity Graph**
+```json
+{
+  "base": [
+    { "door_1": [ { "handle_1": [] } ] },
+    { "door_2": [ { "handle_2": [] } ] },
+    { "door_3": [ { "handle_3": [] } ] },
+    { "door_4": [ { "handle_4": [] } ] },
+    { "drawer_1": [ { "handle_5": [] } ] },
+    { "drawer_2": [ { "handle_6": [] } ] },
+    { "drawer_3": [ { "handle_7": [] } ] },
+    { "drawer_4": [ { "handle_8": [] } ] }
+  ]
+}
+```
+"""
+answer_all = [answer_sys, answer_1, answer_2, answer_3, answer_4]
+messages = [
+    {"role": "user", "content": system_prompt},
+    {"role": "assistant", "content": answer_sys}
+]
+for i in range(4):
+    root_path = './scripts/imgs_reference/'
+    close_path = os.path.join(root_path, f'close{i + 1}.png')
+    open_path = os.path.join(root_path, f'open{i + 1}.png')
+    close_img = encode_image(close_path, center_crop=False)
+    open_img = encode_image(open_path, center_crop=False)
+    messages.append(
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{close_img}"},
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{open_img}"},
+                }
+            ],
+        }
+    )
+    messages.append({"role": "assistant", "content": answer_all[i + 1]})

scripts/json2urdf.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import json
+from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree
+from xml.dom.minidom import parseString
+import pybullet as p
+import pybullet_data
+import os
+from PIL import Image  # 使用Pillow保存图像
+import numpy as np
+import trimesh  # 用于处理3D网格
+import imageio
+import math
+def degrees_to_radians(degrees):
+    """Convert an angle from degrees to radians."""
+    return degrees * math.pi / 180.0
+def ply_to_obj(ply_filename, obj_filename, urdf_filename, part, json_data):
+    """Convert a PLY file to OBJ format using trimesh."""
+    base_path = '/'.join(urdf_filename.split('/')[:-1])
+    mesh = trimesh.load(os.path.join(base_path, ply_filename), force='mesh')
+    print(ply_filename, mesh.bounding_box.centroid)
+    # mesh.vertices -= (mesh.bounding_box.centroid)
+    # find base (parent == -1)
+    base_part_id = next((p['id'] for p in json_data['diffuse_tree'] if p['parent'] == -1), None)
+    if 'joint' in part.keys():
+        mesh.vertices -= (mesh.bounding_box.centroid + part['joint']['axis']['origin'])
+        while part['parent'] != base_part_id:
+            parent_part = next((p for p in json_data['diffuse_tree'] if p['id'] == part['parent']), None)
+            if parent_part is None:
+                break
+            mesh.vertices -= (parent_part['joint']['axis']['origin'])
+            part = parent_part
+    else:
+        mesh.vertices -= (mesh.bounding_box.centroid)
+    mesh.export(os.path.join(base_path, obj_filename))
+def create_urdf_from_json(json_data, urdf_filename, parent_part=None):
+    robot = Element('robot', name='articulate_object')
+    def add_link(parent, part, urdf_filename, json_data, base=False):
+        link = SubElement(parent, 'link', name=f"link_{part['id']}")
+        # 将PLY文件转换为OBJ文件
+        ply_path = part['objs'][0]
+        obj_path = os.path.splitext(ply_path)[0] + '.obj'
+        # if not os.path.exists(obj_path):
+        ply_to_obj(ply_path, obj_path, urdf_filename, part, json_data)
+        visual = SubElement(link, 'visual')
+        origin = SubElement(visual, 'origin', xyz=" ".join(map(str, part['aabb']['center'])), rpy="0 0 0")
+        geometry = SubElement(visual, 'geometry')
+        mesh = SubElement(geometry, 'mesh', filename=obj_path)  # 使用转换后的OBJ路径
+    def add_joint(parent, child_part, parent_part):
+        joint = SubElement(parent, 'joint', name=f"{parent_part['id']}_{child_part['id']}_joint", type=child_part['joint']['type'])
+        # for i in range(3):
+        #     child_part['joint']['axis']['origin'][i] -= (child_part['aabb']['size'][i])
+        origin = SubElement(joint, 'origin', xyz=" ".join(map(str, child_part['joint']['axis']['origin'])), rpy="0 0 0")
+        # origin = SubElement(joint, 'origin', xyz=" 0 0 0 ", rpy="0 0 0")
+        axis = SubElement(joint, 'axis', xyz=" ".join(map(str, child_part['joint']['axis']['direction'])))
+        if child_part['joint']['type'] == 'revolute':
+            child_part['joint']['range'][0] = degrees_to_radians(child_part['joint']['range'][0])
+            child_part['joint']['range'][1] = degrees_to_radians(child_part['joint']['range'][1])
+        lower, upper = child_part['joint']['range']
+        if upper < lower:
+            lower, upper = upper, lower
+        limit = SubElement(
+            joint, 'limit',
+            lower=str(lower),
+            upper=str(upper),
+            effort="10",
+            velocity="1"
+        )
+        parent_element = SubElement(joint, 'parent', link=f"link_{parent_part['id']}")
+        child_element = SubElement(joint, 'child', link=f"link_{child_part['id']}")
+    base_part = json_data['diffuse_tree'][0]
+    add_link(robot, base_part, urdf_filename, json_data, base=True)
+    for part in json_data['diffuse_tree'][1:]:
+        base_part = next((p for p in json_data['diffuse_tree'] if p['parent'] == -1), None)
+        parent_part = next((p for p in json_data['diffuse_tree'] if p['id'] == part['parent']), None)
+        add_link(robot, part, urdf_filename, json_data)
+        if parent_part:
+            add_joint(robot, part, parent_part)
+    xmlstr = parseString(tostring(robot)).toprettyxml(indent="   ")
+    with open(urdf_filename, "w") as f:
+        f.write(xmlstr)
+def pybullet_render(urdf_path, target_dir, num_frames, distance=3, fov=60):
+    physicsClient = p.connect(p.DIRECT)
+    p.setAdditionalSearchPath(pybullet_data.getDataPath())
+    try:
+        robot = p.loadURDF(urdf_path, [0, 0, 0])
+    except Exception as e:
+        print(e)
+        return
+    for i in range(-1, p.getNumJoints(robot)):
+        rgba = [np.random.uniform(0.2, 1.0), np.random.uniform(0.2, 1.0), np.random.uniform(0.2, 1.0), 1]
+        p.changeVisualShape(robot, linkIndex=i, rgbaColor=rgba)
+    p.resetBasePositionAndOrientation(robot, [0, 0, 0], [0, 0.7071, 0.7071, 0])
+    joint_info = []
+    for i in range(p.getNumJoints(robot)):
+        info = p.getJointInfo(robot, i)
+        if info[2] != p.JOINT_FIXED:
+            joint_info.append({
+                'index': info[0],
+                'type': info[2],
+                'name': info[1].decode('utf-8'),
+                'lower_limit': info[8],
+                'upper_limit': info[9],
+                'initial_position': p.getJointState(robot, info[0])[0]
+            })
+    joint_positions = {}
+    for joint in joint_info:
+        start = joint['lower_limit']
+        end = joint['upper_limit']
+        joint_positions[joint['index']] = np.concatenate((np.linspace(start, end, num_frames), np.linspace(end, start, num_frames)))
+    gif_frames = []
+    for frame in range(num_frames*2):
+        # import pdb; pdb.set_trace()
+        for joint in joint_info:
+            p.resetJointState(robot, joint['index'], joint_positions[joint['index']][frame])
+            joint_state = p.getJointState(robot, joint['index'])
+            p.stepSimulation()
+        viewMatrix = p.computeViewMatrixFromYawPitchRoll(
+            cameraTargetPosition=[0, 0, 0],
+            distance=3.0,
+            yaw=-150,
+            pitch=-10,
+            roll=0,
+            upAxisIndex=2
+        )
+        projectionMatrix=p.computeProjectionMatrixFOV(
+            fov=fov, ##60
+            aspect=1.0,
+            nearVal=0.1, farVal=100)
+        width, height, rgbPixels, depthBuffer, segMask = p.getCameraImage(
+            width=1024, height=1024, viewMatrix=viewMatrix,
+            projectionMatrix=projectionMatrix,
+            renderer=p.ER_BULLET_HARDWARE_OPENGL)
+        #get rgba image
+        rgba_image = np.reshape(rgbPixels, (height, width, 4))
+        # rgba_image[np.all(rgba_image[:, :, :3] == 255, axis=-1)] = [0, 0, 0, 0]
+        gif_frames.append(rgba_image[:, :, :3])
+    p.disconnect()
+    imageio.mimsave(f'{target_dir}/animation.gif', gif_frames, fps=8, loop=0)

scripts/mesh_retrieval/retrieve.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import os
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+import json
+import argparse
+import numpy as np
+from retrieval.obj_retrieval import find_obj_candidates, pick_and_rescale_parts
+import trimesh
+import shutil
+def _retrieve_part_meshes(info_dict, save_dir, gt_data_root):
+    mesh_save_dir = os.path.join(save_dir, "plys")
+    obj_save_dir = os.path.join(save_dir, "objs")
+    os.makedirs(mesh_save_dir, exist_ok=True)
+    os.makedirs(obj_save_dir, exist_ok=True)
+    print(save_dir)
+    if os.path.exists(os.path.join(save_dir, "object.ply")):
+        return
+    HASHBOOK_PATH = "retrieval/retrieval_hash_no_handles.json"
+    obj_candidates = find_obj_candidates(
+        info_dict,
+        gt_data_root,
+        HASHBOOK_PATH,
+        gt_file_name="object.json",
+        num_states=5,
+        metric_num_samples=4096,
+        keep_top=3,
+    )
+    retrieved_mesh_specs = pick_and_rescale_parts(
+        info_dict, obj_candidates, gt_data_root, gt_file_name="object.json"
+    )
+    scene = trimesh.Scene()
+    for i, mesh_spec in enumerate(retrieved_mesh_specs):
+        part_spec = info_dict["diffuse_tree"][i]
+        current_part_meshes = []
+        file_paths = []
+        for file in mesh_spec["files"]:
+            file = os.path.join(mesh_spec["dir"], file).replace("ply", "obj")
+            file_paths.append(file)
+            m = trimesh.load(file, force="mesh")
+            current_part_meshes.append(m)
+        if not current_part_meshes:
+            continue
+        bounds = np.array([m.bounds for m in current_part_meshes])
+        min_extents = bounds[:, 0, :].min(axis=0)
+        max_extents = bounds[:, 1, :].max(axis=0)
+        group_centroid = (min_extents + max_extents) / 2.0
+        transformation = trimesh.transformations.compose_matrix(
+            scale=mesh_spec["scale_factor"],
+            angles=[0, 0, np.radians(90) if mesh_spec["z_rotate_90"] else 0],
+            translate=part_spec["aabb"]["center"],
+        )
+        part_scene = trimesh.Scene()
+        for mesh in current_part_meshes:
+            mesh.vertices -= group_centroid
+            mesh.apply_transform(transformation)
+            part_scene.add_geometry(mesh)
+            scene.add_geometry(mesh)
+        obj_path = os.path.join(obj_save_dir, f"part_{i}/part_{i}.obj")
+        os.makedirs(os.path.dirname(obj_path), exist_ok=True)
+        part_scene.export(obj_path, include_texture=True)
+        info_dict["diffuse_tree"][i]["objs"] = [f"objs/part_{i}/part_{i}.obj"]
+    scene.export(os.path.join(save_dir, "object.ply"))
+    del mesh, scene
+    return info_dict
+def main(args):
+    with open(os.path.join(args.src_dir, args.json_name), "r") as f:
+        info_dict = json.load(f)
+    if 'meta' not in info_dict.keys():
+        info_dict['meta'] = {
+            'obj_cat': 'StroageFurniture'
+        }
+    updated_json = _retrieve_part_meshes(info_dict, args.src_dir, args.gt_data_root)
+    if updated_json is not None:
+        with open(os.path.join(args.src_dir, args.json_name), "w") as f:
+            json.dump(updated_json, f)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--src_dir', type=str, required=True, help='path to the directory containing object.json')
+    parser.add_argument('--json_name', type=str, default='object.json', help='name of the json file')
+    parser.add_argument('--gt_data_root', type=str, default='./', help='path to the ground truth data')
+    args = parser.parse_args()
+    main(args)

scripts/mesh_retrieval/retrieve_gpt.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+import subprocess
+import argparse
+from tqdm.contrib.concurrent import process_map
+from functools import partial
+def run_retrieve(src_dir, json_name, data_root):
+    fn_call = ['python', 'scripts/mesh_retrieval/retrieve.py', '--src_dir', src_dir, '--json_name', json_name, '--gt_data_root', data_root]
+    try:
+        subprocess.run(fn_call, check=True,  stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as e:
+        print(f'Error from run_retrieve: {src_dir}')
+        print(f'Error: {e}')
+    return ' '.join(fn_call)
+if __name__ == '__main__':
+    root_path = '/home/users/ruiqi.wu/manipulate_3d_generate/data/gpt_blender/'
+    for class_name in os.listdir(root_path):
+        if class_name == 'StroageFurniture':
+            for model_id in os.listdir(os.path.join(root_path, class_name)):
+                json_path = os.path.join(root_path, class_name, model_id, 'object.json')
+                object_path = os.path.join(root_path, class_name, model_id, 'object.ply')
+                if os.path.exists(json_path):
+                    if not os.path.exists(object_path):
+                        print(json_path)
+                        src_dir = os.path.join(root_path, class_name, model_id)
+                        json_name = 'object.json'
+                        data_root = '../singapo'
+                        run_retrieve(src_dir, json_name, data_root)

scripts/mesh_retrieval/run_retrieve.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import subprocess
+import argparse
+from tqdm.contrib.concurrent import process_map
+from functools import partial
+def run_retrieve(src_dir, json_name, data_root):
+    if 'StorageFurniture' not in src_dir and 'Table' not in src_dir:
+        data_root = '../acd_data/merged-data'
+    fn_call = ['python', 'scripts/mesh_retrieval/retrieve.py', '--src_dir', src_dir, '--json_name', json_name, '--gt_data_root', data_root]
+    try:
+        subprocess.run(fn_call, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as e:
+        print(f'Error from run_retrieve: {src_dir}')
+        print(f'Error: {e}')
+    return ' '.join(fn_call)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--src", type=str, required=True, help="path to the experiment folder")
+    parser.add_argument("--json_name", type=str, default="object.json", help="name of the json file")
+    parser.add_argument("--gt_data_root", type=str, default="../data", help="path to the ground truth data")
+    parser.add_argument("--max_workers", type=int, default=6, help="number of images to render for each object")
+    args = parser.parse_args()
+    assert os.path.exists(args.src), f"Src path does not exist: {args.src}"
+    assert os.path.exists(args.gt_data_root), f"GT data root does not exist: {args.gt_data_root}"
+    exp_path = args.src
+    # len_root = len(exp)
+    print('----------- Retrieve Part Meshes -----------')
+    src_dirs = []
+    # exps = os.listdir(root)
+    # for exp in exps:
+    # exp_path = os.path.join(root, exp)
+    for model_id in os.listdir(exp_path):
+        model_id_path = os.path.join(exp_path, model_id)
+        # print(model_id_path)
+        if '.' in model_id:
+            continue
+        for model_id_id in os.listdir(model_id_path):
+            if '.' not in model_id_id:
+                model_id_id_path = os.path.join(model_id_path, model_id_id)
+                for json_file in os.listdir(model_id_id_path):
+                    if json_file.endswith(args.json_name):
+                        if os.path.exists(os.path.join(model_id_id_path, 'object.ply')):
+                            print(f"Found {model_id_id_path} with object.ply")
+                        else:
+                            # run_retrieve(model_id_id_path, json_name=args.json_name, data_root=args.gt_data_root)
+                            src_dirs.append(model_id_id_path)
+                            print(len(src_dirs), model_id_id_path)
+# for dirpath, dirname, fnames in os.walk(root):
+    #     for fname in fnames:
+    #         if fname.endswith(args.json_name):
+    #             src_dirs.append(dirpath) # save the relative directory path
+    #             print(root)
+    print(f"Found {len(src_dirs)} jsons to retrieve part meshes")
+    # print(src_dirs)
+    # import ipdb
+    # ipdb.set_trace()
+    # for src_dir in src_dirs:
+    #     print(src_dir)
+    #     command = run_retrieve(src_dir, json_name=args.json_name, data_root=args.gt_data_root)
+    #     command_file = open('retrieve_commands.sh', 'a')
+    #     command_file.write(command + '\n')
+    #     command_file.close()
+    process_map(partial(run_retrieve, json_name=args.json_name, data_root=args.gt_data_root), src_dirs, max_workers=6, chunksize=1)