import gradio as gr import numpy as np import PIL import trimesh from transformers_js import import_transformers_js, as_url transformers = await import_transformers_js() pipeline = transformers.pipeline depth_estimator = await pipeline('depth-estimation', 'Xenova/depth-anything-small-hf'); def depthmap_to_glb_trimesh(depth_map, rgb_image, file_path): assert depth_map.shape[:2] == rgb_image.shape[:2], "Depth map and RGB image must have the same dimensions" # Generate vertices and faces vertices = [] colors = [] faces = [] height, width = depth_map.shape for y in range(height): for x in range(width): z = depth_map[y, x] vertices.append([x, y, z]) colors.append(rgb_image[y, x]) # Create faces (2 triangles per pixel, except for edges) for y in range(height - 1): for x in range(width - 1): top_left = y * width + x top_right = top_left + 1 bottom_left = top_left + width bottom_right = bottom_left + 1 faces.append([top_left, bottom_left, top_right]) faces.append([top_right, bottom_left, bottom_right]) # Convert to numpy arrays vertices = np.array(vertices, dtype=np.float64) colors = np.array(colors, dtype=np.uint8) faces = np.array(faces, dtype=np.int32) mesh = trimesh.Trimesh(vertices=vertices, faces=faces, vertex_colors=colors, process=False) # Export to GLB mesh.export(file_path, file_type='glb') def invert_depth(depth_map): max_depth = np.max(depth_map) return max_depth - depth_map def invert_xy(map): return map[::-1, ::-1] async def estimate(image_path, depth_scale): image = PIL.Image.open(image_path) image.thumbnail((384, 384)) # Resize the image keeping the aspect ratio predictions = await depth_estimator(as_url(image_path)) depth_image = predictions["depth"].to_pil() tensor = predictions["predicted_depth"] tensor_data = { "dims": tensor.dims, "type": tensor.type, "size": tensor.size, } # Construct the 3D model from the depth map and the RGB image depth = predictions["predicted_depth"].to_numpy() depth = invert_depth(depth) depth = invert_xy(depth) depth = depth * depth_scale # The model outputs the depth map in a different size than the input image. # So we resize the depth map to match the original image size. depth = np.array(PIL.Image.fromarray(depth).resize(image.size)) image_array = np.asarray(image) image_array = invert_xy(image_array) glb_file_path = "output.glb" depthmap_to_glb_trimesh(depth, image_array, glb_file_path) return depth_image, glb_file_path, tensor_data demo = gr.Interface( fn=estimate, inputs=[ gr.Image(type="filepath"), gr.Slider(minimum=1, maximum=100, value=10, label="Depth Scale") ], outputs=[ gr.Image(label="Depth Image"), gr.Model3D(label="3D Model"), gr.JSON(label="Tensor"), ], examples=[ ["bread_small.png"], ["cats.jpg"], ] ) demo.launch() transformers_js_py trimesh