Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/apps/desktop/src/api/ssh_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use tauri::State;

use bitfun_core::service::remote_ssh::{
SSHConnectionConfig, SSHConnectionResult, SavedConnection, RemoteTreeNode,
SSHConfigLookupResult, SSHConfigEntry,
SSHConfigLookupResult, SSHConfigEntry, ServerInfo,
};
use crate::api::app_state::SSHServiceError;
use crate::AppState;
Expand Down Expand Up @@ -119,6 +119,15 @@ pub async fn ssh_is_connected(
Ok(is_connected)
}

#[tauri::command]
pub async fn ssh_get_server_info(
state: State<'_, AppState>,
connection_id: String,
) -> Result<Option<ServerInfo>, String> {
let manager = state.get_ssh_manager_async().await?;
Ok(manager.resolve_remote_home_if_missing(&connection_id).await)
}

#[tauri::command]
pub async fn ssh_get_config(
state: State<'_, AppState>,
Expand Down
70 changes: 70 additions & 0 deletions src/apps/desktop/src/computer_use/desktop_host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1161,6 +1161,24 @@ end tell"#])
}
}

/// Square region in global logical coordinates for raw OCR preview crops around `(cx, cy)`.
fn ocr_region_square_around_point(
cx: f64,
cy: f64,
half: u32,
) -> BitFunResult<OcrRegionNative> {
let hh = half as f64;
let x0 = (cx - hh).floor() as i32;
let y0 = (cy - hh).floor() as i32;
let w = half.saturating_mul(2).max(1);
Ok(OcrRegionNative {
x0,
y0,
width: w,
height: w,
})
}

/// Capture **raw** display pixels (no pointer/SoM overlay), cropped to `region` intersected with the chosen display.
///
/// `region` and [`DisplayInfo::width`]/[`height`] are **global logical points** (CG / AX). The framebuffer
Expand Down Expand Up @@ -2124,6 +2142,58 @@ impl ComputerUseHost for DesktopComputerUseHost {
.collect())
}

async fn accessibility_hit_at_global_point(
&self,
gx: f64,
gy: f64,
) -> BitFunResult<Option<bitfun_core::agentic::tools::computer_use_host::OcrAccessibilityHit>>
{
#[cfg(target_os = "macos")]
{
let hit = tokio::task::spawn_blocking(move || {
crate::computer_use::macos_ax_ui::accessibility_hit_at_global_point(gx, gy)
})
.await
.map_err(|e| BitFunError::tool(e.to_string()))?;
return Ok(hit);
}
#[cfg(target_os = "windows")]
{
return tokio::task::spawn_blocking(move || {
crate::computer_use::windows_ax_ui::accessibility_hit_at_global_point(gx, gy)
})
.await
.map_err(|e| BitFunError::tool(e.to_string()))?;
}
#[cfg(target_os = "linux")]
{
let _ = (gx, gy);
Ok(None)
}
#[cfg(not(any(
target_os = "macos",
target_os = "windows",
target_os = "linux"
)))]
{
let _ = (gx, gy);
Ok(None)
}
}

async fn ocr_preview_crop_jpeg(
&self,
gx: f64,
gy: f64,
half_extent_native: u32,
) -> BitFunResult<Vec<u8>> {
let region = Self::ocr_region_square_around_point(gx, gy, half_extent_native)?;
let shot = tokio::task::spawn_blocking(move || Self::screenshot_raw_native_region(region))
.await
.map_err(|e| BitFunError::tool(e.to_string()))??;
Ok(shot.bytes)
}

fn last_screenshot_refinement(&self) -> Option<ComputerUseScreenshotRefinement> {
self.state.lock().ok().and_then(|s| s.last_shot_refinement)
}
Expand Down
58 changes: 57 additions & 1 deletion src/apps/desktop/src/computer_use/macos_ax_ui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
//! Coordinates match CoreGraphics global space used by [`crate::computer_use::DesktopComputerUseHost`].

use crate::computer_use::ui_locate_common;
use bitfun_core::agentic::tools::computer_use_host::{SomElement, UiElementLocateQuery, UiElementLocateResult};
use bitfun_core::agentic::tools::computer_use_host::{
OcrAccessibilityHit, SomElement, UiElementLocateQuery, UiElementLocateResult,
};
use bitfun_core::util::errors::{BitFunError, BitFunResult};
use core_foundation::array::{CFArray, CFArrayRef};
use core_foundation::base::{CFTypeRef, TCFType};
Expand All @@ -24,6 +26,12 @@ unsafe extern "C" {
attribute: CFStringRef,
value: *mut CFTypeRef,
) -> i32;
fn AXUIElementCopyElementAtPosition(
element: AXUIElementRef,
x: f32,
y: f32,
out_elem: *mut AXUIElementRef,
) -> i32;
fn AXValueGetType(value: AXValueRef) -> u32;
fn AXValueGetValue(value: AXValueRef, the_type: u32, ptr: *mut c_void) -> bool;
}
Expand Down Expand Up @@ -489,6 +497,54 @@ pub fn enumerate_interactive_elements(max_elements: usize) -> Vec<SomElement> {
results
}

unsafe fn ax_parent_context_line(elem: AXUIElementRef) -> Option<String> {
let parent_val = ax_copy_attr(elem, "AXParent")?;
let parent = parent_val as AXUIElementRef;
if parent.is_null() {
ax_release(parent_val);
return None;
}
let (r, t, _) = read_role_title_id(parent);
ax_release(parent_val);
Some(element_short_desc(r.as_deref(), t.as_deref()))
}

/// Hit-test the accessibility element at global screen coordinates (OCR `move_to_text` disambiguation).
pub fn accessibility_hit_at_global_point(gx: f64, gy: f64) -> Option<OcrAccessibilityHit> {
unsafe {
let sys = AXUIElementCreateSystemWide();
if sys.is_null() {
return None;
}
let mut elem: AXUIElementRef = std::ptr::null();
let err = AXUIElementCopyElementAtPosition(sys, gx as f32, gy as f32, &mut elem);
ax_release(sys as CFTypeRef);
if err != 0 || elem.is_null() {
if !elem.is_null() {
ax_release(elem as CFTypeRef);
}
return None;
}
let (role, title, ident) = read_role_title_id(elem);
let parent_context = ax_parent_context_line(elem);
ax_release(elem as CFTypeRef);
let desc = format!(
"{} | title={:?} | id={:?} | parent=[{}]",
role.as_deref().unwrap_or("?"),
title.as_deref().unwrap_or(""),
ident.as_deref().unwrap_or(""),
parent_context.as_deref().unwrap_or("?"),
);
Some(OcrAccessibilityHit {
role,
title,
identifier: ident,
parent_context,
description: desc,
})
}
}

// ── Raw OCR: frontmost window bounds (separate from agent screenshot pipeline) ─────────────────

/// Bounds of the foreground app's focused or main window in global screen coordinates (same space as pointer / screen capture).
Expand Down
61 changes: 60 additions & 1 deletion src/apps/desktop/src/computer_use/windows_ax_ui.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
//! Windows UI Automation (UIA) tree walk for stable screen coordinates.

use crate::computer_use::ui_locate_common;
use bitfun_core::agentic::tools::computer_use_host::{UiElementLocateQuery, UiElementLocateResult};
use bitfun_core::agentic::tools::computer_use_host::{
OcrAccessibilityHit, UiElementLocateQuery, UiElementLocateResult,
};
use bitfun_core::util::errors::{BitFunError, BitFunResult};
use std::collections::VecDeque;
use windows::Win32::System::Com::{CoCreateInstance, CoInitializeEx, CLSCTX_INPROC_SERVER, COINIT_APARTMENTTHREADED};
use windows::Win32::UI::Accessibility::{CUIAutomation, IUIAutomation, IUIAutomationElement, IUIAutomationTreeWalker};
use windows::Win32::Foundation::POINT;
use windows::Win32::UI::WindowsAndMessaging::GetForegroundWindow;

fn bstr_to_string(b: windows_core::BSTR) -> String {
Expand Down Expand Up @@ -158,3 +161,59 @@ pub fn locate_ui_element_center(query: &UiElementLocateQuery) -> BitFunResult<Ui
}
}
}

/// Hit-test UIA at global screen coordinates (OCR `move_to_text` disambiguation).
pub fn accessibility_hit_at_global_point(gx: f64, gy: f64) -> BitFunResult<Option<OcrAccessibilityHit>> {
unsafe {
let _ = CoInitializeEx(None, COINIT_APARTMENTTHREADED);
}
let automation: IUIAutomation = unsafe {
CoCreateInstance(&CUIAutomation, None, CLSCTX_INPROC_SERVER).map_err(|e| {
BitFunError::tool(format!("UI Automation (CoCreateInstance): {}.", e))
})?
};
let pt = POINT {
x: gx.round() as i32,
y: gy.round() as i32,
};
let elem = unsafe { automation.ElementFromPoint(pt) };
let elem = match elem {
Ok(e) => e,
Err(_) => return Ok(None),
};
let name = unsafe { elem.CurrentName().ok().map(bstr_to_string).unwrap_or_default() };
let ident = unsafe {
elem.CurrentAutomationId()
.ok()
.map(bstr_to_string)
.unwrap_or_default()
};
let role = localized_control_type_string(&elem);
let parent_context = if let Ok(walker) = unsafe { automation.ControlViewWalker() } {
unsafe { walker.GetParentElement(&elem) }
.ok()
.and_then(|parent| {
let pn = unsafe { parent.CurrentName().ok().map(bstr_to_string).unwrap_or_default() };
let pr = localized_control_type_string(&parent);
let s = format!("{}: {}", pr, pn);
if s == ": " || s.trim().is_empty() {
None
} else {
Some(s)
}
})
} else {
None
};
let desc = format!(
"role={} name={:?} id={:?} parent={:?}",
role, name, ident, parent_context
);
Ok(Some(OcrAccessibilityHit {
role: if role.is_empty() { None } else { Some(role) },
title: if name.is_empty() { None } else { Some(name) },
identifier: if ident.is_empty() { None } else { Some(ident) },
parent_context,
description: desc,
}))
}
1 change: 1 addition & 0 deletions src/apps/desktop/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,7 @@ pub async fn run() {
api::ssh_api::ssh_disconnect,
api::ssh_api::ssh_disconnect_all,
api::ssh_api::ssh_is_connected,
api::ssh_api::ssh_get_server_info,
api::ssh_api::ssh_get_config,
api::ssh_api::ssh_list_config_hosts,
api::ssh_api::remote_read_file,
Expand Down
Loading
Loading