blob: 1c1ad545995cd92b631e346a42cf62bd6f6ace9e [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2022 The Chromium Authors
Ramin Halavati26dcce22022-02-23 13:11:142// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Abigail Klein7a63c572024-02-28 20:45:095#include "services/screen_ai/screen_ai_service_impl.h"
Ramin Halavati26dcce22022-02-23 13:11:146
Ramin Halavati26679592025-03-13 07:24:577#include <algorithm>
Ramin Halavati55fdf4d2022-11-07 06:35:188#include <memory>
Ramin Halavati63d9c252024-07-29 06:59:179#include <optional>
Ramin Halavatifd712d8e2022-10-19 05:11:2710#include <utility>
Ramin Halavatib044504d2022-10-24 06:36:3411#include <vector>
Ramin Halavatifd712d8e2022-10-19 05:11:2712
13#include "base/check.h"
Ramin Halavatiad969ef2024-05-21 06:59:4714#include "base/check_is_test.h"
Tom Sepeze9b4dd552024-08-14 22:44:2915#include "base/compiler_specific.h"
Ramin Halavati65f96fb2025-02-24 16:45:1816#include "base/cpu.h"
Ramin Halavati55fdf4d2022-11-07 06:35:1817#include "base/functional/bind.h"
18#include "base/location.h"
Ramin Halavatifafff152022-12-05 09:39:3819#include "base/logging.h"
Kyungjun Lee9d755a02022-11-08 17:57:1120#include "base/metrics/histogram_functions.h"
Ramin Halavati5ff855c2022-04-27 16:30:4821#include "base/process/process.h"
Ramin Halavati65f96fb2025-02-24 16:45:1822#include "base/strings/stringprintf.h"
Ramin Halavati26679592025-03-13 07:24:5723#include "base/system/sys_info.h"
Sean Maher5b9af51f2022-11-21 15:32:4724#include "base/task/single_thread_task_runner.h"
Ramin Halavati55fdf4d2022-11-07 06:35:1825#include "base/task/thread_pool.h"
Ramin Halavati65f96fb2025-02-24 16:45:1826#include "components/crash/core/common/crash_key.h"
Abigail Klein7a63c572024-02-28 20:45:0927#include "services/screen_ai/buildflags/buildflags.h"
Ramin Halavati4c47eb82024-12-03 19:06:4228#include "services/screen_ai/proto/chrome_screen_ai.pb.h"
Abigail Klein7a63c572024-02-28 20:45:0929#include "services/screen_ai/proto/main_content_extractor_proto_convertor.h"
30#include "services/screen_ai/proto/visual_annotator_proto_convertor.h"
Ramin Halavati4c47eb82024-12-03 19:06:4231#include "services/screen_ai/public/cpp/metrics.h"
Abigail Klein7a63c572024-02-28 20:45:0932#include "services/screen_ai/public/cpp/utilities.h"
Ramin Halavatieddadb62022-05-04 17:29:4933#include "ui/accessibility/accessibility_features.h"
Ramin Halavati4f904d0a2024-06-25 15:32:3234#include "ui/accessibility/ax_node.h"
35#include "ui/accessibility/ax_tree.h"
Ramin Halavati84231072022-08-17 08:02:2036#include "ui/accessibility/ax_tree_id.h"
Nektarios Paisiosf73d6972022-06-04 11:32:2537#include "ui/gfx/geometry/rect_f.h"
Ramin Halavati367352a2022-04-14 06:00:2338
Ramin Halavatia9b50a102024-02-07 18:37:2339#if BUILDFLAG(USE_FAKE_SCREEN_AI)
Abigail Klein7a63c572024-02-28 20:45:0940#include "services/screen_ai/screen_ai_library_wrapper_fake.h"
Ramin Halavati767b8fc2024-02-02 06:23:0841#else
Abigail Klein7a63c572024-02-28 20:45:0942#include "services/screen_ai/screen_ai_library_wrapper_impl.h"
Ramin Halavati767b8fc2024-02-02 06:23:0843#endif
44
Ramin Halavati69bf7752022-04-04 09:58:1545namespace screen_ai {
Ramin Halavati26dcce22022-02-23 13:11:1446
Ramin Halavati6d4fc2b2022-06-16 15:33:3547namespace {
48
Ramin Halavati4805c882025-03-03 18:28:4849// Maximum image resolution that OCR service processes. Images larger than this
50// threshold are downsampled before processing.
51const uint32_t kLargestOcrResolution = 2048 * 2048;
52
Ramin Halavati2acf2362024-08-02 14:22:5353// How often it would be checked that the service is idle and can be shutdown.
Ramin Halavatidc0b2e512024-08-16 17:39:0954constexpr base::TimeDelta kIdleCheckingDelay = base::Minutes(5);
Ramin Halavati2acf2362024-08-02 14:22:5355
Ramin Halavati6ff56412024-08-13 07:15:5456// How long after all clients are disconnected, it is checked if service is
57// idle.
Ramin Halavati2fa45b82024-08-15 16:46:5358constexpr base::TimeDelta kCoolDownTime = base::Seconds(10);
Ramin Halavati6ff56412024-08-13 07:15:5459
Ramin Halavatiad969ef2024-05-21 06:59:4760// These values are persisted to logs. Entries should not be renumbered and
61// numeric values should never be reused.
Ramin Halavati29cc1152024-10-27 17:30:2862// See `screen_ai_service.mojom` for more info.
Ramin Halavatib3fe2fe2025-03-20 16:53:0363// LINT.IfChange(OcrClientType)
Ramin Halavatiad969ef2024-05-21 06:59:4764enum class OcrClientTypeForMetrics {
65 kTest = 0,
66 kPdfViewer = 1,
67 kLocalSearch = 2,
68 kCameraApp = 3,
Ramin Halavati29cc1152024-10-27 17:30:2869 kNotUsed = 4, // Can be used for a new client.
Ramin Halavatiad969ef2024-05-21 06:59:4770 kMediaApp = 5,
Michelle Chen5192d522024-10-22 23:38:2271 kScreenshotTextDetection,
72 kMaxValue = kScreenshotTextDetection
Ramin Halavatiad969ef2024-05-21 06:59:4773};
Ramin Halavatib3fe2fe2025-03-20 16:53:0374// LINT.ThenChange(//tools/metrics/histograms/metadata/accessibility/enums.xml:OcrClientType)
75
76// These values are persisted to logs. Entries should not be renumbered and
77// numeric values should never be reused.
78// See `screen_ai_service.mojom` for more info.
79// LINT.IfChange(MainContentExtractionClientType)
80enum class MainContentExtractionClientTypeForMetrics {
81 kTest = 0,
82 kReadingMode = 1,
83 kMainNode = 2,
84 kMahi = 3,
85 kMaxValue = kMahi
86};
87// LINT.ThenChange(//tools/metrics/histograms/metadata/accessibility/enums.xml:MainContentExtractionClientType)
Ramin Halavatiad969ef2024-05-21 06:59:4788
89OcrClientTypeForMetrics GetClientType(mojom::OcrClientType client_type) {
90 switch (client_type) {
91 case mojom::OcrClientType::kTest:
92 CHECK_IS_TEST();
93 return OcrClientTypeForMetrics::kTest;
94 case mojom::OcrClientType::kPdfViewer:
95 return OcrClientTypeForMetrics::kPdfViewer;
96 case mojom::OcrClientType::kLocalSearch:
97 return OcrClientTypeForMetrics::kLocalSearch;
98 case mojom::OcrClientType::kCameraApp:
99 return OcrClientTypeForMetrics::kCameraApp;
Ramin Halavatiad969ef2024-05-21 06:59:47100 case mojom::OcrClientType::kMediaApp:
101 return OcrClientTypeForMetrics::kMediaApp;
Michelle Chen5192d522024-10-22 23:38:22102 case mojom::OcrClientType::kScreenshotTextDetection:
103 return OcrClientTypeForMetrics::kScreenshotTextDetection;
Ramin Halavatiad969ef2024-05-21 06:59:47104 }
105}
106
Ramin Halavatib3fe2fe2025-03-20 16:53:03107MainContentExtractionClientTypeForMetrics GetClientType(
108 mojom::MceClientType client_type) {
109 switch (client_type) {
110 case mojom::MceClientType::kTest:
111 CHECK_IS_TEST();
112 return MainContentExtractionClientTypeForMetrics::kTest;
113 case mojom::MceClientType::kReadingMode:
114 return MainContentExtractionClientTypeForMetrics::kReadingMode;
115 case mojom::MceClientType::kMainNode:
116 return MainContentExtractionClientTypeForMetrics::kMainNode;
117 case mojom::MceClientType::kMahi:
118 return MainContentExtractionClientTypeForMetrics::kMahi;
119 }
120}
121
Ramin Halavati89a36c92023-05-05 15:01:03122ui::AXTreeUpdate ConvertVisualAnnotationToTreeUpdate(
Ramin Halavatidcb09a172024-07-15 08:28:55123 std::optional<chrome_screen_ai::VisualAnnotation>& annotation_proto,
Ramin Halavati89a36c92023-05-05 15:01:03124 const gfx::Rect& image_rect) {
125 if (!annotation_proto) {
126 VLOG(0) << "Screen AI library could not process snapshot or no OCR data.";
127 return ui::AXTreeUpdate();
128 }
129
130 return VisualAnnotationToAXTreeUpdate(*annotation_proto, image_rect);
131}
132
Abigail Klein18673702024-03-05 20:59:06133ui::AXNodeID ComputeMainNode(
134 const ui::AXTree* tree,
135 const std::vector<ui::AXNodeID>& content_node_ids) {
136 ui::AXNode* front = tree->GetFromId(content_node_ids.front());
137 ui::AXNode* back = tree->GetFromId(content_node_ids.back());
138 ui::AXNode* main = front->GetLowestCommonAncestor(*back);
139 return main->id();
140}
141
Ramin Halavati65f96fb2025-02-24 16:45:18142#if !BUILDFLAG(USE_FAKE_SCREEN_AI)
143void SetCPUInstructionSetCrashKey() {
144#if defined(ARCH_CPU_X86_FAMILY)
145 base::CPU();
146 // Report cpu micro architecture in case of crash.
147 static crash_reporter::CrashKeyString<3> cpu_info("intel_micro_architecture");
148 cpu_info.Set(
149 base::StringPrintf("%i", base::CPU().GetIntelMicroArchitecture()));
150#endif
151}
152#endif
153
Ramin Halavati26679592025-03-13 07:24:57154// Return a maximum 11 character string with the signature of available and
155// total memory, both in MB and capped to 99999.
156std::string GetMemoryStatusForCrashKey() {
157 int total_memory = base::SysInfo::AmountOfPhysicalMemoryMB();
158 int available_memory = static_cast<int>(
159 base::SysInfo::AmountOfAvailablePhysicalMemory() / (1024 * 1024));
160
161 // Cap the number of digits for crash report.
162 total_memory = std::min(total_memory, 99999);
163 available_memory = std::min(available_memory, 99999);
164 return base::StringPrintf("%i,%i", available_memory, total_memory);
165}
166
Ramin Halavati26ae6b72022-11-10 06:45:05167} // namespace
168
Ramin Halavati39bab0d2024-01-30 06:05:01169// The library accepts simple pointers to model data retrieval functions, hence
170// callback functions with linked object are not safe to pass.
Ramin Halavati63d9c252024-07-29 06:59:17171// This global variable keeps the pointer the only instance of this class.
172ModelDataHolder* g_model_data_holder_instance = nullptr;
Ramin Halavati8ced6392023-09-25 07:27:50173
Ramin Halavati63d9c252024-07-29 06:59:17174// Keeps the handles of model files, and replies to calls for copying their
175// content.
176class ModelDataHolder {
Ramin Halavati8ced6392023-09-25 07:27:50177 public:
Ramin Halavati63d9c252024-07-29 06:59:17178 ModelDataHolder() {
179 CHECK(!g_model_data_holder_instance);
180 g_model_data_holder_instance = this;
181 }
Ramin Halavati8ced6392023-09-25 07:27:50182
Ramin Halavati63d9c252024-07-29 06:59:17183 ModelDataHolder(const ModelDataHolder&) = delete;
184 ModelDataHolder& operator=(const ModelDataHolder&) = delete;
185
186 ~ModelDataHolder() {
187 CHECK_EQ(g_model_data_holder_instance, this);
188 g_model_data_holder_instance = nullptr;
Ramin Halavati8ced6392023-09-25 07:27:50189 }
190
191 // Returns 0 if file is not found.
192 static uint32_t GetDataSize(const char* relative_file_path) {
Ramin Halavati63d9c252024-07-29 06:59:17193 CHECK(g_model_data_holder_instance);
194 base::File* model_file =
195 g_model_data_holder_instance->GetModelFile(relative_file_path);
196 return model_file ? model_file->GetLength() : 0;
Ramin Halavati8ced6392023-09-25 07:27:50197 }
198
Ramin Halavati63d9c252024-07-29 06:59:17199 // Copies content of the file in `relative_file_path` to `buffer`. Expects
200 // that `buffer_size` would be enough for the entire file content.
Ramin Halavati8ced6392023-09-25 07:27:50201 static void CopyData(const char* relative_file_path,
202 uint32_t buffer_size,
203 char* buffer) {
Ramin Halavati63d9c252024-07-29 06:59:17204 CHECK(g_model_data_holder_instance);
205 base::File* model_file =
206 g_model_data_holder_instance->GetModelFile(relative_file_path);
207 CHECK(model_file);
208
209 int64_t length = model_file->GetLength();
210 CHECK_GE(buffer_size, length);
Tom Sepeze9b4dd552024-08-14 22:44:29211 CHECK_EQ(UNSAFE_TODO(model_file->Read(0, buffer, length)), length);
Ramin Halavati8ced6392023-09-25 07:27:50212 }
213
Ramin Halavati63d9c252024-07-29 06:59:17214 void AddModelFiles(base::flat_map<base::FilePath, base::File> model_files) {
215 for (auto& model_file : model_files) {
216 model_files_[model_file.first.MaybeAsASCII()] =
217 std::move(model_file.second);
Ramin Halavati39bab0d2024-01-30 06:05:01218 }
219 }
Ramin Halavati8ced6392023-09-25 07:27:50220
Ramin Halavati63d9c252024-07-29 06:59:17221 // Returns the file handle for `relative_file_path` if it exists.
222 base::File* GetModelFile(const char* relative_file_path) {
223 if (!base::Contains(model_files_, relative_file_path)) {
224 return nullptr;
225 }
226 return &model_files_[relative_file_path];
227 }
228
Ramin Halavati8ced6392023-09-25 07:27:50229 private:
Ramin Halavati63d9c252024-07-29 06:59:17230 std::map<std::string, base::File> model_files_;
Ramin Halavati8ced6392023-09-25 07:27:50231};
232
Ramin Halavati26ae6b72022-11-10 06:45:05233ScreenAIService::ScreenAIService(
Ramin Halavatic1e4fa92023-05-17 17:22:06234 mojo::PendingReceiver<mojom::ScreenAIServiceFactory> receiver)
235 : factory_receiver_(this, std::move(receiver)),
236 ocr_receiver_(this),
Ramin Halavatiad969ef2024-05-21 06:59:47237 main_content_extraction_receiver_(this) {
Ramin Halavati6ff56412024-08-13 07:15:54238 screen2x_main_content_extractors_.set_disconnect_handler(
239 base::BindRepeating(&ScreenAIService::CheckIdleStateAfterDelay,
240 weak_ptr_factory_.GetWeakPtr()));
241 screen_ai_annotators_.set_disconnect_handler(
242 base::BindRepeating(&ScreenAIService::OcrReceiverDisconnected,
243 weak_ptr_factory_.GetWeakPtr()));
Ramin Halavati63d9c252024-07-29 06:59:17244 model_data_holder_ = std::make_unique<ModelDataHolder>();
Ramin Halavati2acf2362024-08-02 14:22:53245 idle_checking_timer_ = std::make_unique<base::RepeatingTimer>();
246 idle_checking_timer_->Start(FROM_HERE, kIdleCheckingDelay, this,
247 &ScreenAIService::ShutDownIfNoClients);
Ramin Halavatiad969ef2024-05-21 06:59:47248}
Ramin Halavati26ae6b72022-11-10 06:45:05249
250ScreenAIService::~ScreenAIService() = default;
251
Ramin Halavati8ced6392023-09-25 07:27:50252void ScreenAIService::LoadLibrary(const base::FilePath& library_path) {
Abigail Klein7a63c572024-02-28 20:45:09253 // The ScopedBlockingCall in LoadLibrary guarantees that this is not run on
254 // the UI thread.
Ramin Halavatia9b50a102024-02-07 18:37:23255#if BUILDFLAG(USE_FAKE_SCREEN_AI)
Ramin Halavati767b8fc2024-02-02 06:23:08256 library_ = std::make_unique<ScreenAILibraryWrapperFake>();
257#else
258 library_ = std::make_unique<ScreenAILibraryWrapperImpl>();
Ramin Halavati65f96fb2025-02-24 16:45:18259
260 // TODO(crbug.com/381256355): Remove when the library is SSE3 compatible.
261 SetCPUInstructionSetCrashKey();
Ramin Halavati767b8fc2024-02-02 06:23:08262#endif
Ramin Halavati8ced6392023-09-25 07:27:50263
264 bool load_sucessful = library_->Load(library_path);
265 base::UmaHistogramBoolean("Accessibility.ScreenAI.Library.Initialized",
266 load_sucessful);
267
268 if (!load_sucessful) {
269 library_.reset();
270 return;
271 }
272
273 uint32_t version_major;
274 uint32_t version_minor;
275 library_->GetLibraryVersion(version_major, version_minor);
276 VLOG(2) << "Screen AI library version: " << version_major << "."
277 << version_minor;
278
Georg Neis06e387772024-12-25 07:32:22279#if BUILDFLAG(IS_CHROMEOS)
Ramin Halavati8ced6392023-09-25 07:27:50280 library_->SetLogger();
281#endif
282
283 if (features::IsScreenAIDebugModeEnabled()) {
284 library_->EnableDebugMode();
285 }
286
Ramin Halavati63d9c252024-07-29 06:59:17287 library_->SetFileContentFunctions(&ModelDataHolder::GetDataSize,
288 &ModelDataHolder::CopyData);
Ramin Halavati8ced6392023-09-25 07:27:50289}
290
Ramin Halavatic1e4fa92023-05-17 17:22:06291void ScreenAIService::InitializeMainContentExtraction(
Ramin Halavatid139a0f2023-02-28 13:25:28292 const base::FilePath& library_path,
Ramin Halavatifbc62022023-11-10 11:05:07293 base::flat_map<base::FilePath, base::File> model_files,
Ramin Halavatic1e4fa92023-05-17 17:22:06294 mojo::PendingReceiver<mojom::MainContentExtractionService>
295 main_content_extractor_service_receiver,
296 InitializeMainContentExtractionCallback callback) {
297 if (!library_) {
Ramin Halavati8ced6392023-09-25 07:27:50298 LoadLibrary(library_path);
Ramin Halavatic1e4fa92023-05-17 17:22:06299 }
300
301 if (!library_) {
302 std::move(callback).Run(false);
303 base::Process::TerminateCurrentProcessImmediately(-1);
304 }
305
Ramin Halavati63d9c252024-07-29 06:59:17306 model_data_holder_->AddModelFiles(std::move(model_files));
Ramin Halavatib044504d2022-10-24 06:36:34307
Ramin Halavati8ced6392023-09-25 07:27:50308 bool init_successful = library_->InitMainContentExtraction();
Ramin Halavati1d57c2d2023-05-24 05:18:24309 base::UmaHistogramBoolean(
310 "Accessibility.ScreenAI.MainContentExtraction.Initialized",
311 init_successful);
312 if (!init_successful) {
Ramin Halavatic1e4fa92023-05-17 17:22:06313 std::move(callback).Run(false);
Ramin Halavati89a36c92023-05-05 15:01:03314 return;
Ramin Halavatid139a0f2023-02-28 13:25:28315 }
Ramin Halavati89a36c92023-05-05 15:01:03316
Ramin Halavatic1e4fa92023-05-17 17:22:06317 // This interface should be created only once.
318 CHECK(!main_content_extraction_receiver_.is_bound());
319
320 main_content_extraction_receiver_.Bind(
321 std::move(main_content_extractor_service_receiver));
322
323 std::move(callback).Run(true);
Ramin Halavati2acf2362024-08-02 14:22:53324 main_content_extraction_last_used_ = base::TimeTicks::Now();
Ramin Halavatic1e4fa92023-05-17 17:22:06325}
326
327void ScreenAIService::InitializeOCR(
328 const base::FilePath& library_path,
Ramin Halavatifbc62022023-11-10 11:05:07329 base::flat_map<base::FilePath, base::File> model_files,
Ramin Halavatic1e4fa92023-05-17 17:22:06330 mojo::PendingReceiver<mojom::OCRService> ocr_service_receiver,
331 InitializeOCRCallback callback) {
Ramin Halavati26679592025-03-13 07:24:57332 static crash_reporter::CrashKeyString<12> memory_ocr_init(
333 "screen_ai_mem_ocr_init");
334 memory_ocr_init.Set(GetMemoryStatusForCrashKey());
Ramin Halavatic1e4fa92023-05-17 17:22:06335 if (!library_) {
Ramin Halavati8ced6392023-09-25 07:27:50336 LoadLibrary(library_path);
Ramin Halavatic1e4fa92023-05-17 17:22:06337 }
338
339 if (!library_) {
340 std::move(callback).Run(false);
341 base::Process::TerminateCurrentProcessImmediately(-1);
342 }
343
Ramin Halavati63d9c252024-07-29 06:59:17344 model_data_holder_->AddModelFiles(std::move(model_files));
345
Ramin Halavati0e2ae5742023-11-10 05:57:54346 bool init_successful = library_->InitOCR();
Ramin Halavati35d129f2023-06-22 16:37:09347 base::UmaHistogramBoolean("Accessibility.ScreenAI.OCR.Initialized",
Ramin Halavati1d57c2d2023-05-24 05:18:24348 init_successful);
Ramin Halavati480584332023-08-02 06:38:40349
Ramin Halavati1d57c2d2023-05-24 05:18:24350 if (!init_successful) {
Ramin Halavatic1e4fa92023-05-17 17:22:06351 std::move(callback).Run(false);
352 return;
353 }
354
355 // This interface should be created only once.
356 CHECK(!ocr_receiver_.is_bound());
357
358 ocr_receiver_.Bind(std::move(ocr_service_receiver));
359
360 std::move(callback).Run(true);
Ramin Halavati2acf2362024-08-02 14:22:53361 ocr_last_used_ = base::TimeTicks::Now();
Ramin Halavatib044504d2022-10-24 06:36:34362}
Ramin Halavati26dcce22022-02-23 13:11:14363
Ramin Halavatif26d7d42025-03-12 16:04:46364void ScreenAIService::BindShutdownHandler(
365 mojo::PendingRemote<mojom::ScreenAIServiceShutdownHandler>
366 shutdown_handler) {
367 DCHECK(!screen_ai_shutdown_handler_.is_bound());
368 screen_ai_shutdown_handler_.Bind(std::move(shutdown_handler));
369}
370
Ramin Halavati26dcce22022-02-23 13:11:14371void ScreenAIService::BindAnnotator(
372 mojo::PendingReceiver<mojom::ScreenAIAnnotator> annotator) {
373 screen_ai_annotators_.Add(this, std::move(annotator));
374}
375
Ramin Halavatieddadb62022-05-04 17:29:49376void ScreenAIService::BindMainContentExtractor(
377 mojo::PendingReceiver<mojom::Screen2xMainContentExtractor>
378 main_content_extractor) {
Ramin Halavati6ff56412024-08-13 07:15:54379 screen2x_main_content_extractors_.Add(this,
380 std::move(main_content_extractor));
Ramin Halavatieddadb62022-05-04 17:29:49381}
382
Arthur Sonzognic571efb2024-01-26 20:26:18383std::optional<chrome_screen_ai::VisualAnnotation>
Ramin Halavati7716b032024-08-19 14:56:20384ScreenAIService::PerformOcrAndRecordMetrics(const SkBitmap& image) {
Ramin Halavati26679592025-03-13 07:24:57385 static crash_reporter::CrashKeyString<12> memory_perform_ocr(
386 "screen_ai_mem_ocr_perform");
387 memory_perform_ocr.Set(GetMemoryStatusForCrashKey());
388
Ramin Halavati7716b032024-08-19 14:56:20389 CHECK(base::Contains(ocr_client_types_,
390 screen_ai_annotators_.current_receiver()));
Ramin Halavati4805c882025-03-03 18:28:48391 OcrClientTypeForMetrics client_type = GetClientType(
392 ocr_client_types_.find(screen_ai_annotators_.current_receiver())->second);
Ramin Halavatiad969ef2024-05-21 06:59:47393 base::UmaHistogramEnumeration("Accessibility.ScreenAI.OCR.ClientType",
Ramin Halavati4805c882025-03-03 18:28:48394 client_type);
Ramin Halavatiad969ef2024-05-21 06:59:47395
Ramin Halavati2acf2362024-08-02 14:22:53396 ocr_last_used_ = base::TimeTicks::Now();
Ramin Halavati7252cd02023-05-10 07:21:33397 auto result = library_->PerformOcr(image);
Ramin Halavati2acf2362024-08-02 14:22:53398 base::TimeDelta elapsed_time = base::TimeTicks::Now() - ocr_last_used_;
Ramin Halavati2650ebe2023-11-27 19:56:35399 int lines_count = result ? result->lines_size() : 0;
Ramin Halavati3ae33ca2024-01-01 12:58:39400 unsigned image_size = image.width() * image.height();
Ramin Halavati2650ebe2023-11-27 19:56:35401 VLOG(1) << "OCR returned " << lines_count << " lines in " << elapsed_time;
Ramin Halavati7252cd02023-05-10 07:21:33402
Ramin Halavati18aba872024-08-23 14:33:05403 if (!result) {
404 base::UmaHistogramEnumeration(
Ramin Halavati4805c882025-03-03 18:28:48405 "Accessibility.ScreenAI.OCR.Failed.ClientType", client_type);
Ramin Halavati18aba872024-08-23 14:33:05406 }
Ramin Halavati4805c882025-03-03 18:28:48407 if (image_size >= kLargestOcrResolution) {
408 base::UmaHistogramEnumeration(
409 "Accessibility.ScreenAI.OCR.Oversize.ClientType", client_type);
410 }
411
Ramin Halavati81b6d922024-06-07 14:20:34412 base::UmaHistogramBoolean("Accessibility.ScreenAI.OCR.Successful",
413 result.has_value());
Ramin Halavati2650ebe2023-11-27 19:56:35414 base::UmaHistogramCounts100("Accessibility.ScreenAI.OCR.LinesCount",
415 lines_count);
Ramin Halavatia0548b262023-06-27 05:56:47416 base::UmaHistogramCounts10M("Accessibility.ScreenAI.OCR.ImageSize10M",
Ramin Halavati3ae33ca2024-01-01 12:58:39417 image_size);
418 if (image_size < 500 * 500) {
419 base::UmaHistogramTimes("Accessibility.ScreenAI.OCR.Latency.Small",
420 elapsed_time);
421 } else if (image_size < 1000 * 1000) {
422 base::UmaHistogramTimes("Accessibility.ScreenAI.OCR.Latency.Medium",
423 elapsed_time);
424 } else if (image_size < 2000 * 2000) {
425 base::UmaHistogramTimes("Accessibility.ScreenAI.OCR.Latency.Large",
426 elapsed_time);
427 } else {
428 base::UmaHistogramTimes("Accessibility.ScreenAI.OCR.Latency.XLarge",
429 elapsed_time);
430 }
Ramin Halavati2650ebe2023-11-27 19:56:35431
Ramin Halavati7716b032024-08-19 14:56:20432 // MediaApp provides OCR for ChromeOS PDF viewer.
Ramin Halavati4805c882025-03-03 18:28:48433 if (client_type == OcrClientTypeForMetrics::kPdfViewer ||
434 client_type == OcrClientTypeForMetrics::kMediaApp) {
Ramin Halavati2650ebe2023-11-27 19:56:35435 base::UmaHistogramCounts100("Accessibility.ScreenAI.OCR.LinesCount.PDF",
436 lines_count);
437 base::UmaHistogramTimes("Accessibility.ScreenAI.OCR.Time.PDF",
438 elapsed_time);
Ramin Halavati696b8d5c2025-02-28 16:41:58439 base::UmaHistogramCounts10M(
440 lines_count ? "Accessibility.ScreenAI.OCR.ImageSize.PDF.WithText"
441 : "Accessibility.ScreenAI.OCR.ImageSize.PDF.NoText",
442 image_size);
Ramin Halavati4c47eb82024-12-03 19:06:42443
Ramin Halavati01267d12025-01-21 18:11:30444 if (result.has_value()) {
445 std::optional<uint64_t> most_detected_language =
446 GetMostDetectedLanguageInOcrData(*result);
447 if (most_detected_language.has_value()) {
448 base::UmaHistogramSparse(
449 "Accessibility.ScreenAI.OCR.MostDetectedLanguage.PDF",
450 most_detected_language.value());
451 }
Ramin Halavati4c47eb82024-12-03 19:06:42452 }
Ramin Halavati2650ebe2023-11-27 19:56:35453 }
454
Ramin Halavati7252cd02023-05-10 07:21:33455 return result;
456}
457
Ramin Halavatiad969ef2024-05-21 06:59:47458void ScreenAIService::SetClientType(mojom::OcrClientType client_type) {
459 ocr_client_types_[screen_ai_annotators_.current_receiver()] = client_type;
460}
461
Ramin Halavatid0d6231b2025-03-20 05:36:43462void ScreenAIService::SetClientType(mojom::MceClientType client_type) {
463 mce_client_types_[screen2x_main_content_extractors_.current_receiver()] =
464 client_type;
465}
466
Dmitry Grebenyukc49b95ee2023-05-02 05:52:07467void ScreenAIService::PerformOcrAndReturnAnnotation(
468 const SkBitmap& image,
469 PerformOcrAndReturnAnnotationCallback callback) {
Arthur Sonzognic571efb2024-01-26 20:26:18470 std::optional<chrome_screen_ai::VisualAnnotation> annotation_proto =
Ramin Halavati7716b032024-08-19 14:56:20471 PerformOcrAndRecordMetrics(image);
Dmitry Grebenyukc49b95ee2023-05-02 05:52:07472
Ramin Halavati89a36c92023-05-05 15:01:03473 if (annotation_proto) {
474 std::move(callback).Run(ConvertProtoToVisualAnnotation(*annotation_proto));
475 return;
Dmitry Grebenyukc49b95ee2023-05-02 05:52:07476 }
Ramin Halavati89a36c92023-05-05 15:01:03477
478 std::move(callback).Run(mojom::VisualAnnotation::New());
Dmitry Grebenyukc49b95ee2023-05-02 05:52:07479}
480
Kyungjun Lee90461522023-04-25 06:50:09481void ScreenAIService::PerformOcrAndReturnAXTreeUpdate(
Ramin Halavatib446c022023-03-29 15:36:36482 const SkBitmap& image,
Kyungjun Lee90461522023-04-25 06:50:09483 PerformOcrAndReturnAXTreeUpdateCallback callback) {
Arthur Sonzognic571efb2024-01-26 20:26:18484 std::optional<chrome_screen_ai::VisualAnnotation> annotation_proto =
Ramin Halavati7716b032024-08-19 14:56:20485 PerformOcrAndRecordMetrics(image);
Ramin Halavati89a36c92023-05-05 15:01:03486 ui::AXTreeUpdate update = ConvertVisualAnnotationToTreeUpdate(
487 annotation_proto, gfx::Rect(image.width(), image.height()));
Kyungjun Lee90461522023-04-25 06:50:09488
Ramin Halavati89a36c92023-05-05 15:01:03489 // The original caller is always replied to, and an empty AXTreeUpdate tells
490 // that the annotation function was not successful.
491 std::move(callback).Run(update);
Ramin Halavatib446c022023-03-29 15:36:36492}
493
494void ScreenAIService::ExtractMainContent(const ui::AXTreeUpdate& snapshot,
Ramin Halavatib446c022023-03-29 15:36:36495 ExtractMainContentCallback callback) {
Abigail Klein18673702024-03-05 20:59:06496 ui::AXTree tree;
497 std::optional<std::vector<int32_t>> content_node_ids;
Ramin Halavatib3fe2fe2025-03-20 16:53:03498 bool success = ExtractMainContentInternalAndRecordMetrics(snapshot, tree,
499 content_node_ids);
Abigail Klein18673702024-03-05 20:59:06500
501 if (success) {
502 std::move(callback).Run(*content_node_ids);
503 } else {
504 std::move(callback).Run(std::vector<int32_t>());
505 }
506}
507
508void ScreenAIService::ExtractMainNode(const ui::AXTreeUpdate& snapshot,
509 ExtractMainNodeCallback callback) {
510 ui::AXTree tree;
511 std::optional<std::vector<int32_t>> content_node_ids;
Ramin Halavatib3fe2fe2025-03-20 16:53:03512 bool success = ExtractMainContentInternalAndRecordMetrics(snapshot, tree,
513 content_node_ids);
Abigail Klein18673702024-03-05 20:59:06514
515 if (success) {
516 ui::AXNodeID main_node_id = ComputeMainNode(&tree, *content_node_ids);
517 std::move(callback).Run(main_node_id);
518 } else {
519 std::move(callback).Run(ui::kInvalidAXNodeID);
520 }
521}
522
Mark Schillaci79c82812025-03-07 21:46:37523void ScreenAIService::IdentifyMainNode(const ui::AXTreeUpdate& snapshot,
524 IdentifyMainNodeCallback callback) {
525 ui::AXTree tree;
526 std::optional<std::vector<int32_t>> content_node_ids;
Ramin Halavatib3fe2fe2025-03-20 16:53:03527 bool success = ExtractMainContentInternalAndRecordMetrics(snapshot, tree,
528 content_node_ids);
Mark Schillaci79c82812025-03-07 21:46:37529
530 if (success) {
531 ui::AXNodeID main_node_id = ComputeMainNode(&tree, *content_node_ids);
532 std::move(callback).Run(tree.GetAXTreeID(), main_node_id);
533 } else {
534 std::move(callback).Run(ui::AXTreeIDUnknown(), ui::kInvalidAXNodeID);
535 }
536}
537
Ramin Halavatib3fe2fe2025-03-20 16:53:03538bool ScreenAIService::ExtractMainContentInternalAndRecordMetrics(
Abigail Klein18673702024-03-05 20:59:06539 const ui::AXTreeUpdate& snapshot,
540 ui::AXTree& tree,
541 std::optional<std::vector<int32_t>>& content_node_ids) {
Ramin Halavatid0d6231b2025-03-20 05:36:43542 CHECK(base::Contains(mce_client_types_,
543 screen2x_main_content_extractors_.current_receiver()));
544 main_content_extraction_last_used_ = base::TimeTicks::Now();
Ramin Halavatib3fe2fe2025-03-20 16:53:03545 MainContentExtractionClientTypeForMetrics client_type = GetClientType(
546 mce_client_types_[screen2x_main_content_extractors_.current_receiver()]);
547
548 static crash_reporter::CrashKeyString<2> cpu_info(
549 "main_content_extraction_client");
550 cpu_info.Set(base::StringPrintf("%i", static_cast<int>(client_type)));
551
Ramin Halavatib446c022023-03-29 15:36:36552 // Early return if input is empty.
553 if (snapshot.nodes.empty()) {
Ramin Halavatib3fe2fe2025-03-20 16:53:03554 base::UmaHistogramEnumeration(
555 "Accessibility.ScreenAI.MainContentExtraction.Error.SnapshotEmpty",
556 client_type);
Abigail Klein18673702024-03-05 20:59:06557 return false;
Ramin Halavatib33fc0c2022-05-06 09:32:22558 }
Ramin Halavatie2549a92022-08-02 07:43:19559
Abigail Klein18673702024-03-05 20:59:06560 // Deserialize the snapshot and reserialize it to a view hierarchy proto.
Ramin Halavatib3fe2fe2025-03-20 16:53:03561 if (!tree.Unserialize(snapshot)) {
562 base::UmaHistogramEnumeration(
563 "Accessibility.ScreenAI.MainContentExtraction.Error."
564 "SnapshotUnserialize",
565 client_type);
Ramin Halavati62e034d2024-08-02 08:58:22566 return false;
567 }
568
Ramin Halavatib3fe2fe2025-03-20 16:53:03569 std::optional<ViewHierarchyAndTreeSize> converted_snapshot =
570 SnapshotToViewHierarchy(tree);
571 if (!converted_snapshot) {
572 base::UmaHistogramEnumeration(
573 "Accessibility.ScreenAI.MainContentExtraction.Error.SnapshotProto",
574 client_type);
575 return false;
576 }
577
578 base::TimeTicks start_time = base::TimeTicks::Now();
Ramin Halavati62e034d2024-08-02 08:58:22579 content_node_ids =
580 library_->ExtractMainContent(converted_snapshot->serialized_proto);
Ramin Halavatib3fe2fe2025-03-20 16:53:03581 base::TimeDelta elapsed_time = base::TimeTicks::Now() - start_time;
582
583 bool successful =
584 content_node_ids.has_value() && content_node_ids->size() > 0;
Ramin Halavati81b6d922024-06-07 14:20:34585 base::UmaHistogramBoolean(
Ramin Halavatib3fe2fe2025-03-20 16:53:03586 "Accessibility.ScreenAI.MainContentExtraction.Successful2", successful);
587
588 if (!content_node_ids.has_value()) {
589 base::UmaHistogramEnumeration(
590 "Accessibility.ScreenAI.MainContentExtraction.Error.ResultNull",
591 client_type);
592 } else if (content_node_ids->empty()) {
593 base::UmaHistogramEnumeration(
594 "Accessibility.ScreenAI.MainContentExtraction.Error.ResultEmpty",
595 client_type);
596 }
597
598 if (successful) {
599 base::UmaHistogramTimes(
600 "Accessibility.ScreenAI.MainContentExtraction.Latency.Success",
601 elapsed_time);
Ramin Halavati5aad701d2023-05-09 17:23:29602 VLOG(2) << "Screen2x returned " << content_node_ids->size() << " node ids.";
Abigail Klein18673702024-03-05 20:59:06603 return true;
604 } else {
Ramin Halavatib3fe2fe2025-03-20 16:53:03605 base::UmaHistogramTimes(
606 "Accessibility.ScreenAI.MainContentExtraction.Latency.Failure",
607 elapsed_time);
Abigail Klein18673702024-03-05 20:59:06608 VLOG(0) << "Screen2x returned no results.";
609 return false;
Ramin Halavati5aad701d2023-05-09 17:23:29610 }
Abigail Klein18673702024-03-05 20:59:06611}
Ramin Halavati89a36c92023-05-05 15:01:03612
Abigail Klein18673702024-03-05 20:59:06613ui::AXNodeID ScreenAIService::ComputeMainNodeForTesting(
614 const ui::AXTree* tree,
615 const std::vector<ui::AXNodeID>& content_node_ids) {
616 return ComputeMainNode(tree, content_node_ids);
Abigail Kleindfdde352023-01-27 21:03:10617}
618
Ramin Halavati6ff56412024-08-13 07:15:54619void ScreenAIService::OcrReceiverDisconnected() {
Ramin Halavatiad969ef2024-05-21 06:59:47620 auto entry = ocr_client_types_.find(screen_ai_annotators_.current_receiver());
621 if (entry != ocr_client_types_.end()) {
622 ocr_client_types_.erase(entry);
623 }
Ramin Halavati6ff56412024-08-13 07:15:54624
625 CheckIdleStateAfterDelay();
626}
627
628void ScreenAIService::CheckIdleStateAfterDelay() {
629 // Check if service is idle, a little after the client disconnects.
630 base::SequencedTaskRunner::GetCurrentDefault()->PostDelayedTask(
631 FROM_HERE,
632 base::BindOnce(&ScreenAIService::ShutDownIfNoClients,
633 weak_ptr_factory_.GetWeakPtr()),
634 kCoolDownTime);
Ramin Halavatiad969ef2024-05-21 06:59:47635}
636
Ramin Halavati3842e7e2024-07-31 04:39:21637void ScreenAIService::ShutDownIfNoClients() {
Ramin Halavati2acf2362024-08-02 14:22:53638 const base::TimeTicks kIdlenessThreshold =
639 base::TimeTicks::Now() - kIdleCheckingDelay;
Ramin Halavatidc0b2e512024-08-16 17:39:09640 bool ocr_not_needed =
641 !screen_ai_annotators_.size() || ocr_last_used_ < kIdlenessThreshold;
642 bool main_content_extractioncan_not_needed =
643 !screen2x_main_content_extractors_.size() ||
644 main_content_extraction_last_used_ < kIdlenessThreshold;
645
646 if (ocr_not_needed && main_content_extractioncan_not_needed) {
Ramin Halavatif26d7d42025-03-12 16:04:46647 screen_ai_shutdown_handler_->ShuttingDownOnIdle();
Ramin Halavatidc0b2e512024-08-16 17:39:09648 VLOG(2) << "Shutting down since no client or idle.";
649 base::Process::TerminateCurrentProcessImmediately(0);
Ramin Halavati2acf2362024-08-02 14:22:53650 }
Ramin Halavati3842e7e2024-07-31 04:39:21651}
652
Ramin Halavati26dcce22022-02-23 13:11:14653} // namespace screen_ai