diff --git a/pkgs/sdk_triage_bot/bin/triage.dart b/pkgs/sdk_triage_bot/bin/triage.dart index 17968b58..ad46ff35 100644 --- a/pkgs/sdk_triage_bot/bin/triage.dart +++ b/pkgs/sdk_triage_bot/bin/triage.dart @@ -44,7 +44,7 @@ void main(List arguments) async { var issue = results.rest.first; final dryRun = results.flag('dry-run'); - final force = results.flag('force'); + final forceTriage = results.flag('force'); // Accept either an issue number or a url (i.e., // https://github.com/dart-lang/sdk/issues/55816). @@ -69,7 +69,7 @@ void main(List arguments) async { await triage( int.parse(issue), dryRun: dryRun, - force: force, + forceTriage: forceTriage, githubService: githubService, geminiService: geminiService, logger: Logger(), diff --git a/pkgs/sdk_triage_bot/lib/src/gemini.dart b/pkgs/sdk_triage_bot/lib/src/gemini.dart index 7f2b5982..e87f0303 100644 --- a/pkgs/sdk_triage_bot/lib/src/gemini.dart +++ b/pkgs/sdk_triage_bot/lib/src/gemini.dart @@ -6,6 +6,10 @@ import 'package:google_generative_ai/google_generative_ai.dart'; import 'package:http/http.dart' as http; class GeminiService { + // gemini-1.5-pro-latest, gemini-1.5-flash-latest, gemini-1.0-pro-latest + static const String classificationModel = 'models/gemini-1.5-flash-latest'; + static const String summarizationModel = 'models/gemini-1.5-flash-latest'; + final GenerativeModel _summarizeModel; final GenerativeModel _classifyModel; @@ -13,15 +17,15 @@ class GeminiService { required String apiKey, required http.Client httpClient, }) : _summarizeModel = GenerativeModel( - model: 'models/gemini-1.5-flash-latest', + model: summarizationModel, apiKey: apiKey, generationConfig: GenerationConfig(temperature: 0.2), httpClient: httpClient, ), _classifyModel = GenerativeModel( - // TODO(devconcarew): substitute our tuned model + // TODO(devoncarew): substitute our tuned model // model: 'tunedModels/autotune-sdk-triage-tuned-prompt-1l96e2n', - model: 'models/gemini-1.5-flash-latest', + model: classificationModel, apiKey: apiKey, generationConfig: GenerationConfig(temperature: 0.2), httpClient: httpClient, @@ -45,6 +49,6 @@ class GeminiService { Future _query(GenerativeModel model, String prompt) async { final response = await model.generateContent([Content.text(prompt)]); - return response.text!.trim(); + return (response.text ?? '').trim(); } } diff --git a/pkgs/sdk_triage_bot/lib/src/github.dart b/pkgs/sdk_triage_bot/lib/src/github.dart index a6a5e081..25bba1ee 100644 --- a/pkgs/sdk_triage_bot/lib/src/github.dart +++ b/pkgs/sdk_triage_bot/lib/src/github.dart @@ -43,10 +43,15 @@ class GithubService { Future fetchIssues( String areaLabel, { + required bool includeClosed, String? cursor, }) async { final result = await _query(QueryOptions( - document: gql(_buildQueryString(areaLabel, cursor: cursor)), + document: gql(_buildQueryString( + areaLabel, + cursor: cursor, + includeClosed: includeClosed, + )), fetchPolicy: FetchPolicy.noCache, parserFn: (data) { final search = data['search'] as Map; @@ -104,41 +109,46 @@ Future> _query(QueryOptions options) { return _client.query(options); } -String _buildQueryString(String areaLabel, {String? cursor}) { - final cursorRef = cursor == null ? null : '"$cursor"'; +String _buildQueryString( + String areaLabel, { + required bool includeClosed, + String? cursor, +}) { + final cursorTerm = cursor == null ? '' : 'after: "$cursor"'; + final isOpen = includeClosed ? '' : 'is:open'; return '''{ - search( - query: "repo:dart-lang/sdk is:issue is:open label:$areaLabel" - type: ISSUE - first: 100, - after: $cursorRef - ) { - edges { - node { - ... on Issue { - title - number - state - bodyText - labels(first: 10) { - edges { - node { - name + search( + query: "repo:dart-lang/sdk is:issue $isOpen label:$areaLabel" + type: ISSUE + first: 100 + $cursorTerm + ) { + edges { + node { + ... on Issue { + title + number + state + bodyText + labels(first: 10) { + edges { + node { + name + } } } } } } + pageInfo { + endCursor + startCursor + hasNextPage + hasPreviousPage + } } - pageInfo { - endCursor - startCursor - hasNextPage - hasPreviousPage - } - } -}'''; + }'''; } final GraphQLClient _client = _initGraphQLClient(); @@ -158,4 +168,17 @@ extension IssueExtension on Issue { /// /// Note that the original text for the issue is returned in the `body` field. bool get hasComments => commentsCount > 0; + + /// Returns whether this issue has already been triaged. + /// + /// Generally, this means the the issue has had an `area-` label applied to + /// it, has had `needs-info` applied to it, or was closed. + bool get alreadyTriaged { + if (isClosed) return true; + + return labels.any((label) { + final name = label.name; + return name == 'needs-info' || name.startsWith('area-'); + }); + } } diff --git a/pkgs/sdk_triage_bot/lib/src/prompts.dart b/pkgs/sdk_triage_bot/lib/src/prompts.dart index e363949a..0afa2517 100644 --- a/pkgs/sdk_triage_bot/lib/src/prompts.dart +++ b/pkgs/sdk_triage_bot/lib/src/prompts.dart @@ -28,10 +28,11 @@ area-infrastructure: Use area-infrastructure for SDK infrastructure issues, like area-intellij: Tracking issues for the Dart IntelliJ plugin. area-language: Dart language related items (some items might be better tracked at github.com/dart-lang/language). area-meta: Cross-cutting, high-level issues (for tracking many other implementation issues, ...). +area-native-interop: Used for native interop related issues, including FFI. area-pkg: Used for miscellaneous pkg/ packages not associated with specific area- teams. area-sdk: Use area-sdk for general purpose SDK issues (packaging, distribution, …). area-test: Cross-cutting test issues (use area- labels for specific failures; not used for package:test). -area-vm: Use area-vm for VM related issues, including code coverage, FFI, and the AOT and JIT backends. +area-vm: Use area-vm for VM related issues, including code coverage, and the AOT and JIT backends. area-web: Use area-web for Dart web related issues, including the DDC and dart2js compilers and JS interop. Don't make up a new area. diff --git a/pkgs/sdk_triage_bot/lib/triage.dart b/pkgs/sdk_triage_bot/lib/triage.dart index 94f1fa9c..9529e98a 100644 --- a/pkgs/sdk_triage_bot/lib/triage.dart +++ b/pkgs/sdk_triage_bot/lib/triage.dart @@ -17,7 +17,7 @@ final sdkSlug = RepositorySlug('dart-lang', 'sdk'); Future triage( int issueNumber, { bool dryRun = false, - bool force = false, + bool forceTriage = false, required GithubService githubService, required GeminiService geminiService, required Logger logger, @@ -63,21 +63,22 @@ ${trimmedBody(comment.body ?? '')} } // decide if we should triage - final alreadyTriaged = labels.any((l) => l.startsWith('area-')); - if (alreadyTriaged && !force) { - logger.log('Exiting (issue is already triaged).'); - return; + if (!forceTriage) { + if (issue.alreadyTriaged) { + logger.log('Exiting (issue is already triaged).'); + return; + } } // ask for the summary var bodyTrimmed = trimmedBody(issue.body); String summary; try { - // Failures here can include things like gemini safety issues, ... summary = await geminiService.summarize( summarizeIssuePrompt(title: issue.title, body: bodyTrimmed), ); } on GenerativeAIException catch (e) { + // Failures here can include things like gemini safety issues, ... stderr.writeln('gemini: $e'); exit(1); } @@ -88,21 +89,21 @@ ${trimmedBody(comment.body ?? '')} logger.log(''); // ask for the 'area-' classification - List classification; + List newLabels; try { - // Failures here can include things like gemini safety issues, ... - classification = await geminiService.classify( + newLabels = await geminiService.classify( assignAreaPrompt( title: issue.title, body: bodyTrimmed, lastComment: lastComment), ); } on GenerativeAIException catch (e) { + // Failures here can include things like gemini safety issues, ... stderr.writeln('gemini: $e'); exit(1); } logger.log('## gemini classification'); logger.log(''); - logger.log(classification.toString()); + logger.log(newLabels.toString()); logger.log(''); if (dryRun) { @@ -113,7 +114,7 @@ ${trimmedBody(comment.body ?? '')} // perform changes logger.log('## github comment'); logger.log(''); - logger.log('labels: $classification'); + logger.log('labels: $newLabels'); logger.log(''); logger.log(summary); @@ -122,17 +123,16 @@ ${trimmedBody(comment.body ?? '')} // create github comment await githubService.createComment(sdkSlug, issueNumber, comment); - final allLabels = await githubService.getAllLabels(sdkSlug); - var newLabels = filterExistingLabels(allLabels, classification); - if (newLabels.any((l) => l.startsWith('area-'))) { - newLabels.add('triage-automation'); + final allRepoLabels = (await githubService.getAllLabels(sdkSlug)).toSet(); + final labelAdditions = newLabels.toSet().union(allRepoLabels).toList() + ..sort(); + if (labelAdditions.isNotEmpty) { + labelAdditions.add('triage-automation'); } - // remove any duplicates - newLabels = newLabels.toSet().toList(); // apply github labels if (newLabels.isNotEmpty) { - await githubService.addLabelsToIssue(sdkSlug, issueNumber, newLabels); + await githubService.addLabelsToIssue(sdkSlug, issueNumber, labelAdditions); } logger.log(''); diff --git a/pkgs/sdk_triage_bot/test/triage_test.dart b/pkgs/sdk_triage_bot/test/triage_test.dart index 67a57017..c07f737f 100644 --- a/pkgs/sdk_triage_bot/test/triage_test.dart +++ b/pkgs/sdk_triage_bot/test/triage_test.dart @@ -63,7 +63,7 @@ void main() { await triage( mockIssueNumber, - force: true, + forceTriage: true, githubService: githubService, geminiService: geminiService, logger: TestLogger(), diff --git a/pkgs/sdk_triage_bot/tool/bench.dart b/pkgs/sdk_triage_bot/tool/bench.dart new file mode 100644 index 00000000..62d597ab --- /dev/null +++ b/pkgs/sdk_triage_bot/tool/bench.dart @@ -0,0 +1,121 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +// This script benchmarks the issues listed in tool/bench.md using the current +// issue triage prompt and writes the results back into bench.md. + +import 'dart:io'; + +import 'package:github/github.dart'; +import 'package:google_generative_ai/google_generative_ai.dart'; +import 'package:http/http.dart' as http; +import 'package:sdk_triage_bot/src/common.dart'; +import 'package:sdk_triage_bot/src/gemini.dart'; +import 'package:sdk_triage_bot/src/github.dart'; +import 'package:sdk_triage_bot/src/prompts.dart'; + +final sdkSlug = RepositorySlug('dart-lang', 'sdk'); + +void main(List args) async { + print('Running benchmark against current prompt...'); + print(''); + + final client = http.Client(); + + final github = GitHub( + auth: Authentication.withToken(githubToken), + client: client, + ); + final githubService = GithubService(github: github); + final geminiService = GeminiService( + apiKey: geminiKey, + httpClient: client, + ); + + // read issues + final benchmarkFile = File('tool/bench.md'); + final lines = benchmarkFile + .readAsLinesSync() + .where((l) => l.startsWith('| #')) + .toList(); + + final expectations = lines.map(ClassificationResults.parseFrom).toList(); + var predicted = 0; + + print('${expectations.length} issues read.'); + print(''); + + for (var expectation in expectations) { + final issue = + await githubService.fetchIssue(sdkSlug, expectation.issueNumber); + final bodyTrimmed = trimmedBody(issue.body); + + print('#${issue.number}'); + + try { + final labels = await geminiService.classify( + assignAreaPrompt(title: issue.title, body: bodyTrimmed), + ); + if (expectation.satisfiedBy(labels)) { + predicted++; + } else { + var title = issue.title.length > 100 + ? '${issue.title.substring(0, 100)}...' + : issue.title; + print(' "$title"'); + print(' labeled: ${expectation.expectedLabels.join(', ')}'); + print(' prediction: ${labels.join(', ')}'); + } + } on GenerativeAIException catch (e) { + // Failures here can include things like gemini safety issues, ... + stderr.writeln('gemini: $e'); + } + } + + final result = predicted * 100.0 / expectations.length; + final today = DateTime.now().toIso8601String().substring(0, 10); + final percent = result.toStringAsFixed(1); + final model = GeminiService.classificationModel.split('/')[1]; + + benchmarkFile.writeAsStringSync( + '$today: $percent% using $model\n', + mode: FileMode.append, + ); + + print(''); + print('$today: $percent% using $model'); + + client.close(); +} + +class ClassificationResults { + final int issueNumber; + final List expectedLabels; + + ClassificationResults({ + required this.issueNumber, + required this.expectedLabels, + }); + + static ClassificationResults parseFrom(String line) { + // | #56366 | `area-dart2wasm`, `type-enhancement` | + final sections = line.split('|').skip(1).take(2).toList(); + final number = sections[0].trim(); + final labels = sections[1].trim(); + + return ClassificationResults( + issueNumber: int.parse(number.substring(1)), + expectedLabels: labels.split(',').map((label) { + label = label.trim(); + return label.substring(1, label.length - 1); + }).toList(), + ); + } + + bool satisfiedBy(List labels) { + final filtered = labels.where((l) => !l.startsWith('type-')).toSet(); + final expected = expectedLabels.where((l) => !l.startsWith('type-')); + return expected.every(filtered.contains); + } +} diff --git a/pkgs/sdk_triage_bot/tool/bench.md b/pkgs/sdk_triage_bot/tool/bench.md new file mode 100644 index 00000000..c5d189a5 --- /dev/null +++ b/pkgs/sdk_triage_bot/tool/bench.md @@ -0,0 +1,110 @@ +## What's this? + +This is a benchmark of existing, previously triaged issues. We use it to +measure changes to our triage classification prompt - to see if changes +materially improve the classification performance. + +## General issues + +| Issue | Expected | +| --- | --- | +| #56366 | `area-dart2wasm`, `type-enhancement` | +| #56365 | `area-core-library`, `type-enhancement` | +| #56364 | `area-front-end`, `type-bug` | +| #56363 | `area-core-library`, `type-enhancement` | +| #56362 | `area-vm`, `type-bug` | +| #56361 | `area-analyzer` | +| #56360 | `area-core-library` | +| #56358 | `area-web` | +| #56355 | `area-analyzer`, `type-bug`, `type-performance` | +| #56354 | `area-web`, `type-bug` | +| #56353 | `area-dart2wasm` | +| #56350 | `area-analyzer`, `type-enhancement` | +| #56348 | `area-intellij` | +| #56347 | `area-dart-cli`, `type-bug` | +| #56346 | `area-pkg`, `pkg-json`, `type-enhancement` | +| #56345 | `area-analyzer`, `type-enhancement` | +| #56344 | `area-language`, `type-question` | +| #56343 | `area-analyzer`, `type-enhancement` | +| #56342 | `area-analyzer`, `type-enhancement` | +| #56341 | `area-analyzer` | +| #56340 | `area-vm` | +| #56339 | `area-core-library` | +| #56337 | `area-language`, `type-enhancement` | +| #56330 | `area-native-interop`, `type-bug` | +| #56329 | `area-language`, `type-enhancement` | +| #56327 | `area-web`, `type-question` | +| #56325 | `area-web`, `type-question` | +| #56324 | `area-dart-cli` | +| #56323 | `area-core-library`, `type-question` | +| #56322 | `area-web`, `type-bug` | +| #56321 | `area-dart2wasm`, `type-bug` | +| #56319 | `area-vm`, `type-enhancement` | +| #56317 | `area-core-library`, `type-enhancement` | +| #56316 | `area-web` | +| #56315 | `area-web` | +| #56314 | `area-web`, `type-bug` | +| #56308 | `area-vm` | +| #56306 | `area-vm`, `type-bug` | +| #56305 | `area-front-end`, `type-bug`, `type-question` | +| #56304 | `area-core-library`, `type-enhancement` | +| #56303 | `area-dart-cli`, `type-bug` | +| #56302 | `area-vm` | +| #56297 | `area-language`, `type-enhancement` | +| #56284 | `area-core-library`, `type-enhancement` | +| #56283 | `area-dart2wasm` | +| #56256 | `area-front-end`, `type-bug` | +| #56254 | `area-pkg`, `pkg-vm-service`, `type-bug` | +| #56246 | `area-intellij` | +| #56240 | `area-intellij` | +| #56229 | `area-infrastructure` | +| #56227 | `area-native-interop` | +| #56220 | `area-infrastructure`, `type-code-health` | +| #56217 | `area-meta` | +| #56216 | `area-intellij` | +| #56214 | `area-native-interop` | +| #56208 | `area-google3`, `type-enhancement` | +| #56207 | `area-google3` | +| #56206 | `area-google3`, `type-enhancement` | +| #56196 | `area-meta`, `type-enhancement` | +| #56195 | `area-test`, `type-enhancement` | +| #56193 | `area-front-end`, `type-enhancement` | +| #56192 | `area-test`, `type-enhancement` | +| #56189 | `area-meta`, `type-enhancement` | +| #56188 | `area-meta`, `type-enhancement` | +| #56178 | `area-pkg`, `pkg-dtd`, `type-enhancement` | +| #56177 | `area-pkg`, `pkg-dtd`, `type-enhancement` | +| #56168 | `area-google3` | +| #56161 | `area-build`, `type-enhancement` | +| #56156 | `area-test` | +| #56154 | `area-front-end`, `type-enhancement` | +| #56153 | `area-test`, `type-enhancement` | +| #56138 | `area-front-end`, `type-bug` | +| #56122 | `area-front-end`, `area-language` | +| #56107 | `area-front-end`, `type-bug` | +| #56081 | `area-sdk`, `type-enhancement`, `type-infra` | +| #56058 | `area-native-interop` | +| #56008 | `area-infrastructure` | +| #55990 | `area-infrastructure` | +| #55727 | `area-google3` | +| #55669 | `area-build` | +| #55385 | `area-sdk` | +| #54306 | `area-build` | +| #53635 | `area-build`, `area-vm` | + +## Needs info issues + +We need more information from the user before we can triage these issues. + +| Issue | Expected | Notes | +| --- | --- | --- | +| #55291 | `needs-info` | unmodified 'create an issue' template | +| #55144 | `needs-info` | | +| #54595 | `needs-info` | | +| #54134 | `needs-info` | unmodified 'create an issue' template | +| #56348 | `needs-info` | 'Analyzer Feedback from IntelliJ' | +| #56246 | `needs-info` | 'Analyzer Feedback from IntelliJ' | +| #56349 | `needs-info` | Or, potentially `should-close` | + +## Results +2024-08-27: 55.6% using gemini-1.5-flash-latest diff --git a/pkgs/sdk_triage_bot/tool/create_tuning_data.dart b/pkgs/sdk_triage_bot/tool/create_tuning_data.dart index 05ac6e95..8950eaee 100644 --- a/pkgs/sdk_triage_bot/tool/create_tuning_data.dart +++ b/pkgs/sdk_triage_bot/tool/create_tuning_data.dart @@ -17,25 +17,27 @@ import 'package:sdk_triage_bot/src/prompts.dart'; // - make sure we have at least 10 items from each area const Map areaSampleCount = { - 'area-vm': 100, 'area-analyzer': 100, - 'area-web': 100, 'area-core-library': 100, 'area-front-end': 100, + 'area-vm': 100, + 'area-web': 100, // - 'area-language': 50, + 'area-dart-cli': 50, 'area-infrastructure': 50, + 'area-language': 50, 'area-test': 50, - 'area-dart-cli': 50, // - 'area-meta': 25, 'area-dart2wasm': 25, + 'area-meta': 25, + 'area-pkg': 25, // - 'area-sdk': 10, - 'area-intellij': 10, - 'area-tools': 10, 'area-build': 10, 'area-google3': 10, + 'area-intellij': 10, + 'area-native-interop': 10, + 'area-sdk': 10, + 'area-tools': 10, }; void main(List args) async { @@ -84,8 +86,12 @@ void main(List args) async { exit(0); } -Future> downloadIssues(String areaLabel, int count) async { - var result = await fetchIssues(areaLabel); +Future> downloadIssues( + String areaLabel, + int count, { + bool includeClosed = false, +}) async { + var result = await fetchIssues(areaLabel, includeClosed: includeClosed); final issues = []; @@ -101,7 +107,11 @@ Future> downloadIssues(String areaLabel, int count) async { if (!result.hasNext) { break; } else { - result = await fetchIssues(areaLabel, cursor: result.cursor); + result = await fetchIssues( + areaLabel, + includeClosed: includeClosed, + cursor: result.cursor, + ); } } @@ -150,6 +160,19 @@ extension on Issue { return '[$number] "$shortTitle": ${filteredLabels.join(', ')}'; } + + // ignore: unused_element + String get markdownDesc { + final filteredLabels = labels.map((l) => l.name).where((label) { + return label.startsWith('area-') || + label.startsWith('type-') || + label == 'needs-info'; + }).toList() + ..sort(); + + final descriptions = filteredLabels.map((l) => '`$l`').toList(); + return '| #$number | ${descriptions.join(', ')} |'; + } } String csvEncode(String str) {