From d6fb9545b6d6a6c215cb1dec0bf54ac8da108818 Mon Sep 17 00:00:00 2001 From: Corey McKrill <916023+coreymckrill@users.noreply.github.com> Date: Fri, 9 Aug 2019 16:36:27 -0700 Subject: [PATCH] Meetup Client: Refactor to stop using v2 API endpoints (#209) Meetup will apparently shut down all of the v2 API endpoints on 2019-08-15. Our client was still using the v2 `events` endpoint, so this refactor changes that to get as close as possible to the same dataset using v3 endpoints. It's not a perfect match, though, and the v3 API is not friendly towards our use case of pulling events from a large number of different groups all at once. Client changes: * Refactor the `get_events` method * Remove unused signing method * Add a request header for accepting json content type * Adjust the throttle mechanism to be a bit more conservative * Sanitize group slugs before using them in API endpoint requests This also refactors the Meetup Events report in the WordCamp Reports plugin, since the refactored `get_events` method isn't a drop-in replacement. --- .../utilities/class-meetup-client.php | 84 +++++++++--------- .../classes/report/class-meetup-events.php | 88 ++++++++++++++----- .../views/html/meetup-events.php | 4 + 3 files changed, 114 insertions(+), 62 deletions(-) diff --git a/public_html/wp-content/mu-plugins/utilities/class-meetup-client.php b/public_html/wp-content/mu-plugins/utilities/class-meetup-client.php index 89a9215da8..130a24f900 100644 --- a/public_html/wp-content/mu-plugins/utilities/class-meetup-client.php +++ b/public_html/wp-content/mu-plugins/utilities/class-meetup-client.php @@ -177,20 +177,6 @@ protected function send_total_count_request( $request_url ) { return $count; } - /** - * Sign a request URL with our API key. - * - * @param string $request_url - * - * @return string - */ - protected function sign_request_url( $request_url ) { - return add_query_arg( array( - 'sign' => true, - 'key' => $this->api_key, - ), $request_url ); - } - /** * Generate headers to use in a request. * @@ -201,6 +187,7 @@ protected function get_request_args() { return array( 'headers' => array( + 'Accept' => 'application/json', 'Authorization' => "Bearer $oauth_token", ), ); @@ -247,6 +234,8 @@ protected function get_next_url( $response ) { * Check the rate limit status in an API response and delay further execution if necessary. * * @param array $headers + * + * @return void */ protected static function throttle( $response ) { $headers = wp_remote_retrieve_headers( $response ); @@ -258,11 +247,17 @@ protected static function throttle( $response ) { $remaining = absint( $headers['x-ratelimit-remaining'] ); $period = absint( $headers['x-ratelimit-reset'] ); - // Pause more frequently than we need to, and for longer, just to be safe. - if ( $remaining > 2 ) { + /** + * Don't throttle if we have sufficient requests remaining. + * + * We don't let this number get to 0, though, because there are scenarios where multiple processes are using + * the API at the same time, and there's no way for them to be aware of each other. + */ + if ( $remaining > 3 ) { return; } + // Pause for longer than we need to, just to be safe. if ( $period < 2 ) { $period = 2; } @@ -336,48 +331,56 @@ public function get_groups( array $args = array() ) { /** * Retrieve data about events associated with a set of groups. * - * This automatically breaks up requests into chunks of 50 groups to avoid overloading the API. + * Because of the way that the Meetup API v3 endpoints are structured, we unfortunately have to make one request + * (or more, if there's pagination) for each group that we want events for. When there are hundreds of groups, and + * we are throttling to make sure we don't get rate-limited, this process can literally take several minutes. + * + * So, when building the array for the $group_slugs parameter, it's important to filter out groups that you know + * will not provide relevant results. For example, if you want all events during a date range in the past, you can + * filter out groups that didn't join the chapter program until after your date range. + * + * Note that when using date/time related parameters in the $args array, unlike other endpoints and fields in the + * Meetup API which use an epoch timestamp in milliseconds, this one requires a date/time string formatted in + * ISO 8601, without the timezone part. Because consistency is overrated. * - * @param array $group_ids The IDs of the groups to get events for. - * @param array $args Optional. Additional request parameters. - * See https://www.meetup.com/meetup_api/docs/2/events/. + * @param array $group_slugs The URL slugs of each group to retrieve events for. Also known as `urlname`. + * @param array $args Optional. Additional request parameters. + * See https://www.meetup.com/meetup_api/docs/:urlname/events/#list * * @return array|WP_Error */ - public function get_events( array $group_ids, array $args = array() ) { - $url_base = $this->api_base . '2/events'; - $group_chunks = array_chunk( $group_ids, 50, true ); // Meetup API sometimes throws an error with chunk size larger than 50. - $events = array(); + public function get_events( array $group_slugs, array $args = array() ) { + $events = array(); - foreach ( $group_chunks as $chunk ) { - $query_args = array_merge( array( - 'group_id' => implode( ',', $chunk ), - ), $args ); - - $request_url = add_query_arg( $query_args, $url_base ); + if ( $this->debug ) { + $chunked = array_chunk( $group_slugs, 10 ); + $group_slugs = $chunked[0]; + } - $data = $this->send_paginated_request( $request_url ); + foreach ( $group_slugs as $group_slug ) { + $response = $this->get_group_events( $group_slug, $args ); - if ( is_wp_error( $data ) ) { - return $data; + if ( is_wp_error( $response ) ) { + return $response; } - $events = array_merge( $events, $data ); + $events = array_merge( $events, $response ); } return $events; } /** - * Retrieve data about the group. Calls https://www.meetup.com/meetup_api/docs/:urlname/#get + * Retrieve details about a group. * * @param string $group_slug The slug/urlname of a group. * @param array $args Optional. Additional request parameters. + * See https://www.meetup.com/meetup_api/docs/:urlname/#get * * @return array|WP_Error */ public function get_group_details( $group_slug, $args = array() ) { - $request_url = $this->api_base . "$group_slug"; + $request_url = $this->api_base . sanitize_key( $group_slug ); if ( ! empty( $args ) ) { $request_url = add_query_arg( $args, $request_url ); @@ -387,15 +390,16 @@ public function get_group_details( $group_slug, $args = array() ) { } /** - * Retrieve group members. Calls https://www.meetup.com/meetup_api/docs/:urlname/members/#list + * Retrieve details about group members. * * @param string $group_slug The slug/urlname of a group. * @param array $args Optional. Additional request parameters. + * See https://www.meetup.com/meetup_api/docs/:urlname/members/#list * * @return array|WP_Error */ public function get_group_members( $group_slug, $args = array() ) { - $request_url = $this->api_base . "$group_slug/members"; + $request_url = $this->api_base . sanitize_key( $group_slug ) . '/members'; if ( ! empty( $args ) ) { $request_url = add_query_arg( $args, $request_url ); @@ -409,12 +413,12 @@ public function get_group_members( $group_slug, $args = array() ) { * * @param string $group_slug The slug/urlname of a group. * @param array $args Optional. Additional request parameters. - * See https://www.meetup.com/meetup_api/docs/:urlname/events/. + * See https://www.meetup.com/meetup_api/docs/:urlname/events/#list * * @return array|WP_Error */ public function get_group_events( $group_slug, array $args = array() ) { - $request_url = $this->api_base . "$group_slug/events"; + $request_url = $this->api_base . sanitize_key( $group_slug ) . '/events'; if ( ! empty( $args ) ) { $request_url = add_query_arg( $args, $request_url ); diff --git a/public_html/wp-content/plugins/wordcamp-reports/classes/report/class-meetup-events.php b/public_html/wp-content/plugins/wordcamp-reports/classes/report/class-meetup-events.php index cb8b60c59b..2bb57d84ab 100644 --- a/public_html/wp-content/plugins/wordcamp-reports/classes/report/class-meetup-events.php +++ b/public_html/wp-content/plugins/wordcamp-reports/classes/report/class-meetup-events.php @@ -9,7 +9,7 @@ defined( 'WPINC' ) || die(); use Exception; -use DateTime, DateInterval; +use DateTime, DateTimeInterface, DateInterval; use WP_Error; use function WordCamp\Reports\get_views_dir_path; use function WordCamp\Reports\Validation\validate_date_range; @@ -50,6 +50,15 @@ class Meetup_Events extends Base { */ public static $methodology = ' Retrieve data about events in the Chapter program from the Meetup.com API. + + Note that this requires one or more requests to the API for every group in the Chapter program, so running this report may literally take 5-10 minutes. + + Known issues: + + '; /** @@ -82,10 +91,11 @@ class Meetup_Events extends Base { */ protected $public_data_fields = [ 'id' => '', - 'event_url' => '', + 'link' => '', 'name' => '', 'description' => '', 'time' => 0, + 'status' => '', 'group' => '', 'city' => '', 'l10n_country' => '', @@ -157,41 +167,54 @@ public function get_data() { return $data; } + // @todo Maybe find a way to run this without having to hack the ini. + ini_set( 'memory_limit', '900M' ); + ini_set( 'max_execution_time', 500 ); + $meetup = new Meetup_Client(); - $groups = $meetup->get_groups(); + + $groups = $meetup->get_groups( array( + // Don't include groups that joined the chapter program later than the date range. + 'pro_join_date_max' => $this->range->end->getTimestamp() * 1000, + // Don't include groups whose last event was before the start of the date range. + 'last_event_min' => $this->range->start->getTimestamp() * 1000, + ) ); if ( is_wp_error( $groups ) ) { $this->error->add( $groups->get_error_code(), $groups->get_error_message() ); return array(); } - $group_ids = wp_list_pluck( $groups, 'id' ); - $groups = array_combine( $group_ids, $groups ); + $group_slugs = wp_list_pluck( $groups, 'urlname' ); + $groups = array_combine( $group_slugs, $groups ); - $events = $meetup->get_events( $group_ids, array( + /** + * @todo This should probably be converted into a foreach loop that runs the `get_group_events` method + * separately for each group. That way we can modify the start/end date parameters individually for + * the case where the group had events before it joined the chapter program and some number of those + * are included within the report date range. (See Known Issues in the report methodology). + */ + $events = $meetup->get_events( $group_slugs, array( 'status' => 'upcoming,past', - 'time' => sprintf( - '%d,%d', - $this->range->start->getTimestamp() * 1000, - $this->range->end->getTimestamp() * 1000 - ), + 'no_earlier_than' => $this->get_timezoneless_iso8601_format( $this->range->start ), + 'no_later_than' => $this->get_timezoneless_iso8601_format( $this->range->end ), ) ); $data = []; - $relevant_keys = array_fill_keys( [ 'id', 'event_url', 'name', 'description', 'time', 'group', 'city', 'l10n_country', 'latitude', 'longitude' ], '' ); + $relevant_keys = $this->public_data_fields; foreach ( $events as $event ) { - $group_id = $event['group']['id']; - $event = wp_parse_args( $event, $relevant_keys ); + $group_slug = $event['group']['urlname']; + $event = wp_parse_args( $event, $relevant_keys ); $event['description'] = isset( $event['description'] ) ? trim( $event['description'] ) : ''; $event['time'] = absint( $event['time'] ) / 1000; // Convert to seconds. - $event['group'] = isset( $event['group']['name'] ) ? $event['group']['name'] : $groups[ $group_id ]['name']; - $event['city'] = isset( $event['venue']['city'] ) ? $event['venue']['city'] : $groups[ $group_id ]['city']; - $event['l10n_country'] = isset( $event['venue']['localized_country_name'] ) ? $event['venue']['localized_country_name'] : $groups[ $group_id ]['country']; - $event['latitude'] = ! empty( $event['venue']['lat'] ) ? $event['venue']['lat'] : $groups[ $group_id ]['lat']; - $event['longitude'] = ! empty( $event['venue']['lon'] ) ? $event['venue']['lon'] : $groups[ $group_id ]['lon']; + $event['group'] = isset( $event['group']['name'] ) ? $event['group']['name'] : $groups[ $group_slug ]['name']; + $event['city'] = isset( $event['venue']['city'] ) ? $event['venue']['city'] : $groups[ $group_slug ]['city']; + $event['l10n_country'] = isset( $event['venue']['localized_country_name'] ) ? $event['venue']['localized_country_name'] : $groups[ $group_slug ]['country']; + $event['latitude'] = ! empty( $event['venue']['lat'] ) ? $event['venue']['lat'] : $groups[ $group_slug ]['lat']; + $event['longitude'] = ! empty( $event['venue']['lon'] ) ? $event['venue']['lon'] : $groups[ $group_slug ]['lon']; $data[] = array_intersect_key( $event, $relevant_keys ); } @@ -310,13 +333,31 @@ public function compile_report_data( array $data ) { $compiled_data['groups_with_events'] = count( $compiled_data['total_events_by_group'] ); $meetup = new Meetup_Client(); - $total_groups = absint( $meetup->get_result_count( 'pro/wordpress/groups' ) ); + $compiled_data['total_groups'] = absint( $meetup->get_result_count( 'pro/wordpress/groups', array( + // Don't include groups that joined the chapter program later than the date range. + 'pro_join_date_max' => $this->range->end->getTimestamp() * 1000, + ) ) ); - $compiled_data['groups_with_no_events'] = $total_groups - $compiled_data['groups_with_events']; + $compiled_data['groups_with_no_events'] = $compiled_data['total_groups'] - $compiled_data['groups_with_events']; return $compiled_data; } + /** + * Format a date into a valid ISO 8601 string, and then strip off the timezone. + * + * This is the required format for Meetup's v3 events endpoint. + * + * @param DateTimeInterface $date + * + * @return bool|string + */ + protected function get_timezoneless_iso8601_format( DateTimeInterface $date ) { + $real_iso8601 = $date->format( 'c' ); + + return substr( $real_iso8601, 0, strpos( $real_iso8601, '+' ) ); + } + /** * Sort the events by the given field. * @@ -424,6 +465,7 @@ public static function render_admin_page() { ) { $options = array( 'earliest_start' => new DateTime( '2015-01-01' ), // Chapter program started in 2015. + 'max_interval' => new DateInterval( 'P1Y' ), 'search_query' => $search_query, 'search_fields' => self::get_search_fields(), ); @@ -472,6 +514,7 @@ public static function export_to_file() { $options = array( 'earliest_start' => new DateTime( '2015-01-01' ), // Chapter program started in 2015. + 'max_interval' => new DateInterval( 'P1Y' ), 'search_query' => $search_query, 'search_fields' => self::get_search_fields(), ); @@ -486,7 +529,7 @@ public static function export_to_file() { $filename[] = $report->range->start->format( 'Y-m-d' ); $filename[] = $report->range->end->format( 'Y-m-d' ); - $headers = [ 'Event ID', 'Event URL', 'Event Name', 'Description', 'Date', 'Group Name', 'City', 'Country (localized)', 'Latitude', 'Longitude' ]; + $headers = [ 'Event ID', 'Event URL', 'Event Name', 'Description', 'Date', 'Event Status', 'Group Name', 'City', 'Country (localized)', 'Latitude', 'Longitude' ]; $data = $report->get_data(); @@ -567,6 +610,7 @@ public static function render_public_page() { $options = array( 'earliest_start' => new DateTime( '2015-01-01' ), // Chapter program started in 2015. + 'max_interval' => new DateInterval( 'P1Y' ), ); $report = new self( $range->start, $range->end, $options ); diff --git a/public_html/wp-content/plugins/wordcamp-reports/views/html/meetup-events.php b/public_html/wp-content/plugins/wordcamp-reports/views/html/meetup-events.php index 5248d6461a..83cc425185 100644 --- a/public_html/wp-content/plugins/wordcamp-reports/views/html/meetup-events.php +++ b/public_html/wp-content/plugins/wordcamp-reports/views/html/meetup-events.php @@ -56,6 +56,10 @@

By group

+ + + +
Total groups as of format( 'M jS, Y' ) ); ?>
Groups with at least one event during the date range