Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/configurable api base #1760

Merged
merged 7 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,048 changes: 1,279 additions & 1,769 deletions js/sdk/pnpm-lock.yaml

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions js/sdk/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,60 @@ export type WrappedServerStatsResponse = ResultsWrapper<ServerStats>;
export type WrappedTokenResponse = ResultsWrapper<TokenResponse>;
export type WrappedUserResponse = ResultsWrapper<User>;
export type WrappedUsersResponse = PaginatedResultsWrapper<User[]>;



/**
* The "base" shape for an R2R results wrapper.
*/
export interface R2RResults<T> {
results: T;
// Potentially other fields, e.g. "info", "status", etc.
}

/**
* A paginated results wrapper typically includes a 'meta' object
* or something similar for "total_entries".
*/
export interface PaginatedR2RResult<T> extends R2RResults<T> {
meta: {
total_entries: number;
};
}

// ---------------------------
// API Key Models
// ---------------------------

/**
* Full API Key model (includes the private `api_key` which is only
* returned ONCE at creation time).
*/
export interface ApiKey {
public_key: string;
/** The private key, only returned during creation. */
api_key: string;
key_id: string;
name?: string;
}

/**
* API Key model that omits the private `api_key`. Typically used
* for listing user keys.
*/
export interface ApiKeyNoPriv {
public_key: string;
key_id: string;
name?: string;
updated_at: string; // or `Date` if your code auto-parses
}

/**
* Wrapped response that contains one newly created API key.
*/
export type WrappedAPIKeyResponse = R2RResults<ApiKey>;

/**
* Wrapped response that contains a list of existing API keys (no private keys).
*/
export type WrappedAPIKeysResponse = PaginatedR2RResult<ApiKeyNoPriv[]>;
43 changes: 43 additions & 0 deletions js/sdk/src/v3/clients/users.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { feature } from "../../feature";
import { r2rClient } from "../../r2rClient";
import {
WrappedAPIKeyResponse,
WrappedAPIKeysResponse,
WrappedBooleanResponse,
WrappedGenericMessageResponse,
WrappedCollectionsResponse,
Expand Down Expand Up @@ -458,4 +460,45 @@ export class UsersClient {
downloadBlob(blob, options.filename);
}
}

/**
* Create a new API key for the specified user.
* Only superusers or the user themselves may create an API key.
* @param id ID of the user for whom to create an API key
* @returns WrappedAPIKeyResponse
*/
@feature("users.createApiKey")
async createApiKey(options: { id: string }): Promise<WrappedAPIKeyResponse> {
return this.client.makeRequest("POST", `users/${options.id}/api-keys`);
}

/**
* List all API keys for the specified user.
* Only superusers or the user themselves may list the API keys.
* @param id ID of the user whose API keys to list
* @returns WrappedAPIKeysResponse
*/
@feature("users.listApiKeys")
async listApiKeys(options: { id: string }): Promise<WrappedAPIKeysResponse> {
return this.client.makeRequest("GET", `users/${options.id}/api-keys`);
}

/**
* Delete a specific API key for the specified user.
* Only superusers or the user themselves may delete the API key.
* @param id ID of the user
* @param keyId ID of the API key to delete
* @returns WrappedBooleanResponse
*/
@feature("users.deleteApiKey")
async deleteApiKey(options: {
id: string;
keyId: string;
}): Promise<WrappedBooleanResponse> {
return this.client.makeRequest(
"DELETE",
`users/${options.id}/api-keys/${options.keyId}`,
);
}

}
2 changes: 1 addition & 1 deletion py/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ COPY pyproject.toml /app/py/pyproject.toml

# Install dependencies
RUN poetry config virtualenvs.create false \
&& poetry install --extras "core ingestion-bundle" --no-dev --no-root \
&& poetry install --extras "core ingestion-bundle" --no-root \
&& pip install --no-cache-dir gunicorn uvicorn

# Create the final image
Expand Down
38 changes: 24 additions & 14 deletions py/cli/command_group.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
import types
from functools import wraps
from pathlib import Path
Expand Down Expand Up @@ -142,18 +143,32 @@ def exit(self, code: int = 0) -> Never:
raise SystemExit(code)


def initialize_client(base_url: str) -> R2RAsyncClient:
def initialize_client() -> R2RAsyncClient:
"""Initialize R2R client with API key from config if available."""
client = R2RAsyncClient()

try:
config = load_config()
if api_key := config.get("api_key"):
client.set_api_key(api_key)
if not client.api_key:
console.print(
"[yellow]Warning: API key not properly set in client[/yellow]"
)

env_api_base = os.getenv("R2R_API_BASE")
config_api_base = config.get("api_base")
if env_api_base:
api_base = env_api_base
elif config_api_base:
api_base = config_api_base
else:
api_base = "https://cloud.sciphi.ai"
client.set_base_url(api_base)

env_api_key = os.getenv("R2R_API_KEY")
config_api_key = config.get("api_key")
if env_api_key:
api_key = env_api_key
elif config_api_key:
api_key = config_api_key
else:
api_key = None
client.set_api_key(api_key)

except Exception as e:
console.print(
Expand All @@ -165,12 +180,7 @@ def initialize_client(base_url: str) -> R2RAsyncClient:


@click.group(cls=CustomGroup)
@click.option(
"--base-url",
default="https://cloud.sciphi.ai",
help="Base URL for the API",
)
@pass_context
async def cli(ctx: click.Context, base_url: str) -> None:
async def cli(ctx: click.Context) -> None:
"""R2R CLI for all core operations."""
ctx.obj = initialize_client(base_url)
ctx.obj = initialize_client()
1 change: 0 additions & 1 deletion py/cli/commands/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ async def search(ctx: click.Context, query, **kwargs):

client: R2RAsyncClient = ctx.obj

print("client.base_url = ", client.base_url)
try:
with timer():
results = await client.retrieval.search(
Expand Down
20 changes: 20 additions & 0 deletions py/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,26 @@ async def set_api_key(ctx, api_key: str):
console.print("[red]Failed to set API key:[/red]", str(e))


# Commands for Setting / Retrieving Base URL
#
@cli.command("set-api-base", short_help="Set your R2R API base URL")
@click.argument("base_url", required=True, type=str)
@click.pass_context
async def set_api_base(ctx, base_url: str):
"""
Store your R2R API base URL locally so you don’t have to pass it on every command.
Example usage:
r2r set-api-base https://api.example.com
"""
try:
config = load_config()
config["api_base"] = base_url
save_config(config)
console.print("[green]API base URL set successfully![/green]")
except Exception as e:
console.print("[red]Failed to set API base:[/red]", str(e))


@cli.command("get-api", short_help="Get your stored R2R API key")
@click.pass_context
async def get_api(ctx):
Expand Down
39 changes: 39 additions & 0 deletions py/core/base/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,45 @@ class AppConfig(R2RSerializable):
default_max_documents_per_user: Optional[int] = 100
default_max_chunks_per_user: Optional[int] = 100_000
default_max_collections_per_user: Optional[int] = 10
default_max_upload_size: int = 2_000_000 # e.g. ~2 MB

# File extension to max-size mapping
# These are examples; adjust sizes as needed.
max_upload_size_by_type: dict[str, int] = {
# Common text-based formats
"txt": 2_000_000,
"md": 2_000_000,
"tsv": 2_000_000,
"csv": 5_000_000,
"xml": 2_000_000,
"html": 5_000_000,
# Office docs
"doc": 10_000_000,
"docx": 10_000_000,
"ppt": 20_000_000,
"pptx": 20_000_000,
"xls": 10_000_000,
"xlsx": 10_000_000,
"odt": 5_000_000,
# PDFs can expand quite a bit when converted to text
"pdf": 30_000_000,
# E-mail
"eml": 5_000_000,
"msg": 5_000_000,
"p7s": 5_000_000,
# Images
"bmp": 5_000_000,
"heic": 5_000_000,
"jpeg": 5_000_000,
"jpg": 5_000_000,
"png": 5_000_000,
"tiff": 5_000_000,
# Others
"epub": 10_000_000,
"rtf": 5_000_000,
"rst": 5_000_000,
"org": 5_000_000,
}

@classmethod
def create(cls, *args, **kwargs):
Expand Down
16 changes: 16 additions & 0 deletions py/core/main/api/v3/documents_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,22 @@ async def create_document(
if file:
file_data = await self._process_file(file)
content_length = len(file_data["content"])
file_ext = file.filename.split(".")[
-1
] # e.g. "pdf", "txt"
max_allowed_size = await self.services.management.get_max_upload_size_by_type(
user_id=auth_user.id, file_type_or_ext=file_ext
)

if content_length > max_allowed_size:
raise R2RException(
status_code=413, # HTTP 413: Payload Too Large
message=(
f"File size exceeds maximum of {max_allowed_size} bytes "
f"for extension '{file_ext}'."
),
)

file_content = BytesIO(
base64.b64decode(file_data["content"])
)
Expand Down
68 changes: 68 additions & 0 deletions py/core/main/services/management_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,10 +841,78 @@ async def delete_conversation(
)

async def get_user_max_documents(self, user_id: UUID) -> int | None:
# Fetch the user to see if they have any overrides stored
user = await self.providers.database.users_handler.get_user_by_id(
user_id
)
if user.limits_overrides and "max_documents" in user.limits_overrides:
return user.limits_overrides["max_documents"]
return self.config.app.default_max_documents_per_user

async def get_user_max_chunks(self, user_id: UUID) -> int | None:
user = await self.providers.database.users_handler.get_user_by_id(
user_id
)
if user.limits_overrides and "max_chunks" in user.limits_overrides:
return user.limits_overrides["max_chunks"]
return self.config.app.default_max_chunks_per_user

async def get_user_max_collections(self, user_id: UUID) -> int | None:
user = await self.providers.database.users_handler.get_user_by_id(
user_id
)
if (
user.limits_overrides
and "max_collections" in user.limits_overrides
):
return user.limits_overrides["max_collections"]
return self.config.app.default_max_collections_per_user

async def get_max_upload_size_by_type(
self, user_id: UUID, file_type_or_ext: str
) -> int:
"""
Return the maximum allowed upload size (in bytes) for the given user's file type/extension.
Respects user-level overrides if present, falling back to the system config.

```json
{
"limits_overrides": {
"max_file_size": 20_000_000,
"max_file_size_by_type":
{
"pdf": 50_000_000,
"docx": 30_000_000
},
...
}
}
```

"""
# 1. Normalize extension
ext = file_type_or_ext.lower().lstrip(".")

# 2. Fetch user from DB to see if we have any overrides
user = await self.providers.database.users_handler.get_user_by_id(
user_id
)
user_overrides = user.limits_overrides or {}

# 3. Check if there's a user-level override for "max_file_size_by_type"
user_file_type_limits = user_overrides.get("max_file_size_by_type", {})
if ext in user_file_type_limits:
return user_file_type_limits[ext]

# 4. If not, check if there's a user-level fallback "max_file_size"
if "max_file_size" in user_overrides:
return user_overrides["max_file_size"]

# 5. If none exist at user level, use system config
# Example config paths:
system_type_limits = self.config.app.max_upload_size_by_type
if ext in system_type_limits:
return system_type_limits[ext]

# 6. Otherwise, return the global default
return self.config.app.default_max_upload_size
Loading
Loading