import { ApolloError } from '@apollo/client'

import { InputTooLongError } from 'modules/ai/chat/errors'
import { ChatCompletionGenerateResult } from 'modules/ai/prompt/ChatCompletionPrompt'
import {
  CanRunChatCompletionDocument,
  CanRunChatCompletionQuery,
  ChatCompletionDocument,
  ChatCompletionInput,
  ChatCompletionMutationResult,
  ChatCompletionProvider,
  FormattedGraphQLError,
  getApolloClient,
} from 'modules/api'
import { getCurrentWorkspaceId } from 'modules/user/context'

// TODO(jordan): replace with graphql grneerated ChatCompletionInputParams
export type ChatCompletionParams = Partial<{
  provider: ChatCompletionProvider
  temperature: number
  maxTokens: number
  model: string
  stop: string[]
}>

export type APIOptions = {
  timeout?: number
  retries?: number
}

const DEFAULT_RETRIES = 0
const DEFAULT_TIMEOUT = 60000
export const GPT_TOKEN_LIMIT = 4096 - 16 // We take off some buffer for the user/system specification

export const isRetryableError = (error?: ApolloError) => {
  if (
    extractGraphQLErrorCodes(error).some((code) => {
      return ['BAD_USER_INPUT'].includes(code)
    })
  ) {
    return false
  }
  return true
}

export const extractGraphQLErrorCodes = (error?: ApolloError) => {
  const graphqlErrors = (error?.graphQLErrors ||
    []) as unknown as FormattedGraphQLError[]
  return graphqlErrors?.map((err) => err.code)
}

const retryWithBackoff = async ({
  fn,
  retries,
  backoff = 1000,
  timeout,
}: {
  fn: () => Promise<any>
  retries: number
  backoff?: number
  timeout: number
}) => {
  try {
    return await Promise.race([
      fn(),
      new Promise((_, reject) => {
        setTimeout(
          () => reject(new Error(`Request timeout (${timeout}ms)`)),
          timeout
        )
      }),
    ])
  } catch (error) {
    const isTimeoutError = error.message?.startsWith('Request timeout')
    if (isTimeoutError) {
      console.debug(`[AI] Request timeout. Not retrying`)
    }
    const isRetryable = isRetryableError(error) && !isTimeoutError
    if (retries > 0 && isRetryable) {
      await new Promise((resolve) => setTimeout(resolve, backoff))
      return retryWithBackoff({
        fn,
        retries: retries - 1,
        backoff: backoff * 2,
        timeout,
      })
    }
    throw error
  }
}

export const determineGpt35ModelToRun = async (
  input: ChatCompletionGenerateResult
): Promise<
  ChatCompletionGenerateResult & {
    params: ChatCompletionGenerateResult['params'] & {
      model: ChatCompletionModel
    }
  }
> => {
  const modelsToTry: ChatCompletionModel[] = [
    'gpt-3.5-turbo',
    'gpt-3.5-turbo-16k',
    // 'gpt-3.5-turbo-1106' // TODO: enable this once it's on Azure
  ]
  const model = await determineValidModelToRun({
    messages: input.messages,
    maxTokens: input.params.maxTokens ?? null,
    models: modelsToTry,
  })

  return {
    ...input,
    params: {
      ...input.params,
      model,
    },
  }
}

/**
 * Checks to see if a given ChatCompletionInput messages + maxTokens
 * can be run against a model.
 *
 * This check has to be done server side, because we do not know the
 * full size of the prompt, as substitution most often happens serverside
 * for BackendChatCompletionPrompt
 */
export const determineValidModelToRun = async ({
  messages,
  maxTokens,
  models,
}: {
  messages: ChatCompletionInput['messages']
  maxTokens: number | null
  models: ChatCompletionModel[]
}): Promise<ChatCompletionModel> => {
  const { data } = await getApolloClient().query<CanRunChatCompletionQuery>({
    query: CanRunChatCompletionDocument,
    returnPartialData: false,
    variables: {
      input: {
        messages,
        maxTokens,
        models,
      },
    },
  })

  const response = data?.canRunChatCompletion
  if (!response || response.length === 0) {
    throw Error(`No response received`)
  }
  const model = response.find((a) => !!a.canRun)?.model
  if (!model) {
    throw new InputTooLongError(
      'Input too long for models: ' + JSON.stringify(models)
    )
  }

  return model as ChatCompletionModel
}

export const fetchChatCompletion = async (
  input: ChatCompletionInput,
  options: APIOptions = { timeout: DEFAULT_TIMEOUT, retries: DEFAULT_RETRIES }
): Promise<ChatCompletionMutationResult['data']> => {
  console.debug('[AI][fetchChatCompletion]', input.messages, input.params)
  const execute = () =>
    getApolloClient()
      .mutate({
        mutation: ChatCompletionDocument,
        variables: {
          input: {
            ...input,
            workspaceId: getCurrentWorkspaceId(),
          },
        },
      })
      .then(({ data }: ChatCompletionMutationResult) => {
        const response = data?.chatCompletion
        if (!response || response.length === 0) {
          throw Error(`No response received`)
        }
        console.debug('[AI] Got chat completion', response)
        return data
      })

  return retryWithBackoff({
    fn: execute,
    timeout: options.timeout ?? DEFAULT_TIMEOUT,
    retries: options.retries ?? DEFAULT_RETRIES,
  })
}

export type ChatCompletionModel =
  | 'gpt-3.5-turbo'
  | 'gpt-4'
  | 'gpt-4-32k'
  | 'gpt-3.5-turbo-16k'
  | 'gpt-3.5-turbo-1106'
  | 'gpt-4-1106-preview'
