from jupyterquiz import display_quiz
import json

COLORS_QUIZ = {
        '--jq-multiple-choice-bg': '#345995',
        '--jq-mc-button-bg': '#AAA',
        '--jq-mc-button-border': '#e0e0e0e0',
        '--jq-mc-button-inset-shadow': '#555555',
        '--jq-many-choice-bg': '#e26d5a',
        '--jq-numeric-bg': '#5bc0eb', #'#861657',
        '--jq-numeric-input-bg': '#c0c0c0',
        '--jq-numeric-input-label': '#101010',
        '--jq-numeric-input-shadow': '#999999',
        '--jq-incorrect-color': '#d55',
        '--jq-correct-color': '#87a878',
        '--jq-text-color': '#fafafa'
    }

def quiz_data_exploration_1():
    json_string = """
    [
      {
        "question": "Would preprocessing always be the same for each algorithm when working with tabular data?",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "True",
            "correct": false,
            "feedback": "Incorrect. Different algorithms may require different preprocessing steps for optimal performance."
          },
          {
            "answer": "False",
            "correct": true,
            "feedback": "Correct! Preprocessing can vary depending on the algorithm. For example, tree-based models might not require feature scaling, while neural networks typically do."
          }
        ]
      },
      {
        "question": "Which of the following issues might be present in a tabular dataset? (Select all that apply)",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "Distribution shift between train and test sets",
            "correct": true,
            "feedback": "Correct! Distribution shift is a common issue in real-world datasets, especially when data is collected over time."
          },
          {
            "answer": "Multicollinearity among features / Duplicated features",
            "correct": true,
            "feedback": "Correct! Multicollinearity, or high correlation between features, is often present in tabular datasets."
          },
          {
            "answer": "Class imbalance",
            "correct": true,
            "feedback": "Correct! Class imbalance is a common issue, especially in binary classification problems."
          },
          {
            "answer": "Missing values",
            "correct": true,
            "feedback": "Correct! Missing values are very common in real-world datasets and need to be handled appropriately."
          },
          {
            "answer": "Outliers",
            "correct": true,
            "feedback": "Correct!"
          },
          {
            "answer": "Zero-variance features",
            "correct": true,
            "feedback": "Correct!"
          }
        ]
      }
    ]
    """
    
    display_quiz(json.loads(json_string), colors=COLORS_QUIZ)

def quiz_data_exploration_2():
    json_string = """
    [
      {
        "question": "Which of the following issues were present in the dataset we explored? (Select all that apply)",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "Duplicated features",
            "correct": true,
            "feedback": "Correct! We found some features that were exactly the same, indicating duplication."
          },
          {
            "answer": "Zero-variance features",
            "correct": true,
            "feedback": "Correct! We identified features that had the same value for all samples, which provide no useful information."
          },
          {
            "answer": "Different scales across features",
            "correct": true,
            "feedback": "Correct! We observed that features had different scales, which is why we applied scaling."
          },
          {
            "answer": "Outliers",
            "correct": true,
            "feedback": "Correct! We detected outliers in some of the features, which can impact model performance."
          },
          {
            "answer": "Missing values",
            "correct": false,
            "feedback": "Incorrect. While missing values are common in many datasets, we didn't explicitly mention or handle this issue in our exploration."
          },
          {
            "answer": "Non-numeric features",
            "correct": false,
            "feedback": "Incorrect. We didn't encounter or discuss non-numeric features in our dataset exploration."
          },
          {
            "answer": "Time series data",
            "correct": false,
            "feedback": "Incorrect. Our dataset was not described as time series data, and we didn't apply any time series-specific analyses."
          }
        ]
      }
    ]
    """
    
    
    display_quiz(json.loads(json_string), colors=COLORS_QUIZ)

def quiz_data_exploration_3():
    json_string = """
    [
      {
        "question": "Why is feature scaling important when working with neural networks for tabular data?",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "It ensures all features contribute equally to the model initially",
            "correct": true,
            "feedback": "Correct! Scaling ensures that all features start on a level playing field, allowing the model to learn appropriate weights for each feature."
          },
          {
            "answer": "It speeds up the training process",
            "correct": true,
            "feedback": "Also correct! Scaled features can lead to faster convergence during gradient descent."
          },
          {
            "answer": "It eliminates the need for regularization",
            "correct": false,
            "feedback": "Incorrect. While scaling can help, it doesn't eliminate the need for regularization, which serves a different purpose."
          },
          {
            "answer": "It automatically removes outliers from the dataset",
            "correct": false,
            "feedback": "Incorrect. Scaling doesn't remove outliers; it just changes the scale of the data."
          }
        ]
      },
      {
        "question": "When preprocessing data for a machine learning model, why do we fit the scaler on the training data and then transform both training and test data?",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "To prevent data leakage from the test set",
            "correct": true,
            "feedback": "Correct! This approach ensures that no information from the test set influences the preprocessing step."
          },
          {
            "answer": "To save computational time",
            "correct": false,
            "feedback": "While this might save some time, it's not the primary reason."
          },
          {
            "answer": "To introduce bias into the model",
            "correct": false,
            "feedback": "Incorrect. We actually want to avoid introducing bias."
          },
          {
            "answer": "It doesn't matter; we could fit on either set",
            "correct": false,
            "feedback": "Incorrect. The choice of which set to fit on is important for maintaining the integrity of your model evaluation."
          }
        ]
      }
    ]
    """
    
    display_quiz(json.loads(json_string), colors=COLORS_QUIZ)

def quiz_mlp():
    json_string = """
    [
      {
        "question": "Which of the following statements about MLPs for tabular data are true?",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "MLPs can automatically handle categorical variables without preprocessing",
            "correct": false,
            "feedback": "Incorrect. MLPs typically require categorical variables to be encoded (e.g., one-hot encoding) before being input into the network."
          },
          {
            "answer": "MLPs are particularly well-suited for capturing non-linear relationships in tabular data",
            "correct": true,
            "feedback": "Correct! The non-linear activation functions in MLPs allow them to model complex, non-linear relationships in the data."
          },
          {
            "answer": "MLPs always outperform traditional machine learning models on tabular data",
            "correct": false,
            "feedback": "Incorrect. While MLPs can be powerful, traditional models like gradient boosting machines often perform very well on tabular data and may outperform MLPs in some cases."
          },
          {
            "answer": "MLPs for tabular data typically require deeper architectures compared to those used for image data",
            "correct": false,
            "feedback": "Incorrect. MLPs for tabular data often use shallower architectures compared to those used for image data, as tabular data typically has lower-dimensional feature spaces."
          }
        ]
      }
    ]
    """
    
    display_quiz(json.loads(json_string), colors=COLORS_QUIZ)

def quiz_dcn():
    json_string = """
    [
      {
        "question": "What is the main advantage of the Deep Cross Network over a standard MLP for tabular data?",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "It explicitly models feature interactions",
            "correct": true,
            "feedback": "Correct! The cross layers in DCN are designed to capture feature interactions efficiently."
          },
          {
            "answer": "It always requires less training data",
            "correct": false,
            "feedback": "Maybe! The data requirements depend on the problem complexity but are also influenced by model architecture, especially number of parameters."
          },
          {
            "answer": "It always converges faster",
            "correct": false,
            "feedback": "Maybe! Convergence speed depends on various factors, not just the model architecture."
          },
          {
            "answer": "It eliminates the need for feature engineering",
            "correct": false,
            "feedback": "While DCN can capture some feature interactions automatically, it doesn't completely eliminate the need for feature engineering in all cases."
          }
        ]
    }
    ]
    """
    
    display_quiz(json.loads(json_string), colors=COLORS_QUIZ)

def quiz_evaluation():
    json_string = """
    [
      {
        "question": "What does stratified cross-validation do in classification tasks?",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "Preserves the proportion of classes across each fold",
            "correct": true,
            "feedback": "Correct. Stratification ensures that each fold has a similar class distribution to the overall dataset."
          },
          {
            "answer": "Ensures temporal consistency in time series data",
            "correct": false,
            "feedback": "Incorrect. Stratification addresses class imbalance, not temporal order."
          },
          {
            "answer": "Reduces computational complexity",
            "correct": false,
            "feedback": "Incorrect. Stratification doesn’t affect computational cost directly."
          },
          {
            "answer": "Improves model interpretability",
            "correct": false,
            "feedback": "Incorrect. Stratification relates to sampling strategy, not interpretability."
          }
        ]
      },
      {
        "question": "What is a key reason for using time series cross-validation in financial applications?",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "It respects the chronological order of data to avoid data leakage",
            "correct": true,
            "feedback": "Correct. Time series cross-validation prevents lookahead bias by preserving temporal order."
          },
          {
            "answer": "It improves accuracy compared to random sampling",
            "correct": false,
            "feedback": "Incorrect. It may give lower accuracy but provides a more realistic estimate."
          },
          {
            "answer": "It allows shuffling of data across time",
            "correct": false,
            "feedback": "Incorrect. Data should not be shuffled in time series validation."
          },
          {
            "answer": "It removes the need for hyperparameter tuning",
            "correct": false,
            "feedback": "Incorrect. Hyperparameter tuning remains necessary."
          }
        ]
      },
      {
        "question": "How should test set performance typically compare to cross-validation performance?",
        "type": "multiple_choice",
        "answers": [
          {
            "answer": "It can be higher, lower, or similar depending on data variability and overfitting",
            "correct": true,
            "feedback": "Correct. Variability, distribution shift, and model stability all influence test performance."
          },
          {
            "answer": "It should always be higher",
            "correct": false,
            "feedback": "Incorrect. That would indicate overfitting to validation folds."
          },
          {
            "answer": "It should always be lower",
            "correct": false,
            "feedback": "Incorrect. It may be lower due to overfitting, but this is not always the case."
          },
          {
            "answer": "It should be exactly the same",
            "correct": false,
            "feedback": "Incorrect. Performance always varies between data splits."
          }
        ]
      }
    ]
    """
    
    display_quiz(json.loads(json_string), colors=COLORS_QUIZ)