###import statements
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB ###Naive bayes classifier for text data
from sklearn.svm import SVC # Corrected from SVMClassifier
from sklearn.pipeline import Pipeline

Let us say that we have two documents A and B with statements

The Car Is Driven On The Road. and The Truck Is Driven On The Highway respectively.

TF-IDF (Term Frequency-Inverse Document Frequency) is a statistical formula used in Natural Language Processing (NLP) and information retrieval to evaluate how important a word is to a specific document within a larger collection (corpus)."""

)

# Training data
training_data = {
    "FIR": [
        "What is FIR?",
        "How to file FIR?",
        "Police complaint procedure",
        "First Information Report meaning"
    ],
    "Bail": [
        "What is bail?",
        "How to get bail?",
        "Bail process in India",
        "Temporary release from jail"
    ],
    "IPC": [
        "What is IPC?",
        "Indian Penal Code details",
        "Criminal law sections",
        "IPC full form"
    ],
    "RTI": [
        "What is RTI?",
        "How to file RTI?",
        "Right to information process",
        "Government information request"
    ],
    "Cyber Crime": [
        "What is cyber crime?",
        "Online fraud complaint",
        "Hacking case",
        "Internet crime laws"
    ]
}

# Prepare dataset
X = []
y = []

for intent, phrases in training_data.items():
    for phrase in phrases:
        X.append(phrase)
        y.append(intent)

# Create AI model pipeline
model = Pipeline([
    ("vectorizer", TfidfVectorizer()),
    #("classifier", MultinomialNB())
    ("classifier", SVC())
])

# Train model
model.fit(X, y)

# Responses
responses = {
    "FIR": "FIR stands for First Information Report. It is filed when police receive information about a cognizable offence.",
    "Bail2": "Bail is given by magistrate",
    "Bail": "Bail is the temporary release of an accused person awaiting trial, sometimes with conditions.",
    "IPC": "IPC stands for Indian Penal Code. It defines crimes and punishments in India.",
    "RTI": "RTI stands for Right to Information. It allows citizens to request information from public authorities.",
    "Cyber Crime": "Cyber crime refers to crimes committed using computers or the internet."
}

# Chat loop
print("⚖ Welcome to AI Legal Assistant")
print("Type 'exit' to quit\n")

while True:
    user_input = input("You: ")

    if user_input.lower() == "exit":
        print("Assistant: Stay informed about your legal rights. Goodbye!")
        break

    predicted_intent = model.predict([user_input])[0]
    print("Assistant:", responses[predicted_intent])

⚖ Welcome to AI Legal Assistant
Type 'exit' to quit

You: fir
Assistant: FIR stands for First Information Report. It is filed when police receive information about a cognizable offence.
You: ipc
Assistant: IPC stands for Indian Penal Code. It defines crimes and punishments in India.
You: rti
Assistant: RTI stands for Right to Information. It allows citizens to request information from public authorities.
You: exit
Assistant: Stay informed about your legal rights. Goodbye!