#DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf #Phi-3.5-mini-instruct-IQ2_M.gguf #tinyllama-1.1b-chat-v1.0.Q4_0.gguf sudo apt update sudo apt upgrade -y sudo apt install -y build-essential cmake git curl wget libcurl4-openssl-dev pkg-config gcc-9 g++-9 sudo apt install software-properties-common sudo add-apt-repository ppa:deadsnakes/ppa sudo apt install python3.6 python3.6-venv python3.6-dev wget https://download.pytorch.org/whl/cu110/torch-1.7.1%2Bcu110-cp36-cp36m-linux_x86_64.whl wget https://download.pytorch.org/whl/cu110/torchvision-0.8.2%2Bcu110-cp36-cp36m-linux_x86_64.whl sudo ubuntu-drivers autoinstall sleep 10 CUDA_RUN_FILE="cuda_11.0.3_450.51.06_linux.run" wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/$CUDA_RUN_FILE chmod +x $CUDA_RUN_FILE sudo ./$CUDA_RUN_FILE --silent --toolkit echo 'export PATH=/usr/local/cuda-11.0/bin:$PATH' >> ~/.bashrc echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc source ~/.bashrc nvidia-smi nvcc --version python3 -m venv llama_venv source llama_venv/bin/activate pip install --upgrade pip pip install torch==1.12.1+cu110 torchvision==0.13.1+cu110 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu110 cd ~ git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp rm -rf build mkdir build cd build # EDIT FILE: #ifdef __CUDACC__ #ifndef __builtin_assume #define __builtin_assume(x) // empty: ignore it for CUDA compiler #endif #endif # ggml/src/ggml-cuda/fattn-common. # cmake -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=35 .. make -j$(nproc) OR wget https://github.com/ggml-org/llama.cpp/releases/download/b6148/llama-b6148-bin-ubuntu-x64.zip wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf?download=true