62 lines
1.8 KiB
Bash
62 lines
1.8 KiB
Bash
|
|
#DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
|
||
|
|
#Phi-3.5-mini-instruct-IQ2_M.gguf
|
||
|
|
#tinyllama-1.1b-chat-v1.0.Q4_0.gguf
|
||
|
|
|
||
|
|
|
||
|
|
sudo apt update
|
||
|
|
sudo apt upgrade -y
|
||
|
|
sudo apt install -y build-essential cmake git curl wget libcurl4-openssl-dev pkg-config gcc-9 g++-9
|
||
|
|
|
||
|
|
sudo apt install software-properties-common
|
||
|
|
sudo add-apt-repository ppa:deadsnakes/ppa
|
||
|
|
sudo apt install python3.6 python3.6-venv python3.6-dev
|
||
|
|
wget https://download.pytorch.org/whl/cu110/torch-1.7.1%2Bcu110-cp36-cp36m-linux_x86_64.whl
|
||
|
|
wget https://download.pytorch.org/whl/cu110/torchvision-0.8.2%2Bcu110-cp36-cp36m-linux_x86_64.whl
|
||
|
|
|
||
|
|
|
||
|
|
sudo ubuntu-drivers autoinstall
|
||
|
|
|
||
|
|
sleep 10
|
||
|
|
|
||
|
|
CUDA_RUN_FILE="cuda_11.0.3_450.51.06_linux.run"
|
||
|
|
wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/$CUDA_RUN_FILE
|
||
|
|
chmod +x $CUDA_RUN_FILE
|
||
|
|
sudo ./$CUDA_RUN_FILE --silent --toolkit
|
||
|
|
|
||
|
|
echo 'export PATH=/usr/local/cuda-11.0/bin:$PATH' >> ~/.bashrc
|
||
|
|
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
|
||
|
|
source ~/.bashrc
|
||
|
|
|
||
|
|
nvidia-smi
|
||
|
|
nvcc --version
|
||
|
|
|
||
|
|
python3 -m venv llama_venv
|
||
|
|
source llama_venv/bin/activate
|
||
|
|
pip install --upgrade pip
|
||
|
|
pip install torch==1.12.1+cu110 torchvision==0.13.1+cu110 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu110
|
||
|
|
|
||
|
|
cd ~
|
||
|
|
git clone https://github.com/ggerganov/llama.cpp.git
|
||
|
|
cd llama.cpp
|
||
|
|
rm -rf build
|
||
|
|
mkdir build
|
||
|
|
cd build
|
||
|
|
|
||
|
|
|
||
|
|
# EDIT FILE:
|
||
|
|
#ifdef __CUDACC__
|
||
|
|
#ifndef __builtin_assume
|
||
|
|
#define __builtin_assume(x) // empty: ignore it for CUDA compiler
|
||
|
|
#endif
|
||
|
|
#endif
|
||
|
|
# ggml/src/ggml-cuda/fattn-common.
|
||
|
|
#
|
||
|
|
cmake -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=35 ..
|
||
|
|
make -j$(nproc)
|
||
|
|
|
||
|
|
OR
|
||
|
|
wget https://github.com/ggml-org/llama.cpp/releases/download/b6148/llama-b6148-bin-ubuntu-x64.zip
|
||
|
|
|
||
|
|
|
||
|
|
wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf?download=true
|