{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":689773665,"defaultBranch":"main","name":"llamafile","ownerLogin":"Mozilla-Ocho","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-09-10T21:12:32.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/117940224?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1715871344.0","currentOid":""},"activityList":{"items":[{"before":"3a0cf4e1b43f1d082aa206fb056bcffd0367630c","after":"49cc13ca2bb381164abcfca2a86782062fda2c7a","ref":"refs/heads/main","pushedAt":"2024-05-17T09:43:58.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Updated README with instructions to load models from third-party apps (#417)","shortMessageHtmlLink":"Updated README with instructions to load models from third-party apps ("}},{"before":"d5f614c9d7d1efdf6d40a8812d7f148f41aa1072","after":"3a0cf4e1b43f1d082aa206fb056bcffd0367630c","ref":"refs/heads/main","pushedAt":"2024-05-16T23:54:16.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Fix typo in README.md (#407)\n\nSee also #376","shortMessageHtmlLink":"Fix typo in README.md (#407)"}},{"before":"c660d38da613969a7c9be0d69cc56a10f878209b","after":null,"ref":"refs/heads/add-embedding-models","pushedAt":"2024-05-16T14:55:44.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"k8si","name":"Kate Silverstein","path":"/k8si","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3207674?s=80&v=4"}},{"before":"966dd23477f934ad41f2e7840eb1f3e0d7fa7caa","after":"d5f614c9d7d1efdf6d40a8812d7f148f41aa1072","ref":"refs/heads/main","pushedAt":"2024-05-16T14:55:39.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"k8si","name":"Kate Silverstein","path":"/k8si","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3207674?s=80&v=4"},"commit":{"message":"Merge pull request #422 from Mozilla-Ocho/add-embedding-models\n\nadd text embedding models to 'other example llamafiles' table","shortMessageHtmlLink":"Merge pull request #422 from Mozilla-Ocho/add-embedding-models"}},{"before":null,"after":"c660d38da613969a7c9be0d69cc56a10f878209b","ref":"refs/heads/add-embedding-models","pushedAt":"2024-05-16T14:39:16.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"k8si","name":"Kate Silverstein","path":"/k8si","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3207674?s=80&v=4"},"commit":{"message":"add text embedding models to 'other example llamafiles' table","shortMessageHtmlLink":"add text embedding models to 'other example llamafiles' table"}},{"before":"a86e7ce6bf19a976957429945389add858ce60eb","after":"966dd23477f934ad41f2e7840eb1f3e0d7fa7caa","ref":"refs/heads/main","pushedAt":"2024-05-14T05:27:15.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"stlhood","name":"Stephen Hood","path":"/stlhood","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42821?s=80&v=4"},"commit":{"message":"Update HF links to point to new Mozilla account","shortMessageHtmlLink":"Update HF links to point to new Mozilla account"}},{"before":"d4099feff737f3be9a4bed017f1315a4d2cb773a","after":"a86e7ce6bf19a976957429945389add858ce60eb","ref":"refs/heads/main","pushedAt":"2024-05-13T04:33:35.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add Script To Upgrade llamafile Archives (#412)","shortMessageHtmlLink":"Add Script To Upgrade llamafile Archives (#412)"}},{"before":"d10eb328b82ce198db7805ed20748b76f467a6ec","after":"d4099feff737f3be9a4bed017f1315a4d2cb773a","ref":"refs/heads/main","pushedAt":"2024-05-12T03:04:55.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Update another logo","shortMessageHtmlLink":"Update another logo"}},{"before":"eaa756d59344f668cd3f6a762cf147930cacbdd2","after":"d10eb328b82ce198db7805ed20748b76f467a6ec","ref":"refs/heads/main","pushedAt":"2024-05-12T02:45:01.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Redraw tinyBLAS ANSI art logo","shortMessageHtmlLink":"Redraw tinyBLAS ANSI art logo"}},{"before":"30cdd9c0813e1dd3af3fd4b42e491368e5cc0ea3","after":"eaa756d59344f668cd3f6a762cf147930cacbdd2","ref":"refs/heads/main","pushedAt":"2024-05-10T19:15:13.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Faster AVX2 matrix multiplications for legacy quants (#405)\n\n* Matrix multiplications for legacy quants\r\n* Very slightly faster Q5 dequantization\r\n* Restore faster AVX512VNNI+AVX512VL performance","shortMessageHtmlLink":"Faster AVX2 matrix multiplications for legacy quants (#405)"}},{"before":"ae34574adb497f3bedc6f01c4a15eca34c0f9f69","after":"30cdd9c0813e1dd3af3fd4b42e491368e5cc0ea3","ref":"refs/heads/main","pushedAt":"2024-05-10T09:29:22.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Release llamafile v0.8.4","shortMessageHtmlLink":"Release llamafile v0.8.4"}},{"before":"b5c6df6e9e428ea56fe0969da33d8c164e1311f0","after":"ae34574adb497f3bedc6f01c4a15eca34c0f9f69","ref":"refs/heads/main","pushedAt":"2024-05-10T09:03:42.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Release llamafile v0.8.3","shortMessageHtmlLink":"Release llamafile v0.8.3"}},{"before":"4ee1e398273d63d5a6a9554d89eeabb784568f36","after":"b5c6df6e9e428ea56fe0969da33d8c164e1311f0","ref":"refs/heads/main","pushedAt":"2024-05-10T01:11:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Make vectorized expf() handle underflow\n\nOur optimized exponent function implementation now handles the underflow\ncondition by creating subnormal numbers, rather than flushing it to zero","shortMessageHtmlLink":"Make vectorized expf() handle underflow"}},{"before":"564d9fb8cf2f6e382a107b7d40f6c588c55f9538","after":"4ee1e398273d63d5a6a9554d89eeabb784568f36","ref":"refs/heads/main","pushedAt":"2024-05-09T21:42:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Release llamafile v0.8.2\n\n- Upgrade to cosmocc 3.3.6\n- Remove warnings from cuda build\n- Fix bug in llamafile_trapping_enabled\n- Refactor the new vectorized expf() code\n- iqk_mul_mat() only needs codegen for AVX2\n- Be less gung ho about the -ngl flag in README\n- Restore shell scriptabiilty fix for new tokenizer\n- Suppress divide by zero errors llama_print_timings()\n- Cut back on tinyBLAS CPU multiple output type kernels\n- Cut back NVIDIA fat binary releases to -arch=all-major\n- Remove GA (won't rely on slow broken irregular cloud dev tools)\n- Cut flash_attn_ext from release binaries (use --recompile to have it)","shortMessageHtmlLink":"Release llamafile v0.8.2"}},{"before":"94d094018c3e223c7d7579992dc153f710fdf9ef","after":"564d9fb8cf2f6e382a107b7d40f6c588c55f9538","ref":"refs/heads/main","pushedAt":"2024-05-08T11:57:57.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Clarify 'man' as manual in README (#376)","shortMessageHtmlLink":"Clarify 'man' as manual in README (#376)"}},{"before":"0e2845aacd1922a6c0fe1c336af3da80ca1f26a8","after":"94d094018c3e223c7d7579992dc153f710fdf9ef","ref":"refs/heads/main","pushedAt":"2024-05-08T05:31:14.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Sync with upstream llama.cpp project\n\nThis improves tokenization for Command-R, Refact, Olmo, and StarCoder.","shortMessageHtmlLink":"Sync with upstream llama.cpp project"}},{"before":"a2d159e45c2054069669764d27d760ab35957b20","after":"0e2845aacd1922a6c0fe1c336af3da80ca1f26a8","ref":"refs/heads/main","pushedAt":"2024-05-08T04:48:49.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Fix issues with server send_embeddings function\n\nFixes #404","shortMessageHtmlLink":"Fix issues with server send_embeddings function"}},{"before":"aa8c01a6e11343efb1424d67b293b7f13bc86cbc","after":"a2d159e45c2054069669764d27d760ab35957b20","ref":"refs/heads/main","pushedAt":"2024-05-07T10:55:54.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Fix server multimodal statistics (#392)","shortMessageHtmlLink":"Fix server multimodal statistics (#392)"}},{"before":"e6532f7c977aec66a73fffcc5d9a8cd48eef67bb","after":"aa8c01a6e11343efb1424d67b293b7f13bc86cbc","ref":"refs/heads/main","pushedAt":"2024-05-07T06:22:32.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Revert moondream vision language model support\n\nThis broke the server's LLaVA support in a non-obvious way.\n\nSee ggerganov/llama.cpp#6899\nSee ggerganov/llama.cpp#7060","shortMessageHtmlLink":"Revert moondream vision language model support"}},{"before":"911d58f7c754b1f7d224919313804dc76d20f6ae","after":"e6532f7c977aec66a73fffcc5d9a8cd48eef67bb","ref":"refs/heads/main","pushedAt":"2024-05-07T02:58:11.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Faster AVX2 prompt processing for k-quants and IQ4_XS (#394)","shortMessageHtmlLink":"Faster AVX2 prompt processing for k-quants and IQ4_XS (#394)"}},{"before":"eecbf89c18ee404d2968ed89e73870c25143fe39","after":"911d58f7c754b1f7d224919313804dc76d20f6ae","ref":"refs/heads/main","pushedAt":"2024-05-06T18:29:02.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Fix vim modelines (#351)\n\nAccomplished via:\r\n\r\n rg 'ft=c\\+\\+' -l0 | xargs -0 sed -i '' 's/ft=c++/ft=cpp/'","shortMessageHtmlLink":"Fix vim modelines (#351)"}},{"before":"790029485fdd3a81284efdcae1c0483a4d39a6a6","after":"eecbf89c18ee404d2968ed89e73870c25143fe39","ref":"refs/heads/main","pushedAt":"2024-05-06T18:26:26.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"More conservative strong/em markdown matcher (#352)\n\nBoth matchers are now constrained so that they will only be transformed\r\nif the left-hand characters are the start of the text or preceeded by\r\nwhitespace, and the right-hand characters are the end of the text or\r\nfollowed by whitespace or a punctuation mark.\r\n\r\nThis misses some cases, e.g. emphasis inside of parantheticals, but has\r\nbetter behavior with e.g. underscores in function names outside of code\r\nblocks.\r\n\r\nFixes #317.","shortMessageHtmlLink":"More conservative strong/em markdown matcher (#352)"}},{"before":"bbae0f6aaf35952e710664218450d205b2eb0633","after":"790029485fdd3a81284efdcae1c0483a4d39a6a6","ref":"refs/heads/main","pushedAt":"2024-05-04T03:06:05.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add special tokens to server llama_decode() inputs\n\nThe llamafile server /embedding endpoint was returning embeddings that\nwere very inconsistent with llama.cpp. This is due to changes upstream\nwith tokenization. The upstream project was adding special tokens like\n[\"[CLS]\", \" apples\", \" are\", \" red\", \" .\", \"[SEP]\"] before running the\noperation. We're now handling things more similar to upstream although\nthe llama.cpp server has diverged so much since removing LLaVA support\nthat they're very different pieces of software at this point.\n\nFixes #391","shortMessageHtmlLink":"Add special tokens to server llama_decode() inputs"}},{"before":"62381a7d344c6e6493196d82cddd6d2172eeec02","after":"bbae0f6aaf35952e710664218450d205b2eb0633","ref":"refs/heads/main","pushedAt":"2024-05-03T15:53:24.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add --precise and --fast flags","shortMessageHtmlLink":"Add --precise and --fast flags"}},{"before":"89c189e9f8212c45621254bce0599e4b49568a4d","after":"62381a7d344c6e6493196d82cddd6d2172eeec02","ref":"refs/heads/main","pushedAt":"2024-05-03T15:51:42.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add --precise and --fast flags","shortMessageHtmlLink":"Add --precise and --fast flags"}},{"before":"2b4da9804d6ace8554a2060d316fa3fc50a07df1","after":"89c189e9f8212c45621254bce0599e4b49568a4d","ref":"refs/heads/main","pushedAt":"2024-05-03T05:23:39.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Speed up prediction on CPUs with many cores\n\nThis change adds an if statement to the GGML synchronization code that\ncauses significantly fewer memory barriers to be used. The syncthreads\nfunction has also been introduced so that GGML_OP_MUL_MAT can add it's\nbarrier for initialization on its own. That's important, since if tiny\nBLAS doesn't need matrix B quantized, then the barrier can be skipped.\n\nThis change clamps the thread count to 20 maximum after the prefill is\ncompleted. Charting thread count for numerous models on a Threadripper\nreveals that twenty threads is consistently the optimal for prediction\n\nCompared to the blog post https://justine.lol/matmul/#professional the\ntoken generation speed for TinyLLaMA 1.1B has increased, from 52 to 98\ntokens per second. Prompt token per second is up to 2000. With Mistral\n7b the gains are more modest, going from 17 to 21 tok / sec","shortMessageHtmlLink":"Speed up prediction on CPUs with many cores"}},{"before":"c9d7393f82b9b4958932edea53a11d7a82a41198","after":"2b4da9804d6ace8554a2060d316fa3fc50a07df1","ref":"refs/heads/main","pushedAt":"2024-05-02T07:13:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Disable an unintended integrity check","shortMessageHtmlLink":"Disable an unintended integrity check"}},{"before":"0bdea60a92b18461ff71433af888fe831bc5c3ed","after":"c9d7393f82b9b4958932edea53a11d7a82a41198","ref":"refs/heads/main","pushedAt":"2024-05-02T05:50:10.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Make GGML vector ops go faster across hardware","shortMessageHtmlLink":"Make GGML vector ops go faster across hardware"}},{"before":"2af3b88fee64ca69d68a66d9246567ff713ae762","after":"0bdea60a92b18461ff71433af888fe831bc5c3ed","ref":"refs/heads/main","pushedAt":"2024-05-01T05:08:10.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Upgrade to latest llama.cpp code\n\n- There's a new super cool flash attention feature (pass -fa flag)\n\n- llama.cpp is now able to ask tinyBLAS to use an F16 output type,\n which should help it reduce overall memory requirements.\n\n- llama.cpp will now tell tinyBLAS when it wants a higher precision\n word size, which is useful for models like Phi-2 and Phi-3, where\n using ARM FP16 arithmetic might not be a good idea.\n\n- We're using a new strategy for synchronizing ggml-quants.c, where\n instead of doing runtime dispatching by hand, it is now done with\n generated code. This is good news since it means many quants that\n couldn't be optimized for modern machines before (e.g. IQ quants)\n will now go much faster on AVX2 and AVX512 microprocessors.","shortMessageHtmlLink":"Upgrade to latest llama.cpp code"}},{"before":"9cf7363553832ffb35a586f5c485659f1b2bd88c","after":"2af3b88fee64ca69d68a66d9246567ff713ae762","ref":"refs/heads/main","pushedAt":"2024-04-30T21:32:06.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add Kahan summation to tinyBLAS Q8/Q4 on ARM","shortMessageHtmlLink":"Add Kahan summation to tinyBLAS Q8/Q4 on ARM"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAETMBSWQA","startCursor":null,"endCursor":null}},"title":"Activity · Mozilla-Ocho/llamafile"}