mirror of https://git.suyu.dev/suyu/suyu
Merge pull request #329 from bunnei/shader-gen-part-1
OpenGL shader generation part 1merge-requests/60/head
commit
fdca7b5f7a
@ -0,0 +1,340 @@
|
||||
// Copyright (c) 2011 Google, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
||||
//
|
||||
// This file provides CityHash64() and related functions.
|
||||
//
|
||||
// It's probably possible to create even faster hash functions by
|
||||
// writing a program that systematically explores some of the space of
|
||||
// possible hash functions, by using SIMD instructions, or by
|
||||
// compromising on hash quality.
|
||||
|
||||
#include <algorithm>
|
||||
#include <string.h> // for memcpy and memset
|
||||
#include "cityhash.h"
|
||||
#include "common/swap.h"
|
||||
|
||||
// #include "config.h"
|
||||
#ifdef __GNUC__
|
||||
#define HAVE_BUILTIN_EXPECT 1
|
||||
#endif
|
||||
#ifdef COMMON_BIG_ENDIAN
|
||||
#define WORDS_BIGENDIAN 1
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
typedef uint8_t uint8;
|
||||
typedef uint32_t uint32;
|
||||
typedef uint64_t uint64;
|
||||
|
||||
namespace Common {
|
||||
|
||||
static uint64 UNALIGNED_LOAD64(const char* p) {
|
||||
uint64 result;
|
||||
memcpy(&result, p, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32 UNALIGNED_LOAD32(const char* p) {
|
||||
uint32 result;
|
||||
memcpy(&result, p, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define uint32_in_expected_order(x) (swap32(x))
|
||||
#define uint64_in_expected_order(x) (swap64(x))
|
||||
#else
|
||||
#define uint32_in_expected_order(x) (x)
|
||||
#define uint64_in_expected_order(x) (x)
|
||||
#endif
|
||||
|
||||
#if !defined(LIKELY)
|
||||
#if HAVE_BUILTIN_EXPECT
|
||||
#define LIKELY(x) (__builtin_expect(!!(x), 1))
|
||||
#else
|
||||
#define LIKELY(x) (x)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static uint64 Fetch64(const char* p) {
|
||||
return uint64_in_expected_order(UNALIGNED_LOAD64(p));
|
||||
}
|
||||
|
||||
static uint32 Fetch32(const char* p) {
|
||||
return uint32_in_expected_order(UNALIGNED_LOAD32(p));
|
||||
}
|
||||
|
||||
// Some primes between 2^63 and 2^64 for various uses.
|
||||
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
|
||||
static const uint64 k1 = 0xb492b66fbe98f273ULL;
|
||||
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
|
||||
|
||||
// Bitwise right rotate. Normally this will compile to a single
|
||||
// instruction, especially if the shift is a manifest constant.
|
||||
static uint64 Rotate(uint64 val, int shift) {
|
||||
// Avoid shifting by 64: doing so yields an undefined result.
|
||||
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
|
||||
}
|
||||
|
||||
static uint64 ShiftMix(uint64 val) {
|
||||
return val ^ (val >> 47);
|
||||
}
|
||||
|
||||
static uint64 HashLen16(uint64 u, uint64 v) {
|
||||
return Hash128to64(uint128(u, v));
|
||||
}
|
||||
|
||||
static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
|
||||
// Murmur-inspired hashing.
|
||||
uint64 a = (u ^ v) * mul;
|
||||
a ^= (a >> 47);
|
||||
uint64 b = (v ^ a) * mul;
|
||||
b ^= (b >> 47);
|
||||
b *= mul;
|
||||
return b;
|
||||
}
|
||||
|
||||
static uint64 HashLen0to16(const char* s, size_t len) {
|
||||
if (len >= 8) {
|
||||
uint64 mul = k2 + len * 2;
|
||||
uint64 a = Fetch64(s) + k2;
|
||||
uint64 b = Fetch64(s + len - 8);
|
||||
uint64 c = Rotate(b, 37) * mul + a;
|
||||
uint64 d = (Rotate(a, 25) + b) * mul;
|
||||
return HashLen16(c, d, mul);
|
||||
}
|
||||
if (len >= 4) {
|
||||
uint64 mul = k2 + len * 2;
|
||||
uint64 a = Fetch32(s);
|
||||
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
|
||||
}
|
||||
if (len > 0) {
|
||||
uint8 a = s[0];
|
||||
uint8 b = s[len >> 1];
|
||||
uint8 c = s[len - 1];
|
||||
uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
|
||||
uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
|
||||
return ShiftMix(y * k2 ^ z * k0) * k2;
|
||||
}
|
||||
return k2;
|
||||
}
|
||||
|
||||
// This probably works well for 16-byte strings as well, but it may be overkill
|
||||
// in that case.
|
||||
static uint64 HashLen17to32(const char* s, size_t len) {
|
||||
uint64 mul = k2 + len * 2;
|
||||
uint64 a = Fetch64(s) * k1;
|
||||
uint64 b = Fetch64(s + 8);
|
||||
uint64 c = Fetch64(s + len - 8) * mul;
|
||||
uint64 d = Fetch64(s + len - 16) * k2;
|
||||
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul);
|
||||
}
|
||||
|
||||
// Return a 16-byte hash for 48 bytes. Quick and dirty.
|
||||
// Callers do best to use "random-looking" values for a and b.
|
||||
static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a,
|
||||
uint64 b) {
|
||||
a += w;
|
||||
b = Rotate(b + a + z, 21);
|
||||
uint64 c = a;
|
||||
a += x;
|
||||
a += y;
|
||||
b += Rotate(a, 44);
|
||||
return make_pair(a + z, b + c);
|
||||
}
|
||||
|
||||
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
|
||||
static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) {
|
||||
return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a,
|
||||
b);
|
||||
}
|
||||
|
||||
// Return an 8-byte hash for 33 to 64 bytes.
|
||||
static uint64 HashLen33to64(const char* s, size_t len) {
|
||||
uint64 mul = k2 + len * 2;
|
||||
uint64 a = Fetch64(s) * k2;
|
||||
uint64 b = Fetch64(s + 8);
|
||||
uint64 c = Fetch64(s + len - 24);
|
||||
uint64 d = Fetch64(s + len - 32);
|
||||
uint64 e = Fetch64(s + 16) * k2;
|
||||
uint64 f = Fetch64(s + 24) * 9;
|
||||
uint64 g = Fetch64(s + len - 8);
|
||||
uint64 h = Fetch64(s + len - 16) * mul;
|
||||
uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
|
||||
uint64 v = ((a + g) ^ d) + f + 1;
|
||||
uint64 w = swap64((u + v) * mul) + h;
|
||||
uint64 x = Rotate(e + f, 42) + c;
|
||||
uint64 y = (swap64((v + w) * mul) + g) * mul;
|
||||
uint64 z = e + f + c;
|
||||
a = swap64((x + z) * mul + y) + b;
|
||||
b = ShiftMix((z + a) * mul + d + h) * mul;
|
||||
return b + x;
|
||||
}
|
||||
|
||||
uint64 CityHash64(const char* s, size_t len) {
|
||||
if (len <= 32) {
|
||||
if (len <= 16) {
|
||||
return HashLen0to16(s, len);
|
||||
} else {
|
||||
return HashLen17to32(s, len);
|
||||
}
|
||||
} else if (len <= 64) {
|
||||
return HashLen33to64(s, len);
|
||||
}
|
||||
|
||||
// For strings over 64 bytes we hash the end first, and then as we
|
||||
// loop we keep 56 bytes of state: v, w, x, y, and z.
|
||||
uint64 x = Fetch64(s + len - 40);
|
||||
uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
|
||||
uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
|
||||
pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
|
||||
pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
|
||||
x = x * k1 + Fetch64(s);
|
||||
|
||||
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
|
||||
len = (len - 1) & ~static_cast<size_t>(63);
|
||||
do {
|
||||
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
||||
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
||||
x ^= w.second;
|
||||
y += v.first + Fetch64(s + 40);
|
||||
z = Rotate(z + w.first, 33) * k1;
|
||||
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
||||
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
||||
std::swap(z, x);
|
||||
s += 64;
|
||||
len -= 64;
|
||||
} while (len != 0);
|
||||
return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
|
||||
HashLen16(v.second, w.second) + x);
|
||||
}
|
||||
|
||||
uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
|
||||
return CityHash64WithSeeds(s, len, k2, seed);
|
||||
}
|
||||
|
||||
uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) {
|
||||
return HashLen16(CityHash64(s, len) - seed0, seed1);
|
||||
}
|
||||
|
||||
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
|
||||
// of any length representable in signed long. Based on City and Murmur.
|
||||
static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
|
||||
uint64 a = Uint128Low64(seed);
|
||||
uint64 b = Uint128High64(seed);
|
||||
uint64 c = 0;
|
||||
uint64 d = 0;
|
||||
signed long l = static_cast<long>(len) - 16;
|
||||
if (l <= 0) { // len <= 16
|
||||
a = ShiftMix(a * k1) * k1;
|
||||
c = b * k1 + HashLen0to16(s, len);
|
||||
d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
|
||||
} else { // len > 16
|
||||
c = HashLen16(Fetch64(s + len - 8) + k1, a);
|
||||
d = HashLen16(b + len, c + Fetch64(s + len - 16));
|
||||
a += d;
|
||||
do {
|
||||
a ^= ShiftMix(Fetch64(s) * k1) * k1;
|
||||
a *= k1;
|
||||
b ^= a;
|
||||
c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
|
||||
c *= k1;
|
||||
d ^= c;
|
||||
s += 16;
|
||||
l -= 16;
|
||||
} while (l > 0);
|
||||
}
|
||||
a = HashLen16(a, c);
|
||||
b = HashLen16(d, b);
|
||||
return uint128(a ^ b, HashLen16(b, a));
|
||||
}
|
||||
|
||||
uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
|
||||
if (len < 128) {
|
||||
return CityMurmur(s, len, seed);
|
||||
}
|
||||
|
||||
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
|
||||
// v, w, x, y, and z.
|
||||
pair<uint64, uint64> v, w;
|
||||
uint64 x = Uint128Low64(seed);
|
||||
uint64 y = Uint128High64(seed);
|
||||
uint64 z = len * k1;
|
||||
v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
|
||||
v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
|
||||
w.first = Rotate(y + z, 35) * k1 + x;
|
||||
w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
|
||||
|
||||
// This is the same inner loop as CityHash64(), manually unrolled.
|
||||
do {
|
||||
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
||||
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
||||
x ^= w.second;
|
||||
y += v.first + Fetch64(s + 40);
|
||||
z = Rotate(z + w.first, 33) * k1;
|
||||
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
||||
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
||||
std::swap(z, x);
|
||||
s += 64;
|
||||
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
||||
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
||||
x ^= w.second;
|
||||
y += v.first + Fetch64(s + 40);
|
||||
z = Rotate(z + w.first, 33) * k1;
|
||||
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
||||
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
||||
std::swap(z, x);
|
||||
s += 64;
|
||||
len -= 128;
|
||||
} while (LIKELY(len >= 128));
|
||||
x += Rotate(v.first + z, 49) * k0;
|
||||
y = y * k0 + Rotate(w.second, 37);
|
||||
z = z * k0 + Rotate(w.first, 27);
|
||||
w.first *= 9;
|
||||
v.first *= k0;
|
||||
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
|
||||
for (size_t tail_done = 0; tail_done < len;) {
|
||||
tail_done += 32;
|
||||
y = Rotate(x + y, 42) * k0 + v.second;
|
||||
w.first += Fetch64(s + len - tail_done + 16);
|
||||
x = x * k0 + w.first;
|
||||
z += w.second + Fetch64(s + len - tail_done);
|
||||
w.second += v.first;
|
||||
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
|
||||
v.first *= k0;
|
||||
}
|
||||
// At this point our 56 bytes of state should contain more than
|
||||
// enough information for a strong 128-bit hash. We use two
|
||||
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
|
||||
x = HashLen16(x, v.first);
|
||||
y = HashLen16(y + z, w.first);
|
||||
return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
|
||||
}
|
||||
|
||||
uint128 CityHash128(const char* s, size_t len) {
|
||||
return len >= 16
|
||||
? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0))
|
||||
: CityHash128WithSeed(s, len, uint128(k0, k1));
|
||||
}
|
||||
|
||||
} // namespace Common
|
@ -0,0 +1,110 @@
|
||||
// Copyright (c) 2011 Google, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
||||
//
|
||||
// http://code.google.com/p/cityhash/
|
||||
//
|
||||
// This file provides a few functions for hashing strings. All of them are
|
||||
// high-quality functions in the sense that they pass standard tests such
|
||||
// as Austin Appleby's SMHasher. They are also fast.
|
||||
//
|
||||
// For 64-bit x86 code, on short strings, we don't know of anything faster than
|
||||
// CityHash64 that is of comparable quality. We believe our nearest competitor
|
||||
// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
|
||||
// tables and most other hashing (excluding cryptography).
|
||||
//
|
||||
// For 64-bit x86 code, on long strings, the picture is more complicated.
|
||||
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
|
||||
// CityHashCrc128 appears to be faster than all competitors of comparable
|
||||
// quality. CityHash128 is also good but not quite as fast. We believe our
|
||||
// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
|
||||
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
|
||||
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
|
||||
// Note that CityHashCrc128 is declared in citycrc.h.
|
||||
//
|
||||
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
|
||||
// is of comparable quality. We believe our nearest competitor is Murmur3A.
|
||||
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
|
||||
//
|
||||
// Functions in the CityHash family are not suitable for cryptography.
|
||||
//
|
||||
// Please see CityHash's README file for more details on our performance
|
||||
// measurements and so on.
|
||||
//
|
||||
// WARNING: This code has been only lightly tested on big-endian platforms!
|
||||
// It is known to work well on little-endian platforms that have a small penalty
|
||||
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
|
||||
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
|
||||
// bug reports are welcome.
|
||||
//
|
||||
// By the way, for some hash functions, given strings a and b, the hash
|
||||
// of a+b is easily derived from the hashes of a and b. This property
|
||||
// doesn't hold for any hash functions in this file.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h> // for size_t.
|
||||
|
||||
namespace Common {
|
||||
|
||||
typedef std::pair<uint64_t, uint64_t> uint128;
|
||||
|
||||
inline uint64_t Uint128Low64(const uint128& x) {
|
||||
return x.first;
|
||||
}
|
||||
inline uint64_t Uint128High64(const uint128& x) {
|
||||
return x.second;
|
||||
}
|
||||
|
||||
// Hash function for a byte array.
|
||||
uint64_t CityHash64(const char* buf, size_t len);
|
||||
|
||||
// Hash function for a byte array. For convenience, a 64-bit seed is also
|
||||
// hashed into the result.
|
||||
uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed);
|
||||
|
||||
// Hash function for a byte array. For convenience, two seeds are also
|
||||
// hashed into the result.
|
||||
uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1);
|
||||
|
||||
// Hash function for a byte array.
|
||||
uint128 CityHash128(const char* s, size_t len);
|
||||
|
||||
// Hash function for a byte array. For convenience, a 128-bit seed is also
|
||||
// hashed into the result.
|
||||
uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed);
|
||||
|
||||
// Hash 128 input bits down to 64 bits of output.
|
||||
// This is intended to be a reasonably good hash function.
|
||||
inline uint64_t Hash128to64(const uint128& x) {
|
||||
// Murmur-inspired hashing.
|
||||
const uint64_t kMul = 0x9ddfea08eb382d69ULL;
|
||||
uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
|
||||
a ^= (a >> 47);
|
||||
uint64_t b = (Uint128High64(x) ^ a) * kMul;
|
||||
b ^= (b >> 47);
|
||||
b *= kMul;
|
||||
return b;
|
||||
}
|
||||
|
||||
} // namespace Common
|
@ -1,141 +0,0 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/hash.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
|
||||
// the conversion here
|
||||
static FORCE_INLINE u64 getblock64(const u64* p, size_t i) {
|
||||
return p[i];
|
||||
}
|
||||
|
||||
// Finalization mix - force all bits of a hash block to avalanche
|
||||
static FORCE_INLINE u64 fmix64(u64 k) {
|
||||
k ^= k >> 33;
|
||||
k *= 0xff51afd7ed558ccdllu;
|
||||
k ^= k >> 33;
|
||||
k *= 0xc4ceb9fe1a85ec53llu;
|
||||
k ^= k >> 33;
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit
|
||||
// platforms (MurmurHash3_x64_128). It was taken from:
|
||||
// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
|
||||
void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) {
|
||||
const u8* data = (const u8*)key;
|
||||
const size_t nblocks = len / 16;
|
||||
|
||||
u64 h1 = seed;
|
||||
u64 h2 = seed;
|
||||
|
||||
const u64 c1 = 0x87c37b91114253d5llu;
|
||||
const u64 c2 = 0x4cf5ad432745937fllu;
|
||||
|
||||
// Body
|
||||
|
||||
const u64* blocks = (const u64*)(data);
|
||||
|
||||
for (size_t i = 0; i < nblocks; i++) {
|
||||
u64 k1 = getblock64(blocks, i * 2 + 0);
|
||||
u64 k2 = getblock64(blocks, i * 2 + 1);
|
||||
|
||||
k1 *= c1;
|
||||
k1 = _rotl64(k1, 31);
|
||||
k1 *= c2;
|
||||
h1 ^= k1;
|
||||
|
||||
h1 = _rotl64(h1, 27);
|
||||
h1 += h2;
|
||||
h1 = h1 * 5 + 0x52dce729;
|
||||
|
||||
k2 *= c2;
|
||||
k2 = _rotl64(k2, 33);
|
||||
k2 *= c1;
|
||||
h2 ^= k2;
|
||||
|
||||
h2 = _rotl64(h2, 31);
|
||||
h2 += h1;
|
||||
h2 = h2 * 5 + 0x38495ab5;
|
||||
}
|
||||
|
||||
// Tail
|
||||
|
||||
const u8* tail = (const u8*)(data + nblocks * 16);
|
||||
|
||||
u64 k1 = 0;
|
||||
u64 k2 = 0;
|
||||
|
||||
switch (len & 15) {
|
||||
case 15:
|
||||
k2 ^= ((u64)tail[14]) << 48;
|
||||
case 14:
|
||||
k2 ^= ((u64)tail[13]) << 40;
|
||||
case 13:
|
||||
k2 ^= ((u64)tail[12]) << 32;
|
||||
case 12:
|
||||
k2 ^= ((u64)tail[11]) << 24;
|
||||
case 11:
|
||||
k2 ^= ((u64)tail[10]) << 16;
|
||||
case 10:
|
||||
k2 ^= ((u64)tail[9]) << 8;
|
||||
case 9:
|
||||
k2 ^= ((u64)tail[8]) << 0;
|
||||
k2 *= c2;
|
||||
k2 = _rotl64(k2, 33);
|
||||
k2 *= c1;
|
||||
h2 ^= k2;
|
||||
|
||||
case 8:
|
||||
k1 ^= ((u64)tail[7]) << 56;
|
||||
case 7:
|
||||
k1 ^= ((u64)tail[6]) << 48;
|
||||
case 6:
|
||||
k1 ^= ((u64)tail[5]) << 40;
|
||||
case 5:
|
||||
k1 ^= ((u64)tail[4]) << 32;
|
||||
case 4:
|
||||
k1 ^= ((u64)tail[3]) << 24;
|
||||
case 3:
|
||||
k1 ^= ((u64)tail[2]) << 16;
|
||||
case 2:
|
||||
k1 ^= ((u64)tail[1]) << 8;
|
||||
case 1:
|
||||
k1 ^= ((u64)tail[0]) << 0;
|
||||
k1 *= c1;
|
||||
k1 = _rotl64(k1, 31);
|
||||
k1 *= c2;
|
||||
h1 ^= k1;
|
||||
};
|
||||
|
||||
// Finalization
|
||||
|
||||
h1 ^= len;
|
||||
h2 ^= len;
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
h1 = fmix64(h1);
|
||||
h2 = fmix64(h2);
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
((u64*)out)[0] = h1;
|
||||
((u64*)out)[1] = h2;
|
||||
}
|
||||
|
||||
} // namespace Common
|
@ -0,0 +1,327 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "common/bit_field.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace Shader {
|
||||
|
||||
struct Register {
|
||||
Register() = default;
|
||||
|
||||
constexpr Register(u64 value) : value(value) {}
|
||||
|
||||
constexpr u64 GetIndex() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
constexpr operator u64() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr u64 operator-(const T& oth) const {
|
||||
return value - oth;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr u64 operator&(const T& oth) const {
|
||||
return value & oth;
|
||||
}
|
||||
|
||||
constexpr u64 operator&(const Register& oth) const {
|
||||
return value & oth.value;
|
||||
}
|
||||
|
||||
constexpr u64 operator~() const {
|
||||
return ~value;
|
||||
}
|
||||
|
||||
private:
|
||||
u64 value;
|
||||
};
|
||||
|
||||
union Attribute {
|
||||
Attribute() = default;
|
||||
|
||||
constexpr Attribute(u64 value) : value(value) {}
|
||||
|
||||
enum class Index : u64 {
|
||||
Position = 7,
|
||||
Attribute_0 = 8,
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<22, 2, u64> element;
|
||||
BitField<24, 6, Index> index;
|
||||
BitField<47, 3, u64> size;
|
||||
} fmt20;
|
||||
|
||||
union {
|
||||
BitField<30, 2, u64> element;
|
||||
BitField<32, 6, Index> index;
|
||||
} fmt28;
|
||||
|
||||
BitField<39, 8, u64> reg;
|
||||
u64 value;
|
||||
};
|
||||
|
||||
union Uniform {
|
||||
BitField<20, 14, u64> offset;
|
||||
BitField<34, 5, u64> index;
|
||||
};
|
||||
|
||||
union OpCode {
|
||||
enum class Id : u64 {
|
||||
TEXS = 0x6C,
|
||||
IPA = 0xE0,
|
||||
FFMA_IMM = 0x65,
|
||||
FFMA_CR = 0x93,
|
||||
FFMA_RC = 0xA3,
|
||||
FFMA_RR = 0xB3,
|
||||
|
||||
FADD_C = 0x98B,
|
||||
FMUL_C = 0x98D,
|
||||
MUFU = 0xA10,
|
||||
FADD_R = 0xB8B,
|
||||
FMUL_R = 0xB8D,
|
||||
LD_A = 0x1DFB,
|
||||
ST_A = 0x1DFE,
|
||||
|
||||
FSETP_R = 0x5BB,
|
||||
FSETP_C = 0x4BB,
|
||||
EXIT = 0xE30,
|
||||
KIL = 0xE33,
|
||||
|
||||
FMUL_IMM = 0x70D,
|
||||
FMUL_IMM_x = 0x72D,
|
||||
FADD_IMM = 0x70B,
|
||||
FADD_IMM_x = 0x72B,
|
||||
};
|
||||
|
||||
enum class Type {
|
||||
Trivial,
|
||||
Arithmetic,
|
||||
Ffma,
|
||||
Flow,
|
||||
Memory,
|
||||
Unknown,
|
||||
};
|
||||
|
||||
struct Info {
|
||||
Type type;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
OpCode() = default;
|
||||
|
||||
constexpr OpCode(Id value) : value(static_cast<u64>(value)) {}
|
||||
|
||||
constexpr OpCode(u64 value) : value{value} {}
|
||||
|
||||
constexpr Id EffectiveOpCode() const {
|
||||
switch (op1) {
|
||||
case Id::TEXS:
|
||||
return op1;
|
||||
}
|
||||
|
||||
switch (op2) {
|
||||
case Id::IPA:
|
||||
return op2;
|
||||
}
|
||||
|
||||
switch (op3) {
|
||||
case Id::FFMA_IMM:
|
||||
case Id::FFMA_CR:
|
||||
case Id::FFMA_RC:
|
||||
case Id::FFMA_RR:
|
||||
return op3;
|
||||
}
|
||||
|
||||
switch (op4) {
|
||||
case Id::EXIT:
|
||||
case Id::FSETP_R:
|
||||
case Id::FSETP_C:
|
||||
case Id::KIL:
|
||||
return op4;
|
||||
}
|
||||
|
||||
switch (op5) {
|
||||
case Id::MUFU:
|
||||
case Id::LD_A:
|
||||
case Id::ST_A:
|
||||
case Id::FADD_R:
|
||||
case Id::FADD_C:
|
||||
case Id::FMUL_R:
|
||||
case Id::FMUL_C:
|
||||
return op5;
|
||||
|
||||
case Id::FMUL_IMM:
|
||||
case Id::FMUL_IMM_x:
|
||||
return Id::FMUL_IMM;
|
||||
|
||||
case Id::FADD_IMM:
|
||||
case Id::FADD_IMM_x:
|
||||
return Id::FADD_IMM;
|
||||
}
|
||||
|
||||
return static_cast<Id>(value);
|
||||
}
|
||||
|
||||
static const Info& GetInfo(const OpCode& opcode) {
|
||||
static const std::map<Id, Info> info_table{BuildInfoTable()};
|
||||
const auto& search{info_table.find(opcode.EffectiveOpCode())};
|
||||
if (search != info_table.end()) {
|
||||
return search->second;
|
||||
}
|
||||
|
||||
static const Info unknown{Type::Unknown, "UNK"};
|
||||
return unknown;
|
||||
}
|
||||
|
||||
constexpr operator Id() const {
|
||||
return static_cast<Id>(value);
|
||||
}
|
||||
|
||||
constexpr OpCode operator<<(size_t bits) const {
|
||||
return value << bits;
|
||||
}
|
||||
|
||||
constexpr OpCode operator>>(size_t bits) const {
|
||||
return value >> bits;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr u64 operator-(const T& oth) const {
|
||||
return value - oth;
|
||||
}
|
||||
|
||||
constexpr u64 operator&(const OpCode& oth) const {
|
||||
return value & oth.value;
|
||||
}
|
||||
|
||||
constexpr u64 operator~() const {
|
||||
return ~value;
|
||||
}
|
||||
|
||||
static std::map<Id, Info> BuildInfoTable() {
|
||||
std::map<Id, Info> info_table;
|
||||
info_table[Id::TEXS] = {Type::Memory, "texs"};
|
||||
info_table[Id::LD_A] = {Type::Memory, "ld_a"};
|
||||
info_table[Id::ST_A] = {Type::Memory, "st_a"};
|
||||
info_table[Id::MUFU] = {Type::Arithmetic, "mufu"};
|
||||
info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"};
|
||||
info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"};
|
||||
info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"};
|
||||
info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"};
|
||||
info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"};
|
||||
info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"};
|
||||
info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"};
|
||||
info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"};
|
||||
info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
|
||||
info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
|
||||
info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
|
||||
info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
|
||||
info_table[Id::EXIT] = {Type::Trivial, "exit"};
|
||||
info_table[Id::IPA] = {Type::Trivial, "ipa"};
|
||||
info_table[Id::KIL] = {Type::Flow, "kil"};
|
||||
return info_table;
|
||||
}
|
||||
|
||||
BitField<57, 7, Id> op1;
|
||||
BitField<56, 8, Id> op2;
|
||||
BitField<55, 9, Id> op3;
|
||||
BitField<52, 12, Id> op4;
|
||||
BitField<51, 13, Id> op5;
|
||||
u64 value;
|
||||
};
|
||||
static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");
|
||||
|
||||
} // namespace Shader
|
||||
} // namespace Tegra
|
||||
|
||||
namespace std {
|
||||
|
||||
// TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330.
|
||||
template <>
|
||||
struct make_unsigned<Tegra::Shader::Attribute> {
|
||||
using type = Tegra::Shader::Attribute;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct make_unsigned<Tegra::Shader::Register> {
|
||||
using type = Tegra::Shader::Register;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct make_unsigned<Tegra::Shader::OpCode> {
|
||||
using type = Tegra::Shader::OpCode;
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace Tegra {
|
||||
namespace Shader {
|
||||
|
||||
enum class Pred : u64 {
|
||||
UnusedIndex = 0x7,
|
||||
NeverExecute = 0xf,
|
||||
};
|
||||
|
||||
enum class SubOp : u64 {
|
||||
Cos = 0x0,
|
||||
Sin = 0x1,
|
||||
Ex2 = 0x2,
|
||||
Lg2 = 0x3,
|
||||
Rcp = 0x4,
|
||||
Rsq = 0x5,
|
||||
};
|
||||
|
||||
union Instruction {
|
||||
Instruction& operator=(const Instruction& instr) {
|
||||
hex = instr.hex;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OpCode opcode;
|
||||
BitField<0, 8, Register> gpr0;
|
||||
BitField<8, 8, Register> gpr8;
|
||||
BitField<16, 4, Pred> pred;
|
||||
BitField<20, 8, Register> gpr20;
|
||||
BitField<20, 7, SubOp> sub_op;
|
||||
BitField<28, 8, Register> gpr28;
|
||||
BitField<36, 13, u64> imm36;
|
||||
BitField<39, 8, Register> gpr39;
|
||||
|
||||
union {
|
||||
BitField<45, 1, u64> negate_b;
|
||||
BitField<46, 1, u64> abs_a;
|
||||
BitField<48, 1, u64> negate_a;
|
||||
BitField<49, 1, u64> abs_b;
|
||||
BitField<50, 1, u64> abs_d;
|
||||
} alu;
|
||||
|
||||
union {
|
||||
BitField<48, 1, u64> negate_b;
|
||||
BitField<49, 1, u64> negate_c;
|
||||
} ffma;
|
||||
|
||||
BitField<60, 1, u64> is_b_gpr;
|
||||
BitField<59, 1, u64> is_c_gpr;
|
||||
|
||||
Attribute attribute;
|
||||
Uniform uniform;
|
||||
|
||||
u64 hex;
|
||||
};
|
||||
static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
|
||||
static_assert(std::is_standard_layout<Instruction>::value,
|
||||
"Structure does not have standard layout");
|
||||
|
||||
} // namespace Shader
|
||||
} // namespace Tegra
|
@ -0,0 +1,65 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
|
||||
namespace GLShader {
|
||||
|
||||
namespace Impl {
|
||||
void SetShaderUniformBlockBinding(GLuint shader, const char* name,
|
||||
Maxwell3D::Regs::ShaderStage binding, size_t expected_size) {
|
||||
GLuint ub_index = glGetUniformBlockIndex(shader, name);
|
||||
if (ub_index != GL_INVALID_INDEX) {
|
||||
GLint ub_size = 0;
|
||||
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
|
||||
ASSERT_MSG(ub_size == expected_size,
|
||||
"Uniform block size did not match! Got %d, expected %zu",
|
||||
static_cast<int>(ub_size), expected_size);
|
||||
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
|
||||
}
|
||||
}
|
||||
|
||||
void SetShaderUniformBlockBindings(GLuint shader) {
|
||||
SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex,
|
||||
sizeof(MaxwellUniformData));
|
||||
SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry,
|
||||
sizeof(MaxwellUniformData));
|
||||
SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment,
|
||||
sizeof(MaxwellUniformData));
|
||||
}
|
||||
|
||||
void SetShaderSamplerBindings(GLuint shader) {
|
||||
OpenGLState cur_state = OpenGLState::GetCurState();
|
||||
GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
|
||||
cur_state.Apply();
|
||||
|
||||
// Set the texture samplers to correspond to different texture units
|
||||
for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
|
||||
// Set the texture samplers to correspond to different texture units
|
||||
std::string uniform_name = "tex[" + std::to_string(texture) + "]";
|
||||
GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
|
||||
if (uniform_tex != -1) {
|
||||
glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
|
||||
}
|
||||
}
|
||||
|
||||
cur_state.draw.shader_program = old_program;
|
||||
cur_state.Apply();
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
|
||||
const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
|
||||
for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) {
|
||||
const auto& const_buffer = shader_stage.const_buffers[index];
|
||||
const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address);
|
||||
Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GLShader
|
@ -0,0 +1,151 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <glad/glad.h>
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace GLShader {
|
||||
|
||||
/// Number of OpenGL texture samplers that can be used in the fragment shader
|
||||
static constexpr size_t NumTextureSamplers = 32;
|
||||
|
||||
using Tegra::Engines::Maxwell3D;
|
||||
|
||||
namespace Impl {
|
||||
void SetShaderUniformBlockBindings(GLuint shader);
|
||||
void SetShaderSamplerBindings(GLuint shader);
|
||||
} // namespace Impl
|
||||
|
||||
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
||||
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
|
||||
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
|
||||
// Not following that rule will cause problems on some AMD drivers.
|
||||
struct MaxwellUniformData {
|
||||
void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
|
||||
|
||||
using ConstBuffer = std::array<GLvec4, 4>;
|
||||
alignas(16) std::array<ConstBuffer, Maxwell3D::Regs::MaxConstBuffers> const_buffers;
|
||||
};
|
||||
static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect");
|
||||
static_assert(sizeof(MaxwellUniformData) < 16384,
|
||||
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
class OGLShaderStage {
|
||||
public:
|
||||
OGLShaderStage() = default;
|
||||
|
||||
void Create(const char* source, GLenum type) {
|
||||
OGLShader shader;
|
||||
shader.Create(source, type);
|
||||
program.Create(true, shader.handle);
|
||||
Impl::SetShaderUniformBlockBindings(program.handle);
|
||||
Impl::SetShaderSamplerBindings(program.handle);
|
||||
}
|
||||
GLuint GetHandle() const {
|
||||
return program.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLProgram program;
|
||||
};
|
||||
|
||||
// TODO(wwylele): beautify this doc
|
||||
// This is a shader cache designed for translating PICA shader to GLSL shader.
|
||||
// The double cache is needed because diffent KeyConfigType, which includes a hash of the code
|
||||
// region (including its leftover unused code) can generate the same GLSL code.
|
||||
template <typename KeyConfigType,
|
||||
std::string (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), GLenum ShaderType>
|
||||
class ShaderCache {
|
||||
public:
|
||||
ShaderCache() = default;
|
||||
|
||||
GLuint Get(const KeyConfigType& key, const ShaderSetup& setup) {
|
||||
auto map_it = shader_map.find(key);
|
||||
if (map_it == shader_map.end()) {
|
||||
std::string program = CodeGenerator(setup, key);
|
||||
|
||||
auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{});
|
||||
OGLShaderStage& cached_shader = iter->second;
|
||||
if (new_shader) {
|
||||
cached_shader.Create(program.c_str(), ShaderType);
|
||||
}
|
||||
shader_map[key] = &cached_shader;
|
||||
return cached_shader.GetHandle();
|
||||
} else {
|
||||
return map_it->second->GetHandle();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map;
|
||||
std::unordered_map<std::string, OGLShaderStage> shader_cache;
|
||||
};
|
||||
|
||||
using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>;
|
||||
|
||||
using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>;
|
||||
|
||||
class ProgramManager {
|
||||
public:
|
||||
ProgramManager() {
|
||||
pipeline.Create();
|
||||
}
|
||||
|
||||
void UseProgrammableVertexShader(const MaxwellVSConfig& config, const ShaderSetup setup) {
|
||||
current.vs = vertex_shaders.Get(config, setup);
|
||||
}
|
||||
|
||||
void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) {
|
||||
current.fs = fragment_shaders.Get(config, setup);
|
||||
}
|
||||
|
||||
void UseTrivialGeometryShader() {
|
||||
current.gs = 0;
|
||||
}
|
||||
|
||||
void ApplyTo(OpenGLState& state) {
|
||||
// Workaround for AMD bug
|
||||
glUseProgramStages(pipeline.handle,
|
||||
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
|
||||
0);
|
||||
|
||||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs);
|
||||
glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs);
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs);
|
||||
state.draw.shader_program = 0;
|
||||
state.draw.program_pipeline = pipeline.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
struct ShaderTuple {
|
||||
GLuint vs = 0, gs = 0, fs = 0;
|
||||
bool operator==(const ShaderTuple& rhs) const {
|
||||
return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
|
||||
}
|
||||
struct Hash {
|
||||
std::size_t operator()(const ShaderTuple& tuple) const {
|
||||
std::size_t hash = 0;
|
||||
boost::hash_combine(hash, tuple.vs);
|
||||
boost::hash_combine(hash, tuple.gs);
|
||||
boost::hash_combine(hash, tuple.fs);
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
};
|
||||
ShaderTuple current;
|
||||
VertexShaders vertex_shaders;
|
||||
FragmentShaders fragment_shaders;
|
||||
|
||||
std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache;
|
||||
OGLPipeline pipeline;
|
||||
};
|
||||
|
||||
} // namespace GLShader
|
Loading…
Reference in New Issue